Commit a83b3b8

bmind7maryamziaa

and

authored

[Bugfix] Fix CUDA/CPU mismatch in threaded training (#6245)

* Ensure tensors use default device in torch policy and utils --------- Co-authored-by: maryam-zia <maryam.zia@unity3d.com>

1 parent a277771 commit a83b3b8Copy full SHA for a83b3b8

File tree

6 files changed

+34

-22

lines changed

ml-agents/mlagents/trainers
- optimizer
  - torch_optimizer.py
- poca
  - optimizer_torch.py
- policy
  - torch_policy.py
- torch_entities
  - components/reward_providers
    - gail_reward_provider.py
  - networks.py
  - utils.py

6 files changed

+34

-22

lines changed

`‎ml-agents/mlagents/trainers/optimizer/torch_optimizer.py‎`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`from typing import Dict, Optional, Tuple, List`
`2`		`-from mlagents.torch_utils import torch`
	`2`	`+from mlagents.torch_utils import torch, default_device`
`3`	`3`	`import numpy as np`
`4`	`4`	`from collections import defaultdict`
`5`	`5`
`@@ -162,7 +162,7 @@ def get_trajectory_value_estimates(`
`162`	`162`	`memory = self.critic_memory_dict[agent_id]`
`163`	`163`	`else:`
`164`	`164`	`memory = (`
`165`		`- torch.zeros((1, 1, self.critic.memory_size))`
	`165`	`+ torch.zeros((1, 1, self.critic.memory_size), device=default_device())`
`166`	`166`	`if self.policy.use_recurrent`
`167`	`167`	`else None`
`168`	`168`	`)`

`‎ml-agents/mlagents/trainers/poca/optimizer_torch.py‎`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -608,12 +608,12 @@ def get_trajectory_and_baseline_value_estimates(`
`608`	`608`	`_init_baseline_mem = self.baseline_memory_dict[agent_id]`
`609`	`609`	`else:`
`610`	`610`	`_init_value_mem = (`
`611`		`- torch.zeros((1, 1, self.critic.memory_size))`
	`611`	`+ torch.zeros((1, 1, self.critic.memory_size), device=default_device())`
`612`	`612`	`if self.policy.use_recurrent`
`613`	`613`	`else None`
`614`	`614`	`)`
`615`	`615`	`_init_baseline_mem = (`
`616`		`- torch.zeros((1, 1, self.critic.memory_size))`
	`616`	`+ torch.zeros((1, 1, self.critic.memory_size), device=default_device())`
`617`	`617`	`if self.policy.use_recurrent`
`618`	`618`	`else None`
`619`	`619`	`)`

`‎ml-agents/mlagents/trainers/policy/torch_policy.py‎`

Lines changed: 11 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -69,13 +69,17 @@ def export_memory_size(self) -> int:`
`69`	`69`	`return self._export_m_size`
`70`	`70`
`71`	`71`	`def _extract_masks(self, decision_requests: DecisionSteps) -> np.ndarray:`
	`72`	`+ device = default_device()`
`72`	`73`	`mask = None`
`73`	`74`	`if self.behavior_spec.action_spec.discrete_size > 0:`
`74`	`75`	`num_discrete_flat = np.sum(self.behavior_spec.action_spec.discrete_branches)`
`75`		`- mask = torch.ones([len(decision_requests), num_discrete_flat])`
	`76`	`+ mask = torch.ones(`
	`77`	`+ [len(decision_requests), num_discrete_flat], device=device`
	`78`	`+ )`
`76`	`79`	`if decision_requests.action_mask is not None:`
`77`	`80`	`mask = torch.as_tensor(`
`78`		`- 1 - np.concatenate(decision_requests.action_mask, axis=1)`
	`81`	`+ 1 - np.concatenate(decision_requests.action_mask, axis=1),`
	`82`	`+ device=device,`
`79`	`83`	`)`
`80`	`84`	`return mask`
`81`	`85`
`@@ -91,11 +95,12 @@ def evaluate(`
`91`	`95`	`"""`
`92`	`96`	`obs = decision_requests.obs`
`93`	`97`	`masks = self._extract_masks(decision_requests)`
`94`		`- tensor_obs = [torch.as_tensor(np_ob) for np_ob in obs]`
	`98`	`+ device = default_device()`
	`99`	`+ tensor_obs = [torch.as_tensor(np_ob, device=device) for np_ob in obs]`
`95`	`100`
`96`		`- memories = torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(`
`97`		`- 0`
`98`		`- )`
	`101`	`+ memories = torch.as_tensor(`
	`102`	`+ self.retrieve_memories(global_agent_ids), device=device`
	`103`	`+ ).unsqueeze(0)`
`99`	`104`	`with torch.no_grad():`
`100`	`105`	`action, run_out, memories = self.actor.get_action_and_stats(`
`101`	`106`	`tensor_obs, masks=masks, memories=memories`

`‎ml-agents/mlagents/trainers/torch_entities/components/reward_providers/gail_reward_provider.py‎`

Lines changed: 9 additions & 7 deletions

Original file line number	Diff line number	Diff line change
`@@ -143,7 +143,7 @@ def compute_estimate(`
`143`	`143`	`if self._settings.use_actions:`
`144`	`144`	`actions = self.get_action_input(mini_batch)`
`145`	`145`	`dones = torch.as_tensor(`
`146`		`- mini_batch[BufferKey.DONE], dtype=torch.float`
	`146`	`+ mini_batch[BufferKey.DONE], dtype=torch.float, device=default_device()`
`147`	`147`	`).unsqueeze(1)`
`148`	`148`	`action_inputs = torch.cat([actions, dones], dim=1)`
`149`	`149`	`hidden, _ = self.encoder(inputs, action_inputs)`
`@@ -162,7 +162,7 @@ def compute_loss(`
`162`	`162`	`"""`
`163`	`163`	`Given a policy mini_batch and an expert mini_batch, computes the loss of the discriminator.`
`164`	`164`	`"""`
`165`		`- total_loss = torch.zeros(1)`
	`165`	`+ total_loss = torch.zeros(1, device=default_device())`
`166`	`166`	`stats_dict: Dict[str, np.ndarray] = {}`
`167`	`167`	`policy_estimate, policy_mu = self.compute_estimate(`
`168`	`168`	`policy_batch, use_vail_noise=True`
`@@ -219,21 +219,23 @@ def compute_gradient_magnitude(`
`219`	`219`	`expert_inputs = self.get_state_inputs(expert_batch)`
`220`	`220`	`interp_inputs = []`
`221`	`221`	`for policy_input, expert_input in zip(policy_inputs, expert_inputs):`
`222`		`- obs_epsilon = torch.rand(policy_input.shape)`
	`222`	`+ obs_epsilon = torch.rand(policy_input.shape, device=policy_input.device)`
`223`	`223`	`interp_input = obs_epsilon * policy_input + (1 - obs_epsilon) * expert_input`
`224`	`224`	`interp_input.requires_grad = True # For gradient calculation`
`225`	`225`	`interp_inputs.append(interp_input)`
`226`	`226`	`if self._settings.use_actions:`
`227`	`227`	`policy_action = self.get_action_input(policy_batch)`
`228`	`228`	`expert_action = self.get_action_input(expert_batch)`
`229`		`- action_epsilon = torch.rand(policy_action.shape)`
	`229`	`+ action_epsilon = torch.rand(`
	`230`	`+ policy_action.shape, device=policy_action.device`
	`231`	`+ )`
`230`	`232`	`policy_dones = torch.as_tensor(`
`231`		`- policy_batch[BufferKey.DONE], dtype=torch.float`
	`233`	`+ policy_batch[BufferKey.DONE], dtype=torch.float, device=default_device()`
`232`	`234`	`).unsqueeze(1)`
`233`	`235`	`expert_dones = torch.as_tensor(`
`234`		`- expert_batch[BufferKey.DONE], dtype=torch.float`
	`236`	`+ expert_batch[BufferKey.DONE], dtype=torch.float, device=default_device()`
`235`	`237`	`).unsqueeze(1)`
`236`		`- dones_epsilon = torch.rand(policy_dones.shape)`
	`238`	`+ dones_epsilon = torch.rand(policy_dones.shape, device=policy_dones.device)`
`237`	`239`	`action_inputs = torch.cat(`
`238`	`240`	`[`
`239`	`241`	`action_epsilon * policy_action`

`‎ml-agents/mlagents/trainers/torch_entities/networks.py‎`

Lines changed: 4 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`from typing import Callable, List, Dict, Tuple, Optional, Union, Any`
`2`	`2`	`import abc`
`3`	`3`
`4`		`-from mlagents.torch_utils import torch, nn`
	`4`	`+from mlagents.torch_utils import torch, nn, default_device`
`5`	`5`
`6`	`6`	`from mlagents_envs.base_env import ActionSpec, ObservationSpec, ObservationType`
`7`	`7`	`from mlagents.trainers.torch_entities.action_model import ActionModel`
`@@ -87,7 +87,9 @@ def update_normalization(self, buffer: AgentBuffer) -> None:`
`87`	`87`	`obs = ObsUtil.from_buffer(buffer, len(self.processors))`
`88`	`88`	`for vec_input, enc in zip(obs, self.processors):`
`89`	`89`	`if isinstance(enc, VectorInput):`
`90`		`- enc.update_normalization(torch.as_tensor(vec_input.to_ndarray()))`
	`90`	`+ enc.update_normalization(`
	`91`	`+ torch.as_tensor(vec_input.to_ndarray(), device=default_device())`
	`92`	`+ )`
`91`	`93`
`92`	`94`	`def copy_normalization(self, other_encoder: "ObservationEncoder") -> None:`
`93`	`95`	`if self.normalize:`

`‎ml-agents/mlagents/trainers/torch_entities/utils.py‎`

Lines changed: 6 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`from typing import List, Optional, Tuple, Dict`
`2`		`-from mlagents.torch_utils import torch, nn`
	`2`	`+from mlagents.torch_utils import torch, nn, default_device`
`3`	`3`	`from mlagents.trainers.torch_entities.layers import LinearEncoder, Initialization`
`4`	`4`	`import numpy as np`
`5`	`5`
`@@ -233,7 +233,8 @@ def list_to_tensor(`
`233`	`233`	`Converts a list of numpy arrays into a tensor. MUCH faster than`
`234`	`234`	`calling as_tensor on the list directly.`
`235`	`235`	`"""`
`236`		`- return torch.as_tensor(np.asanyarray(ndarray_list), dtype=dtype)`
	`236`	`+ device = default_device()`
	`237`	`+ return torch.as_tensor(np.asanyarray(ndarray_list), dtype=dtype, device=device)`
`237`	`238`
`238`	`239`	`@staticmethod`
`239`	`240`	`def list_to_tensor_list(`
`@@ -243,8 +244,10 @@ def list_to_tensor_list(`
`243`	`244`	`Converts a list of numpy arrays into a list of tensors. MUCH faster than`
`244`	`245`	`calling as_tensor on the list directly.`
`245`	`246`	`"""`
	`247`	`+ device = default_device()`
`246`	`248`	`return [`
`247`		`- torch.as_tensor(np.asanyarray(_arr), dtype=dtype) for _arr in ndarray_list`
	`249`	`+ torch.as_tensor(np.asanyarray(_arr), dtype=dtype, device=device)`
	`250`	`+ for _arr in ndarray_list`
`248`	`251`	`]`
`249`	`252`
`250`	`253`	`@staticmethod`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit a83b3b8

File tree

6 files changed

6 files changed

`‎ml-agents/mlagents/trainers/optimizer/torch_optimizer.py‎`

`‎ml-agents/mlagents/trainers/poca/optimizer_torch.py‎`

`‎ml-agents/mlagents/trainers/policy/torch_policy.py‎`

`‎ml-agents/mlagents/trainers/torch_entities/components/reward_providers/gail_reward_provider.py‎`

`‎ml-agents/mlagents/trainers/torch_entities/networks.py‎`

`‎ml-agents/mlagents/trainers/torch_entities/utils.py‎`

0 commit comments