tmp

puyuan · puyuan · commit 158e4a0d6b21 · 2025-06-01T10:17:19.000Z
diff --git a/lzero/policy/unizero_multitask.py b/lzero/policy/unizero_multitask.py
@@ -17,7 +17,9 @@
 from .utils import configure_optimizers_nanogpt
 import sys
 
-sys.path.append('/cpfs04/user/puyuan/code/LibMTL')
+# sys.path.append('/cpfs04/user/puyuan/code/LibMTL')
+sys.path.append('/fs-computility/niuyazhe/puyuan/code/LibMTL')
+
 from LibMTL.weighting.MoCo_unizero import MoCo as GradCorrect
 from LibMTL.weighting.moco_generic import GenericMoCo, MoCoCfg
 from LibMTL.weighting.moco_fast import FastMoCo, MoCoCfg
@@ -634,11 +636,13 @@ def _forward_learn(self, data: Tuple[torch.Tensor], task_weights=None, ignore_gr
             # rank = get_rank()
             # print(f'Rank {rank}: cfg.policy.task_id : {self._cfg.task_id}, self._cfg.batch_size {self._cfg.batch_size}')
 
-            target_reward = target_reward.view(self._cfg.batch_size[task_id], -1)
-            target_value = target_value.view(self._cfg.batch_size[task_id], -1)
+            cur_batch_size = target_reward.size(0)          # run-time batch
+
+            target_reward = target_reward.view(cur_batch_size, -1)
+            target_value = target_value.view(cur_batch_size, -1)
 
-            target_reward = target_reward.view(self._cfg.batch_size[task_id], -1)
-            target_value = target_value.view(self._cfg.batch_size[task_id], -1)
+            # target_reward = target_reward.view(self._cfg.batch_size[task_id], -1)
+            # target_value = target_value.view(self._cfg.batch_size[task_id], -1)
 
             # assert obs_batch.size(0) == self._cfg.batch_size == target_reward.size(0)
 
@@ -654,10 +658,10 @@ def _forward_learn(self, data: Tuple[torch.Tensor], task_weights=None, ignore_gr
             batch_for_gpt = {}
             if isinstance(self._cfg.model.observation_shape, int) or len(self._cfg.model.observation_shape) == 1:
                 batch_for_gpt['observations'] = torch.cat((obs_batch, obs_target_batch), dim=1).reshape(
-                    self._cfg.batch_size[task_id], -1, self._cfg.model.observation_shape)
+                    cur_batch_size, -1, self._cfg.model.observation_shape)
             elif len(self._cfg.model.observation_shape) == 3:
                 batch_for_gpt['observations'] = torch.cat((obs_batch, obs_target_batch), dim=1).reshape(
-                    self._cfg.batch_size[task_id], -1, *self._cfg.model.observation_shape)
+                    cur_batch_size, -1, *self._cfg.model.observation_shape)
 
             batch_for_gpt['actions'] = action_batch.squeeze(-1)
             batch_for_gpt['rewards'] = target_reward_categorical[:, :-1]
diff --git a/zoo/atari/config/atari_unizero_multitask_segment_ddp_config.py b/zoo/atari/config/atari_unizero_multitask_segment_ddp_config.py
@@ -184,8 +184,8 @@ def create_config(env_id, action_space_size, collector_env_num, evaluator_env_nu
             reanalyze_ratio=reanalyze_ratio,
             n_episode=n_episode,
             replay_buffer_size=int(5e5),
-            eval_freq=int(1e4), # TODO: 
-            # eval_freq=int(2e4),
+            # eval_freq=int(1e4), # TODO: 8games
+            eval_freq=int(2e4),  # TODO: 26games
             collector_env_num=collector_env_num,
             evaluator_env_num=evaluator_env_num,
             buffer_reanalyze_freq=buffer_reanalyze_freq,
@@ -204,12 +204,12 @@ def generate_configs(env_id_list, action_space_size, collector_env_num, n_episod
 
 
     # ========= TODO: global BENCHMARK_NAME =========
-    exp_name_prefix = f'data_unizero_atari_mt_20250527/atari_{len(env_id_list)}games_orig_simnorm-kl_vit_moe8_tran-nlayer{num_layers}_brf{buffer_reanalyze_freq}_not-share-head_seed{seed}/'
+    # exp_name_prefix = f'data_unizero_atari_mt_20250527/atari_{len(env_id_list)}games_orig_simnorm-kl_vit_moe8_tran-nlayer{num_layers}_brf{buffer_reanalyze_freq}_not-share-head_seed{seed}/'
 
     # exp_name_prefix = f'data_unizero_atari_mt_20250522/atari_{len(env_id_list)}games_orig_tran-nlayer{num_layers}_brf{buffer_reanalyze_freq}_not-share-head_seed{seed}/'
     # exp_name_prefix = f'data_unizero_atari_mt_20250527/atari_{len(env_id_list)}games_orig_simnorm-kl_vit_moco-v2_tran-nlayer{num_layers}_brf{buffer_reanalyze_freq}_not-share-head_seed{seed}/'
 
-    exp_name_prefix = f'data_unizero_atari_mt_20250530/atari_{len(env_id_list)}games_orig_vit_ln-mse_moe8_tran-nlayer{num_layers}_brf{buffer_reanalyze_freq}_not-share-head_seed{seed}/'
+    exp_name_prefix = f'data_unizero_atari_mt_20250601/atari_{len(env_id_list)}games_orig_vit_ln-mse_moe8_tran-nlayer{num_layers}_brf{buffer_reanalyze_freq}_not-share-head_seed{seed}/'
 
     # exp_name_prefix = f'data_unizero_atari_mt_20250521/atari_{len(env_id_list)}games_orig_simnorm-kl_vit_moe8_taskembed128_tran-nlayer{num_layers}_rr1_brf{buffer_reanalyze_freq}_not-share-head_seed{seed}/'
 
@@ -254,7 +254,7 @@ def create_env_manager():
 
         =========== volce atari8 =========================
         cd /fs-computility/niuyazhe/puyuan/code/LightZero/
-        python -m torch.distributed.launch --nproc_per_node=4 --master_port=29502 /fs-computility/niuyazhe/puyuan/code/LightZero/zoo/atari/config/atari_unizero_multitask_segment_ddp_config.py 2>&1 | tee /fs-computility/niuyazhe/puyuan/code/LightZero/log/20250509/uz_mt_atari8_orig_vit_ln-mse_moe8_nlayer8_brf002_seed12.log
+        python -m torch.distributed.launch --nproc_per_node=8 --master_port=29502 /fs-computility/niuyazhe/puyuan/code/LightZero/zoo/atari/config/atari_unizero_multitask_segment_ddp_config.py 2>&1 | tee /fs-computility/niuyazhe/puyuan/code/LightZero/log/20250509/uz_mt_atari26_orig_vit_ln-mse_moe8_nlayer8_brf002_seed12.log
 
 
         =========== cpfs atari8 =========================
@@ -306,7 +306,7 @@ def create_env_manager():
     import os
 
 
-    num_games = 8 # 26 # 8
+    num_games = 26 # 26 # 8
     num_layers = 8 # ==============TODO==============
     action_space_size = 18
     collector_env_num = 8
@@ -383,7 +383,7 @@ def create_env_manager():
 
 
     import torch.distributed as dist
-    for seed in [1]:
+    for seed in [1,2]:
         configs = generate_configs(env_id_list, action_space_size, collector_env_num, n_episode, evaluator_env_num,
                                    num_simulations, reanalyze_ratio, batch_sizes, num_unroll_steps, infer_context_length,
                                    norm_type, seed, buffer_reanalyze_freq, reanalyze_batch_size, reanalyze_partition,
diff --git a/zoo/dmc2gym/config/dmc2gym_state_suz_multitask_ddp_balance_config.py b/zoo/dmc2gym/config/dmc2gym_state_suz_multitask_ddp_balance_config.py
@@ -139,10 +139,11 @@ def create_config(env_id, observation_shape_list, action_space_size_list, collec
                     lora_alpha=1,
                     lora_dropout=0.0,
                     lora_scale_init=1,
-                    # min_stage0_iters=10000,
-                    # max_stage_iters=20000,
+
                     min_stage0_iters=10000,
                     max_stage_iters=5000,
+                    # min_stage0_iters=10,
+                    # max_stage_iters=20,
                 ),
             ),
             use_task_exploitation_weight=False, # TODO
@@ -211,7 +212,7 @@ def generate_configs(env_id_list: List[str],
     configs = []
     # ========= TODO: global BENCHMARK_NAME =========
 
-    exp_name_prefix = f'data_suz_dmc_mt_balance_20250526/dmc_{len(env_id_list)}tasks_frameskip4_balance-stage-total-{curriculum_stage_num}_stage0-10k-5k_moe8_nlayer8_not-share-head_brf{buffer_reanalyze_freq}_seed{seed}/'
+    exp_name_prefix = f'data_suz_dmc_mt_balance_20250601/dmc_{len(env_id_list)}tasks_frameskip4_balance-stage-total-{curriculum_stage_num}_stage0-10k-5k_moe8_nlayer8_not-share-head_brf{buffer_reanalyze_freq}_seed{seed}/'
 
     # exp_name_prefix = f'data_lz/data_suz_dmc_mt_20250409_moco/dmc_{len(env_id_list)}tasks_notaskembed_nlayer8_not-share-head_final-ln_bs64_brf{buffer_reanalyze_freq}_seed{seed}/'
     
@@ -266,6 +267,9 @@ def create_env_manager():
     Overview:
         This script should be executed with <nproc_per_node> GPUs.
         Run the following command to launch the script:
+        cd /fs-computility/niuyazhe/puyuan/code/LightZero/
+        python -m torch.distributed.launch --nproc_per_node=8 --master_port=29501 /fs-computility/niuyazhe/puyuan/code/LightZero/zoo/dmc2gym/config/dmc2gym_state_suz_multitask_ddp_balance_config.py 2>&1 | tee /fs-computility/niuyazhe/puyuan/code/LightZero/log/20250509/uz_mt_dmc18_ln_balance_moe8_stage5_stage0-10k-5k_nlayer8.log
+
         cd /cpfs04/user/puyuan/code/LightZero/
         python -m torch.distributed.launch --nproc_per_node=8 --master_port=29501 /cpfs04/user/puyuan/code/LightZero/zoo/dmc2gym/config/dmc2gym_state_suz_multitask_ddp_balance_config.py 2>&1 | tee /cpfs04/user/puyuan/code/LightZero/log/20250522_cpfs/uz_mt_dmc18_ln_balance_moe8_stage5_stage0-5k-10k_nlayer8.log
         torchrun --nproc_per_node=8 ./zoo/dmc2gym/config/dmc2gym_state_suz_multitask_ddp_config.py
@@ -385,7 +389,7 @@ def create_env_manager():
     # evaluator_env_num = 2
     # num_simulations = 1
     # total_batch_size = 8
-    # batch_size = [2 for _ in range(len(env_id_list))]
+    # batch_size = [3 for _ in range(len(env_id_list))]
     # =======================================
 
     seed = 0  # You can iterate over multiple seeds if needed