opendilab
diff --git a/‎.github/workflows/release.yml
Lines changed: 99 additions & 15 deletions b/‎.github/workflows/release.yml
Lines changed: 99 additions & 15 deletions
diff --git a/‎.github/workflows/release_test.yml
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/release_test.yml
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/test.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/test.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 1 addition & 1 deletion b/‎CHANGELOG.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.zh.md
Lines changed: 27 additions & 15 deletions b/‎README.zh.md
Lines changed: 27 additions & 15 deletions
diff --git a/‎docs/source/tutorials/envs/customize_envs.md
Lines changed: 8 additions & 3 deletions b/‎docs/source/tutorials/envs/customize_envs.md
Lines changed: 8 additions & 3 deletions
diff --git a/‎lzero/config/meta.py
Lines changed: 1 addition & 1 deletion b/‎lzero/config/meta.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎lzero/mcts/buffer/game_buffer_unizero.py
Lines changed: 5 additions & 5 deletions b/‎lzero/mcts/buffer/game_buffer_unizero.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎lzero/mcts/buffer/game_segment.py
Lines changed: 1 addition & 1 deletion b/‎lzero/mcts/buffer/game_segment.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎lzero/mcts/ptree/ptree_ez.py
Lines changed: 2 additions & 2 deletions b/‎lzero/mcts/ptree/ptree_ez.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎lzero/mcts/ptree/ptree_mz.py
Lines changed: 2 additions & 2 deletions b/‎lzero/mcts/ptree/ptree_mz.py
Lines changed: 2 additions & 2 deletions
@@ -38,10 +38,10 @@ jobs:
         run: |
           make zip
           ls -al dist
-      - name: Upload packed files to artifacts
-        uses: actions/upload-artifact@v2
+      - name: Upload packed files to artifacts (source)
+        uses: actions/upload-artifact@v4
         with:
-          name: build-artifacts-all
+          name: build-artifacts-source
           path: ./dist/*
 
   wheel_build:
@@ -102,35 +102,119 @@ jobs:
         run: |
           ls -al ./wheelhouse
           mv wheelhouse dist
-      - name: Upload packed files to artifacts
-        uses: actions/upload-artifact@v2
+      - name: Upload packed files to artifacts (wheels)
+        uses: actions/upload-artifact@v4
         with:
-          name: build-artifacts-all
+          name: build-artifacts-wheels-${{ matrix.os }}-${{ matrix.python }}-${{ matrix.architecture }}
           path: ./dist/*
 
+  wheel_aggregate:
+    name: Aggregate all wheels
+    runs-on: ubuntu-20.04
+    needs: wheel_build
+    steps:
+      - name: Create aggregation directory
+        run: mkdir -p aggregated_wheels_all
+
+      - name: Download wheel ubuntu-20.04, 3.7, x86_64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-ubuntu-20.04-3.7-x86_64
+          path: aggregated_wheels_all
+      - name: Download wheel ubuntu-20.04, 3.8, x86_64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-ubuntu-20.04-3.8-x86_64
+          path: aggregated_wheels_all
+      - name: Download wheel ubuntu-20.04, 3.9, x86_64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-ubuntu-20.04-3.9-x86_64
+          path: aggregated_wheels_all
+      - name: Download wheel ubuntu-20.04, 3.10, x86_64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-ubuntu-20.04-3.10-x86_64
+          path: aggregated_wheels_all
+      - name: Download wheel ubuntu-20.04, 3.11, x86_64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-ubuntu-20.04-3.11-x86_64
+          path: aggregated_wheels_all
+      - name: Download wheel ubuntu-20.04, 3.7, aarch64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-ubuntu-20.04-3.7-aarch64
+          path: aggregated_wheels_all
+      - name: Download wheel ubuntu-20.04, 3.8, aarch64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-ubuntu-20.04-3.8-aarch64
+          path: aggregated_wheels_all
+      - name: Download wheel ubuntu-20.04, 3.9, aarch64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-ubuntu-20.04-3.9-aarch64
+          path: aggregated_wheels_all
+      - name: Download wheel ubuntu-20.04, 3.10, aarch64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-ubuntu-20.04-3.10-aarch64
+          path: aggregated_wheels_all
+      - name: Download wheel ubuntu-20.04, 3.11, aarch64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-ubuntu-20.04-3.11-aarch64
+          path: aggregated_wheels_all
+
+      - name: Download wheel macos-13, 3.7, x86_64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-macos-13-3.7-x86_64
+          path: aggregated_wheels_all
+      - name: Download wheel macos-13, 3.8, x86_64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-macos-13-3.8-x86_64
+          path: aggregated_wheels_all
+      - name: Download wheel macos-13, 3.7, arm64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-macos-13-3.7-arm64
+          path: aggregated_wheels_all
+      - name: Download wheel macos-13, 3.8, arm64
+        uses: actions/download-artifact@v4
+        with:
+          name: build-artifacts-wheels-macos-13-3.8-arm64
+          path: aggregated_wheels_all
+
+      - name: Upload unified wheels artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-artifacts-wheels
+          path: aggregated_wheels_all
+
   # the publishing can only be processed on linux system
   wheel_publish:
     name: Publish the wheels to pypi
     runs-on: ubuntu-20.04
     needs:
       - source_build
-      - wheel_build
+      - wheel_aggregate
     strategy:
       fail-fast: false
       matrix:
         python:
           - '3.8.7'
 
     steps:
-      - name: Download packed files to artifacts
-        uses: actions/download-artifact@v3
+      - name: Download unified wheels artifact
+        uses: actions/download-artifact@v4
         with:
-          name: build-artifacts-all
+          name: build-artifacts-wheels
           path: ./dist
-      - name: Show the buildings
-        shell: bash
-        run: |
-          ls -al ./dist
+      - name: Show the aggregated wheels
+        run: ls -al ./dist
       - name: Upload distribution 📦 to github release
         uses: svenstaro/upload-release-action@v2
         with:
@@ -145,4 +229,4 @@ jobs:
           password: ${{ secrets.PYPI_API_TOKEN }}
           verbose: true
           skip_existing: true
-          packages_dir: dist/
+          packages_dir: dist/
@@ -38,12 +38,12 @@ jobs:
           make zip
           ls -al dist
       - name: Upload packed files to artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: build-artifacts-source-pack
           path: ./dist/*
       - name: Upload packed files to artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: build-artifacts-all
           path: ./dist/*
@@ -108,12 +108,12 @@ jobs:
           ls -al ./wheelhouse
           mv wheelhouse dist
       - name: Upload packed files to artifacts
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: build-artifacts-${{ runner.os }}-cp${{ matrix.python }}-${{ matrix.architecture }}
           path: ./dist/*
       - name: Upload packed files to artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: build-artifacts-all
           path: ./dist/*
 
@@ -100,6 +100,7 @@ jobs:
         run: |
             make clean build unittest
       - name: Upload coverage to Codecov
+        if: ${{ env.OS_NAME == 'MacOS' }}
         uses: codecov/codecov-action@v4
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
 
@@ -1449,4 +1449,5 @@ events.*
 # pooltool-specific stuff
 !/assets/pooltool/**
 lzero/mcts/ctree/ctree_alphazero/pybind11
+
 zoo/jericho/envs/z-machine-games-master
@@ -1,4 +1,4 @@
-2025.04.01 (v0.2.0)
+2025.04.09 (v0.2.0)
 - env: Add Metadrive environment and configurations (#192)
 - env: Add Sampled MuZero/UniZero and DMC environment with related configurations (#260)
 - env: Polish Chess environment and its render method; add unittests and configurations (#272)
 
@@ -28,7 +28,7 @@
 [![GitHub license](https://img.shields.io/github/license/opendilab/LightZero)](https://github.com/opendilab/LightZero/blob/master/LICENSE)
 [![discord badge](https://dcbadge.vercel.app/api/server/dkZS2JF56X?style=flat)](https://discord.gg/dkZS2JF56X)
 
-Updated on 2025.04.01 LightZero-v0.2.0
+Updated on 2025.04.09 LightZero-v0.2.0
 
 English | [简体中文(Simplified Chinese)](https://github.com/opendilab/LightZero/blob/main/README.zh.md) | [Documentation](https://opendilab.github.io/LightZero) | [LightZero Paper](https://arxiv.org/abs/2310.08348) | [🔥UniZero Paper](https://arxiv.org/abs/2406.10667) | [🔥ReZero Paper](https://arxiv.org/abs/2404.16364)
 
 
@@ -27,7 +27,7 @@
 [![Contributors](https://img.shields.io/github/contributors/opendilab/LightZero)](https://github.com/opendilab/LightZero/graphs/contributors)
 [![GitHub license](https://img.shields.io/github/license/opendilab/LightZero)](https://github.com/opendilab/LightZero/blob/master/LICENSE)
 
-最近更新于 2025.04.01 LightZero-v0.2.0
+最近更新于 2025.04.09 LightZero-v0.2.0
 
 [English](https://github.com/opendilab/LightZero/blob/main/README.md) | 简体中文 | [文档](https://opendilab.github.io/LightZero) | [LightZero 论文](https://arxiv.org/abs/2310.08348) | [🔥UniZero 论文](https://arxiv.org/abs/2406.10667) | [🔥ReZero 论文](https://arxiv.org/abs/2404.16364)
 
@@ -52,25 +52,37 @@
 **LightZero** 的目标是**标准化 MCTS 算法族，以加速相关研究和应用。** [Benchmark](#benchmark) 中介绍了目前所有已实现算法的性能比较。
 
 ### 导航
-- [概览](#概览)
+- [LightZero](#lightzero)
+  - [🔍 背景](#-背景)
+  - [🎨 概览](#-概览)
     - [导航](#导航)
-    - [特点](#特点)
-    - [框架结构](#框架结构)
-    - [集成算法](#集成算法)
-- [安装方法](#安装方法)
-- [快速开始](#快速开始)
-- [文档](#文档)
-- [基线算法比较](#基线算法比较)
-- [MCTS相关笔记](#MCTS-相关笔记)
+    - [💥 特点](#-特点)
+    - [🧩 框架结构](#-框架结构)
+    - [🎁 集成算法](#-集成算法)
+  - [⚙️ 安装方法](#️-安装方法)
+    - [使用 Docker 进行安装](#使用-docker-进行安装)
+  - [🚀 快速开始](#-快速开始)
+  - [📚 文档](#-文档)
+  - [📊 基线算法比较](#-基线算法比较)
+  - [📝 MCTS 相关笔记](#-mcts-相关笔记)
     - [论文笔记](#论文笔记)
     - [算法框架图](#算法框架图)
-- [MCTS相关论文](#MCTS-相关论文)
+  - [MCTS 相关论文](#mcts-相关论文)
     - [重要论文](#重要论文)
+      - [LightZero Implemented series](#lightzero-implemented-series)
+      - [AlphaGo series](#alphago-series)
+      - [MuZero series](#muzero-series)
+      - [MCTS Analysis](#mcts-analysis)
+      - [MCTS Application](#mcts-application)
     - [其他论文](#其他论文)
-- [反馈意见和贡献](#反馈意见和贡献)
-- [引用](#引用)
-- [致谢](#致谢)
-- [许可证](#许可证)
+      - [ICML](#icml)
+      - [ICLR](#iclr)
+      - [NeurIPS](#neurips)
+      - [Other Conference or Journal](#other-conference-or-journal)
+  - [💬 反馈意见和贡献](#-反馈意见和贡献)
+  - [🌏 引用](#-引用)
+  - [💓 致谢](#-致谢)
+  - [🏷️ 许可证](#️-许可证)
 
 ### 💥 特点
 **轻量**：LightZero 中集成了多种 MCTS 族算法，能够在同一框架下轻量化地解决多种属性的决策问题。
 
@@ -81,12 +81,17 @@ In a custom environment, you need to provide properties for observation space an
 
 ```python
 @property
-defobservation_space(self):
-    return self.env.observation_space
+def observation_space(self):
+    return self._observation_space
 
 @property
 def action_space(self):
-    return self.env.action_space
+    return self._action_space
+    
+@property
+def legal_actions(self):
+    # get the actual legal actions
+    return np.arange(self._action_space.n)
 ```
 
 ### 6. **Render Method**<br>
 
@@ -7,7 +7,7 @@
 __TITLE__ = "LightZero"
 
 #: Version of this project.
-__VERSION__ = "0.1.0"
+__VERSION__ = "0.2.0"
 
 #: Short description of the project, will be included in ``setup.py``.
 __DESCRIPTION__ = 'A lightweight and efficient MCTS/AlphaZero/MuZero algorithm toolkits.'
 
@@ -76,7 +76,7 @@ def sample(
         )
 
         # target policy
-        batch_target_policies_re = self._compute_target_policy_reanalyzed(policy_re_context, policy._target_model, current_batch[1]) # current_batch[1] is batch_action
+        batch_target_policies_re = self._compute_target_policy_reanalyzed(policy_re_context, policy._target_model, current_batch[1], current_batch[-1]) # current_batch[1] is batch_action
         batch_target_policies_non_re = self._compute_target_policy_non_reanalyzed(
             policy_non_re_context, self._cfg.model.action_space_size
         )
@@ -235,7 +235,7 @@ def reanalyze_buffer(
         # obtain the current_batch and prepare target context
         policy_re_context, current_batch = self._make_batch_for_reanalyze(batch_size)
         # target policy
-        self._compute_target_policy_reanalyzed(policy_re_context, policy._target_model, current_batch[1], current_batch[-1] )
+        self._compute_target_policy_reanalyzed(policy_re_context, policy._target_model, current_batch[1], current_batch[-1])
 
     def _make_batch_for_reanalyze(self, batch_size: int) -> Tuple[Any]:
         """
@@ -432,7 +432,7 @@ def _compute_target_policy_reanalyzed(self, policy_re_context: List[Any], model:
             # =============== NOTE: The key difference with MuZero =================
             # To obtain the target policy from MCTS guided by the recent target model
             # TODO: batch_obs (policy_obs_list) is at timestep t, batch_action is at timestep t
-            m_output = model.initial_inference(batch_obs, batch_action[:self.reanalyze_num], start_pos=batch_timestep)  # NOTE: :self.reanalyze_num
+            m_output = model.initial_inference(batch_obs, batch_action[:self.reanalyze_num], start_pos=batch_timestep[:self.reanalyze_num])  # NOTE: :self.reanalyze_num
             # =======================================================================
 
             if not model.training:
@@ -459,13 +459,13 @@ def _compute_target_policy_reanalyzed(self, policy_re_context: List[Any], model:
                 roots = MCTSCtree.roots(transition_batch_size, legal_actions)
                 roots.prepare(self._cfg.root_noise_weight, noises, reward_pool, policy_logits_pool, to_play)
                 # do MCTS for a new policy with the recent target model
-                MCTSCtree(self._cfg).search(roots, model, latent_state_roots, to_play, batch_timestep)
+                MCTSCtree(self._cfg).search(roots, model, latent_state_roots, to_play, batch_timestep[:self.reanalyze_num])
             else:
                 # python mcts_tree
                 roots = MCTSPtree.roots(transition_batch_size, legal_actions)
                 roots.prepare(self._cfg.root_noise_weight, noises, reward_pool, policy_logits_pool, to_play)
                 # do MCTS for a new policy with the recent target model
-                MCTSPtree(self._cfg).search(roots, model, latent_state_roots, to_play, batch_timestep)
+                MCTSPtree(self._cfg).search(roots, model, latent_state_roots, to_play, batch_timestep[:self.reanalyze_num])
 
             roots_legal_actions_list = legal_actions
             roots_distributions = roots.get_distributions()
 
@@ -135,7 +135,7 @@ def append(
             obs: np.ndarray,
             reward: np.ndarray,
             action_mask: np.ndarray = None,
-            to_play: List = [-1],
+            to_play: int = -1,
             timestep: int = 0,
             chance: int = 0,
     ) -> None:
 
@@ -239,7 +239,7 @@ def prepare(
             noises: List[float],
             value_prefixs: List[float],
             policies: List[List[float]],
-            to_play: List = [-1]
+            to_play: Union[int, List] = -1
     ) -> None:
         """
         Overview:
@@ -261,7 +261,7 @@ def prepare(
             self.roots[i].add_exploration_noise(root_noise_weight, noises[i])
             self.roots[i].visit_count += 1
 
-    def prepare_no_noise(self, value_prefixs: List[float], policies: List[List[float]], to_play: List = [-1]) -> None:
+    def prepare_no_noise(self, value_prefixs: List[float], policies: List[List[float]], to_play: Union[int, List] = -1) -> None:
         """
         Overview:
             Expand the roots without noise.
 
@@ -220,7 +220,7 @@ def prepare(
             noises: List[float],
             rewards: List[float],
             policies: List[List[float]],
-            to_play: List = [-1]
+            to_play: Union[int, List] = -1
     ) -> None:
         """
         Overview:
@@ -241,7 +241,7 @@ def prepare(
             self.roots[i].add_exploration_noise(root_noise_weight, noises[i])
             self.roots[i].visit_count += 1
 
-    def prepare_no_noise(self, rewards: List[float], policies: List[List[float]], to_play: List = [-1]) -> None:
+    def prepare_no_noise(self, rewards: List[float], policies: List[List[float]], to_play: Union[int, List] = -1) -> None:
         """
         Overview:
             Expand the roots without noise.
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-2025.04.01 (v0.2.0)`
	`1`	`+2025.04.09 (v0.2.0)`
`2`	`2`	`- env: Add Metadrive environment and configurations (#192)`
`3`	`3`	`- env: Add Sampled MuZero/UniZero and DMC environment with related configurations (#260)`
`4`	`4`	`- env: Polish Chess environment and its render method; add unittests and configurations (#272)`