[Auto Parallel] Fix ckpt_converter for auto_parallel (PaddlePaddle#9136)

zhangyuqin1998 · lvdongyi · commit 7b92936d8f77 · 2024-09-14T16:18:21.000+08:00
diff --git a/paddlenlp/trainer/auto_trainer.py b/paddlenlp/trainer/auto_trainer.py
@@ -780,5 +780,8 @@ def _load_from_checkpoint(self, resume_from_checkpoint=None):
                     raise ValueError(f"Can't find a valid checkpoint at {resume_from_checkpoint}")
                 self._load_ckpt_func(state_dict, ckpt_path)
 
+            if self.args.to_static:
+                self.model_wrapped.set_state_dict(model_state_dict)
+                self.model_wrapped.set_state_dict(optim_state_dict)
             # release memory
             del state_dict
diff --git a/paddlenlp/trainer/utils/ckpt_converter.py b/paddlenlp/trainer/utils/ckpt_converter.py
@@ -148,11 +148,8 @@ def load_from_hybrid_parallel_checkpoint(self):
 
             # In this scenario, the data type of the model state is bfloat16.
             for param_name, param_value in model_params.items():
-                if param_value.is_dist():
-                    master_weight = self.auto_parallel_state_dict[param_name + ".master_weight"]
-                    cast_master_weight = paddle.cast(master_weight._local_value(), param_value.dtype)
-                    paddle.assign(cast_master_weight, param_value._local_value())
-                else:
+                if param_value._is_initialized():
+                    # These codes are compatible for both dense tensor and dist tensor
                     master_weight = self.auto_parallel_state_dict[param_name + ".master_weight"]
                     cast_master_weight = paddle.cast(master_weight, param_value.dtype)
                     paddle.assign(cast_master_weight, param_value)