Fix shared weights sync for PipelineLayer (#7772)

DrownFish19 · web-flow · commit 487428b3996e · 2024-01-08T15:59:12.000+08:00
* fix shared weights sync

* fix typo
diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py
@@ -41,7 +41,10 @@
 )
 from huggingface_hub.utils import EntryNotFoundError
 from paddle import Tensor
-from paddle.distributed.fleet.meta_parallel.parallel_layers import SharedLayerDesc
+from paddle.distributed.fleet.meta_parallel.parallel_layers import (
+    PipelineLayer,
+    SharedLayerDesc,
+)
 from paddle.nn import Embedding, Layer
 
 # TODO(fangzeyang) Temporary fix and replace by paddle framework downloader later
@@ -933,6 +936,18 @@ def _post_init(self, original_init, *args, **kwargs):
         ):
             self.init_weights()
 
+        # Note:
+        # 1. PipelineLayer will create parameters for each layer and
+        # call `_synchronize_shared_weights()` to synchronize the shared parameters.
+        # 2. When setting the model `state_dict`, `_synchronize_shared_weights` will be called to
+        # synchronize the shared parameters.
+        # However, `self._init_weights` will re-initialize the parameters without
+        # synchronizing the shared parameters. If the following step does not load a checkpoint,
+        # the shared parameters will be different.
+
+        if isinstance(self, PipelineLayer):
+            self._synchronize_shared_weights()
+
     def _init_weights(self, layer):
         """
         Initialize the weights. This method should be overridden by derived class.