bug fix

DesmonDay · DesmonDay · commit e2cc486503b1 · 2024-07-25T17:51:11.000+08:00
diff --git a/paddlenlp/experimental/transformers/chatglm/modeling.py b/paddlenlp/experimental/transformers/chatglm/modeling.py
@@ -389,20 +389,20 @@ def set_state_dict(self, state_dict, use_structured_name=True):
         head_dim = embed_dim // config.num_attention_heads
 
         for k, v in state_dict.items():
-            if k.startswith("transformer.word_embeddings.weight"):
+            if k.startswith("chatglm.transformer.word_embeddings.weight"):
                 self.word_embeddings.weight.set_value(v.astype(dtype))
                 continue
-            elif k.startswith("transformer.final_layernorm.weight"):
+            elif k.startswith("chatglm.transformer.final_layernorm.weight"):
                 self.transformer_block.ffn_ln_scales[config.num_hidden_layers - 1].set_value(v.astype("float32"))
                 continue
-            elif k.startswith("transformer.final_layernorm.bias"):
+            elif k.startswith("chatglm.transformer.final_layernorm.bias"):
                 self.transformer_block.ffn_ln_biases[config.num_hidden_layers - 1].set_value(v.astype("float32"))
                 continue
             elif k.startswith("lm_head.weight"):
                 continue
             elif k.endswith("rotary_embeddings.inv_freq") or k.endswith("rotary_emb.inv_freq"):
                 continue
-            idx = int(k.split(".")[2])
+            idx = int(k.split(".")[3])
             if k.endswith("input_layernorm.weight"):
                 if idx == 0:
                     self.input_layernorm.weight.set_value(v.astype(dtype))
@@ -584,7 +584,7 @@ def __init__(self, config: ChatGLMConfig):
 
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
-        return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs)
+        return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs, return_numpy=False)
 
     @classmethod
     def get_cache_kvs_shape(
@@ -745,6 +745,6 @@ def forward(
     @paddle.no_grad()
     def set_state_dict(self, state_dict):
         self.lm_head.weight.set_value(
-            state_dict["transformer.word_embeddings.weight"].astype(self.lm_head.weight.dtype)
+            state_dict["chatglm.transformer.word_embeddings.weight"].astype(self.lm_head.weight.dtype)
         )
         self.model.transformer.set_state_dict({k: state_dict[k] for k in state_dict.keys()})
diff --git a/paddlenlp/experimental/transformers/utils.py b/paddlenlp/experimental/transformers/utils.py
@@ -13,168 +13,23 @@
 # limitations under the License.
 from __future__ import annotations
 
-import json
 import os
-from functools import partial
 
-import numpy as np
 import paddle
-from tqdm import tqdm
 
-from paddlenlp.transformers import AutoConfig
 from paddlenlp.transformers.model_utils import (
-    _add_variant,
     dtype_guard,
-    load_state_dict,
+    load_tp_checkpoint,
     no_init_weights,
 )
 from paddlenlp.transformers.utils import (
     ContextManagers,
     is_paddle_support_lazy_init,
     is_safetensors_available,
-    paddlenlp_load,
 )
-from paddlenlp.utils.env import (
-    PADDLE_WEIGHTS_INDEX_NAME,
-    SAFE_MASTER_WEIGHTS_INDEX_NAME,
-    SAFE_PEFT_WEIGHTS_INDEX_NAME,
-    SAFE_WEIGHTS_INDEX_NAME,
-)
-
-try:
-    from paddlenlp.utils.safetensors import fast_load_file as safe_load_file
-    from paddlenlp.utils.safetensors import fast_safe_open as safe_open
-except:
-    from safetensors import safe_open
-    from safetensors.numpy import load_file as safe_load_file
-
-
-def load_sharded_checkpoint(folder, variant=None, return_numpy=False):
-    """
-
-    This load is performed efficiently: each checkpoint shard is loaded one by one in RAM and deleted after being
-    loaded in the model.
-
-    Args:
-        folder (`str` or `os.PathLike`): A path to a folder containing the sharded checkpoint.
-        variant (`str`): The model variant.
-        return_numpy (`bool`): Whether to return numpy array instead of paddle tensor.
-
-    """
-    # Load the index
-    pdparams_file = os.path.join(folder, _add_variant("model_state.pdparams", variant))
-    lora_pdparams_file = os.path.join(folder, _add_variant("lora_model_state.pdparams", variant))
-    safetensors_file = os.path.join(folder, _add_variant("model.safetensors", variant))
-    if os.path.isfile(pdparams_file):
-        return paddle.load(pdparams_file, return_numpy=return_numpy)
-    if os.path.isfile(lora_pdparams_file):
-        return paddle.load(lora_pdparams_file, return_numpy=return_numpy)
-    if os.path.isfile(safetensors_file):
-        state_dict = safe_load_file(safetensors_file)
-        if not return_numpy:
-            for key in list(state_dict.keys()):
-                if isinstance(state_dict[key], np.ndarray):
-                    state_dict[key] = paddle.Tensor(state_dict.pop(key), zero_copy=True)
-        return state_dict
-
-    index_file = os.path.join(folder, _add_variant(PADDLE_WEIGHTS_INDEX_NAME, variant))
-    safe_index_file = os.path.join(folder, _add_variant(SAFE_WEIGHTS_INDEX_NAME, variant))
-    safe_master_file = os.path.join(folder, _add_variant(SAFE_MASTER_WEIGHTS_INDEX_NAME, variant))
-    safe_peft_file = os.path.join(folder, _add_variant(SAFE_PEFT_WEIGHTS_INDEX_NAME, variant))
-
-    index_present = os.path.isfile(index_file)
-    safe_index_present = os.path.isfile(safe_index_file)
-    safe_master_present = os.path.isfile(safe_master_file)
-    safe_peft_present = os.path.isfile(safe_peft_file)
-
-    load_safe = False
-    load_index = None
-    if safe_index_present:
-        load_safe = True  # load safe due to preference
-        load_index = safe_index_file
-    elif safe_master_present:
-        load_safe = True
-        load_index = safe_master_file
-    elif index_present:
-        load_index = index_file
-    elif safe_peft_present:
-        load_safe = True
-        load_index = safe_peft_file
-    else:
-        raise ValueError(f"Could not find {index_file} or {safe_index_file} or {safe_peft_file}")
-
-    with open(load_index, "r", encoding="utf-8") as f:
-        index = json.load(f)
-
-    shard_files = list(set(index["weight_map"].values()))
-    loader = safe_load_file if load_safe else partial(paddlenlp_load, map_location="np" if return_numpy else "cpu")
-
-    ret = {}
-    for shard_file in tqdm(shard_files):
-        state_dict = loader(os.path.join(folder, shard_file))
-        ret.update(state_dict)
-
-    if not return_numpy:
-        for key in list(ret.keys()):
-            if isinstance(ret[key], np.ndarray):
-                ret[key] = paddle.Tensor(ret.pop(key), zero_copy=True)
-
-    return ret
-
-
-def load_tp_checkpoint(folder, cls, config, return_numpy=False):
-    """
-
-    This load is performed efficiently: Load tp checkpoint only from cpu, no need to init the model.
-
-    Args:
-        folder (`str` or `os.PathLike`): A path to a folder containing the model checkpoint.
-        cls (`str`): The model class.
-        config (`AutoConfig`): The model config.
-        return_numpy (bool): Whether load the tp checkpoint as numpy.
-    """
-
-    config = AutoConfig.from_pretrained(folder)
-    if config.tensor_parallel_degree == 1 or config.tensor_parallel_degree == -1:
-        return load_sharded_checkpoint(folder, return_numpy=return_numpy)
-    else:
-        rank_model_path = os.path.join(folder, f"model_state.tp0{config.tensor_parallel_rank}.pdparams")
-        model_path = os.path.join(folder, "model_state.pdparams")
-        safe_model_path = os.path.join(folder, "model.safetensors")
-        if os.path.exists(rank_model_path):
-            return paddle.load(rank_model_path, return_numpy=return_numpy)
-        elif os.path.exists(model_path):
-            state_dict = cls.convert_tensor_parallel(model_path, config)
-        elif os.path.exists(safe_model_path):
-            with safe_open(safe_model_path, framework="np", device="cpu") as f:
-                loaded_keys = f.keys()
-            tp_actions = cls.get_tensor_parallel_convert_actions(config, loaded_keys)
-            state_dict = load_state_dict(safe_model_path, tp_actions)
-        else:  # shard files safetensors
-            resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded = cls._resolve_model_file_path(
-                pretrained_model_name_or_path=folder,
-                use_safetensors=True,
-            )
-            if len(resolved_sharded_files) > 1:
-                resolved_sharded_files = tqdm(resolved_sharded_files, desc="Loading checkpoint shards")
-            loaded_state_dict_keys = sharded_metadata["all_checkpoint_keys"]
-            tp_actions = cls.get_tensor_parallel_convert_actions(config, loaded_state_dict_keys, ignore_error=True)
-            state_dict = {}
-            for shard_file in resolved_sharded_files:
-                shard_state_dict = load_state_dict(
-                    shard_file,
-                    tp_actions,
-                    loaded_state_dict_keys,
-                )
-                state_dict.update(shard_state_dict)
-        if return_numpy:
-            for k in list(state_dict.keys()):
-                if not isinstance(state_dict[k], np.ndarray):
-                    state_dict[k] = state_dict.pop(k).cpu().numpy()
-    return state_dict
 
 
-def infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs):
+def infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs, return_numpy=True):
     r"""
     Instantiate a pretrained model configuration from a pre-trained model name or path.
     """
@@ -203,7 +58,7 @@ def infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args,
     with ContextManagers(init_contexts):
         model = cls(config)
 
-    resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded = cls._resolve_model_file_path(
+    resolved_archive_file, _, _, _ = cls._resolve_model_file_path(
         pretrained_model_name_or_path,
         cache_dir=cache_dir,
         subfolder=subfolder,
@@ -216,7 +71,7 @@ def infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args,
     )
 
     model_path = os.path.dirname(resolved_archive_file)
-    state_dict = load_tp_checkpoint(model_path, cls, config, return_numpy=True)
+    state_dict = load_tp_checkpoint(model_path, cls, config, return_numpy=return_numpy)
     model.set_state_dict(state_dict)
 
     return model
diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py
@@ -20,7 +20,6 @@
 import json
 import os
 import re
-import sys
 import tempfile
 import warnings
 from contextlib import contextmanager
@@ -59,6 +58,8 @@
     PADDLE_WEIGHTS_NAME,
     PYTORCH_WEIGHTS_INDEX_NAME,
     PYTORCH_WEIGHTS_NAME,
+    SAFE_MASTER_WEIGHTS_INDEX_NAME,
+    SAFE_PEFT_WEIGHTS_INDEX_NAME,
     SAFE_WEIGHTS_INDEX_NAME,
     SAFE_WEIGHTS_NAME,
 )
@@ -109,13 +110,14 @@ def unwrap_optimizer(optimizer, optimizer_instances=()):
 
 
 if is_safetensors_available():
-    from safetensors.numpy import load_file as safe_load_file
     from safetensors.numpy import save_file as safe_save_file
 
-    if sys.platform.startswith("win"):
-        from safetensors import safe_open
-    else:
+    try:
+        from paddlenlp.utils.safetensors import fast_load_file as safe_load_file
         from paddlenlp.utils.safetensors import fast_safe_open as safe_open
+    except:
+        from safetensors import safe_open
+        from safetensors.numpy import load_file as safe_load_file
 
 
 def prune_linear_layer(layer: nn.Linear, index: paddle.Tensor, dim: int = 0) -> nn.Linear:
@@ -2665,3 +2667,126 @@ def set_state_dict(self, state_dict, *args, **kwargs):
 
         ret = super().set_state_dict(state_dict, *args, **kwargs)
         return ret
+
+
+def load_sharded_checkpoint_as_one(folder, variant=None, return_numpy=False):
+    """
+
+    This load is performed efficiently: each checkpoint shard is loaded one by one in RAM and deleted after being
+    loaded in the model.
+
+    Args:
+        folder (`str` or `os.PathLike`): A path to a folder containing the sharded checkpoint.
+        variant (`str`): The model variant.
+        return_numpy (`bool`): Whether to return numpy array instead of paddle tensor.
+
+    """
+    # Load the index
+    pdparams_file = os.path.join(folder, _add_variant("model_state.pdparams", variant))
+    lora_pdparams_file = os.path.join(folder, _add_variant("lora_model_state.pdparams", variant))
+    safetensors_file = os.path.join(folder, _add_variant("model.safetensors", variant))
+    if os.path.isfile(pdparams_file):
+        return paddle.load(pdparams_file, return_numpy=return_numpy)
+    if os.path.isfile(lora_pdparams_file):
+        return paddle.load(lora_pdparams_file, return_numpy=return_numpy)
+    if os.path.isfile(safetensors_file):
+        state_dict = safe_load_file(safetensors_file)
+        if not return_numpy:
+            for key in list(state_dict.keys()):
+                if isinstance(state_dict[key], np.ndarray):
+                    state_dict[key] = paddle.Tensor(state_dict.pop(key), zero_copy=True)
+        return state_dict
+
+    index_file = os.path.join(folder, _add_variant(PADDLE_WEIGHTS_INDEX_NAME, variant))
+    safe_index_file = os.path.join(folder, _add_variant(SAFE_WEIGHTS_INDEX_NAME, variant))
+    safe_master_file = os.path.join(folder, _add_variant(SAFE_MASTER_WEIGHTS_INDEX_NAME, variant))
+    safe_peft_file = os.path.join(folder, _add_variant(SAFE_PEFT_WEIGHTS_INDEX_NAME, variant))
+
+    index_present = os.path.isfile(index_file)
+    safe_index_present = os.path.isfile(safe_index_file)
+    safe_master_present = os.path.isfile(safe_master_file)
+    safe_peft_present = os.path.isfile(safe_peft_file)
+
+    load_safe = False
+    load_index = None
+    if safe_index_present:
+        load_safe = True  # load safe due to preference
+        load_index = safe_index_file
+    elif safe_master_present:
+        load_safe = True
+        load_index = safe_master_file
+    elif index_present:
+        load_index = index_file
+    elif safe_peft_present:
+        load_safe = True
+        load_index = safe_peft_file
+    else:
+        raise ValueError(f"Could not find {index_file} or {safe_index_file} or {safe_peft_file}")
+
+    with open(load_index, "r", encoding="utf-8") as f:
+        index = json.load(f)
+
+    shard_files = list(set(index["weight_map"].values()))
+    loader = safe_load_file if load_safe else partial(paddlenlp_load, map_location="np" if return_numpy else "cpu")
+
+    ret = {}
+    for shard_file in tqdm(shard_files):
+        state_dict = loader(os.path.join(folder, shard_file))
+        ret.update(state_dict)
+
+    if not return_numpy:
+        for key in list(ret.keys()):
+            if isinstance(ret[key], np.ndarray):
+                ret[key] = paddle.Tensor(ret.pop(key), zero_copy=True)
+
+    return ret
+
+
+def load_tp_checkpoint(folder, cls, config, return_numpy=False):
+    """
+
+    This load is performed efficiently: Load tp checkpoint only from cpu, no need to init the model.
+
+    Args:
+        folder (`str` or `os.PathLike`): A path to a folder containing the model checkpoint.
+        cls (`str`): The model class.
+        config (`AutoConfig`): The model config.
+        return_numpy (bool): Whether load the tp checkpoint as numpy.
+    """
+    if config.tensor_parallel_degree == 1 or config.tensor_parallel_degree == -1:
+        return load_sharded_checkpoint_as_one(folder, return_numpy=return_numpy)
+    else:
+        rank_model_path = os.path.join(folder, f"model_state.tp0{config.tensor_parallel_rank}.pdparams")
+        model_path = os.path.join(folder, "model_state.pdparams")
+        safe_model_path = os.path.join(folder, "model.safetensors")
+        if os.path.exists(rank_model_path):
+            return paddle.load(rank_model_path, return_numpy=return_numpy)
+        elif os.path.exists(model_path):
+            state_dict = cls.convert_tensor_parallel(model_path, config)
+        elif os.path.exists(safe_model_path):
+            with safe_open(safe_model_path, framework="np", device="cpu") as f:
+                loaded_keys = f.keys()
+            tp_actions = cls.get_tensor_parallel_convert_actions(config, loaded_keys)
+            state_dict = load_state_dict(safe_model_path, tp_actions)
+        else:  # shard files safetensors
+            resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded = cls._resolve_model_file_path(
+                pretrained_model_name_or_path=folder,
+                use_safetensors=True,
+            )
+            if len(resolved_sharded_files) > 1:
+                resolved_sharded_files = tqdm(resolved_sharded_files, desc="Loading checkpoint shards")
+            loaded_state_dict_keys = sharded_metadata["all_checkpoint_keys"]
+            tp_actions = cls.get_tensor_parallel_convert_actions(config, loaded_state_dict_keys, ignore_error=True)
+            state_dict = {}
+            for shard_file in resolved_sharded_files:
+                shard_state_dict = load_state_dict(
+                    shard_file,
+                    tp_actions,
+                    loaded_state_dict_keys,
+                )
+                state_dict.update(shard_state_dict)
+        if return_numpy:
+            for k in list(state_dict.keys()):
+                if not isinstance(state_dict[k], np.ndarray):
+                    state_dict[k] = state_dict.pop(k).cpu().numpy()
+    return state_dict