changes (#6077)

sijunhe · web-flow · commit 09c652a5a000 · 2023-06-02T13:54:39.000+08:00
diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py
@@ -338,12 +338,12 @@ def resolve_weight_file_from_hf_hub(repo_id: str, cache_dir: str, support_conver
         support_conversion (bool): whether support converting pytorch weight file to paddle weight file
         subfolder (str, optional) An optional value corresponding to a folder inside the repo.
     """
-    if hf_file_exists(repo_id, "model_state.pdparams", subfolder=subfolder):
-        file_name = "model_state.pdparams"
+    if hf_file_exists(repo_id, PADDLE_WEIGHT_FILE_NAME, subfolder=subfolder):
+        file_name = PADDLE_WEIGHT_FILE_NAME
     elif hf_file_exists(repo_id, PYTORCH_WEIGHT_FILE_NAME, subfolder=subfolder):
         if not support_conversion:
             raise EntryNotFoundError(
-                f"can not download `model_state.pdparams from https://huggingface.co/{repo_id}` "
+                f"can not download `{PADDLE_WEIGHT_FILE_NAME} from https://huggingface.co/{repo_id}` "
                 "and current model doesn't support conversion from pytorch weight file to paddle weight file"
             )
         file_name = PYTORCH_WEIGHT_FILE_NAME
@@ -448,7 +448,7 @@ class is a pretrained model class adding layers on top of the base model,
     # TODO: more flexible resource handle, namedtuple with fields as:
     # resource_name, saved_file, handle_name_for_load(None for used as __init__
     # arguments), handle_name_for_save
-    resource_files_names = {"model_state": "model_state.pdparams"}
+    resource_files_names = {"model_state": PADDLE_WEIGHT_FILE_NAME}
     pretrained_resource_files_map = {}
     base_model_prefix = ""
     main_input_name = "input_ids"
@@ -583,6 +583,22 @@ def model_name_list(self):
         # Todo: return all model name
         return list(self.pretrained_init_configuration.keys())
 
+    def get_memory_footprint(self, return_buffers=True):
+        r"""
+        Get the memory footprint of a model. This will return the memory footprint of the current model in bytes.
+        Useful to benchmark the memory footprint of the current model and design some tests.
+
+        Arguments:
+            return_buffers (`bool`, *optional*, defaults to `True`):
+                Whether to return the size of the buffer tensors in the computation of the memory footprint. Buffers
+                are tensors that do not require gradients and not registered as parameters
+        """
+        mem = sum([param.numel().item() * param.element_size() for param in self.parameters()])
+        if return_buffers:
+            mem_bufs = sum([buf.numel().item() * buf.element_size() for buf in self.buffers()])
+            mem = mem + mem_bufs
+        return mem
+
     def get_input_embeddings(self) -> nn.Embedding:
         """get input embedding of model
 
@@ -1275,7 +1291,7 @@ def from_pretrained(
 
         else:
             # 4. loading the state dict
-            if config.tensor_parallel_degree > 1 and model_weight_file.endswith("model_state.pdparams"):
+            if config.tensor_parallel_degree > 1 and model_weight_file.endswith(PADDLE_WEIGHT_FILE_NAME):
                 model_state_dict = cls.convert_tensor_parallel(model_weight_file, config)
             else:
                 model_state_dict = paddle.load(model_weight_file, return_numpy=load_state_as_np)