|
43 | 43 | GenerationBlockInferenceModel,
|
44 | 44 | GenerationInferenceModel,
|
45 | 45 | )
|
46 |
| -from paddlenlp.experimental.transformers.utils import load_tp_checkpoint |
| 46 | +from paddlenlp.experimental.transformers.utils import infererence_model_from_pretrained |
47 | 47 | from paddlenlp.transformers import LlamaConfig, LlamaPretrainedModel
|
48 | 48 | from paddlenlp.transformers.conversion_utils import split_param_func
|
49 | 49 | from paddlenlp.transformers.llama.modeling import LlamaLMHead
|
|
52 | 52 | CausalLMOutputWithCrossAttentions,
|
53 | 53 | )
|
54 | 54 | from paddlenlp.transformers.model_utils import (
|
55 |
| - dtype_guard, |
56 | 55 | dy2st_nocheck_guard_context,
|
57 |
| - no_init_weights, |
58 | 56 | register_base_model,
|
59 | 57 | )
|
60 |
| -from paddlenlp.transformers.utils import ( |
61 |
| - ContextManagers, |
62 |
| - is_paddle_support_lazy_init, |
63 |
| - is_safetensors_available, |
64 |
| -) |
65 | 58 | from paddlenlp.utils.log import logger
|
66 | 59 |
|
67 | 60 | __all__ = [
|
@@ -1147,47 +1140,7 @@ def __init__(self, config):
|
1147 | 1140 |
|
1148 | 1141 | @classmethod
|
1149 | 1142 | def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
|
1150 |
| - config = kwargs.pop("config", None) |
1151 |
| - cache_dir = kwargs.pop("cache_dir", None) |
1152 |
| - dtype = kwargs.pop("dtype", None) |
1153 |
| - if dtype is None: |
1154 |
| - dtype = config.dtype |
1155 |
| - subfolder = kwargs.pop("subfolder", None) |
1156 |
| - if subfolder is None: |
1157 |
| - subfolder = "" |
1158 |
| - variant = kwargs.pop("variant", None) |
1159 |
| - use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False) |
1160 |
| - low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) |
1161 |
| - |
1162 |
| - init_contexts = [] |
1163 |
| - if low_cpu_mem_usage or config.quantization_config.is_weight_quantize(): |
1164 |
| - # Instantiate model. |
1165 |
| - init_contexts.append(no_init_weights(_enable=True)) |
1166 |
| - if is_paddle_support_lazy_init(): |
1167 |
| - init_contexts.append(paddle.LazyGuard()) |
1168 |
| - if dtype: |
1169 |
| - init_contexts.append(dtype_guard(dtype)) |
1170 |
| - |
1171 |
| - # init the model |
1172 |
| - with ContextManagers(init_contexts): |
1173 |
| - model = cls(config) |
1174 |
| - |
1175 |
| - resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded = cls._resolve_model_file_path( |
1176 |
| - pretrained_model_name_or_path, |
1177 |
| - cache_dir=cache_dir, |
1178 |
| - subfolder=subfolder, |
1179 |
| - from_hf_hub=False, |
1180 |
| - from_aistudio=False, |
1181 |
| - config=config, |
1182 |
| - convert_from_torch=False, |
1183 |
| - use_safetensors=use_safetensors, |
1184 |
| - variant=variant, |
1185 |
| - ) |
1186 |
| - |
1187 |
| - model_path = os.path.dirname(resolved_archive_file) |
1188 |
| - state_dict = load_tp_checkpoint(model_path, cls, config) |
1189 |
| - model.set_state_dict(state_dict) |
1190 |
| - return model |
| 1143 | + return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs) |
1191 | 1144 |
|
1192 | 1145 | @classmethod
|
1193 | 1146 | def get_cache_kvs_shape(
|
@@ -1284,47 +1237,7 @@ def __init__(self, config):
|
1284 | 1237 |
|
1285 | 1238 | @classmethod
|
1286 | 1239 | def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
|
1287 |
| - config = kwargs.pop("config", None) |
1288 |
| - cache_dir = kwargs.pop("cache_dir", None) |
1289 |
| - dtype = kwargs.pop("dtype", None) |
1290 |
| - if dtype is None: |
1291 |
| - dtype = config.dtype |
1292 |
| - subfolder = kwargs.pop("subfolder", None) |
1293 |
| - if subfolder is None: |
1294 |
| - subfolder = "" |
1295 |
| - variant = kwargs.pop("variant", None) |
1296 |
| - use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False) |
1297 |
| - low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) |
1298 |
| - |
1299 |
| - init_contexts = [] |
1300 |
| - if low_cpu_mem_usage or config.quantization_config.is_weight_quantize(): |
1301 |
| - # Instantiate model. |
1302 |
| - init_contexts.append(no_init_weights(_enable=True)) |
1303 |
| - if is_paddle_support_lazy_init(): |
1304 |
| - init_contexts.append(paddle.LazyGuard()) |
1305 |
| - if dtype: |
1306 |
| - init_contexts.append(dtype_guard(dtype)) |
1307 |
| - |
1308 |
| - # init the model |
1309 |
| - with ContextManagers(init_contexts): |
1310 |
| - model = cls(config) |
1311 |
| - |
1312 |
| - resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded = cls._resolve_model_file_path( |
1313 |
| - pretrained_model_name_or_path, |
1314 |
| - cache_dir=cache_dir, |
1315 |
| - subfolder=subfolder, |
1316 |
| - from_hf_hub=False, |
1317 |
| - from_aistudio=False, |
1318 |
| - config=config, |
1319 |
| - convert_from_torch=False, |
1320 |
| - use_safetensors=use_safetensors, |
1321 |
| - variant=variant, |
1322 |
| - ) |
1323 |
| - |
1324 |
| - model_path = os.path.dirname(resolved_archive_file) |
1325 |
| - state_dict = load_tp_checkpoint(model_path, cls, config) |
1326 |
| - model.set_state_dict(state_dict) |
1327 |
| - return model |
| 1240 | + return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs) |
1328 | 1241 |
|
1329 | 1242 | @classmethod
|
1330 | 1243 | def get_cache_kvs_shape(
|
@@ -1561,48 +1474,7 @@ def get_tensor_parallel_split_mappings(num_layers):
|
1561 | 1474 |
|
1562 | 1475 | @classmethod
|
1563 | 1476 | def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
|
1564 |
| - config = kwargs.pop("config", None) |
1565 |
| - cache_dir = kwargs.pop("cache_dir", None) |
1566 |
| - dtype = kwargs.pop("dtype", None) |
1567 |
| - if dtype is None: |
1568 |
| - dtype = config.dtype |
1569 |
| - subfolder = kwargs.pop("subfolder", None) |
1570 |
| - if subfolder is None: |
1571 |
| - subfolder = "" |
1572 |
| - variant = kwargs.pop("variant", None) |
1573 |
| - use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False) |
1574 |
| - low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) |
1575 |
| - |
1576 |
| - init_contexts = [] |
1577 |
| - if low_cpu_mem_usage or config.quantization_config.is_weight_quantize(): |
1578 |
| - # Instantiate model. |
1579 |
| - init_contexts.append(no_init_weights(_enable=True)) |
1580 |
| - if is_paddle_support_lazy_init(): |
1581 |
| - init_contexts.append(paddle.LazyGuard()) |
1582 |
| - if dtype: |
1583 |
| - init_contexts.append(dtype_guard(dtype)) |
1584 |
| - |
1585 |
| - # init the model |
1586 |
| - with ContextManagers(init_contexts): |
1587 |
| - model = cls(config) |
1588 |
| - |
1589 |
| - resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded = cls._resolve_model_file_path( |
1590 |
| - pretrained_model_name_or_path, |
1591 |
| - cache_dir=cache_dir, |
1592 |
| - subfolder=subfolder, |
1593 |
| - from_hf_hub=False, |
1594 |
| - from_aistudio=False, |
1595 |
| - config=config, |
1596 |
| - convert_from_torch=False, |
1597 |
| - use_safetensors=use_safetensors, |
1598 |
| - variant=variant, |
1599 |
| - ) |
1600 |
| - |
1601 |
| - model_path = os.path.dirname(resolved_archive_file) |
1602 |
| - state_dict = load_tp_checkpoint(model_path, cls, config) |
1603 |
| - model.set_state_dict(state_dict) |
1604 |
| - |
1605 |
| - return model |
| 1477 | + return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs) |
1606 | 1478 |
|
1607 | 1479 | @classmethod
|
1608 | 1480 | def get_cache_kvs_shape(
|
|
0 commit comments