Skip to content

Commit ea2f41c

Browse files
committed
update return_numpy=True
1 parent cf7b2fb commit ea2f41c

File tree

6 files changed

+72
-338
lines changed

6 files changed

+72
-338
lines changed

paddlenlp/experimental/transformers/chatglm/modeling.py

Lines changed: 2 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
# limitations under the License.
1414
from __future__ import annotations
1515

16-
import os
17-
1816
import paddle
1917
import paddle.nn.functional as F
2018
from paddle import nn
@@ -29,23 +27,16 @@
2927
from paddlenlp.experimental.transformers.generation_utils import (
3028
GenerationInferenceModel,
3129
)
32-
from paddlenlp.experimental.transformers.utils import load_tp_checkpoint
30+
from paddlenlp.experimental.transformers.utils import infererence_model_from_pretrained
3331
from paddlenlp.transformers import ChatGLMConfig, ChatGLMPretrainedModel
3432
from paddlenlp.transformers.model_outputs import (
3533
BaseModelOutputWithPastAndCrossAttentions,
3634
CausalLMOutputWithPast,
3735
)
3836
from paddlenlp.transformers.model_utils import (
39-
dtype_guard,
4037
dy2st_nocheck_guard_context,
41-
no_init_weights,
4238
register_base_model,
4339
)
44-
from paddlenlp.transformers.utils import (
45-
ContextManagers,
46-
is_paddle_support_lazy_init,
47-
is_safetensors_available,
48-
)
4940

5041
__all__ = ["ChatGLMForCausalLMInferenceModel"]
5142

@@ -593,47 +584,7 @@ def __init__(self, config: ChatGLMConfig):
593584

594585
@classmethod
595586
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
596-
config = kwargs.pop("config", None)
597-
cache_dir = kwargs.pop("cache_dir", None)
598-
dtype = kwargs.pop("dtype", None)
599-
if dtype is None:
600-
dtype = config.dtype
601-
subfolder = kwargs.pop("subfolder", None)
602-
if subfolder is None:
603-
subfolder = ""
604-
variant = kwargs.pop("variant", None)
605-
use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False)
606-
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
607-
608-
init_contexts = []
609-
if low_cpu_mem_usage or config.quantization_config.is_weight_quantize():
610-
# Instantiate model.
611-
init_contexts.append(no_init_weights(_enable=True))
612-
if is_paddle_support_lazy_init():
613-
init_contexts.append(paddle.LazyGuard())
614-
if dtype:
615-
init_contexts.append(dtype_guard(dtype))
616-
617-
# init the model
618-
with ContextManagers(init_contexts):
619-
model = cls(config)
620-
621-
resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded = cls._resolve_model_file_path(
622-
pretrained_model_name_or_path,
623-
cache_dir=cache_dir,
624-
subfolder=subfolder,
625-
from_hf_hub=False,
626-
from_aistudio=False,
627-
config=config,
628-
convert_from_torch=False,
629-
use_safetensors=use_safetensors,
630-
variant=variant,
631-
)
632-
633-
model_path = os.path.dirname(resolved_archive_file)
634-
state_dict = load_tp_checkpoint(model_path, cls, config)
635-
model.set_state_dict(state_dict)
636-
return model
587+
return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs)
637588

638589
@classmethod
639590
def get_cache_kvs_shape(

paddlenlp/experimental/transformers/gpt/modeling.py

Lines changed: 2 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
# limitations under the License.
1414
from __future__ import annotations
1515

16-
import os
17-
1816
import paddle
1917
from paddle import nn
2018
from paddle.distributed import fleet
@@ -28,24 +26,17 @@
2826
from paddlenlp.experimental.transformers.generation_utils import (
2927
GenerationInferenceModel,
3028
)
31-
from paddlenlp.experimental.transformers.utils import load_tp_checkpoint
29+
from paddlenlp.experimental.transformers.utils import infererence_model_from_pretrained
3230
from paddlenlp.transformers import GPTConfig, GPTPretrainedModel
3331
from paddlenlp.transformers.gpt.modeling import GPTEmbeddings, parallel_matmul
3432
from paddlenlp.transformers.model_outputs import (
3533
BaseModelOutputWithPastAndCrossAttentions,
3634
CausalLMOutputWithCrossAttentions,
3735
)
3836
from paddlenlp.transformers.model_utils import (
39-
dtype_guard,
4037
dy2st_nocheck_guard_context,
41-
no_init_weights,
4238
register_base_model,
4339
)
44-
from paddlenlp.transformers.utils import (
45-
ContextManagers,
46-
is_paddle_support_lazy_init,
47-
is_safetensors_available,
48-
)
4940

5041
__all__ = ["GPTInferenceModel", "GPTForCausalLMInferenceModel"]
5142

@@ -456,47 +447,7 @@ def __init__(self, config):
456447

457448
@classmethod
458449
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
459-
config = kwargs.pop("config", None)
460-
cache_dir = kwargs.pop("cache_dir", None)
461-
dtype = kwargs.pop("dtype", None)
462-
if dtype is None:
463-
dtype = config.dtype
464-
subfolder = kwargs.pop("subfolder", None)
465-
if subfolder is None:
466-
subfolder = ""
467-
variant = kwargs.pop("variant", None)
468-
use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False)
469-
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
470-
471-
init_contexts = []
472-
if low_cpu_mem_usage or config.quantization_config.is_weight_quantize():
473-
# Instantiate model.
474-
init_contexts.append(no_init_weights(_enable=True))
475-
if is_paddle_support_lazy_init():
476-
init_contexts.append(paddle.LazyGuard())
477-
if dtype:
478-
init_contexts.append(dtype_guard(dtype))
479-
480-
# init the model
481-
with ContextManagers(init_contexts):
482-
model = cls(config)
483-
484-
resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded = cls._resolve_model_file_path(
485-
pretrained_model_name_or_path,
486-
cache_dir=cache_dir,
487-
subfolder=subfolder,
488-
from_hf_hub=False,
489-
from_aistudio=False,
490-
config=config,
491-
convert_from_torch=False,
492-
use_safetensors=use_safetensors,
493-
variant=variant,
494-
)
495-
496-
model_path = os.path.dirname(resolved_archive_file)
497-
state_dict = load_tp_checkpoint(model_path, cls, config)
498-
model.set_state_dict(state_dict)
499-
return model
450+
return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs)
500451

501452
@classmethod
502453
def get_cache_kvs_shape(

paddlenlp/experimental/transformers/llama/modeling.py

Lines changed: 4 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
GenerationBlockInferenceModel,
4444
GenerationInferenceModel,
4545
)
46-
from paddlenlp.experimental.transformers.utils import load_tp_checkpoint
46+
from paddlenlp.experimental.transformers.utils import infererence_model_from_pretrained
4747
from paddlenlp.transformers import LlamaConfig, LlamaPretrainedModel
4848
from paddlenlp.transformers.conversion_utils import split_param_func
4949
from paddlenlp.transformers.llama.modeling import LlamaLMHead
@@ -52,16 +52,9 @@
5252
CausalLMOutputWithCrossAttentions,
5353
)
5454
from paddlenlp.transformers.model_utils import (
55-
dtype_guard,
5655
dy2st_nocheck_guard_context,
57-
no_init_weights,
5856
register_base_model,
5957
)
60-
from paddlenlp.transformers.utils import (
61-
ContextManagers,
62-
is_paddle_support_lazy_init,
63-
is_safetensors_available,
64-
)
6558
from paddlenlp.utils.log import logger
6659

6760
__all__ = [
@@ -1147,47 +1140,7 @@ def __init__(self, config):
11471140

11481141
@classmethod
11491142
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
1150-
config = kwargs.pop("config", None)
1151-
cache_dir = kwargs.pop("cache_dir", None)
1152-
dtype = kwargs.pop("dtype", None)
1153-
if dtype is None:
1154-
dtype = config.dtype
1155-
subfolder = kwargs.pop("subfolder", None)
1156-
if subfolder is None:
1157-
subfolder = ""
1158-
variant = kwargs.pop("variant", None)
1159-
use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False)
1160-
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
1161-
1162-
init_contexts = []
1163-
if low_cpu_mem_usage or config.quantization_config.is_weight_quantize():
1164-
# Instantiate model.
1165-
init_contexts.append(no_init_weights(_enable=True))
1166-
if is_paddle_support_lazy_init():
1167-
init_contexts.append(paddle.LazyGuard())
1168-
if dtype:
1169-
init_contexts.append(dtype_guard(dtype))
1170-
1171-
# init the model
1172-
with ContextManagers(init_contexts):
1173-
model = cls(config)
1174-
1175-
resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded = cls._resolve_model_file_path(
1176-
pretrained_model_name_or_path,
1177-
cache_dir=cache_dir,
1178-
subfolder=subfolder,
1179-
from_hf_hub=False,
1180-
from_aistudio=False,
1181-
config=config,
1182-
convert_from_torch=False,
1183-
use_safetensors=use_safetensors,
1184-
variant=variant,
1185-
)
1186-
1187-
model_path = os.path.dirname(resolved_archive_file)
1188-
state_dict = load_tp_checkpoint(model_path, cls, config)
1189-
model.set_state_dict(state_dict)
1190-
return model
1143+
return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs)
11911144

11921145
@classmethod
11931146
def get_cache_kvs_shape(
@@ -1284,47 +1237,7 @@ def __init__(self, config):
12841237

12851238
@classmethod
12861239
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
1287-
config = kwargs.pop("config", None)
1288-
cache_dir = kwargs.pop("cache_dir", None)
1289-
dtype = kwargs.pop("dtype", None)
1290-
if dtype is None:
1291-
dtype = config.dtype
1292-
subfolder = kwargs.pop("subfolder", None)
1293-
if subfolder is None:
1294-
subfolder = ""
1295-
variant = kwargs.pop("variant", None)
1296-
use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False)
1297-
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
1298-
1299-
init_contexts = []
1300-
if low_cpu_mem_usage or config.quantization_config.is_weight_quantize():
1301-
# Instantiate model.
1302-
init_contexts.append(no_init_weights(_enable=True))
1303-
if is_paddle_support_lazy_init():
1304-
init_contexts.append(paddle.LazyGuard())
1305-
if dtype:
1306-
init_contexts.append(dtype_guard(dtype))
1307-
1308-
# init the model
1309-
with ContextManagers(init_contexts):
1310-
model = cls(config)
1311-
1312-
resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded = cls._resolve_model_file_path(
1313-
pretrained_model_name_or_path,
1314-
cache_dir=cache_dir,
1315-
subfolder=subfolder,
1316-
from_hf_hub=False,
1317-
from_aistudio=False,
1318-
config=config,
1319-
convert_from_torch=False,
1320-
use_safetensors=use_safetensors,
1321-
variant=variant,
1322-
)
1323-
1324-
model_path = os.path.dirname(resolved_archive_file)
1325-
state_dict = load_tp_checkpoint(model_path, cls, config)
1326-
model.set_state_dict(state_dict)
1327-
return model
1240+
return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs)
13281241

13291242
@classmethod
13301243
def get_cache_kvs_shape(
@@ -1561,48 +1474,7 @@ def get_tensor_parallel_split_mappings(num_layers):
15611474

15621475
@classmethod
15631476
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
1564-
config = kwargs.pop("config", None)
1565-
cache_dir = kwargs.pop("cache_dir", None)
1566-
dtype = kwargs.pop("dtype", None)
1567-
if dtype is None:
1568-
dtype = config.dtype
1569-
subfolder = kwargs.pop("subfolder", None)
1570-
if subfolder is None:
1571-
subfolder = ""
1572-
variant = kwargs.pop("variant", None)
1573-
use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False)
1574-
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
1575-
1576-
init_contexts = []
1577-
if low_cpu_mem_usage or config.quantization_config.is_weight_quantize():
1578-
# Instantiate model.
1579-
init_contexts.append(no_init_weights(_enable=True))
1580-
if is_paddle_support_lazy_init():
1581-
init_contexts.append(paddle.LazyGuard())
1582-
if dtype:
1583-
init_contexts.append(dtype_guard(dtype))
1584-
1585-
# init the model
1586-
with ContextManagers(init_contexts):
1587-
model = cls(config)
1588-
1589-
resolved_archive_file, resolved_sharded_files, sharded_metadata, is_sharded = cls._resolve_model_file_path(
1590-
pretrained_model_name_or_path,
1591-
cache_dir=cache_dir,
1592-
subfolder=subfolder,
1593-
from_hf_hub=False,
1594-
from_aistudio=False,
1595-
config=config,
1596-
convert_from_torch=False,
1597-
use_safetensors=use_safetensors,
1598-
variant=variant,
1599-
)
1600-
1601-
model_path = os.path.dirname(resolved_archive_file)
1602-
state_dict = load_tp_checkpoint(model_path, cls, config)
1603-
model.set_state_dict(state_dict)
1604-
1605-
return model
1477+
return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs)
16061478

16071479
@classmethod
16081480
def get_cache_kvs_shape(

0 commit comments

Comments
 (0)