23
23
from paddlenlp import __version__
24
24
from paddlenlp .utils .downloader import COMMUNITY_MODEL_PREFIX , get_path_from_url
25
25
from paddlenlp .utils .env import HF_CACHE_HOME , MODEL_HOME
26
- from paddlenlp .utils .import_utils import is_fast_tokenizer_available
26
+ from paddlenlp .utils .import_utils import import_module , is_fast_tokenizer_available
27
27
from paddlenlp .utils .log import logger
28
28
29
29
__all__ = [
@@ -154,13 +154,31 @@ def _get_tokenizer_class_from_config(cls, pretrained_model_name_or_path, config_
154
154
155
155
if init_class :
156
156
class_name = cls ._name_mapping [init_class ]
157
- import_class = importlib . import_module (f"paddlenlp.transformers.{ class_name } .tokenizer" )
157
+ import_class = import_module (f"paddlenlp.transformers.{ class_name } .tokenizer" )
158
158
tokenizer_class = getattr (import_class , init_class )
159
159
if use_fast :
160
- for fast_tokenizer_class , name in cls ._fast_name_mapping .items ():
161
- if name == class_name :
162
- import_class = importlib .import_module (f"paddlenlp.transformers.{ class_name } .fast_tokenizer" )
163
- tokenizer_class = getattr (import_class , fast_tokenizer_class )
160
+ if is_fast_tokenizer_available ():
161
+ is_support_fast_tokenizer = False
162
+ init_class_prefix = init_class [:- 9 ]
163
+ for fast_tokenizer_class , name in cls ._fast_name_mapping .items ():
164
+ fast_tokenizer_class_prefix = fast_tokenizer_class [:- 9 ]
165
+ if name == class_name and fast_tokenizer_class_prefix .startswith (init_class_prefix ):
166
+ is_support_fast_tokenizer = True
167
+ import_class = import_module (f"paddlenlp.transformers.{ class_name } .fast_tokenizer" )
168
+ tokenizer_class = getattr (import_class , fast_tokenizer_class )
169
+ break
170
+ if not is_support_fast_tokenizer :
171
+ logger .warning (
172
+ f"The tokenizer { tokenizer_class } doesn't have the fast version."
173
+ " Please check the map `paddlenlp.transformers.auto.tokenizer.FAST_TOKENIZER_MAPPING_NAMES`"
174
+ " to see which fast tokenizers are currently supported."
175
+ )
176
+ else :
177
+ logger .warning (
178
+ "Can't find the fast_tokenizer package, "
179
+ "please ensure install fast_tokenizer correctly. "
180
+ "You can install fast_tokenizer by `pip install fast-tokenizer-python`."
181
+ )
164
182
return tokenizer_class
165
183
# If no `init_class`, we use pattern recognition to recognize the tokenizer class.
166
184
else :
@@ -170,7 +188,7 @@ def _get_tokenizer_class_from_config(cls, pretrained_model_name_or_path, config_
170
188
if pattern in pretrained_model_name_or_path .lower ():
171
189
init_class = key
172
190
class_name = cls ._name_mapping [init_class ]
173
- import_class = importlib . import_module (f"paddlenlp.transformers.{ class_name } .tokenizer" )
191
+ import_class = import_module (f"paddlenlp.transformers.{ class_name } .tokenizer" )
174
192
tokenizer_class = getattr (import_class , init_class )
175
193
return tokenizer_class
176
194
0 commit comments