Revert "[Tokenier] Enable padding_side as call time kwargs (#9161)" (#9192)

ZHUI · web-flow · commit c4d3a2f19dd5 · 2024-09-25T15:43:36.000+08:00
This reverts commit c5e6db5.
diff --git a/paddlenlp/transformers/artist/tokenizer.py b/paddlenlp/transformers/artist/tokenizer.py
@@ -225,7 +225,6 @@ def __call__(
         return_offsets_mapping=False,
         add_special_tokens=True,
         pad_to_multiple_of=None,
-        padding_side=None,
         return_tensors=None,
         verbose: bool = True,
         **kwargs
@@ -248,7 +247,6 @@ def __call__(
             return_offsets_mapping,
             add_special_tokens,
             pad_to_multiple_of,
-            padding_side,
             return_tensors,
             verbose,
             **kwargs,
diff --git a/paddlenlp/transformers/bloom/tokenizer.py b/paddlenlp/transformers/bloom/tokenizer.py
@@ -18,7 +18,7 @@
 import os
 import shutil
 from functools import lru_cache
-from typing import Dict, Literal, Optional, Union
+from typing import Dict, Optional, Union
 
 import numpy as np
 from paddle.utils import try_import
@@ -360,7 +360,6 @@ def _pad(
         max_length: Optional[int] = None,
         padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
         pad_to_multiple_of: Optional[int] = None,
-        padding_side: Optional[Literal["right", "left"]] = None,
         return_attention_mask: Optional[bool] = None,
     ) -> dict:
         """
@@ -376,16 +375,13 @@ def _pad(
                 - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
                 - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
                 - PaddingStrategy.DO_NOT_PAD: Do not pad
-                The tokenizer padding sides are defined in `padding_side` argument:
+                The tokenizer padding sides are defined in self.padding_side:
 
                     - 'left': pads on the left of the sequences
                     - 'right': pads on the right of the sequences
             pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
                 This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
                 >= 7.5 (Volta).
-            padding_side: (optional) The side on which the model should have padding applied.
-                Should be selected between ['right', 'left'].
-                Default value is picked from the class attribute of the same name.
             return_attention_mask:
                 (optional) Set to False to avoid returning attention mask (default: set to model specifics)
         """
@@ -398,7 +394,7 @@ def _pad(
 
         required_input = encoded_inputs[self.model_input_names[0]]
         encoded_inputs = super()._pad(
-            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, padding_side, return_attention_mask
+            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, return_attention_mask
         )
         if attention_mask is not None and len(np.shape(attention_mask)) > 2:
             encoded_inputs["attention_mask"] = attention_mask
diff --git a/paddlenlp/transformers/chatglm/tokenizer.py b/paddlenlp/transformers/chatglm/tokenizer.py
@@ -14,7 +14,7 @@
 
 """Tokenization classes for ChatGLM."""
 import os
-from typing import Dict, List, Literal, Optional, Union
+from typing import Dict, List, Optional, Union
 
 import numpy as np
 import sentencepiece as spm
@@ -218,15 +218,13 @@ def _pad(
         max_length: Optional[int] = None,
         padding_strategy=PaddingStrategy.DO_NOT_PAD,
         pad_to_multiple_of: Optional[int] = None,
-        padding_side: Optional[Literal["right", "left"]] = None,
         return_attention_mask: Optional[bool] = None,
     ) -> dict:
         # Load from model defaults
         if return_attention_mask is None:
             return_attention_mask = "attention_mask" in self.model_input_names or "attention_mask" in encoded_inputs
 
-        padding_side = padding_side if padding_side is not None else self.padding_side
-        assert padding_side == "left"
+        assert self.padding_side == "left"
         required_input = encoded_inputs[self.model_input_names[0]]
         seq_length = len(required_input)
 
diff --git a/paddlenlp/transformers/chatglm_v2/tokenizer.py b/paddlenlp/transformers/chatglm_v2/tokenizer.py
@@ -15,7 +15,7 @@
 
 import os
 import re
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 
 import numpy as np
 from sentencepiece import SentencePieceProcessor
@@ -244,7 +244,6 @@ def _pad(
         max_length: Optional[int] = None,
         padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
         pad_to_multiple_of: Optional[int] = None,
-        padding_side: Optional[Literal["right", "left"]] = None,
         return_attention_mask: Optional[bool] = None,
     ) -> dict:
         """
@@ -260,22 +259,18 @@ def _pad(
                 - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
                 - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
                 - PaddingStrategy.DO_NOT_PAD: Do not pad
-                The tokenizer padding sides are defined in `padding_side` argument:
+                The tokenizer padding sides are defined in self.padding_side:
 
                     - 'left': pads on the left of the sequences
                     - 'right': pads on the right of the sequences
             pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
                 This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
-                >= 7.5 (Volta).
-            padding_side: (optional) The side on which the model should have padding applied.
-                Should be selected between ['right', 'left'].
-                Default value is picked from the class attribute of the same name.
+                `>= 7.5` (Volta).
             return_attention_mask:
                 (optional) Set to False to avoid returning attention mask (default: set to model specifics)
         """
         # Load from model defaults
-        padding_side = padding_side if padding_side is not None else self.padding_side
-        assert padding_side == "left"
+        assert self.padding_side == "left"
 
         required_input = encoded_inputs[self.model_input_names[0]]
         seq_length = len(required_input)
diff --git a/paddlenlp/transformers/dallebart/tokenizer.py b/paddlenlp/transformers/dallebart/tokenizer.py
@@ -464,7 +464,6 @@ def __call__(
         return_offsets_mapping=False,
         add_special_tokens=True,
         pad_to_multiple_of=None,
-        padding_side=None,
         return_tensors=None,
         verbose: bool = True,
         **kwargs
@@ -498,7 +497,6 @@ def __call__(
             return_offsets_mapping,
             add_special_tokens,
             pad_to_multiple_of,
-            padding_side,
             return_tensors,
             verbose,
             **kwargs,
diff --git a/paddlenlp/transformers/gemma/tokenizer.py b/paddlenlp/transformers/gemma/tokenizer.py
@@ -15,7 +15,7 @@
 
 import os
 from shutil import copyfile
-from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 import sentencepiece as spm
@@ -323,7 +323,6 @@ def _pad(
         max_length: Optional[int] = None,
         padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
         pad_to_multiple_of: Optional[int] = None,
-        padding_side: Optional[Literal["right", "left"]] = None,
         return_attention_mask: Optional[bool] = None,
     ) -> dict:
         """
@@ -346,9 +345,6 @@ def _pad(
             pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
                 This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
                 >= 7.5 (Volta).
-            padding_side: (optional) The side on which the model should have padding applied.
-                Should be selected between ['right', 'left'].
-                Default value is picked from the class attribute of the same name.
             return_attention_mask:
                 (optional) Set to False to avoid returning attention mask (default: set to model specifics)
         """
@@ -363,7 +359,7 @@ def _pad(
 
         required_input = encoded_inputs[self.model_input_names[0]]
         encoded_inputs = super()._pad(
-            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, padding_side, return_attention_mask
+            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, return_attention_mask
         )
         if attention_mask is not None and len(np.shape(attention_mask)) > 2:
             encoded_inputs["attention_mask"] = attention_mask
diff --git a/paddlenlp/transformers/gpt/tokenizer.py b/paddlenlp/transformers/gpt/tokenizer.py
@@ -17,7 +17,7 @@
 import os
 import shutil
 from functools import lru_cache
-from typing import Dict, Literal, Optional, Union
+from typing import Dict, Optional, Union
 
 import jieba
 import numpy as np
@@ -584,7 +584,6 @@ def _pad(
         max_length: Optional[int] = None,
         padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
         pad_to_multiple_of: Optional[int] = None,
-        padding_side: Optional[Literal["right", "left"]] = None,
         return_attention_mask: Optional[bool] = None,
     ) -> dict:
         """
@@ -600,16 +599,13 @@ def _pad(
                 - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
                 - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
                 - PaddingStrategy.DO_NOT_PAD: Do not pad
-                The tokenizer padding sides are defined in `padding_side` argument:
+                The tokenizer padding sides are defined in self.padding_side:
 
                     - 'left': pads on the left of the sequences
                     - 'right': pads on the right of the sequences
             pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
                 This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
                 >= 7.5 (Volta).
-            padding_side: (optional) The side on which the model should have padding applied.
-                Should be selected between ['right', 'left'].
-                Default value is picked from the class attribute of the same name.
             return_attention_mask:
                 (optional) Set to False to avoid returning attention mask (default: set to model specifics)
         """
@@ -624,7 +620,7 @@ def _pad(
 
         required_input = encoded_inputs[self.model_input_names[0]]
         encoded_inputs = super()._pad(
-            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, padding_side, return_attention_mask
+            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, return_attention_mask
         )
         if attention_mask is not None and len(np.shape(attention_mask)) > 2:
             encoded_inputs["attention_mask"] = attention_mask
diff --git a/paddlenlp/transformers/llama/tokenizer.py b/paddlenlp/transformers/llama/tokenizer.py
@@ -15,7 +15,7 @@
 
 import os
 from shutil import copyfile
-from typing import Dict, List, Literal, Optional, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union
 
 import numpy as np
 import sentencepiece as spm
@@ -232,7 +232,6 @@ def _pad(
         max_length: Optional[int] = None,
         padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
         pad_to_multiple_of: Optional[int] = None,
-        padding_side: Optional[Literal["right", "left"]] = None,
         return_attention_mask: Optional[bool] = None,
     ) -> dict:
         """
@@ -248,16 +247,13 @@ def _pad(
                 - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
                 - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
                 - PaddingStrategy.DO_NOT_PAD: Do not pad
-                The tokenizer padding sides are defined in `padding_side` argument:
+                The tokenizer padding sides are defined in self.padding_side:
 
                     - 'left': pads on the left of the sequences
                     - 'right': pads on the right of the sequences
             pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
                 This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
                 >= 7.5 (Volta).
-            padding_side: (optional) The side on which the model should have padding applied.
-                Should be selected between ['right', 'left'].
-                Default value is picked from the class attribute of the same name.
             return_attention_mask:
                 (optional) Set to False to avoid returning attention mask (default: set to model specifics)
         """
@@ -272,7 +268,7 @@ def _pad(
 
         required_input = encoded_inputs[self.model_input_names[0]]
         encoded_inputs = super()._pad(
-            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, padding_side, return_attention_mask
+            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, return_attention_mask
         )
         if attention_mask is not None and len(np.shape(attention_mask)) > 2:
             encoded_inputs["attention_mask"] = attention_mask
@@ -525,7 +521,6 @@ def _pad(
         max_length: Optional[int] = None,
         padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
         pad_to_multiple_of: Optional[int] = None,
-        padding_side: Optional[Literal["right", "left"]] = None,
         return_attention_mask: Optional[bool] = None,
     ) -> dict:
         """
@@ -541,16 +536,13 @@ def _pad(
                 - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
                 - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
                 - PaddingStrategy.DO_NOT_PAD: Do not pad
-                The tokenizer padding sides are defined in `padding_side` argument:
+                The tokenizer padding sides are defined in self.padding_side:
 
                     - 'left': pads on the left of the sequences
                     - 'right': pads on the right of the sequences
             pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
                 This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
                 >= 7.5 (Volta).
-            padding_side: (optional) The side on which the model should have padding applied.
-                Should be selected between ['right', 'left'].
-                Default value is picked from the class attribute of the same name.
             return_attention_mask:
                 (optional) Set to False to avoid returning attention mask (default: set to model specifics)
         """
@@ -565,7 +557,7 @@ def _pad(
 
         required_input = encoded_inputs[self.model_input_names[0]]
         encoded_inputs = super()._pad(
-            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, padding_side, return_attention_mask
+            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, return_attention_mask
         )
         if attention_mask is not None and len(np.shape(attention_mask)) > 2:
             encoded_inputs["attention_mask"] = attention_mask
diff --git a/paddlenlp/transformers/mamba/tokenizer.py b/paddlenlp/transformers/mamba/tokenizer.py
@@ -18,7 +18,7 @@
 import os
 import shutil
 from functools import lru_cache
-from typing import Dict, Literal, Optional, Union
+from typing import Dict, Optional, Union
 
 import numpy as np
 from paddle.utils import try_import
@@ -302,7 +302,6 @@ def _pad(
         max_length: Optional[int] = None,
         padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
         pad_to_multiple_of: Optional[int] = None,
-        padding_side: Optional[Literal["right", "left"]] = None,
         return_attention_mask: Optional[bool] = None,
     ) -> dict:
         """
@@ -318,16 +317,13 @@ def _pad(
                 - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
                 - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
                 - PaddingStrategy.DO_NOT_PAD: Do not pad
-                The tokenizer padding sides are defined in `padding_side` argument:
+                The tokenizer padding sides are defined in self.padding_side:
 
                     - 'left': pads on the left of the sequences
                     - 'right': pads on the right of the sequences
             pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
                 This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
                 >= 7.5 (Volta).
-            padding_side: (optional) The side on which the model should have padding applied.
-                Should be selected between ['right', 'left'].
-                Default value is picked from the class attribute of the same name.
             return_attention_mask:
                 (optional) Set to False to avoid returning attention mask (default: set to model specifics)
         """
@@ -342,7 +338,7 @@ def _pad(
 
         required_input = encoded_inputs[self.model_input_names[0]]
         encoded_inputs = super()._pad(
-            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, padding_side, return_attention_mask
+            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, return_attention_mask
         )
         if attention_mask is not None and len(np.shape(attention_mask)) > 2:
             encoded_inputs["attention_mask"] = attention_mask
diff --git a/paddlenlp/transformers/qwen/tokenizer.py b/paddlenlp/transformers/qwen/tokenizer.py
@@ -17,7 +17,7 @@
 import base64
 import os
 import unicodedata
-from typing import Collection, Dict, List, Literal, Optional, Set, Tuple, Union
+from typing import Collection, Dict, List, Optional, Set, Tuple, Union
 
 import numpy as np
 
@@ -255,7 +255,6 @@ def _pad(
         max_length: Optional[int] = None,
         padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
         pad_to_multiple_of: Optional[int] = None,
-        padding_side: Optional[Literal["right", "left"]] = None,
         return_attention_mask: Optional[bool] = None,
     ) -> dict:
         """
@@ -271,16 +270,13 @@ def _pad(
                 - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
                 - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
                 - PaddingStrategy.DO_NOT_PAD: Do not pad
-                The tokenizer padding sides are defined in `padding_side` argument:
+                The tokenizer padding sides are defined in self.padding_side:
 
                     - 'left': pads on the left of the sequences
                     - 'right': pads on the right of the sequences
             pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
                 This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
                 >= 7.5 (Volta).
-            padding_side: (optional) The side on which the model should have padding applied.
-                Should be selected between ['right', 'left'].
-                Default value is picked from the class attribute of the same name.
             return_attention_mask:
                 (optional) Set to False to avoid returning attention mask (default: set to model specifics)
         """
@@ -295,7 +291,7 @@ def _pad(
 
         required_input = encoded_inputs[self.model_input_names[0]]
         encoded_inputs = super()._pad(
-            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, padding_side, return_attention_mask
+            encoded_inputs, max_length, padding_strategy, pad_to_multiple_of, return_attention_mask
         )
         if attention_mask is not None and len(np.shape(attention_mask)) > 2:
             encoded_inputs["attention_mask"] = attention_mask
diff --git a/paddlenlp/transformers/tokenizer_utils.py b/paddlenlp/transformers/tokenizer_utils.py
diff --git a/paddlenlp/transformers/tokenizer_utils_base.py b/paddlenlp/transformers/tokenizer_utils_base.py
diff --git a/paddlenlp/transformers/tokenizer_utils_fast.py b/paddlenlp/transformers/tokenizer_utils_fast.py
diff --git a/tests/transformers/chatglm/test_tokenizer.py b/tests/transformers/chatglm/test_tokenizer.py
diff --git a/tests/transformers/test_tokenizer_common.py b/tests/transformers/test_tokenizer_common.py