From 52a18de38f31065e2e60b5f172db666c26997aef Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Tue, 23 Aug 2022 03:01:51 +0000
Subject: [PATCH 001/159] fix multi-layer-inherit
---
.../transformers/tokenizer_utils_base.py | 7 +++
tests/transformers/test_tokenizer_util.py | 57 +++++++++++++++++++
2 files changed, 64 insertions(+)
create mode 100644 tests/transformers/test_tokenizer_util.py
diff --git a/paddlenlp/transformers/tokenizer_utils_base.py b/paddlenlp/transformers/tokenizer_utils_base.py
index bccbb02b2ecc..04dafd6acb43 100644
--- a/paddlenlp/transformers/tokenizer_utils_base.py
+++ b/paddlenlp/transformers/tokenizer_utils_base.py
@@ -1353,8 +1353,15 @@ class PretrainedTokenizerBase(SpecialTokensMixin):
truncation_side: str = "right"
slow_tokenizer_class = None
+ # tag for init_kwargs
+ _have_done_init: bool = False
+
def __init__(self, **kwargs):
# inputs and kwargs for saving and re-loading (see ``from_pretrained`` and ``save_pretrained``)
+ if self._have_done_init:
+ return
+ self._have_done_init = True
+
self.init_inputs = ()
self.init_kwargs = copy.deepcopy(kwargs)
self.name_or_path = kwargs.pop("name_or_path", "")
diff --git a/tests/transformers/test_tokenizer_util.py b/tests/transformers/test_tokenizer_util.py
new file mode 100644
index 000000000000..c222ad5fd461
--- /dev/null
+++ b/tests/transformers/test_tokenizer_util.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import unittest
+import json
+import tempfile
+
+from paddlenlp.transformers.tokenizer_utils import PretrainedTokenizer
+
+
+class EmptyTokenizer(PretrainedTokenizer):
+
+ def __init__(self, a=1, b=2):
+ pass
+
+
+class SubEmptyTokenizer(EmptyTokenizer):
+
+ def __init__(self, c=3, d=4):
+ super().__init__(a=c, b=d)
+
+
+class TokenizerUtilsTest(unittest.TestCase):
+
+ def test_multi_inherit(self):
+ tokenizer = SubEmptyTokenizer()
+
+ self.assertIn('c', tokenizer.init_kwargs)
+ self.assertEqual(tokenizer.init_kwargs['c'], 3)
+
+ def test_config(self):
+ tmpdirname = tempfile.mkdtemp()
+
+ tokenizer = SubEmptyTokenizer()
+ tokenizer.save_pretrained(tmpdirname)
+
+ with open(os.path.join(tmpdirname, "tokenizer_config.json"),
+ 'r',
+ encoding='utf-8') as f:
+ data = json.load(f)
+
+ self.assertIn('c', data)
+ self.assertEqual(data['c'], 3)
+ self.assertEqual(data['tokenizer_class'], "SubEmptyTokenizer")
From 7b50b5450a5cffd58c415b2126eb82bae39c073c Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Wed, 24 Aug 2022 06:29:58 +0000
Subject: [PATCH 002/159] update bert model unittest
---
tests/transformers/bert/test_modeling.py | 156 +++++++++++++----------
1 file changed, 88 insertions(+), 68 deletions(-)
diff --git a/tests/transformers/bert/test_modeling.py b/tests/transformers/bert/test_modeling.py
index 7b2a7e093a86..c95a3597c950 100644
--- a/tests/transformers/bert/test_modeling.py
+++ b/tests/transformers/bert/test_modeling.py
@@ -15,44 +15,45 @@
import unittest
import paddle
+from parameterized import parameterized_class
from paddlenlp.transformers import BertModel, BertForQuestionAnswering, BertForSequenceClassification,\
BertForTokenClassification, BertForPretraining, BertForMultipleChoice, BertForMaskedLM, BertPretrainedModel
-from ..test_modeling_common import ids_tensor, floats_tensor, random_attention_mask, ModelTesterMixin
-from ...testing_utils import slow
+
+from tests.testing_utils import slow
+from tests.transformers.test_modeling_common import ids_tensor, floats_tensor, random_attention_mask, ModelTesterMixin
class BertModelTester:
- def __init__(
- self,
- parent,
- batch_size=13,
- seq_length=7,
- is_training=True,
- use_input_mask=True,
- use_token_type_ids=True,
- use_labels=True,
- vocab_size=99,
- hidden_size=32,
- num_hidden_layers=5,
- num_attention_heads=4,
- intermediate_size=37,
- hidden_act="gelu",
- hidden_dropout_prob=0.1,
- attention_probs_dropout_prob=0.1,
- max_position_embeddings=512,
- type_vocab_size=16,
- initializer_range=0.02,
- pad_token_id=0,
- pool_act="tanh",
- fuse=False,
- type_sequence_label_size=2,
- num_labels=3,
- num_choices=4,
- num_classes=3,
- scope=None,
- ):
+ def __init__(self,
+ parent,
+ batch_size=13,
+ seq_length=7,
+ is_training=True,
+ use_input_mask=True,
+ use_token_type_ids=True,
+ use_labels=True,
+ vocab_size=99,
+ hidden_size=32,
+ num_hidden_layers=5,
+ num_attention_heads=4,
+ intermediate_size=37,
+ hidden_act="gelu",
+ hidden_dropout_prob=0.1,
+ attention_probs_dropout_prob=0.1,
+ max_position_embeddings=512,
+ type_vocab_size=16,
+ initializer_range=0.02,
+ pad_token_id=0,
+ pool_act="tanh",
+ fuse=False,
+ type_sequence_label_size=2,
+ num_labels=3,
+ num_choices=4,
+ num_classes=3,
+ scope=None,
+ return_dict=False):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
@@ -79,6 +80,7 @@ def __init__(
self.num_labels = num_labels
self.num_choices = num_choices
self.scope = scope
+ self.return_dict = return_dict
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length],
@@ -140,8 +142,10 @@ def create_and_check_model(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids)
- result = model(input_ids, token_type_ids=token_type_ids)
- result = model(input_ids)
+ result = model(input_ids,
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ result = model(input_ids, return_dict=self.return_dict)
self.parent.assertEqual(
result[0].shape,
[self.batch_size, self.seq_length, self.hidden_size])
@@ -163,7 +167,8 @@ def create_and_check_for_masked_lm(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- labels=token_labels)
+ labels=token_labels,
+ return_dict=self.return_dict)
self.parent.assertEqual(
result[1].shape,
[self.batch_size, self.seq_length, self.vocab_size])
@@ -181,12 +186,15 @@ def create_and_check_model_past_large_inputs(
model = BertModel(**config)
model.eval()
+ self.return_dict = False
+
# first forward pass
outputs = model(input_ids,
attention_mask=input_mask,
use_cache=True,
- return_dict=True)
- past_key_values = outputs.past_key_values
+ return_dict=self.return_dict)
+ past_key_values = outputs.past_key_values if self.return_dict else outputs[
+ 2]
# create hypothetical multiple next token and extent to next_input_ids
next_tokens = ids_tensor((self.batch_size, 3), self.vocab_size)
@@ -196,15 +204,26 @@ def create_and_check_model_past_large_inputs(
next_input_ids = paddle.concat([input_ids, next_tokens], axis=-1)
next_attention_mask = paddle.concat([input_mask, next_mask], axis=-1)
- output_from_no_past = model(next_input_ids,
- attention_mask=next_attention_mask,
- output_hidden_states=True,
- return_dict=True)["hidden_states"][0]
- output_from_past = model(next_tokens,
- attention_mask=next_attention_mask,
- past_key_values=past_key_values,
- output_hidden_states=True,
- return_dict=True)["hidden_states"][0]
+ outputs = model(next_input_ids,
+ attention_mask=next_attention_mask,
+ output_hidden_states=True,
+ return_dict=self.return_dict)
+
+ if self.return_dict:
+ output_from_no_past = outputs['hidden_states'][0]
+ else:
+ output_from_no_past = outputs[2][0]
+
+ outputs = model(next_tokens,
+ attention_mask=next_attention_mask,
+ past_key_values=past_key_values,
+ output_hidden_states=True,
+ return_dict=self.return_dict)
+
+ if self.return_dict:
+ output_from_past = outputs['hidden_states'][0]
+ else:
+ output_from_past = outputs[2][0]
# select random slice
random_slice_idx = ids_tensor((1, ), output_from_past.shape[-1]).item()
@@ -235,13 +254,12 @@ def create_and_check_for_pretraining(
):
model = BertForPretraining(BertModel(**config))
model.eval()
- result = model(
- input_ids,
- attention_mask=input_mask,
- token_type_ids=token_type_ids,
- labels=token_labels,
- next_sentence_label=sequence_labels,
- )
+ result = model(input_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ labels=token_labels,
+ next_sentence_label=sequence_labels,
+ return_dict=self.return_dict)
self.parent.assertEqual(
result[1].shape,
[self.batch_size, self.seq_length, self.vocab_size])
@@ -266,12 +284,11 @@ def create_and_check_for_multiple_choice(
[-1, self.num_choices, -1])
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(
[-1, self.num_choices, -1])
- result = model(
- multiple_choice_inputs_ids,
- attention_mask=multiple_choice_input_mask,
- token_type_ids=multiple_choice_token_type_ids,
- labels=choice_labels,
- )
+ result = model(multiple_choice_inputs_ids,
+ attention_mask=multiple_choice_input_mask,
+ token_type_ids=multiple_choice_token_type_ids,
+ labels=choice_labels,
+ return_dict=self.return_dict)
self.parent.assertEqual(result[1].shape,
[self.batch_size, self.num_choices])
@@ -287,13 +304,12 @@ def create_and_check_for_question_answering(
):
model = BertForQuestionAnswering(BertModel(**config))
model.eval()
- result = model(
- input_ids,
- attention_mask=input_mask,
- token_type_ids=token_type_ids,
- start_positions=sequence_labels,
- end_positions=sequence_labels,
- )
+ result = model(input_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ start_positions=sequence_labels,
+ end_positions=sequence_labels,
+ return_dict=self.return_dict)
self.parent.assertEqual(result[1].shape,
[self.batch_size, self.seq_length])
self.parent.assertEqual(result[2].shape,
@@ -315,7 +331,8 @@ def create_and_check_for_sequence_classification(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- labels=sequence_labels)
+ labels=sequence_labels,
+ return_dict=self.return_dict)
self.parent.assertEqual(result[1].shape,
[self.batch_size, self.num_classes])
@@ -335,7 +352,8 @@ def create_and_check_for_token_classification(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- labels=token_labels)
+ labels=token_labels,
+ return_dict=self.return_dict)
self.parent.assertEqual(
result[1].shape,
[self.batch_size, self.seq_length, self.num_classes])
@@ -359,8 +377,10 @@ def prepare_config_and_inputs_for_common(self):
return config, inputs_dict
+@parameterized_class(("return_dict", ), [[True], [False]])
class BertModelTest(ModelTesterMixin, unittest.TestCase):
base_model_class = BertModel
+ return_dict = False
all_model_classes = (
BertModel,
@@ -373,7 +393,7 @@ class BertModelTest(ModelTesterMixin, unittest.TestCase):
)
def setUp(self):
- self.model_tester = BertModelTester(self)
+ self.model_tester = BertModelTester(self, return_dict=self.return_dict)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
From b1629c0cdfdc5c96e83dc733f5325500697d457f Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Wed, 24 Aug 2022 06:35:49 +0000
Subject: [PATCH 003/159] update requirements.txt
---
requirements.txt | 1 +
1 file changed, 1 insertion(+)
diff --git a/requirements.txt b/requirements.txt
index f92e71484d98..ab13cb3a4db1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,3 +10,4 @@ paddlefsl
sentencepiece
paddle2onnx
protobuf>=3.1.0, <=3.20.0
+parameterized
\ No newline at end of file
From d6f58c836f3de7cca2f3d14a249dfbd3d4b41ed2 Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Wed, 24 Aug 2022 06:57:47 +0000
Subject: [PATCH 004/159] update ernie modeling test
---
tests/transformers/ernie/test_modeling.py | 61 +++++++++++++++--------
1 file changed, 40 insertions(+), 21 deletions(-)
diff --git a/tests/transformers/ernie/test_modeling.py b/tests/transformers/ernie/test_modeling.py
index d6735344263a..7cd6ab575143 100644
--- a/tests/transformers/ernie/test_modeling.py
+++ b/tests/transformers/ernie/test_modeling.py
@@ -15,9 +15,11 @@
import unittest
import paddle
+from parameterized import parameterized_class
from paddlenlp.transformers import ErnieModel, ErnieForQuestionAnswering, ErnieForSequenceClassification,\
ErnieForTokenClassification, ErnieForPretraining, ErnieForMultipleChoice, ErnieForMaskedLM, ErniePretrainedModel
+
from ...transformers.test_modeling_common import ids_tensor, floats_tensor, random_attention_mask, ModelTesterMixin
from ...testing_utils import slow
@@ -49,6 +51,7 @@ def __init__(
num_choices=4,
num_classes=3,
scope=None,
+ return_dict=False,
):
self.parent = parent
self.batch_size = batch_size
@@ -73,6 +76,7 @@ def __init__(
self.num_labels = num_labels
self.num_choices = num_choices
self.scope = scope
+ self.return_dict = return_dict
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length],
@@ -118,9 +122,12 @@ def create_and_check_model(
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
- result = model(input_ids, token_type_ids=token_type_ids)
- result = model(input_ids)
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ result = model(input_ids,
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ result = model(input_ids, return_dict=self.return_dict)
self.parent.assertEqual(
result[0].shape,
[self.batch_size, self.seq_length, self.hidden_size])
@@ -138,7 +145,11 @@ def create_and_check_for_masked_lm(
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ if self.return_dict:
+ result = result.logits
+
self.parent.assertEqual(
result.shape, [self.batch_size, self.seq_length, self.vocab_size])
@@ -158,11 +169,12 @@ def create_and_check_for_multiple_choice(
[-1, self.num_choices, -1])
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(
[-1, self.num_choices, -1])
- result = model(
- multiple_choice_inputs_ids,
- attention_mask=multiple_choice_input_mask,
- token_type_ids=multiple_choice_token_type_ids,
- )
+ result = model(multiple_choice_inputs_ids,
+ attention_mask=multiple_choice_input_mask,
+ token_type_ids=multiple_choice_token_type_ids,
+ return_dict=self.return_dict)
+ if self.return_dict:
+ result = result.logits
self.parent.assertEqual(result.shape,
[self.batch_size, self.num_choices])
@@ -170,11 +182,10 @@ def create_and_check_for_question_answering(self, config, input_ids,
token_type_ids, input_mask):
model = ErnieForQuestionAnswering(ErnieModel(**config))
model.eval()
- result = model(
- input_ids,
- attention_mask=input_mask,
- token_type_ids=token_type_ids,
- )
+ result = model(input_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
self.parent.assertEqual(result[0].shape,
[self.batch_size, self.seq_length])
self.parent.assertEqual(result[1].shape,
@@ -190,11 +201,13 @@ def create_and_check_for_sequence_classification(
model = ErnieForSequenceClassification(ErnieModel(**config),
num_classes=self.num_classes)
model.eval()
- result = model(
- input_ids,
- attention_mask=input_mask,
- token_type_ids=token_type_ids,
- )
+ result = model(input_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ if self.return_dict:
+ result = result.logits
+
self.parent.assertEqual(result.shape,
[self.batch_size, self.num_classes])
@@ -210,7 +223,11 @@ def create_and_check_for_token_classification(
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ if self.return_dict:
+ result = result.logits
+
self.parent.assertEqual(
result.shape, [self.batch_size, self.seq_length, self.num_classes])
@@ -230,8 +247,10 @@ def prepare_config_and_inputs_for_common(self):
return config, inputs_dict
+@parameterized_class(("return_dict", ), [[False], [True]])
class ErnieModelTest(ModelTesterMixin, unittest.TestCase):
base_model_class = ErnieModel
+ return_dict = False
all_model_classes = (
ErnieModel,
@@ -244,7 +263,7 @@ class ErnieModelTest(ModelTesterMixin, unittest.TestCase):
)
def setUp(self):
- self.model_tester = ErnieModelTester(self)
+ self.model_tester = ErnieModelTester(self, return_dict=self.return_dict)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
From 522becff954ae2b851b582b7bd06d16f8d69f22a Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Wed, 24 Aug 2022 07:25:47 +0000
Subject: [PATCH 005/159] update roberta unittest
---
paddlenlp/transformers/roberta/modeling.py | 8 ++-
tests/transformers/roberta/test_modeling.py | 75 ++++++++++++++-------
2 files changed, 57 insertions(+), 26 deletions(-)
diff --git a/paddlenlp/transformers/roberta/modeling.py b/paddlenlp/transformers/roberta/modeling.py
index 50584a7376d7..f852663a7932 100644
--- a/paddlenlp/transformers/roberta/modeling.py
+++ b/paddlenlp/transformers/roberta/modeling.py
@@ -810,9 +810,13 @@ def forward(self,
loss = loss_fct(logits.reshape((-1, self.num_classes)),
labels.reshape((-1, )))
if not return_dict:
+
output = (logits, ) + outputs[2:]
- return ((loss, ) + output) if loss is not None else (
- output[0] if len(output) == 1 else output)
+ if loss is not None:
+ return (loss, ) + output
+ if len(output) == 1:
+ return output[0]
+ return output
return TokenClassifierOutput(
loss=loss,
diff --git a/tests/transformers/roberta/test_modeling.py b/tests/transformers/roberta/test_modeling.py
index 66e350c411ec..f38dbc0277b1 100644
--- a/tests/transformers/roberta/test_modeling.py
+++ b/tests/transformers/roberta/test_modeling.py
@@ -15,6 +15,7 @@
import unittest
import paddle
+from parameterized import parameterized_class
from paddlenlp.transformers import (
RobertaPretrainedModel,
@@ -35,10 +36,7 @@
class RobertaModelTester:
- def __init__(
- self,
- parent,
- ):
+ def __init__(self, parent, return_dict: bool = False):
self.parent = parent
self.batch_size = 13
self.seq_length = 7
@@ -64,6 +62,7 @@ def __init__(
self.num_labels = 3
self.num_choices = 4
self.scope = None
+ self.return_dict = return_dict
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length],
@@ -121,14 +120,23 @@ def create_and_check_model(self, config, input_ids, token_type_ids,
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
- result = model(input_ids, token_type_ids=token_type_ids)
- result = model(input_ids, return_dict=True)
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ result = model(input_ids,
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ result = model(input_ids, return_dict=self.return_dict)
+
+ if self.return_dict:
+ last_hidden_state = result.last_hidden_state
+ pooler_output = result.pooler_output
+ else:
+ last_hidden_state, pooler_output = result[0], result[1]
self.parent.assertEqual(
- result.last_hidden_state.shape,
+ last_hidden_state.shape,
[self.batch_size, self.seq_length, self.hidden_size])
- self.parent.assertEqual(result.pooler_output.shape,
+ self.parent.assertEqual(pooler_output.shape,
[self.batch_size, self.hidden_size])
def create_and_check_for_causal_lm(
@@ -143,10 +151,12 @@ def create_and_check_for_causal_lm(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=True)
+ return_dict=self.return_dict)
+ if self.return_dict:
+ result = result.logits
+
self.parent.assertEqual(
- result.logits.shape,
- [self.batch_size, self.seq_length, self.vocab_size])
+ result.shape, [self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_for_masked_lm(
self,
@@ -160,10 +170,12 @@ def create_and_check_for_masked_lm(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=True)
+ return_dict=self.return_dict)
+ if self.return_dict:
+ result = result.logits
+
self.parent.assertEqual(
- result.logits.shape,
- [self.batch_size, self.seq_length, self.vocab_size])
+ result.shape, [self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_for_token_classification(self, config, input_ids,
token_type_ids, input_mask):
@@ -174,10 +186,13 @@ def create_and_check_for_token_classification(self, config, input_ids,
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=True)
+ return_dict=self.return_dict)
+
+ if self.return_dict:
+ result = result.logits
+
self.parent.assertEqual(
- result.logits.shape,
- [self.batch_size, self.seq_length, self.num_labels])
+ result.shape, [self.batch_size, self.seq_length, self.num_labels])
def create_and_check_for_multiple_choice(self, config, input_ids,
token_type_ids, input_mask):
@@ -192,8 +207,11 @@ def create_and_check_for_multiple_choice(self, config, input_ids,
result = model(multiple_choice_inputs_ids,
attention_mask=multiple_choice_input_mask,
token_type_ids=multiple_choice_token_type_ids,
- return_dict=True)
- self.parent.assertEqual(result.logits.shape,
+ return_dict=self.return_dict)
+ if self.return_dict:
+ result = result.logits
+
+ self.parent.assertEqual(result.shape,
[self.batch_size, self.num_choices])
def create_and_check_for_question_answering(self, config, input_ids,
@@ -203,10 +221,16 @@ def create_and_check_for_question_answering(self, config, input_ids,
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=True)
- self.parent.assertEqual(result.start_logits.shape,
+ return_dict=self.return_dict)
+
+ if self.return_dict:
+ start_logits, end_logits = result.start_logits, result.end_logits
+ else:
+ start_logits, end_logits = result[0], result[1]
+
+ self.parent.assertEqual(start_logits.shape,
[self.batch_size, self.seq_length])
- self.parent.assertEqual(result.end_logits.shape,
+ self.parent.assertEqual(end_logits.shape,
[self.batch_size, self.seq_length])
def prepare_config_and_inputs_for_common(self):
@@ -225,8 +249,10 @@ def prepare_config_and_inputs_for_common(self):
return config, inputs_dict
+@parameterized_class(("return_dict", ), [[True], [False]])
class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
base_model_class = RobertaModel
+ return_dict = False
all_model_classes = (
RobertaForCausalLM,
@@ -240,7 +266,8 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
all_generative_model_classes = (RobertaForCausalLM, )
def setUp(self):
- self.model_tester = RobertaModelTester(self)
+ self.model_tester = RobertaModelTester(self,
+ return_dict=self.return_dict)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
From 0f133c04acda5b49325562dd12446854218256a0 Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Wed, 24 Aug 2022 08:37:49 +0000
Subject: [PATCH 006/159] update roformer modeling testing
---
tests/transformers/roformer/test_modeling.py | 109 ++++++++++++-------
1 file changed, 70 insertions(+), 39 deletions(-)
diff --git a/tests/transformers/roformer/test_modeling.py b/tests/transformers/roformer/test_modeling.py
index 23a46ddc3c93..786dad6819b2 100644
--- a/tests/transformers/roformer/test_modeling.py
+++ b/tests/transformers/roformer/test_modeling.py
@@ -18,12 +18,14 @@
from dataclasses import dataclass, fields, Field
import paddle
+from parameterized import parameterized_class
-from paddlenlp.transformers import (
- RoFormerModel, RoFormerPretrainedModel, RoFormerForPretraining,
- RoFormerForSequenceClassification, RoFormerForTokenClassification,
- RoFormerForQuestionAnswering, RoFormerForMultipleChoice,
- RoFormerForMaskedLM)
+from paddlenlp.transformers import (RoFormerModel, RoFormerPretrainedModel,
+ RoFormerForSequenceClassification,
+ RoFormerForTokenClassification,
+ RoFormerForQuestionAnswering,
+ RoFormerForMultipleChoice,
+ RoFormerForMaskedLM)
from ..test_modeling_common import ids_tensor, floats_tensor, random_attention_mask, ModelTesterMixin
from ...testing_utils import slow
@@ -75,11 +77,10 @@ class RoFormerModelTestConfig(RoFormerModelTestModelConfig):
class RoFormerModelTester:
- def __init__(
- self,
- parent,
- config: Optional[RoFormerModelTestConfig] = None,
- ):
+ def __init__(self,
+ parent,
+ config: Optional[RoFormerModelTestConfig] = None,
+ return_dict: bool = False):
self.parent = parent
self.config: RoFormerModelTestConfig = config or RoFormerModelTestConfig(
)
@@ -87,6 +88,7 @@ def __init__(
self.is_training = self.config.is_training
self.num_classes = self.config.num_classes
self.num_choices = self.config.num_choices
+ self.return_dict = return_dict
def prepare_config_and_inputs(self):
config = self.config
@@ -109,6 +111,11 @@ def prepare_config_and_inputs(self):
def get_config(self) -> dict:
return self.config.model_kwargs
+ def __getattr__(self, key: str):
+ if not hasattr(self.config, key):
+ raise AttributeError(f'attribute <{key}> not exist')
+ return getattr(self.config, key)
+
def create_and_check_model(
self,
config,
@@ -120,9 +127,16 @@ def create_and_check_model(
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
- result = model(input_ids, token_type_ids=token_type_ids)
- result = model(input_ids)
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ result = model(input_ids,
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ result = model(input_ids, return_dict=self.return_dict)
+
+ if self.return_dict:
+ result = [result.last_hidden_state, result.pooler_output]
+
self.parent.assertEqual(result[0].shape, [
self.config.batch_size, self.config.seq_length,
self.config.hidden_size
@@ -151,11 +165,12 @@ def create_and_check_for_multiple_choice(
input_mask = input_mask.unsqueeze(1).expand(
[-1, self.config.num_choices, -1])
- result = model(
- multiple_choice_inputs_ids,
- attention_mask=input_mask,
- token_type_ids=token_type_ids,
- )
+ result = model(multiple_choice_inputs_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ if self.return_dict:
+ result = result.logits
self.parent.assertEqual(
result.shape, [self.config.batch_size, self.config.num_choices])
@@ -163,15 +178,20 @@ def create_and_check_for_question_answering(self, config, input_ids,
token_type_ids, input_mask):
model = RoFormerForQuestionAnswering(RoFormerModel(**config))
model.eval()
- result = model(
- input_ids,
- attention_mask=input_mask,
- token_type_ids=token_type_ids,
- )
+ result = model(input_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ if self.return_dict:
+ start_logits, end_logits = result.start_logits, result.end_logits
+ else:
+ start_logits, end_logits = result[0], result[1]
+
self.parent.assertEqual(
- result[0].shape, [self.config.batch_size, self.config.seq_length])
+ start_logits.shape,
+ [self.config.batch_size, self.config.seq_length])
self.parent.assertEqual(
- result[1].shape, [self.config.batch_size, self.config.seq_length])
+ end_logits.shape, [self.config.batch_size, self.config.seq_length])
def create_and_check_for_token_classification(
self,
@@ -185,7 +205,11 @@ def create_and_check_for_token_classification(
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ if self.return_dict:
+ result = result.logits
+
self.parent.assertEqual(result.shape, [
self.config.batch_size, self.config.seq_length,
self.config.num_classes
@@ -202,7 +226,11 @@ def create_and_check_for_masked_lm(
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ if self.return_dict:
+ result = result.logits
+
self.parent.assertEqual(result.shape, [
self.config.batch_size, self.config.seq_length,
self.config.vocab_size
@@ -218,11 +246,13 @@ def create_and_check_for_sequence_classification(
model = RoFormerForSequenceClassification(
RoFormerModel(**config), num_classes=self.config.num_classes)
model.eval()
- result = model(
- input_ids,
- attention_mask=input_mask,
- token_type_ids=token_type_ids,
- )
+ result = model(input_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict)
+ if self.return_dict:
+ result = result.logits
+
self.parent.assertEqual(
result.shape, [self.config.batch_size, self.config.num_classes])
@@ -242,18 +272,19 @@ def prepare_config_and_inputs_for_common(self):
return config, inputs_dict
+@parameterized_class(("return_dict", ), [[True], [False]])
class RoFormerModelTest(ModelTesterMixin, unittest.TestCase):
base_model_class = RoFormerModel
+ return_dict: bool = False
- all_model_classes = (
- RoFormerModel,
- RoFormerForMultipleChoice,
- RoFormerForPretraining,
- RoFormerForSequenceClassification,
- )
+ all_model_classes = (RoFormerModel, RoFormerForSequenceClassification,
+ RoFormerForTokenClassification,
+ RoFormerForQuestionAnswering,
+ RoFormerForMultipleChoice, RoFormerForMaskedLM)
def setUp(self):
- self.model_tester = RoFormerModelTester(self)
+ self.model_tester = RoFormerModelTester(self,
+ return_dict=self.return_dict)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
From d4b41227bb9746fe4a964dd7428e44beeee2de6a Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Wed, 24 Aug 2022 13:07:19 +0000
Subject: [PATCH 007/159] complete ernie label loss
---
tests/transformers/ernie/test_modeling.py | 140 ++++++++++++++++------
1 file changed, 104 insertions(+), 36 deletions(-)
diff --git a/tests/transformers/ernie/test_modeling.py b/tests/transformers/ernie/test_modeling.py
index 7cd6ab575143..468c9818ca65 100644
--- a/tests/transformers/ernie/test_modeling.py
+++ b/tests/transformers/ernie/test_modeling.py
@@ -20,39 +20,38 @@
from paddlenlp.transformers import ErnieModel, ErnieForQuestionAnswering, ErnieForSequenceClassification,\
ErnieForTokenClassification, ErnieForPretraining, ErnieForMultipleChoice, ErnieForMaskedLM, ErniePretrainedModel
-from ...transformers.test_modeling_common import ids_tensor, floats_tensor, random_attention_mask, ModelTesterMixin
+from ..test_modeling_common import ids_tensor, floats_tensor, random_attention_mask, ModelTesterMixin
from ...testing_utils import slow
class ErnieModelTester:
- def __init__(
- self,
- parent,
- batch_size=13,
- seq_length=7,
- is_training=True,
- use_input_mask=True,
- use_token_type_ids=True,
- vocab_size=99,
- hidden_size=32,
- num_hidden_layers=5,
- num_attention_heads=4,
- intermediate_size=37,
- hidden_act="gelu",
- hidden_dropout_prob=0.1,
- attention_probs_dropout_prob=0.1,
- max_position_embeddings=512,
- type_vocab_size=2,
- initializer_range=0.02,
- pad_token_id=0,
- type_sequence_label_size=2,
- num_labels=3,
- num_choices=4,
- num_classes=3,
- scope=None,
- return_dict=False,
- ):
+ def __init__(self,
+ parent,
+ batch_size=13,
+ seq_length=7,
+ is_training=True,
+ use_input_mask=True,
+ use_token_type_ids=True,
+ vocab_size=99,
+ hidden_size=32,
+ num_hidden_layers=5,
+ num_attention_heads=4,
+ intermediate_size=37,
+ hidden_act="gelu",
+ hidden_dropout_prob=0.1,
+ attention_probs_dropout_prob=0.1,
+ max_position_embeddings=512,
+ type_vocab_size=2,
+ initializer_range=0.02,
+ pad_token_id=0,
+ type_sequence_label_size=2,
+ num_labels=3,
+ num_choices=4,
+ num_classes=3,
+ scope=None,
+ return_dict: bool = False,
+ use_labels: bool = False):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
@@ -77,6 +76,7 @@ def __init__(
self.num_choices = num_choices
self.scope = scope
self.return_dict = return_dict
+ self.use_labels = use_labels
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length],
@@ -92,8 +92,19 @@ def prepare_config_and_inputs(self):
token_type_ids = ids_tensor([self.batch_size, self.seq_length],
self.type_vocab_size)
+ sequence_labels = None
+ token_labels = None
+ choice_labels = None
+
+ if self.use_labels:
+ sequence_labels = ids_tensor([self.batch_size],
+ self.type_sequence_label_size)
+ token_labels = ids_tensor([self.batch_size, self.seq_length],
+ self.num_labels)
+ choice_labels = ids_tensor([self.batch_size], self.num_choices)
+
config = self.get_config()
- return config, input_ids, token_type_ids, input_mask
+ return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def get_config(self):
return {
@@ -117,6 +128,9 @@ def create_and_check_model(
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
):
model = ErnieModel(**config)
model.eval()
@@ -140,15 +154,21 @@ def create_and_check_for_masked_lm(
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
):
model = ErnieForMaskedLM(ErnieModel(**config))
model.eval()
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
+ labels=token_labels,
return_dict=self.return_dict)
if self.return_dict:
result = result.logits
+ elif self.use_labels:
+ result = result[1]
self.parent.assertEqual(
result.shape, [self.batch_size, self.seq_length, self.vocab_size])
@@ -159,6 +179,9 @@ def create_and_check_for_multiple_choice(
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
):
model = ErnieForMultipleChoice(ErnieModel(**config),
num_choices=self.num_choices)
@@ -172,23 +195,45 @@ def create_and_check_for_multiple_choice(
result = model(multiple_choice_inputs_ids,
attention_mask=multiple_choice_input_mask,
token_type_ids=multiple_choice_token_type_ids,
+ labels=choice_labels,
return_dict=self.return_dict)
if self.return_dict:
result = result.logits
+ elif self.use_labels:
+ result = result[1]
+
self.parent.assertEqual(result.shape,
[self.batch_size, self.num_choices])
- def create_and_check_for_question_answering(self, config, input_ids,
- token_type_ids, input_mask):
+ def create_and_check_for_question_answering(
+ self,
+ config,
+ input_ids,
+ token_type_ids,
+ input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
+ ):
model = ErnieForQuestionAnswering(ErnieModel(**config))
model.eval()
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
+ start_positions=sequence_labels,
+ end_positions=sequence_labels,
return_dict=self.return_dict)
- self.parent.assertEqual(result[0].shape,
+
+ if self.return_dict:
+ start_logits, end_logits = result.start_logits, result.end_logits
+ elif self.use_labels:
+ start_logits, end_logits = result[1], result[2]
+ else:
+ start_logits, end_logits = result[0], result[1]
+
+ self.parent.assertEqual(start_logits.shape,
[self.batch_size, self.seq_length])
- self.parent.assertEqual(result[1].shape,
+ self.parent.assertEqual(end_logits.shape,
[self.batch_size, self.seq_length])
def create_and_check_for_sequence_classification(
@@ -197,6 +242,9 @@ def create_and_check_for_sequence_classification(
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
):
model = ErnieForSequenceClassification(ErnieModel(**config),
num_classes=self.num_classes)
@@ -204,9 +252,12 @@ def create_and_check_for_sequence_classification(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
+ labels=sequence_labels,
return_dict=self.return_dict)
if self.return_dict:
result = result.logits
+ elif self.use_labels:
+ result = result[1]
self.parent.assertEqual(result.shape,
[self.batch_size, self.num_classes])
@@ -217,6 +268,9 @@ def create_and_check_for_token_classification(
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
):
model = ErnieForTokenClassification(ErnieModel(**config),
num_classes=self.num_classes)
@@ -224,9 +278,12 @@ def create_and_check_for_token_classification(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
+ labels=token_labels,
return_dict=self.return_dict)
if self.return_dict:
result = result.logits
+ elif self.use_labels:
+ result = result[1]
self.parent.assertEqual(
result.shape, [self.batch_size, self.seq_length, self.num_classes])
@@ -238,6 +295,9 @@ def prepare_config_and_inputs_for_common(self):
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
) = config_and_inputs
inputs_dict = {
"input_ids": input_ids,
@@ -247,10 +307,16 @@ def prepare_config_and_inputs_for_common(self):
return config, inputs_dict
-@parameterized_class(("return_dict", ), [[False], [True]])
+@parameterized_class(("return_dict", "use_labels"), [
+ [False, False],
+ [False, True],
+ [True, False],
+ [True, True],
+])
class ErnieModelTest(ModelTesterMixin, unittest.TestCase):
base_model_class = ErnieModel
- return_dict = False
+ return_dict: bool = False
+ use_labels: bool = False
all_model_classes = (
ErnieModel,
@@ -263,7 +329,9 @@ class ErnieModelTest(ModelTesterMixin, unittest.TestCase):
)
def setUp(self):
- self.model_tester = ErnieModelTester(self, return_dict=self.return_dict)
+ self.model_tester = ErnieModelTester(self,
+ use_labels=self.use_labels,
+ return_dict=self.return_dict)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
From 200af6de8d728d0c9819ad540633b28cdff1da76 Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Thu, 25 Aug 2022 02:45:09 +0000
Subject: [PATCH 008/159] complete ernie/roberta/roformer unittest
---
tests/transformers/roberta/test_modeling.py | 150 ++++++++++++++++---
tests/transformers/roformer/test_modeling.py | 90 +++++++++--
2 files changed, 212 insertions(+), 28 deletions(-)
diff --git a/tests/transformers/roberta/test_modeling.py b/tests/transformers/roberta/test_modeling.py
index f38dbc0277b1..f63eb6bfacab 100644
--- a/tests/transformers/roberta/test_modeling.py
+++ b/tests/transformers/roberta/test_modeling.py
@@ -28,7 +28,7 @@
RobertaModel,
)
-from ...transformers.test_modeling_common import ids_tensor, floats_tensor, random_attention_mask, ModelTesterMixin
+from ..test_modeling_common import ids_tensor, floats_tensor, random_attention_mask, ModelTesterMixin
from ...testing_utils import slow
ROBERTA_TINY = "sshleifer/tiny-distilroberta-base"
@@ -36,7 +36,10 @@
class RobertaModelTester:
- def __init__(self, parent, return_dict: bool = False):
+ def __init__(self,
+ parent,
+ return_dict: bool = False,
+ use_labels: bool = False):
self.parent = parent
self.batch_size = 13
self.seq_length = 7
@@ -63,6 +66,7 @@ def __init__(self, parent, return_dict: bool = False):
self.num_choices = 4
self.scope = None
self.return_dict = return_dict
+ self.use_labels = use_labels
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length],
@@ -78,8 +82,18 @@ def prepare_config_and_inputs(self):
token_type_ids = ids_tensor([self.batch_size, self.seq_length],
self.type_vocab_size)
+ sequence_labels = None
+ token_labels = None
+ choice_labels = None
+ if self.use_labels:
+ sequence_labels = ids_tensor([self.batch_size],
+ self.type_sequence_label_size)
+ token_labels = ids_tensor([self.batch_size, self.seq_length],
+ self.num_labels)
+ choice_labels = ids_tensor([self.batch_size], self.num_choices)
+
config = self.get_config()
- return config, input_ids, token_type_ids, input_mask
+ return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def get_config(self):
return {
@@ -105,6 +119,9 @@ def prepare_config_and_inputs_for_decoder(self):
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
) = self.prepare_config_and_inputs()
return (
@@ -112,10 +129,21 @@ def prepare_config_and_inputs_for_decoder(self):
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
)
- def create_and_check_model(self, config, input_ids, token_type_ids,
- input_mask):
+ def create_and_check_model(
+ self,
+ config,
+ input_ids,
+ token_type_ids,
+ input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
+ ):
model = RobertaModel(**config)
model.eval()
result = model(input_ids,
@@ -145,15 +173,21 @@ def create_and_check_for_causal_lm(
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
):
model = RobertaForCausalLM(RobertaModel(**config))
model.eval()
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
+ labels=token_labels,
return_dict=self.return_dict)
if self.return_dict:
result = result.logits
+ elif self.use_labels:
+ result = result[1]
self.parent.assertEqual(
result.shape, [self.batch_size, self.seq_length, self.vocab_size])
@@ -164,21 +198,35 @@ def create_and_check_for_masked_lm(
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
):
model = RobertaForMaskedLM(RobertaModel(**config))
model.eval()
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
+ labels=token_labels,
return_dict=self.return_dict)
if self.return_dict:
result = result.logits
+ elif self.use_labels:
+ result = result[1]
self.parent.assertEqual(
result.shape, [self.batch_size, self.seq_length, self.vocab_size])
- def create_and_check_for_token_classification(self, config, input_ids,
- token_type_ids, input_mask):
+ def create_and_check_for_token_classification(
+ self,
+ config,
+ input_ids,
+ token_type_ids,
+ input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
+ ):
model = RobertaForTokenClassification(RobertaModel(**config),
num_classes=self.num_labels,
dropout=None)
@@ -186,16 +234,53 @@ def create_and_check_for_token_classification(self, config, input_ids,
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=self.return_dict)
+ return_dict=self.return_dict,
+ labels=token_labels)
if self.return_dict:
result = result.logits
+ elif self.use_labels:
+ result = result[1]
self.parent.assertEqual(
result.shape, [self.batch_size, self.seq_length, self.num_labels])
- def create_and_check_for_multiple_choice(self, config, input_ids,
- token_type_ids, input_mask):
+ def create_and_check_for_sequence_classification(
+ self,
+ config,
+ input_ids,
+ token_type_ids,
+ input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
+ ):
+ model = RobertaForSequenceClassification(RobertaModel(**config),
+ num_classes=self.num_labels)
+ model.eval()
+ result = model(input_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ labels=sequence_labels,
+ return_dict=self.return_dict)
+ if self.return_dict:
+ result = result.logits
+ elif self.use_labels:
+ result = result[1]
+
+ self.parent.assertEqual(result.shape,
+ [self.batch_size, self.num_labels])
+
+ def create_and_check_for_multiple_choice(
+ self,
+ config,
+ input_ids,
+ token_type_ids,
+ input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
+ ):
model = RobertaForMultipleChoice(RobertaModel(**config))
model.eval()
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(
@@ -207,24 +292,40 @@ def create_and_check_for_multiple_choice(self, config, input_ids,
result = model(multiple_choice_inputs_ids,
attention_mask=multiple_choice_input_mask,
token_type_ids=multiple_choice_token_type_ids,
- return_dict=self.return_dict)
+ return_dict=self.return_dict,
+ labels=choice_labels)
+
if self.return_dict:
result = result.logits
+ elif self.use_labels:
+ result = result[1]
self.parent.assertEqual(result.shape,
[self.batch_size, self.num_choices])
- def create_and_check_for_question_answering(self, config, input_ids,
- token_type_ids, input_mask):
+ def create_and_check_for_question_answering(
+ self,
+ config,
+ input_ids,
+ token_type_ids,
+ input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
+ ):
model = RobertaForQuestionAnswering(RobertaModel(**config))
model.eval()
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=self.return_dict)
+ return_dict=self.return_dict,
+ start_positions=sequence_labels,
+ end_positions=sequence_labels)
if self.return_dict:
start_logits, end_logits = result.start_logits, result.end_logits
+ elif self.use_labels:
+ start_logits, end_logits = result[1], result[2]
else:
start_logits, end_logits = result[0], result[1]
@@ -240,6 +341,9 @@ def prepare_config_and_inputs_for_common(self):
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
) = config_and_inputs
inputs_dict = {
"input_ids": input_ids,
@@ -249,10 +353,16 @@ def prepare_config_and_inputs_for_common(self):
return config, inputs_dict
-@parameterized_class(("return_dict", ), [[True], [False]])
+@parameterized_class(("return_dict", "use_labels"), [
+ [False, False],
+ [False, True],
+ [True, False],
+ [True, True],
+])
class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
base_model_class = RobertaModel
- return_dict = False
+ return_dict: bool = False
+ use_labels: bool = False
all_model_classes = (
RobertaForCausalLM,
@@ -267,7 +377,8 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
def setUp(self):
self.model_tester = RobertaModelTester(self,
- return_dict=self.return_dict)
+ return_dict=self.return_dict,
+ use_labels=self.use_labels)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
@@ -282,6 +393,11 @@ def test_for_masked_lm(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
+ def test_for_sequence_classification(self):
+ config_and_inputs = self.model_tester.prepare_config_and_inputs()
+ self.model_tester.create_and_check_for_sequence_classification(
+ *config_and_inputs)
+
def test_for_token_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_token_classification(
diff --git a/tests/transformers/roformer/test_modeling.py b/tests/transformers/roformer/test_modeling.py
index 786dad6819b2..17f36d107825 100644
--- a/tests/transformers/roformer/test_modeling.py
+++ b/tests/transformers/roformer/test_modeling.py
@@ -69,6 +69,7 @@ class RoFormerModelTestConfig(RoFormerModelTestModelConfig):
is_training: bool = False
use_input_mask: bool = False
use_token_type_ids: bool = True
+ type_sequence_label_size: int = 2
# used for sequence classification
num_classes: int = 3
@@ -80,7 +81,8 @@ class RoFormerModelTester:
def __init__(self,
parent,
config: Optional[RoFormerModelTestConfig] = None,
- return_dict: bool = False):
+ return_dict: bool = False,
+ use_labels: bool = False):
self.parent = parent
self.config: RoFormerModelTestConfig = config or RoFormerModelTestConfig(
)
@@ -88,7 +90,10 @@ def __init__(self,
self.is_training = self.config.is_training
self.num_classes = self.config.num_classes
self.num_choices = self.config.num_choices
+
+ self.type_sequence_label_size = self.config.type_sequence_label_size
self.return_dict = return_dict
+ self.use_labels = use_labels
def prepare_config_and_inputs(self):
config = self.config
@@ -105,8 +110,19 @@ def prepare_config_and_inputs(self):
token_type_ids = ids_tensor([config.batch_size, config.seq_length],
config.type_vocab_size)
+ sequence_labels = None
+ token_labels = None
+ choice_labels = None
+
+ if self.use_labels:
+ sequence_labels = ids_tensor([self.batch_size],
+ self.type_sequence_label_size)
+ token_labels = ids_tensor([self.batch_size, self.seq_length],
+ self.num_classes)
+ choice_labels = ids_tensor([self.batch_size], self.num_choices)
+
config = self.get_config()
- return config, input_ids, token_type_ids, input_mask
+ return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def get_config(self) -> dict:
return self.config.model_kwargs
@@ -122,6 +138,9 @@ def create_and_check_model(
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
):
model = RoFormerModel(**config)
model.eval()
@@ -150,6 +169,9 @@ def create_and_check_for_multiple_choice(
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
):
model = RoFormerForMultipleChoice(RoFormerModel(**config),
num_choices=self.config.num_choices)
@@ -168,22 +190,40 @@ def create_and_check_for_multiple_choice(
result = model(multiple_choice_inputs_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=self.return_dict)
+ return_dict=self.return_dict,
+ labels=choice_labels)
if self.return_dict:
result = result.logits
+ elif self.use_labels:
+ result = result[1]
+
self.parent.assertEqual(
result.shape, [self.config.batch_size, self.config.num_choices])
- def create_and_check_for_question_answering(self, config, input_ids,
- token_type_ids, input_mask):
+ def create_and_check_for_question_answering(
+ self,
+ config,
+ input_ids,
+ token_type_ids,
+ input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
+ ):
model = RoFormerForQuestionAnswering(RoFormerModel(**config))
model.eval()
- result = model(input_ids,
- attention_mask=input_mask,
- token_type_ids=token_type_ids,
- return_dict=self.return_dict)
+ result = model(
+ input_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ return_dict=self.return_dict,
+ start_positions=sequence_labels,
+ end_positions=sequence_labels,
+ )
if self.return_dict:
start_logits, end_logits = result.start_logits, result.end_logits
+ elif self.use_labels:
+ start_logits, end_logits = result[1], result[2]
else:
start_logits, end_logits = result[0], result[1]
@@ -199,6 +239,9 @@ def create_and_check_for_token_classification(
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
):
model = RoFormerForTokenClassification(RoFormerModel(**config),
num_classes=self.num_classes)
@@ -206,9 +249,12 @@ def create_and_check_for_token_classification(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
+ labels=token_labels,
return_dict=self.return_dict)
if self.return_dict:
result = result.logits
+ elif self.use_labels:
+ result = result[1]
self.parent.assertEqual(result.shape, [
self.config.batch_size, self.config.seq_length,
@@ -221,15 +267,21 @@ def create_and_check_for_masked_lm(
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
):
model = RoFormerForMaskedLM(RoFormerModel(**config))
model.eval()
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
+ labels=token_labels,
return_dict=self.return_dict)
if self.return_dict:
result = result.logits
+ elif self.use_labels:
+ result = result[1]
self.parent.assertEqual(result.shape, [
self.config.batch_size, self.config.seq_length,
@@ -242,6 +294,9 @@ def create_and_check_for_sequence_classification(
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
):
model = RoFormerForSequenceClassification(
RoFormerModel(**config), num_classes=self.config.num_classes)
@@ -249,9 +304,12 @@ def create_and_check_for_sequence_classification(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
+ labels=sequence_labels,
return_dict=self.return_dict)
if self.return_dict:
result = result.logits
+ elif self.use_labels:
+ result = result[1]
self.parent.assertEqual(
result.shape, [self.config.batch_size, self.config.num_classes])
@@ -263,6 +321,9 @@ def prepare_config_and_inputs_for_common(self):
input_ids,
token_type_ids,
input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
) = config_and_inputs
inputs_dict = {
"input_ids": input_ids,
@@ -272,10 +333,16 @@ def prepare_config_and_inputs_for_common(self):
return config, inputs_dict
-@parameterized_class(("return_dict", ), [[True], [False]])
+@parameterized_class(("return_dict", "use_labels"), [
+ [False, False],
+ [False, True],
+ [True, False],
+ [True, True],
+])
class RoFormerModelTest(ModelTesterMixin, unittest.TestCase):
base_model_class = RoFormerModel
return_dict: bool = False
+ use_labels: bool = False
all_model_classes = (RoFormerModel, RoFormerForSequenceClassification,
RoFormerForTokenClassification,
@@ -284,7 +351,8 @@ class RoFormerModelTest(ModelTesterMixin, unittest.TestCase):
def setUp(self):
self.model_tester = RoFormerModelTester(self,
- return_dict=self.return_dict)
+ return_dict=self.return_dict,
+ use_labels=self.use_labels)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
From 26daca4cf15e36ccd1ef73f1d08441d272a917fa Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Thu, 25 Aug 2022 12:09:05 +0000
Subject: [PATCH 009/159] update label/loss
---
tests/transformers/bert/test_modeling.py | 45 +++++----
tests/transformers/ernie/test_modeling.py | 75 +++++++-------
tests/transformers/roberta/test_modeling.py | 100 +++++++++----------
tests/transformers/roformer/test_modeling.py | 80 +++++++--------
4 files changed, 145 insertions(+), 155 deletions(-)
diff --git a/tests/transformers/bert/test_modeling.py b/tests/transformers/bert/test_modeling.py
index 9e841d46f139..81ef6ded0f2d 100644
--- a/tests/transformers/bert/test_modeling.py
+++ b/tests/transformers/bert/test_modeling.py
@@ -169,8 +169,13 @@ def create_and_check_for_masked_lm(
token_type_ids=token_type_ids,
labels=token_labels,
return_dict=self.return_dict)
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
+
self.parent.assertEqual(
- result[1].shape,
+ result[0].shape,
[self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_model_past_large_inputs(
@@ -186,8 +191,6 @@ def create_and_check_model_past_large_inputs(
model = BertModel(**config)
model.eval()
- self.return_dict = False
-
# first forward pass
outputs = model(input_ids,
attention_mask=input_mask,
@@ -209,10 +212,7 @@ def create_and_check_model_past_large_inputs(
output_hidden_states=True,
return_dict=self.return_dict)
- if self.return_dict:
- output_from_no_past = outputs['hidden_states'][0]
- else:
- output_from_no_past = outputs[2][0]
+ output_from_no_past = outputs[2][0]
outputs = model(next_tokens,
attention_mask=next_attention_mask,
@@ -220,10 +220,7 @@ def create_and_check_model_past_large_inputs(
output_hidden_states=True,
return_dict=self.return_dict)
- if self.return_dict:
- output_from_past = outputs['hidden_states'][0]
- else:
- output_from_past = outputs[2][0]
+ output_from_past = outputs[2][0]
# select random slice
random_slice_idx = ids_tensor((1, ), output_from_past.shape[-1]).item()
@@ -257,13 +254,11 @@ def create_and_check_for_pretraining(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- labels=token_labels,
- next_sentence_label=sequence_labels,
return_dict=self.return_dict)
self.parent.assertEqual(
- result[1].shape,
+ result[0].shape,
[self.batch_size, self.seq_length, self.vocab_size])
- self.parent.assertEqual(result[2].shape, [self.batch_size, 2])
+ self.parent.assertEqual(result[1].shape, [self.batch_size, 2])
def create_and_check_for_multiple_choice(
self,
@@ -289,7 +284,12 @@ def create_and_check_for_multiple_choice(
token_type_ids=multiple_choice_token_type_ids,
labels=choice_labels,
return_dict=self.return_dict)
- self.parent.assertEqual(result[1].shape,
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
+
+ self.parent.assertEqual(result[0].shape,
[self.batch_size, self.num_choices])
def create_and_check_for_question_answering(
@@ -310,6 +310,11 @@ def create_and_check_for_question_answering(
start_positions=sequence_labels,
end_positions=sequence_labels,
return_dict=self.return_dict)
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
+
self.parent.assertEqual(result[1].shape,
[self.batch_size, self.seq_length])
self.parent.assertEqual(result[2].shape,
@@ -377,10 +382,16 @@ def prepare_config_and_inputs_for_common(self):
return config, inputs_dict
-@parameterized_class(("return_dict", ), [[True], [False]])
+@parameterized_class(("return_dict", "use_labels"), [
+ [False, False],
+ [False, True],
+ [True, False],
+ [True, True],
+])
class BertModelTest(ModelTesterMixin, unittest.TestCase):
base_model_class = BertModel
return_dict = False
+ use_labels = False
all_model_classes = (
BertModel,
diff --git a/tests/transformers/ernie/test_modeling.py b/tests/transformers/ernie/test_modeling.py
index 468c9818ca65..69f072897bc0 100644
--- a/tests/transformers/ernie/test_modeling.py
+++ b/tests/transformers/ernie/test_modeling.py
@@ -49,9 +49,7 @@ def __init__(self,
num_labels=3,
num_choices=4,
num_classes=3,
- scope=None,
- return_dict: bool = False,
- use_labels: bool = False):
+ scope=None):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
@@ -75,8 +73,6 @@ def __init__(self,
self.num_labels = num_labels
self.num_choices = num_choices
self.scope = scope
- self.return_dict = return_dict
- self.use_labels = use_labels
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length],
@@ -96,7 +92,7 @@ def prepare_config_and_inputs(self):
token_labels = None
choice_labels = None
- if self.use_labels:
+ if self.parent.use_labels:
sequence_labels = ids_tensor([self.batch_size],
self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length],
@@ -137,11 +133,11 @@ def create_and_check_model(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=self.return_dict)
+ return_dict=self.parent.return_dict)
result = model(input_ids,
token_type_ids=token_type_ids,
- return_dict=self.return_dict)
- result = model(input_ids, return_dict=self.return_dict)
+ return_dict=self.parent.return_dict)
+ result = model(input_ids, return_dict=self.parent.return_dict)
self.parent.assertEqual(
result[0].shape,
[self.batch_size, self.seq_length, self.hidden_size])
@@ -164,14 +160,16 @@ def create_and_check_for_masked_lm(
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=token_labels,
- return_dict=self.return_dict)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+ return_dict=self.parent.return_dict)
+
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
self.parent.assertEqual(
- result.shape, [self.batch_size, self.seq_length, self.vocab_size])
+ result[0].shape,
+ [self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_for_multiple_choice(
self,
@@ -196,13 +194,13 @@ def create_and_check_for_multiple_choice(
attention_mask=multiple_choice_input_mask,
token_type_ids=multiple_choice_token_type_ids,
labels=choice_labels,
- return_dict=self.return_dict)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+ return_dict=self.parent.return_dict)
+ if choice_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
- self.parent.assertEqual(result.shape,
+ self.parent.assertEqual(result[0].shape,
[self.batch_size, self.num_choices])
def create_and_check_for_question_answering(
@@ -222,11 +220,9 @@ def create_and_check_for_question_answering(
token_type_ids=token_type_ids,
start_positions=sequence_labels,
end_positions=sequence_labels,
- return_dict=self.return_dict)
+ return_dict=self.parent.return_dict)
- if self.return_dict:
- start_logits, end_logits = result.start_logits, result.end_logits
- elif self.use_labels:
+ if sequence_labels is not None:
start_logits, end_logits = result[1], result[2]
else:
start_logits, end_logits = result[0], result[1]
@@ -253,13 +249,13 @@ def create_and_check_for_sequence_classification(
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=sequence_labels,
- return_dict=self.return_dict)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+ return_dict=self.parent.return_dict)
+ if sequence_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
- self.parent.assertEqual(result.shape,
+ self.parent.assertEqual(result[0].shape,
[self.batch_size, self.num_classes])
def create_and_check_for_token_classification(
@@ -279,14 +275,15 @@ def create_and_check_for_token_classification(
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=token_labels,
- return_dict=self.return_dict)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+ return_dict=self.parent.return_dict)
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
self.parent.assertEqual(
- result.shape, [self.batch_size, self.seq_length, self.num_classes])
+ result[0].shape,
+ [self.batch_size, self.seq_length, self.num_classes])
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
@@ -329,9 +326,7 @@ class ErnieModelTest(ModelTesterMixin, unittest.TestCase):
)
def setUp(self):
- self.model_tester = ErnieModelTester(self,
- use_labels=self.use_labels,
- return_dict=self.return_dict)
+ self.model_tester = ErnieModelTester(self)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
diff --git a/tests/transformers/roberta/test_modeling.py b/tests/transformers/roberta/test_modeling.py
index f63eb6bfacab..b7a2d7a8610f 100644
--- a/tests/transformers/roberta/test_modeling.py
+++ b/tests/transformers/roberta/test_modeling.py
@@ -36,10 +36,7 @@
class RobertaModelTester:
- def __init__(self,
- parent,
- return_dict: bool = False,
- use_labels: bool = False):
+ def __init__(self, parent):
self.parent = parent
self.batch_size = 13
self.seq_length = 7
@@ -65,8 +62,6 @@ def __init__(self,
self.num_labels = 3
self.num_choices = 4
self.scope = None
- self.return_dict = return_dict
- self.use_labels = use_labels
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length],
@@ -85,7 +80,7 @@ def prepare_config_and_inputs(self):
sequence_labels = None
token_labels = None
choice_labels = None
- if self.use_labels:
+ if self.parent.use_labels:
sequence_labels = ids_tensor([self.batch_size],
self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length],
@@ -149,22 +144,16 @@ def create_and_check_model(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=self.return_dict)
+ return_dict=self.parent.return_dict)
result = model(input_ids,
token_type_ids=token_type_ids,
- return_dict=self.return_dict)
- result = model(input_ids, return_dict=self.return_dict)
-
- if self.return_dict:
- last_hidden_state = result.last_hidden_state
- pooler_output = result.pooler_output
- else:
- last_hidden_state, pooler_output = result[0], result[1]
+ return_dict=self.parent.return_dict)
+ result = model(input_ids, return_dict=self.parent.return_dict)
self.parent.assertEqual(
- last_hidden_state.shape,
+ result[0].shape,
[self.batch_size, self.seq_length, self.hidden_size])
- self.parent.assertEqual(pooler_output.shape,
+ self.parent.assertEqual(result[1].shape,
[self.batch_size, self.hidden_size])
def create_and_check_for_causal_lm(
@@ -183,14 +172,15 @@ def create_and_check_for_causal_lm(
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=token_labels,
- return_dict=self.return_dict)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+ return_dict=self.parent.return_dict)
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
self.parent.assertEqual(
- result.shape, [self.batch_size, self.seq_length, self.vocab_size])
+ result[0].shape,
+ [self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_for_masked_lm(
self,
@@ -208,14 +198,16 @@ def create_and_check_for_masked_lm(
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=token_labels,
- return_dict=self.return_dict)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+ return_dict=self.parent.return_dict)
+
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
self.parent.assertEqual(
- result.shape, [self.batch_size, self.seq_length, self.vocab_size])
+ result[0].shape,
+ [self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_for_token_classification(
self,
@@ -234,16 +226,17 @@ def create_and_check_for_token_classification(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=self.return_dict,
+ return_dict=self.parent.return_dict,
labels=token_labels)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
self.parent.assertEqual(
- result.shape, [self.batch_size, self.seq_length, self.num_labels])
+ result[0].shape,
+ [self.batch_size, self.seq_length, self.num_labels])
def create_and_check_for_sequence_classification(
self,
@@ -262,13 +255,14 @@ def create_and_check_for_sequence_classification(
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=sequence_labels,
- return_dict=self.return_dict)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+ return_dict=self.parent.return_dict)
+
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
- self.parent.assertEqual(result.shape,
+ self.parent.assertEqual(result[0].shape,
[self.batch_size, self.num_labels])
def create_and_check_for_multiple_choice(
@@ -292,15 +286,15 @@ def create_and_check_for_multiple_choice(
result = model(multiple_choice_inputs_ids,
attention_mask=multiple_choice_input_mask,
token_type_ids=multiple_choice_token_type_ids,
- return_dict=self.return_dict,
+ return_dict=self.parent.return_dict,
labels=choice_labels)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
- self.parent.assertEqual(result.shape,
+ self.parent.assertEqual(result[0].shape,
[self.batch_size, self.num_choices])
def create_and_check_for_question_answering(
@@ -318,13 +312,11 @@ def create_and_check_for_question_answering(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=self.return_dict,
+ return_dict=self.parent.return_dict,
start_positions=sequence_labels,
end_positions=sequence_labels)
- if self.return_dict:
- start_logits, end_logits = result.start_logits, result.end_logits
- elif self.use_labels:
+ if sequence_labels is not None:
start_logits, end_logits = result[1], result[2]
else:
start_logits, end_logits = result[0], result[1]
@@ -376,9 +368,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
all_generative_model_classes = (RobertaForCausalLM, )
def setUp(self):
- self.model_tester = RobertaModelTester(self,
- return_dict=self.return_dict,
- use_labels=self.use_labels)
+ self.model_tester = RobertaModelTester(self)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
diff --git a/tests/transformers/roformer/test_modeling.py b/tests/transformers/roformer/test_modeling.py
index 17f36d107825..a26b4a46cb2e 100644
--- a/tests/transformers/roformer/test_modeling.py
+++ b/tests/transformers/roformer/test_modeling.py
@@ -80,9 +80,7 @@ class RoFormerModelTester:
def __init__(self,
parent,
- config: Optional[RoFormerModelTestConfig] = None,
- return_dict: bool = False,
- use_labels: bool = False):
+ config: Optional[RoFormerModelTestConfig] = None):
self.parent = parent
self.config: RoFormerModelTestConfig = config or RoFormerModelTestConfig(
)
@@ -92,8 +90,6 @@ def __init__(self,
self.num_choices = self.config.num_choices
self.type_sequence_label_size = self.config.type_sequence_label_size
- self.return_dict = return_dict
- self.use_labels = use_labels
def prepare_config_and_inputs(self):
config = self.config
@@ -114,7 +110,7 @@ def prepare_config_and_inputs(self):
token_labels = None
choice_labels = None
- if self.use_labels:
+ if self.parent.use_labels:
sequence_labels = ids_tensor([self.batch_size],
self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length],
@@ -147,14 +143,11 @@ def create_and_check_model(
result = model(input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=self.return_dict)
+ return_dict=self.parent.return_dict)
result = model(input_ids,
token_type_ids=token_type_ids,
- return_dict=self.return_dict)
- result = model(input_ids, return_dict=self.return_dict)
-
- if self.return_dict:
- result = [result.last_hidden_state, result.pooler_output]
+ return_dict=self.parent.return_dict)
+ result = model(input_ids, return_dict=self.parent.return_dict)
self.parent.assertEqual(result[0].shape, [
self.config.batch_size, self.config.seq_length,
@@ -190,15 +183,16 @@ def create_and_check_for_multiple_choice(
result = model(multiple_choice_inputs_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=self.return_dict,
+ return_dict=self.parent.return_dict,
labels=choice_labels)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
self.parent.assertEqual(
- result.shape, [self.config.batch_size, self.config.num_choices])
+ result[0].shape, [self.config.batch_size, self.config.num_choices])
def create_and_check_for_question_answering(
self,
@@ -216,13 +210,12 @@ def create_and_check_for_question_answering(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
- return_dict=self.return_dict,
+ return_dict=self.parent.return_dict,
start_positions=sequence_labels,
end_positions=sequence_labels,
)
- if self.return_dict:
- start_logits, end_logits = result.start_logits, result.end_logits
- elif self.use_labels:
+
+ if sequence_labels is not None:
start_logits, end_logits = result[1], result[2]
else:
start_logits, end_logits = result[0], result[1]
@@ -250,13 +243,14 @@ def create_and_check_for_token_classification(
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=token_labels,
- return_dict=self.return_dict)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+ return_dict=self.parent.return_dict)
+
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
- self.parent.assertEqual(result.shape, [
+ self.parent.assertEqual(result[0].shape, [
self.config.batch_size, self.config.seq_length,
self.config.num_classes
])
@@ -277,13 +271,14 @@ def create_and_check_for_masked_lm(
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=token_labels,
- return_dict=self.return_dict)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+ return_dict=self.parent.return_dict)
+
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
- self.parent.assertEqual(result.shape, [
+ self.parent.assertEqual(result[0].shape, [
self.config.batch_size, self.config.seq_length,
self.config.vocab_size
])
@@ -305,14 +300,15 @@ def create_and_check_for_sequence_classification(
attention_mask=input_mask,
token_type_ids=token_type_ids,
labels=sequence_labels,
- return_dict=self.return_dict)
- if self.return_dict:
- result = result.logits
- elif self.use_labels:
- result = result[1]
+ return_dict=self.parent.return_dict)
+
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
self.parent.assertEqual(
- result.shape, [self.config.batch_size, self.config.num_classes])
+ result[0].shape, [self.config.batch_size, self.config.num_classes])
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
@@ -350,9 +346,7 @@ class RoFormerModelTest(ModelTesterMixin, unittest.TestCase):
RoFormerForMultipleChoice, RoFormerForMaskedLM)
def setUp(self):
- self.model_tester = RoFormerModelTester(self,
- return_dict=self.return_dict,
- use_labels=self.use_labels)
+ self.model_tester = RoFormerModelTester(self)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
From 3a90faa9164793d01182cb9a0ab029c4010af833 Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Tue, 30 Aug 2022 11:52:02 +0000
Subject: [PATCH 010/159] update refactor code
---
paddlenlp/transformers/tokenizer_utils_base.py | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/paddlenlp/transformers/tokenizer_utils_base.py b/paddlenlp/transformers/tokenizer_utils_base.py
index 244a3a809eab..88f66541b959 100644
--- a/paddlenlp/transformers/tokenizer_utils_base.py
+++ b/paddlenlp/transformers/tokenizer_utils_base.py
@@ -1353,17 +1353,12 @@ class PretrainedTokenizerBase(SpecialTokensMixin):
truncation_side: str = "right"
slow_tokenizer_class = None
- # tag for init_kwargs
- _have_done_init: bool = False
-
def __init__(self, **kwargs):
# inputs and kwargs for saving and re-loading (see ``from_pretrained`` and ``save_pretrained``)
- if self._have_done_init:
- return
- self._have_done_init = True
-
self.init_inputs = ()
- self.init_kwargs = copy.deepcopy(kwargs)
+
+ self.init_kwargs = getattr(self, "init_kwargs",
+ None) or copy.deepcopy(kwargs)
self.name_or_path = kwargs.pop("name_or_path", "")
self._processor_class = kwargs.pop("processor_class", None)
From 13a13aff17b3b2cc35ba33c0520bf1b64193e470 Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Wed, 31 Aug 2022 08:02:01 +0000
Subject: [PATCH 011/159] remove unrelated requirements
---
requirements.txt | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index ab13cb3a4db1..474b2ca14a5e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,5 +9,4 @@ tqdm
paddlefsl
sentencepiece
paddle2onnx
-protobuf>=3.1.0, <=3.20.0
-parameterized
\ No newline at end of file
+protobuf>=3.1.0, <=3.20.0
\ No newline at end of file
From 58e5e7e4bb0dccf61eb32725cdc9619c34f579a8 Mon Sep 17 00:00:00 2001
From: yujun <50394665+JunnYu@users.noreply.github.com>
Date: Tue, 6 Sep 2022 19:28:05 +0800
Subject: [PATCH 012/159] add license
---
.../stable_diffusion_utils/LICENSE | 82 +++++++++++++++++++
.../stable_diffusion_utils/README.md | 78 ++++++++++++++++++
2 files changed, 160 insertions(+)
create mode 100644 paddlenlp/transformers/stable_diffusion_utils/LICENSE
create mode 100644 paddlenlp/transformers/stable_diffusion_utils/README.md
diff --git a/paddlenlp/transformers/stable_diffusion_utils/LICENSE b/paddlenlp/transformers/stable_diffusion_utils/LICENSE
new file mode 100644
index 000000000000..928aa738f243
--- /dev/null
+++ b/paddlenlp/transformers/stable_diffusion_utils/LICENSE
@@ -0,0 +1,82 @@
+Copyright (c) 2022 Robin Rombach and Patrick Esser and contributors
+
+CreativeML Open RAIL-M
+dated August 22, 2022
+
+Section I: PREAMBLE
+
+Multimodal generative models are being widely adopted and used, and have the potential to transform the way artists, among other individuals, conceive and benefit from AI or ML technologies as a tool for content creation.
+
+Notwithstanding the current and potential benefits that these artifacts can bring to society at large, there are also concerns about potential misuses of them, either due to their technical limitations or ethical considerations.
+
+In short, this license strives for both the open and responsible downstream use of the accompanying model. When it comes to the open character, we took inspiration from open source permissive licenses regarding the grant of IP rights. Referring to the downstream responsible use, we added use-based restrictions not permitting the use of the Model in very specific scenarios, in order for the licensor to be able to enforce the license in case potential misuses of the Model may occur. At the same time, we strive to promote open and responsible research on generative models for art and content generation.
+
+Even though downstream derivative versions of the model could be released under different licensing terms, the latter will always have to include - at minimum - the same use-based restrictions as the ones in the original license (this license). We believe in the intersection between open and responsible AI development; thus, this License aims to strike a balance between both in order to enable responsible open-science in the field of AI.
+
+This License governs the use of the model (and its derivatives) and is informed by the model card associated with the model.
+
+NOW THEREFORE, You and Licensor agree as follows:
+
+1. Definitions
+
+- "License" means the terms and conditions for use, reproduction, and Distribution as defined in this document.
+- "Data" means a collection of information and/or content extracted from the dataset used with the Model, including to train, pretrain, or otherwise evaluate the Model. The Data is not licensed under this License.
+- "Output" means the results of operating a Model as embodied in informational content resulting therefrom.
+- "Model" means any accompanying machine-learning based assemblies (including checkpoints), consisting of learnt weights, parameters (including optimizer states), corresponding to the model architecture as embodied in the Complementary Material, that have been trained or tuned, in whole or in part on the Data, using the Complementary Material.
+- "Derivatives of the Model" means all modifications to the Model, works based on the Model, or any other model which is created or initialized by transfer of patterns of the weights, parameters, activations or output of the Model, to the other model, in order to cause the other model to perform similarly to the Model, including - but not limited to - distillation methods entailing the use of intermediate data representations or methods based on the generation of synthetic data by the Model for training the other model.
+- "Complementary Material" means the accompanying source code and scripts used to define, run, load, benchmark or evaluate the Model, and used to prepare data for training or evaluation, if any. This includes any accompanying documentation, tutorials, examples, etc, if any.
+- "Distribution" means any transmission, reproduction, publication or other sharing of the Model or Derivatives of the Model to a third party, including providing the Model as a hosted service made available by electronic or other remote means - e.g. API-based or web access.
+- "Licensor" means the copyright owner or entity authorized by the copyright owner that is granting the License, including the persons or entities that may have rights in the Model and/or distributing the Model.
+- "You" (or "Your") means an individual or Legal Entity exercising permissions granted by this License and/or making use of the Model for whichever purpose and in any field of use, including usage of the Model in an end-use application - e.g. chatbot, translator, image generator.
+- "Third Parties" means individuals or legal entities that are not under common control with Licensor or You.
+- "Contribution" means any work of authorship, including the original version of the Model and any modifications or additions to that Model or Derivatives of the Model thereof, that is intentionally submitted to Licensor for inclusion in the Model by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Model, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
+- "Contributor" means Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Model.
+
+Section II: INTELLECTUAL PROPERTY RIGHTS
+
+Both copyright and patent grants apply to the Model, Derivatives of the Model and Complementary Material. The Model and Derivatives of the Model are subject to additional terms as described in Section III.
+
+2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare, publicly display, publicly perform, sublicense, and distribute the Complementary Material, the Model, and Derivatives of the Model.
+3. Grant of Patent License. Subject to the terms and conditions of this License and where and as applicable, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this paragraph) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Model and the Complementary Material, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Model to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Model and/or Complementary Material or a Contribution incorporated within the Model and/or Complementary Material constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for the Model and/or Work shall terminate as of the date such litigation is asserted or filed.
+
+Section III: CONDITIONS OF USAGE, DISTRIBUTION AND REDISTRIBUTION
+
+4. Distribution and Redistribution. You may host for Third Party remote access purposes (e.g. software-as-a-service), reproduce and distribute copies of the Model or Derivatives of the Model thereof in any medium, with or without modifications, provided that You meet the following conditions:
+Use-based restrictions as referenced in paragraph 5 MUST be included as an enforceable provision by You in any type of legal agreement (e.g. a license) governing the use and/or distribution of the Model or Derivatives of the Model, and You shall give notice to subsequent users You Distribute to, that the Model or Derivatives of the Model are subject to paragraph 5. This provision does not apply to the use of Complementary Material.
+You must give any Third Party recipients of the Model or Derivatives of the Model a copy of this License;
+You must cause any modified files to carry prominent notices stating that You changed the files;
+You must retain all copyright, patent, trademark, and attribution notices excluding those notices that do not pertain to any part of the Model, Derivatives of the Model.
+You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions - respecting paragraph 4.a. - for use, reproduction, or Distribution of Your modifications, or for any such Derivatives of the Model as a whole, provided Your use, reproduction, and Distribution of the Model otherwise complies with the conditions stated in this License.
+5. Use-based restrictions. The restrictions set forth in Attachment A are considered Use-based restrictions. Therefore You cannot use the Model and the Derivatives of the Model for the specified restricted uses. You may use the Model subject to this License, including only for lawful purposes and in accordance with the License. Use may include creating any content with, finetuning, updating, running, training, evaluating and/or reparametrizing the Model. You shall require all of Your users who use the Model or a Derivative of the Model to comply with the terms of this paragraph (paragraph 5).
+6. The Output You Generate. Except as set forth herein, Licensor claims no rights in the Output You generate using the Model. You are accountable for the Output you generate and its subsequent uses. No use of the output can contravene any provision as stated in the License.
+
+Section IV: OTHER PROVISIONS
+
+7. Updates and Runtime Restrictions. To the maximum extent permitted by law, Licensor reserves the right to restrict (remotely or otherwise) usage of the Model in violation of this License, update the Model through electronic means, or modify the Output of the Model based on updates. You shall undertake reasonable efforts to use the latest version of the Model.
+8. Trademarks and related. Nothing in this License permits You to make use of Licensors’ trademarks, trade names, logos or to otherwise suggest endorsement or misrepresent the relationship between the parties; and any rights not expressly granted herein are reserved by the Licensors.
+9. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Model and the Complementary Material (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Model, Derivatives of the Model, and the Complementary Material and assume any risks associated with Your exercise of permissions under this License.
+10. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Model and the Complementary Material (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
+11. Accepting Warranty or Additional Liability. While redistributing the Model, Derivatives of the Model and the Complementary Material thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
+12. If any provision of this License is held to be invalid, illegal or unenforceable, the remaining provisions shall be unaffected thereby and remain valid as if such provision had not been set forth herein.
+
+END OF TERMS AND CONDITIONS
+
+
+
+
+Attachment A
+
+Use Restrictions
+
+You agree not to use the Model or Derivatives of the Model:
+- In any way that violates any applicable national, federal, state, local or international law or regulation;
+- For the purpose of exploiting, harming or attempting to exploit or harm minors in any way;
+- To generate or disseminate verifiably false information and/or content with the purpose of harming others;
+- To generate or disseminate personal identifiable information that can be used to harm an individual;
+- To defame, disparage or otherwise harass others;
+- For fully automated decision making that adversely impacts an individual’s legal rights or otherwise creates or modifies a binding, enforceable obligation;
+- For any use intended to or which has the effect of discriminating against or harming individuals or groups based on online or offline social behavior or known or predicted personal or personality characteristics;
+- To exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm;
+- For any use intended to or which has the effect of discriminating against individuals or groups based on legally protected characteristics or categories;
+- To provide medical advice and medical results interpretation;
+- To generate or disseminate information for the purpose to be used for administration of justice, law enforcement, immigration or asylum processes, such as predicting an individual will commit fraud/crime commitment (e.g. by text profiling, drawing causal relationships between assertions made in documents, indiscriminate and arbitrarily-targeted use).
\ No newline at end of file
diff --git a/paddlenlp/transformers/stable_diffusion_utils/README.md b/paddlenlp/transformers/stable_diffusion_utils/README.md
new file mode 100644
index 000000000000..92fd00ba260d
--- /dev/null
+++ b/paddlenlp/transformers/stable_diffusion_utils/README.md
@@ -0,0 +1,78 @@
+## Stable Diffusion模型
+
+**Stable Diffusion** 是由 **CompVis**、**Stability AI** 和 **LAION** 的研究人员和工程师开源的文图生成模型。他们使用**LAION-5B** 数据库子集的512x512大小的图像进行训练。**LAION-5B** 是目前存在的最大、可免费访问的数据集。
+
+
+

+
+
+
+- **论文地址**:[High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752)
+- **原版模型**:https://github.com/CompVis/stable-diffusion
+- **pytorch版模型**:https://huggingface.co/CompVis/stable-diffusion
+- **Huggingface团队有关该模型的介绍**:https://huggingface.co/blog/stable_diffusion
+
+
+## Reference
+- https://github.com/huggingface/diffusers
+```bibtex
+ @InProceedings{Rombach_2022_CVPR,
+ author = {Rombach, Robin and Blattmann, Andreas and Lorenz, Dominik and Esser, Patrick and Ommer, Bj\"orn},
+ title = {High-Resolution Image Synthesis With Latent Diffusion Models},
+ booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2022},
+ pages = {10684-10695}
+ }
+ ```
+
+## License
+[The CreativeML OpenRAIL M license](https://huggingface.co/spaces/CompVis/stable-diffusion-license) is an [Open RAIL M license](https://www.licenses.ai/blog/2022/8/18/naming-convention-of-responsible-ai-licenses), adapted from the work that [BigScience](https://bigscience.huggingface.co/) and [the RAIL Initiative](https://www.licenses.ai/) are jointly carrying in the area of responsible AI licensing. See also [the article about the BLOOM Open RAIL license](https://bigscience.huggingface.co/blog/the-bigscience-rail-license) on which this license is based.
+
+
+Misuse, Malicious Use, and Out-of-Scope Use
+_Note: This section is taken from the [DALLE-MINI model card](https://huggingface.co/dalle-mini/dalle-mini), but applies in the same way to Stable Diffusion v1_.
+
+
+The model should not be used to intentionally create or disseminate images that create hostile or alienating environments for people. This includes generating images that people would foreseeably find disturbing, distressing, or offensive; or content that propagates historical or current stereotypes.
+
+#### Out-of-Scope Use
+The model was not trained to be factual or true representations of people or events, and therefore using the model to generate such content is out-of-scope for the abilities of this model.
+
+#### Misuse and Malicious Use
+Using the model to generate content that is cruel to individuals is a misuse of this model. This includes, but is not limited to:
+
+- Generating demeaning, dehumanizing, or otherwise harmful representations of people or their environments, cultures, religions, etc.
+- Intentionally promoting or propagating discriminatory content or harmful stereotypes.
+- Impersonating individuals without their consent.
+- Sexual content without consent of the people who might see it.
+- Mis- and disinformation
+- Representations of egregious violence and gore
+- Sharing of copyrighted or licensed material in violation of its terms of use.
+- Sharing content that is an alteration of copyrighted or licensed material in violation of its terms of use.
+
+## Limitations and Bias
+
+### Limitations
+
+- The model does not achieve perfect photorealism
+- The model cannot render legible text
+- The model does not perform well on more difficult tasks which involve compositionality, such as rendering an image corresponding to “A red cube on top of a blue sphere”
+- Faces and people in general may not be generated properly.
+- The model was trained mainly with English captions and will not work as well in other languages.
+- The autoencoding part of the model is lossy
+- The model was trained on a large-scale dataset
+ [LAION-5B](https://laion.ai/blog/laion-5b/) which contains adult material
+ and is not fit for product use without additional safety mechanisms and
+ considerations.
+- No additional measures were used to deduplicate the dataset. As a result, we observe some degree of memorization for images that are duplicated in the training data.
+ The training data can be searched at [https://rom1504.github.io/clip-retrieval/](https://rom1504.github.io/clip-retrieval/) to possibly assist in the detection of memorized images.
+
+### Bias
+
+While the capabilities of image generation models are impressive, they can also reinforce or exacerbate social biases.
+Stable Diffusion v1 was trained on subsets of [LAION-2B(en)](https://laion.ai/blog/laion-5b/),
+which consists of images that are primarily limited to English descriptions.
+Texts and images from communities and cultures that use other languages are likely to be insufficiently accounted for.
+This affects the overall output of the model, as white and western cultures are often set as the default. Further, the
+ability of the model to generate content with non-English prompts is significantly worse than with English-language prompts.
From b88fc4ea5bf07e44d5a5eaf8beb174a900135568 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=90=B4=E9=AB=98=E5=8D=87?=
Date: Tue, 6 Sep 2022 20:02:02 +0800
Subject: [PATCH 013/159] Update setup.py and README Examples (#3208)
---
pipelines/README.md | 22 +++++++++++++------
.../Install_windows.md | 5 +++--
.../frequently-asked-question/README.md | 6 +++--
.../question-answering/Install_windows.md | 4 +++-
.../examples/question-answering/README.md | 6 +++--
.../semantic-search/Install_windows.md | 4 +++-
.../examples/semantic-search/Neural_Search.md | 6 +++--
pipelines/examples/semantic-search/README.md | 6 +++--
pipelines/setup.py | 17 +++++++++-----
9 files changed, 52 insertions(+), 24 deletions(-)
diff --git a/pipelines/README.md b/pipelines/README.md
index 02a97f30ada1..1125e89070a0 100644
--- a/pipelines/README.md
+++ b/pipelines/README.md
@@ -77,21 +77,29 @@ python setup.py install
from pipelines.document_stores import FAISSDocumentStore
from pipelines.nodes import DensePassageRetriever, ErnieRanker
-# Step1: Initialize a FaissDocumentStore to store texts of documents
+# Step1: Preparing the data
+documents = [
+ {'content': '金钱龟不分品种,只有生长地之分,在我国主要分布于广东、广西、福建、海南、香港、澳门等地,在国外主要分布于越南等亚热带国家和地区。',
+ 'meta': {'name': 'test1.txt'}},
+ {'content': '衡量酒水的价格的因素很多的,酒水的血统(也就是那里产的,采用什么工艺等);存储的时间等等,酒水是一件很难标准化得商品,只要你敢要价,有买的那就值那个钱。',
+ 'meta': {'name': 'test2.txt'}}
+]
+
+# Step2: Initialize a FaissDocumentStore to store texts of documents
document_store = FAISSDocumentStore(embedding_dim=768)
document_store.write_documents(documents)
-# Step2: Initialize a DenseRetriever and build ANN index
-retriever = DensePassageRetriever(document_store=document_store, query_embedding_model="rocketqa-zh-dureader-query-encoder")
+# Step3: Initialize a DenseRetriever and build ANN index
+retriever = DensePassageRetriever(document_store=document_store, query_embedding_model="rocketqa-zh-base-query-encoder",embed_title=False)
document_store.update_embeddings(retriever)
-# Step3: Initialize a Ranker
-ranker = ErnieRanker(model_name_or_path="rocketqa-zh-dureader-cross-encoder")
+# Step4: Initialize a Ranker
+ranker = ErnieRanker(model_name_or_path="rocketqa-base-cross-encoder")
-# Step4: Initialize a SemanticSearchPipeline and ask questions
+# Step5: Initialize a SemanticSearchPipeline and ask questions
from pipelines import SemanticSearchPipeline
pipeline = SemanticSearchPipeline(retriever, ranker)
-prediction = pipeline.run(query="亚马逊河流的相关介绍")
+prediction = pipeline.run(query="衡量酒水的价格的因素有哪些?")
```
### 快速部署
diff --git a/pipelines/examples/frequently-asked-question/Install_windows.md b/pipelines/examples/frequently-asked-question/Install_windows.md
index 30236378799f..bec6f73f4c76 100644
--- a/pipelines/examples/frequently-asked-question/Install_windows.md
+++ b/pipelines/examples/frequently-asked-question/Install_windows.md
@@ -8,11 +8,12 @@
a. 依赖安装:
我们预置了基于[ 8000 多条保险行业问答数据](https://github.com/SophonPlus/ChineseNlpCorpus/blob/master/datasets/baoxianzhidao/intro.ipynb)搭建保险FAQ智能问答的代码示例,您可以通过如下命令快速体验智能问答的效果
```bash
-
git clone https://github.com/tvst/htbuilder.git
cd htbuilder/
python setup install
-# 1) 安装 pipelines package
+# pip 一键安装
+pip install --upgrade paddle-pipelines -i https://pypi.tuna.tsinghua.edu.cn/simple
+# 或者源码进行安装最新版本
cd ${HOME}/PaddleNLP/pipelines/
pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
python setup.py install
diff --git a/pipelines/examples/frequently-asked-question/README.md b/pipelines/examples/frequently-asked-question/README.md
index f3d5e793253f..e7e8d560d0bd 100644
--- a/pipelines/examples/frequently-asked-question/README.md
+++ b/pipelines/examples/frequently-asked-question/README.md
@@ -41,9 +41,11 @@ b. 硬件环境:
c. 依赖安装:
首先需要安装PaddlePaddle,PaddlePaddle的安装请参考文档[官方安装文档](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html),然后安装下面的依赖:
```bash
-pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
-# 1) 安装 pipelines package
+# pip 一键安装
+pip install --upgrade paddle-pipelines -i https://pypi.tuna.tsinghua.edu.cn/simple
+# 或者源码进行安装最新版本
cd ${HOME}/PaddleNLP/pipelines/
+pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
python setup.py install
```
【注意】以下的所有的流程都只需要在`pipelines`根目录下进行,不需要跳转目录
diff --git a/pipelines/examples/question-answering/Install_windows.md b/pipelines/examples/question-answering/Install_windows.md
index d27dc4d33735..5e2cec507d68 100644
--- a/pipelines/examples/question-answering/Install_windows.md
+++ b/pipelines/examples/question-answering/Install_windows.md
@@ -12,7 +12,9 @@ a. 依赖安装:
git clone https://github.com/tvst/htbuilder.git
cd htbuilder/
python setup install
-# 1) 安装 pipelines package
+# pip 一键安装
+pip install --upgrade paddle-pipelines -i https://pypi.tuna.tsinghua.edu.cn/simple
+# 或者源码进行安装最新版本
cd ${HOME}/PaddleNLP/pipelines/
pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
python setup.py install
diff --git a/pipelines/examples/question-answering/README.md b/pipelines/examples/question-answering/README.md
index 06120e173189..ddde9f567586 100644
--- a/pipelines/examples/question-answering/README.md
+++ b/pipelines/examples/question-answering/README.md
@@ -47,9 +47,11 @@ b. 硬件环境:
c. 依赖安装:
首先需要安装PaddlePaddle,PaddlePaddle的安装请参考文档[官方安装文档](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html),然后安装下面的依赖:
```bash
-pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
-# 1) 安装 pipelines package
+# pip 一键安装
+pip install --upgrade paddle-pipelines -i https://pypi.tuna.tsinghua.edu.cn/simple
+# 或者源码进行安装最新版本
cd ${HOME}/PaddleNLP/pipelines/
+pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
python setup.py install
```
【注意】以下的所有的流程都只需要在`pipelines`根目录下进行,不需要跳转目录
diff --git a/pipelines/examples/semantic-search/Install_windows.md b/pipelines/examples/semantic-search/Install_windows.md
index 183eaa7ccdc5..51fd6eb94bd8 100644
--- a/pipelines/examples/semantic-search/Install_windows.md
+++ b/pipelines/examples/semantic-search/Install_windows.md
@@ -11,7 +11,9 @@ a. 依赖安装:
git clone https://github.com/tvst/htbuilder.git
cd htbuilder/
python setup install
-# 1) 安装 pipelines package
+# pip 一键安装
+pip install --upgrade paddle-pipelines -i https://pypi.tuna.tsinghua.edu.cn/simple
+# 或者源码进行安装最新版本
cd ${HOME}/PaddleNLP/pipelines/
pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
python setup.py install
diff --git a/pipelines/examples/semantic-search/Neural_Search.md b/pipelines/examples/semantic-search/Neural_Search.md
index ac68a0c47cca..73d30bb024be 100644
--- a/pipelines/examples/semantic-search/Neural_Search.md
+++ b/pipelines/examples/semantic-search/Neural_Search.md
@@ -23,9 +23,11 @@ b. 硬件环境:
c. 依赖安装:
首先需要安装PaddlePaddle,PaddlePaddle的安装请参考文档[官方安装文档](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html),然后安装下面的依赖:
```bash
-pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
-# 1) 安装 pipelines package
+# pip 一键安装
+pip install --upgrade paddle-pipelines -i https://pypi.tuna.tsinghua.edu.cn/simple
+# 或者源码进行安装最新版本
cd ${HOME}/PaddleNLP/pipelines/
+pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
python setup.py install
```
【注意】以下的所有的流程都只需要在`pipelines`根目录下进行,不需要跳转目录
diff --git a/pipelines/examples/semantic-search/README.md b/pipelines/examples/semantic-search/README.md
index b556e45a715f..6bffbd1376ea 100644
--- a/pipelines/examples/semantic-search/README.md
+++ b/pipelines/examples/semantic-search/README.md
@@ -52,9 +52,11 @@ b. 硬件环境:
c. 依赖安装:
首先需要安装PaddlePaddle,PaddlePaddle的安装请参考文档[官方安装文档](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html),然后安装下面的依赖:
```bash
-pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
-# 1) 安装 pipelines package
+# pip 一键安装
+pip install --upgrade paddle-pipelines -i https://pypi.tuna.tsinghua.edu.cn/simple
+# 或者源码进行安装最新版本
cd ${HOME}/PaddleNLP/pipelines/
+pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
python setup.py install
```
【注意】以下的所有的流程都只需要在`pipelines`根目录下进行,不需要跳转目录
diff --git a/pipelines/setup.py b/pipelines/setup.py
index 840e81f1ba38..1e17f871dfe6 100644
--- a/pipelines/setup.py
+++ b/pipelines/setup.py
@@ -14,21 +14,28 @@
import os
import setuptools
import sys
+import io
import pipelines
-import platform
-long_description = "PIPELINES: An End to End Natural Language Proceessing Development Kit Based on ERNIE"
+description = "Paddle-Pipelines: An End to End Natural Language Proceessing Development Kit Based on PaddleNLP"
with open("requirements.txt") as fin:
REQUIRED_PACKAGES = fin.read()
+
+def read(*names, **kwargs):
+ with io.open(os.path.join(os.path.dirname(__file__), *names),
+ encoding=kwargs.get("encoding", "utf8")) as fp:
+ return fp.read()
+
+
setuptools.setup(name="paddle-pipelines",
version=pipelines.__version__,
author="PaddlePaddle Speech and Language Team",
author_email="paddlenlp@baidu.com",
- description=long_description,
- long_description=long_description,
- long_description_content_type="text/plain",
+ description=description,
+ long_description=read("README.md"),
+ long_description_content_type="text/markdown",
url="https://github.com/PaddlePaddle/PaddleNLP",
packages=setuptools.find_packages(
where='.',
From 19e03c4e71981854dc38ccaa9b17fc2ba7db9a94 Mon Sep 17 00:00:00 2001
From: limingshu <61349199+JamesLim-sy@users.noreply.github.com>
Date: Tue, 6 Sep 2022 20:46:14 +0800
Subject: [PATCH 014/159] Move token_num fetch out of train cycle (#3089)
---
examples/machine_translation/transformer/train.py | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/examples/machine_translation/transformer/train.py b/examples/machine_translation/transformer/train.py
index 3141ee534d97..d5c290554044 100644
--- a/examples/machine_translation/transformer/train.py
+++ b/examples/machine_translation/transformer/train.py
@@ -201,6 +201,7 @@ def do_train(args):
(args.trg_vocab_size - 1) + 1e-20))
step_idx = 0
+ tokens_sum = 0
# For benchmark
reader_cost_avg = AverageStatistical()
@@ -225,7 +226,6 @@ def do_train(args):
logits = transformer(src_word=src_word, trg_word=trg_word)
sum_cost, avg_cost, token_num = criterion(logits, lbl_word)
- tokens_per_cards = token_num.numpy()
scaled = scaler.scale(avg_cost) # scale the loss
scaled.backward() # do backward
@@ -238,7 +238,6 @@ def do_train(args):
else:
logits = transformer(src_word=src_word, trg_word=trg_word)
sum_cost, avg_cost, token_num = criterion(logits, lbl_word)
- tokens_per_cards = token_num.numpy()
avg_cost.backward()
@@ -248,7 +247,9 @@ def do_train(args):
train_batch_cost = time.time() - batch_start
reader_cost_avg.record(train_reader_cost)
batch_cost_avg.record(train_batch_cost)
- batch_ips_avg.record(train_batch_cost, tokens_per_cards)
+ batch_ips_avg.record(train_batch_cost, 0)
+
+ tokens_sum += token_num
# Profile for model benchmark
if args.profiler_options is not None:
@@ -258,6 +259,9 @@ def do_train(args):
if step_idx % args.print_step == 0 and (args.benchmark
or rank == 0):
total_avg_cost = avg_cost.numpy()
+ tokens_sum_val = tokens_sum.numpy()
+ batch_ips_avg.record(0, tokens_sum_val)
+ tokens_sum = 0
if step_idx == 0:
logger.info(
From b7437055f07235d37301c846dc2843e63606a12b Mon Sep 17 00:00:00 2001
From: chenxiaozeng
Date: Tue, 6 Sep 2022 21:07:51 +0800
Subject: [PATCH 015/159] Add finance course (#3207)
* add finance course group code
Co-authored-by: tianxin
---
README_cn.md | 35 ++++++++++++++++++++++++++++-------
1 file changed, 28 insertions(+), 7 deletions(-)
diff --git a/README_cn.md b/README_cn.md
index 35ac23e052b9..004637fda899 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -30,13 +30,34 @@
**PaddleNLP**是一款**简单易用**且**功能强大**的自然语言处理开发库。聚合业界**优质预训练模型**并提供**开箱即用**的开发体验,覆盖NLP多场景的模型库搭配**产业实践范例**可满足开发者**灵活定制**的需求。
## News 📢
-* 📝 2022.8.1 **PaddleNLP v2.3.5**发布!新增[**CodeGen**](./examples/code_generation/codegen) 对话式程序生成大模型,支持Taskflow一键调用;通用信息抽取技术英文模型[**UIE-en**](./model_zoo/uie)正式发布,支持英文各项信息抽取工作; [**RGL**](./examples/few_shot/RGL)是百度自研的 Prompt-based tuning 小样本学习算法,论文被 Findings of NAACL 2022 接收,欢迎大家使用!
-* 🍭 2022.6.29 **PaddleNLP v2.3.4**发布 全系列中文预训练小模型发布,快速提升预训练模型部署效率,通用信息抽取技术[**UIE Tiny**](./model_zoo/uie) 系列模型全新升级,支持速度更快效果更好的UIE小模型。
-* 🔥 2022.5.16 [**PaddleNLP v2.3**](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.3.0)全新发布!🎉
- * 💎 发布通用信息抽取技术[**UIE**](./model_zoo/uie),单模型支持实体识别、关系和事件抽取、情感分析等多种开放域信息抽取任务,不限领域和抽取目标,支持**一键抽取**与全流程**小样本**高效定制开发。
- * 😊 发布文心大模型[**ERNIE 3.0**](./model_zoo/ernie-3.0)轻量级模型,在[CLUE](https://www.cluebenchmarks.com/)上实现同规模结构效果最佳,并提供**🗜️无损压缩**和**⚙️全场景部署**方案。
- * 🏥 发布中文医疗领域预训练模型[**ERNIE-Health**](./model_zoo/ernie-health),[CBLUE](https://github.com/CBLUEbenchmark/CBLUE)中文医疗信息处理评测冠军模型。
- * 💬 发布大规模百亿开放域对话预训练模型[**PLATO-XL**](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/model_zoo/plato-xl) ,配合⚡**FasterGeneration**⚡快速实现高性能GPU并行推理加速。
+* 👀 **2022.9.6 飞桨智慧金融行业系列直播课**
+
+ * 围绕深度学习技术在金融行业的产业实践与发展趋势,邀请行业内专家分享产业实践。探讨科技金融的未来发展;
+
+ * PaddleNLP配套课程发布产业实践范例:基于UIE的金融文件信息抽取;基于Pipelines的FAQ问答系统;
+
+ * **9月6日起每周二、周四19点直播**,扫码免费加入微信群获取直播链接,与行业专家深度交流:
+
+
+

+
+
+* 📝 **2022.8.1 发布[PaddleNLP v2.3.5](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.3.5)**
+
+ * 新增 [**CodeGen**](./examples/code_generation/codegen) 对话式程序生成大模型,支持 Taskflow 一键调用,自动补全、生成代码;
+ * UIE 英文模型 [**UIE-en**](./model_zoo/uie) 正式发布,支持英文文本信息抽取;
+ * 集成 Findings of NAACL 2022 前沿 Prompt-based tuning 小样本学习算法 [**RGL**](./examples/few_shot/RGL)。
+
+* 🍭 **2022.6.29 发布 [PaddleNLP v2.3.4](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.3.4)**
+
+ * [**ERNIE Tiny**](./model_zoo/ernie-3.0) 全系列中文预训练小模型发布,又准又快;
+ * 通用信息抽取技术[**UIE Tiny**](./model_zoo/uie) 系列模型全新升级,支持速度更快效果更好的UIE小模型。
+
+* 🔥 **2022.5.16 发布 [PaddleNLP v2.3](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.3.0)**
+ * 💎 发布通用信息抽取技术 [**UIE**](./model_zoo/uie),单模型支持实体识别、关系和事件抽取、情感分析等多种开放域信息抽取任务,不限领域和抽取目标,支持**零样本抽取**与全流程**小样本**高效定制开发;
+ * 😊 发布文心大模型 [**ERNIE 3.0**](./model_zoo/ernie-3.0) 轻量级模型,在 [CLUE ](https://www.cluebenchmarks.com/)上实现同规模结构效果最佳,并提供**🗜️无损压缩**和**⚙️全场景部署**方案;
+ * 🏥 发布中文医疗领域预训练模型 [**ERNIE-Health**](./model_zoo/ernie-health),[CBLUE](https://github.com/CBLUEbenchmark/CBLUE) 中文医疗信息处理评测冠军模型;
+ * 💬 发布大规模百亿开放域对话预训练模型 [**PLATO-XL**](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/model_zoo/plato-xl) ,配合⚡**FasterGeneration**⚡快速实现高性能GPU并行推理加速。
## 社区交流
From 4f10afa9147ca343add50d2d11b655615c6d79a6 Mon Sep 17 00:00:00 2001
From: chenxiaozeng
Date: Tue, 6 Sep 2022 21:31:53 +0800
Subject: [PATCH 016/159] Update README_cn.md (#3212)
add v2.4 features description.
---
README_cn.md | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/README_cn.md b/README_cn.md
index 004637fda899..4e718c330ea4 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -30,6 +30,14 @@
**PaddleNLP**是一款**简单易用**且**功能强大**的自然语言处理开发库。聚合业界**优质预训练模型**并提供**开箱即用**的开发体验,覆盖NLP多场景的模型库搭配**产业实践范例**可满足开发者**灵活定制**的需求。
## News 📢
+
+* 🔥 **2022.9.6 发布 [PaddleNLP v2.4](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.4.0)**
+ * 💎 **[NLP 流水线系统 Pipelines](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/pipelines)** 发布,让解决 NLP 任务像搭积木一样便捷、灵活、高效!
+ * 😊 新增文本分类**多分类、多标签、层次分类**的全流程方案,基于多样的数据增强策略、前沿的 TrustAI 可信计算等,解决分类任务调优难题。
+ * 🍭 新增**文图生成、代码生成、文本摘要**功能,支持 Taskflow 一键调用,打通 FasterGeneration 高性能推理;
+ * 💪 新增多语言模型 **UIE-M**,支持中英文混合抽取;新增基于封闭域模型 GlobalPointer 的 **UIE 数据蒸馏**方案,推理速度提升100倍以上!
+
+
* 👀 **2022.9.6 飞桨智慧金融行业系列直播课**
* 围绕深度学习技术在金融行业的产业实践与发展趋势,邀请行业内专家分享产业实践。探讨科技金融的未来发展;
From c6abe76ab9e38c8ed16a9129ebc3f97ab4626792 Mon Sep 17 00:00:00 2001
From: bruce0210 <100854336+bruce0210@users.noreply.github.com>
Date: Wed, 7 Sep 2022 07:03:28 +0800
Subject: [PATCH 017/159] Update README.md (#3209)
Improve and fix the text content of case 1.
Co-authored-by: tianxin
---
pipelines/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pipelines/README.md b/pipelines/README.md
index 1125e89070a0..4c12af295dad 100644
--- a/pipelines/README.md
+++ b/pipelines/README.md
@@ -178,7 +178,7 @@ GPU 镜像下载大概耗时 15 分钟左右,容器启动成功后,等待1
#### 查询精度大幅提升
-市面已有的工程规范查询系统解决方案一直延续着传统关键字词匹配的方式,依赖用户对对查询结果进行自行排序、筛选,甚至要再次人工查阅工程规范文件后,才能最终确认是否为想要查询的规范条款。传统规范查询系统至少需要进行 3~5 次查询才能找到用户想要的规范条款,而寻规系统是基于强大预训练模型构建起来的语义检索系统,针对 80% 的规范查询需求仅 **1 次查询** 就能精确命中查询意图,并返回查询条款的结果!
+市面现已有的工程规范查询系统解决方案一直延续着传统关键字词匹配的查询方式,依赖用户对查询结果进行自行排序、筛选、鉴别,有时甚至还要再次由工程设计人员耗费一定时间精力人工查阅工程规范文件后,才能最终确认是否为想要查询的规范条款。传统规范查询系统至少需要进行 3~5 次查询才能找到用户想要的规范条款,而寻规系统是基于强大预训练模型构建起来的语义检索系统,针对 80% 的规范查询需求仅 **1 次查询** 就能精确命中查询意图,并返回真正符合工程设计人员查询意图的结果!
## :mortar_board: Tutorials
- Tutorial 1 - 语义检索 Pipeline: [AIStudio notebook](https://aistudio.baidu.com/aistudio/projectdetail/4442670) | [Python](https://github.com/PaddlePaddle/PaddleNLP/blob/develop/pipelines/examples/semantic-search/semantic_search_example.py)
From 0ed8de72bc92a6504aa411250c4cb6d148d363b1 Mon Sep 17 00:00:00 2001
From: wuhuachaocoding <77733235+wuhuachaocoding@users.noreply.github.com>
Date: Wed, 7 Sep 2022 11:22:24 +0800
Subject: [PATCH 018/159] [Recompute] Update recompute for hybrid parallel
interface. (#3211)
Co-authored-by: Zhong Hui
---
examples/language_model/gpt-3/dygraph/modeling.py | 8 +++++++-
examples/language_model/moe/dygraph/modeling.py | 8 ++++++--
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/examples/language_model/gpt-3/dygraph/modeling.py b/examples/language_model/gpt-3/dygraph/modeling.py
index 112b0b63b058..28fadcefe812 100644
--- a/examples/language_model/gpt-3/dygraph/modeling.py
+++ b/examples/language_model/gpt-3/dygraph/modeling.py
@@ -1178,4 +1178,10 @@ def _logits_helper(embedding, output):
loss_fn=GPTPretrainingCriterionPipe(),
topology=topology,
seg_method="layer:TransformerDecoderLayer",
- recompute_interval=1 if use_recompute else 0)
+ recompute_interval=1 if use_recompute else 0,
+ recompute_ctx={
+ "mp_group":
+ fleet.fleet._hcg.get_model_parallel_group(),
+ "offload": False,
+ "partition": False
+ })
diff --git a/examples/language_model/moe/dygraph/modeling.py b/examples/language_model/moe/dygraph/modeling.py
index 66d9743e328c..64c1f220ca1d 100644
--- a/examples/language_model/moe/dygraph/modeling.py
+++ b/examples/language_model/moe/dygraph/modeling.py
@@ -1165,5 +1165,9 @@ def _logits_helper(embedding, output):
topology=topology,
seg_method="layer:TransformerDecoderLayer",
recompute_interval=recompute_interval,
- recompute_partition=False,
- recompute_offload=False)
+ recompute_ctx={
+ "mp_group":
+ fleet.fleet._hcg.get_model_parallel_group(),
+ "offload": False,
+ "partition": False
+ })
From a6ab9c896b03250a98ee08b89aafc7de6d6371ef Mon Sep 17 00:00:00 2001
From: wawltor
Date: Wed, 7 Sep 2022 12:15:53 +0800
Subject: [PATCH 019/159] Update README_cn.md
---
README_cn.md | 19 ++++---------------
1 file changed, 4 insertions(+), 15 deletions(-)
diff --git a/README_cn.md b/README_cn.md
index 4e718c330ea4..c585f49841d0 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -32,10 +32,10 @@
## News 📢
* 🔥 **2022.9.6 发布 [PaddleNLP v2.4](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.4.0)**
- * 💎 **[NLP 流水线系统 Pipelines](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/pipelines)** 发布,让解决 NLP 任务像搭积木一样便捷、灵活、高效!
- * 😊 新增文本分类**多分类、多标签、层次分类**的全流程方案,基于多样的数据增强策略、前沿的 TrustAI 可信计算等,解决分类任务调优难题。
- * 🍭 新增**文图生成、代码生成、文本摘要**功能,支持 Taskflow 一键调用,打通 FasterGeneration 高性能推理;
- * 💪 新增多语言模型 **UIE-M**,支持中英文混合抽取;新增基于封闭域模型 GlobalPointer 的 **UIE 数据蒸馏**方案,推理速度提升100倍以上!
+ * 💎 NLP工具:**[NLP 流水线系统 Pipelines](./pipelines)** 发布,让解决 NLP 任务像搭积木一样便捷、灵活、高效!
+ * 😊 产业应用:新增[文本分类应用](./applications/text_classification)**多分类、多标签、层次分类**的全流程方案,支持 **小样本学习** 和 **TrustAI** 可信计算模型调优;[UIE信息抽取](./model_zoo/uie)发布**UIE-M**,支持中英文混合抽取,新增**UIE 数据蒸馏**方案,推理速度提升100倍以上;
+ * 🍭 AIGC内容生成: 新增代码生成SOTA模型[**CodeGen**](./examples/code_generation/codegen),支持多种编程语言代码生成;集成[文图生成潮流模型](https://github.com/PaddlePaddle/PaddleNLP/blob/develop/docs/model_zoo/taskflow.md#%E6%96%87%E5%9B%BE%E7%94%9F%E6%88%90) **DALL-E-mini**、**Disco Diffusion**、**Stable Diffusion**,更多趣玩应用等你来玩;新增[中文文本摘要应用](./applications/text_summarization),基于大规模语料的中文摘要模型首次发布,可支持Taskflow一键调用;
+ * 💪 框架升级: [**模型自动压缩API**](./docs/compression.md)发布,自动对模型进行裁减和量化,大幅提升模型部署效率;[**小样本Prompt框架发布**](./applications/text_classification/multi_class/few-shot), 支持PET、P-Tuning、RGL等经典模型的快速实现;
* 👀 **2022.9.6 飞桨智慧金融行业系列直播课**
@@ -50,17 +50,6 @@
-* 📝 **2022.8.1 发布[PaddleNLP v2.3.5](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.3.5)**
-
- * 新增 [**CodeGen**](./examples/code_generation/codegen) 对话式程序生成大模型,支持 Taskflow 一键调用,自动补全、生成代码;
- * UIE 英文模型 [**UIE-en**](./model_zoo/uie) 正式发布,支持英文文本信息抽取;
- * 集成 Findings of NAACL 2022 前沿 Prompt-based tuning 小样本学习算法 [**RGL**](./examples/few_shot/RGL)。
-
-* 🍭 **2022.6.29 发布 [PaddleNLP v2.3.4](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.3.4)**
-
- * [**ERNIE Tiny**](./model_zoo/ernie-3.0) 全系列中文预训练小模型发布,又准又快;
- * 通用信息抽取技术[**UIE Tiny**](./model_zoo/uie) 系列模型全新升级,支持速度更快效果更好的UIE小模型。
-
* 🔥 **2022.5.16 发布 [PaddleNLP v2.3](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.3.0)**
* 💎 发布通用信息抽取技术 [**UIE**](./model_zoo/uie),单模型支持实体识别、关系和事件抽取、情感分析等多种开放域信息抽取任务,不限领域和抽取目标,支持**零样本抽取**与全流程**小样本**高效定制开发;
* 😊 发布文心大模型 [**ERNIE 3.0**](./model_zoo/ernie-3.0) 轻量级模型,在 [CLUE ](https://www.cluebenchmarks.com/)上实现同规模结构效果最佳,并提供**🗜️无损压缩**和**⚙️全场景部署**方案;
From 60a475e157d2e9617a83b7aea3c8221dc1a880bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=AA=91=E9=A9=AC=E5=B0=8F=E7=8C=AB?= <1435130236@qq.com>
Date: Wed, 7 Sep 2022 13:32:51 +0800
Subject: [PATCH 020/159] [ModelingOutput]update roformer unittest (#3159)
* add roformer unittest
* add roformer unittest
* update test_modeling
* use relative import
* reduce model config to accelerate testing
* remove input_embedding from pretrained model
* revert slow tag
* update local branch
* update get_vocab method
* update get_vocab method
* update test_chinese method
* change absolute import
* update unittest
* update chinese test case
* add roformer more output testing
Co-authored-by: Guo Sheng
Co-authored-by: liu zhengxi <380185688@qq.com>
---
paddlenlp/transformers/roformer/modeling.py | 6 +
tests/transformers/roformer/test_modeling.py | 205 ++++++++++++-------
2 files changed, 133 insertions(+), 78 deletions(-)
diff --git a/paddlenlp/transformers/roformer/modeling.py b/paddlenlp/transformers/roformer/modeling.py
index d4aa86629372..68d0b9e8522c 100644
--- a/paddlenlp/transformers/roformer/modeling.py
+++ b/paddlenlp/transformers/roformer/modeling.py
@@ -709,6 +709,12 @@ def get_input_embeddings(self) -> nn.Embedding:
def set_input_embeddings(self, embedding: nn.Embedding):
self.embeddings.word_embeddings = embedding
+ def get_input_embeddings(self) -> nn.Embedding:
+ return self.embeddings.word_embeddings
+
+ def set_input_embeddings(self, embedding: nn.Embedding):
+ self.embeddings.word_embeddings = embedding
+
class RoFormerForQuestionAnswering(RoFormerPretrainedModel):
r"""
diff --git a/tests/transformers/roformer/test_modeling.py b/tests/transformers/roformer/test_modeling.py
index 23a46ddc3c93..bc9c7e3945e5 100644
--- a/tests/transformers/roformer/test_modeling.py
+++ b/tests/transformers/roformer/test_modeling.py
@@ -16,14 +16,17 @@
import unittest
from typing import Optional, Tuple
from dataclasses import dataclass, fields, Field
+from parameterized import parameterized_class
import paddle
+from paddle import Tensor
-from paddlenlp.transformers import (
- RoFormerModel, RoFormerPretrainedModel, RoFormerForPretraining,
- RoFormerForSequenceClassification, RoFormerForTokenClassification,
- RoFormerForQuestionAnswering, RoFormerForMultipleChoice,
- RoFormerForMaskedLM)
+from paddlenlp.transformers import (RoFormerModel, RoFormerPretrainedModel,
+ RoFormerForSequenceClassification,
+ RoFormerForTokenClassification,
+ RoFormerForQuestionAnswering,
+ RoFormerForMultipleChoice,
+ RoFormerForMaskedLM)
from ..test_modeling_common import ids_tensor, floats_tensor, random_attention_mask, ModelTesterMixin
from ...testing_utils import slow
@@ -67,6 +70,7 @@ class RoFormerModelTestConfig(RoFormerModelTestModelConfig):
is_training: bool = False
use_input_mask: bool = False
use_token_type_ids: bool = True
+ type_sequence_label_size = 3
# used for sequence classification
num_classes: int = 3
@@ -102,27 +106,43 @@ def prepare_config_and_inputs(self):
if self.config.use_token_type_ids:
token_type_ids = ids_tensor([config.batch_size, config.seq_length],
config.type_vocab_size)
+ sequence_labels = None
+ token_labels = None
+ choice_labels = None
+
+ if self.parent.use_labels:
+ sequence_labels = ids_tensor([self.batch_size],
+ self.type_sequence_label_size)
+ token_labels = ids_tensor([self.batch_size, self.seq_length],
+ self.num_classes)
+ choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = self.get_config()
- return config, input_ids, token_type_ids, input_mask
+ return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def get_config(self) -> dict:
return self.config.model_kwargs
- def create_and_check_model(
- self,
- config,
- input_ids,
- token_type_ids,
- input_mask,
- ):
+ def __getattr__(self, key: str):
+ if not hasattr(self.config, key):
+ raise AttributeError(f'attribute <{key}> not exist')
+ return getattr(self.config, key)
+
+ def create_and_check_model(self, config, input_ids: Tensor,
+ token_type_ids: Tensor, input_mask: Tensor,
+ sequence_labels: Tensor, token_labels: Tensor,
+ choice_labels: Tensor):
model = RoFormerModel(**config)
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
- result = model(input_ids, token_type_ids=token_type_ids)
- result = model(input_ids)
+ token_type_ids=token_type_ids,
+ return_dict=self.parent.return_dict)
+ result = model(input_ids,
+ token_type_ids=token_type_ids,
+ return_dict=self.parent.return_dict)
+ result = model(input_ids, return_dict=self.parent.return_dict)
+
self.parent.assertEqual(result[0].shape, [
self.config.batch_size, self.config.seq_length,
self.config.hidden_size
@@ -130,13 +150,12 @@ def create_and_check_model(
self.parent.assertEqual(
result[1].shape, [self.config.batch_size, self.config.hidden_size])
- def create_and_check_for_multiple_choice(
- self,
- config,
- input_ids,
- token_type_ids,
- input_mask,
- ):
+ def create_and_check_for_multiple_choice(self, config, input_ids: Tensor,
+ token_type_ids: Tensor,
+ input_mask: Tensor,
+ sequence_labels: Tensor,
+ token_labels: Tensor,
+ choice_labels: Tensor):
model = RoFormerForMultipleChoice(RoFormerModel(**config),
num_choices=self.config.num_choices)
model.eval()
@@ -151,89 +170,113 @@ def create_and_check_for_multiple_choice(
input_mask = input_mask.unsqueeze(1).expand(
[-1, self.config.num_choices, -1])
- result = model(
- multiple_choice_inputs_ids,
- attention_mask=input_mask,
- token_type_ids=token_type_ids,
- )
- self.parent.assertEqual(
- result.shape, [self.config.batch_size, self.config.num_choices])
+ result = model(multiple_choice_inputs_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ labels=choice_labels,
+ return_dict=self.parent.return_dict)
+
+ if paddle.is_tensor(result):
+ result = [result]
+ elif choice_labels is not None:
+ result = result[1:]
- def create_and_check_for_question_answering(self, config, input_ids,
- token_type_ids, input_mask):
+ self.parent.assertEqual(
+ result[0].shape, [self.config.batch_size, self.config.num_choices])
+
+ def create_and_check_for_question_answering(self, config, input_ids: Tensor,
+ token_type_ids: Tensor,
+ input_mask: Tensor,
+ sequence_labels: Tensor,
+ token_labels: Tensor,
+ choice_labels: Tensor):
model = RoFormerForQuestionAnswering(RoFormerModel(**config))
model.eval()
- result = model(
- input_ids,
- attention_mask=input_mask,
- token_type_ids=token_type_ids,
- )
+ result = model(input_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ start_positions=sequence_labels,
+ end_positions=sequence_labels,
+ return_dict=self.parent.return_dict)
+
+ if paddle.is_tensor(result):
+ result = [result]
+ elif choice_labels is not None:
+ result = result[1:]
+
self.parent.assertEqual(
result[0].shape, [self.config.batch_size, self.config.seq_length])
self.parent.assertEqual(
result[1].shape, [self.config.batch_size, self.config.seq_length])
def create_and_check_for_token_classification(
- self,
- config,
- input_ids,
- token_type_ids,
- input_mask,
- ):
+ self, config, input_ids: Tensor, token_type_ids: Tensor,
+ input_mask: Tensor, sequence_labels: Tensor, token_labels: Tensor,
+ choice_labels: Tensor):
model = RoFormerForTokenClassification(RoFormerModel(**config),
num_classes=self.num_classes)
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
- self.parent.assertEqual(result.shape, [
+ token_type_ids=token_type_ids,
+ labels=token_labels,
+ return_dict=self.parent.return_dict)
+ if paddle.is_tensor(result):
+ result = [result]
+ elif choice_labels is not None:
+ result = result[1:]
+
+ self.parent.assertEqual(result[0].shape, [
self.config.batch_size, self.config.seq_length,
self.config.num_classes
])
- def create_and_check_for_masked_lm(
- self,
- config,
- input_ids,
- token_type_ids,
- input_mask,
- ):
+ def create_and_check_for_masked_lm(self, config, input_ids: Tensor,
+ token_type_ids: Tensor,
+ input_mask: Tensor,
+ sequence_labels: Tensor,
+ token_labels: Tensor,
+ choice_labels: Tensor):
model = RoFormerForMaskedLM(RoFormerModel(**config))
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
- self.parent.assertEqual(result.shape, [
+ token_type_ids=token_type_ids,
+ labels=token_labels,
+ return_dict=self.parent.return_dict)
+ if paddle.is_tensor(result):
+ result = [result]
+ elif choice_labels is not None:
+ result = result[1:]
+
+ self.parent.assertEqual(result[0].shape, [
self.config.batch_size, self.config.seq_length,
self.config.vocab_size
])
def create_and_check_for_sequence_classification(
- self,
- config,
- input_ids,
- token_type_ids,
- input_mask,
- ):
+ self, config, input_ids: Tensor, token_type_ids: Tensor,
+ input_mask: Tensor, sequence_labels: Tensor, token_labels: Tensor,
+ choice_labels: Tensor):
model = RoFormerForSequenceClassification(
RoFormerModel(**config), num_classes=self.config.num_classes)
model.eval()
- result = model(
- input_ids,
- attention_mask=input_mask,
- token_type_ids=token_type_ids,
- )
+ result = model(input_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ labels=sequence_labels,
+ return_dict=self.parent.return_dict)
+ if paddle.is_tensor(result):
+ result = [result]
+ elif choice_labels is not None:
+ result = result[1:]
self.parent.assertEqual(
- result.shape, [self.config.batch_size, self.config.num_classes])
+ result[0].shape, [self.config.batch_size, self.config.num_classes])
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
- (
- config,
- input_ids,
- token_type_ids,
- input_mask,
- ) = config_and_inputs
+ (config, input_ids, token_type_ids, input_mask, _, _,
+ _) = config_and_inputs
inputs_dict = {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
@@ -242,15 +285,21 @@ def prepare_config_and_inputs_for_common(self):
return config, inputs_dict
+@parameterized_class(("return_dict", "use_labels"), [
+ [False, False],
+ [False, True],
+ [True, False],
+ [True, True],
+])
class RoFormerModelTest(ModelTesterMixin, unittest.TestCase):
base_model_class = RoFormerModel
+ use_labels = False
+ return_dict = False
- all_model_classes = (
- RoFormerModel,
- RoFormerForMultipleChoice,
- RoFormerForPretraining,
- RoFormerForSequenceClassification,
- )
+ all_model_classes = (RoFormerModel, RoFormerForSequenceClassification,
+ RoFormerForTokenClassification,
+ RoFormerForQuestionAnswering,
+ RoFormerForMultipleChoice, RoFormerForMaskedLM)
def setUp(self):
self.model_tester = RoFormerModelTester(self)
From 9b51a640692e33585bfcd6cdcbd2a90fa10b731f Mon Sep 17 00:00:00 2001
From: wawltor
Date: Wed, 7 Sep 2022 14:30:04 +0800
Subject: [PATCH 021/159] Update README_cn.md
---
README_cn.md | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/README_cn.md b/README_cn.md
index c585f49841d0..ffcd879f125c 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -32,10 +32,10 @@
## News 📢
* 🔥 **2022.9.6 发布 [PaddleNLP v2.4](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.4.0)**
- * 💎 NLP工具:**[NLP 流水线系统 Pipelines](./pipelines)** 发布,让解决 NLP 任务像搭积木一样便捷、灵活、高效!
- * 😊 产业应用:新增[文本分类应用](./applications/text_classification)**多分类、多标签、层次分类**的全流程方案,支持 **小样本学习** 和 **TrustAI** 可信计算模型调优;[UIE信息抽取](./model_zoo/uie)发布**UIE-M**,支持中英文混合抽取,新增**UIE 数据蒸馏**方案,推理速度提升100倍以上;
- * 🍭 AIGC内容生成: 新增代码生成SOTA模型[**CodeGen**](./examples/code_generation/codegen),支持多种编程语言代码生成;集成[文图生成潮流模型](https://github.com/PaddlePaddle/PaddleNLP/blob/develop/docs/model_zoo/taskflow.md#%E6%96%87%E5%9B%BE%E7%94%9F%E6%88%90) **DALL-E-mini**、**Disco Diffusion**、**Stable Diffusion**,更多趣玩应用等你来玩;新增[中文文本摘要应用](./applications/text_summarization),基于大规模语料的中文摘要模型首次发布,可支持Taskflow一键调用;
- * 💪 框架升级: [**模型自动压缩API**](./docs/compression.md)发布,自动对模型进行裁减和量化,大幅提升模型部署效率;[**小样本Prompt框架发布**](./applications/text_classification/multi_class/few-shot), 支持PET、P-Tuning、RGL等经典模型的快速实现;
+ * 💎 NLP工具: **[NLP 流水线系统 Pipelines](./pipelines)** 发布,让解决 NLP 任务像搭积木一样便捷、灵活、高效!
+ * 😊 产业应用: 新增[文本分类应用](./applications/text_classification)**多分类、多标签、层次分类**的全流程方案,支持 **小样本学习** 和 **TrustAI** 可信计算模型调优;[UIE信息抽取](./model_zoo/uie)发布**UIE-M**,支持中英文混合抽取,新增**UIE 数据蒸馏**方案,推理速度提升100倍以上;
+ * 🍭 AIGC内容生成: 新增代码生成SOTA模型[**CodeGen**](./examples/code_generation/codegen),支持多种编程语言代码生成;集成[文图生成潮流模型](https://github.com/PaddlePaddle/PaddleNLP/blob/develop/docs/model_zoo/taskflow.md#%E6%96%87%E5%9B%BE%E7%94%9F%E6%88%90) **DALL-E-mini**、**Disco Diffusion**、**Stable Diffusion**,更多趣玩模型等你来玩;新增[中文文本摘要应用](./applications/text_summarization),基于大规模语料的中文摘要模型首次发布,可支持Taskflow一键调用;
+ * 💪 框架升级: [**模型自动压缩API**](./docs/compression.md)发布,自动对模型进行裁减和量化,大幅提升模型部署效率;[**小样本Prompt框架发布**](./applications/text_classification/multi_class/few-shot),支持PET、P-Tuning、RGL等经典模型的快速实现;
* 👀 **2022.9.6 飞桨智慧金融行业系列直播课**
From 12c605af45f0699a0aad2dc4fa1fe0a88fa0b955 Mon Sep 17 00:00:00 2001
From: kztao
Date: Wed, 7 Sep 2022 14:44:14 +0800
Subject: [PATCH 022/159] Fix windows dtype bug of neural search (#3182)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* Fix windows dtype bug of neural search
* Fix windows dtype bug of neural search
Co-authored-by: 吴高升
---
applications/neural_search/recall/simcse/inference.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/applications/neural_search/recall/simcse/inference.py b/applications/neural_search/recall/simcse/inference.py
index 0e11c6ad65e4..097c348c736f 100644
--- a/applications/neural_search/recall/simcse/inference.py
+++ b/applications/neural_search/recall/simcse/inference.py
@@ -66,8 +66,10 @@ def convert_example(example, tokenizer, max_seq_length=512, do_evalute=False):
max_seq_length=max_seq_length)
batchify_fn = lambda samples, fn=Tuple(
- Pad(axis=0, pad_val=tokenizer.pad_token_id), # text_input
- Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # text_segment
+ Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype="int64"
+ ), # text_input
+ Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype="int64"
+ ), # text_segment
): [data for data in fn(samples)]
pretrained_model = AutoModel.from_pretrained(model_name_or_path)
From 9ab5a91df668b675c6c8dd3946eca013e527e450 Mon Sep 17 00:00:00 2001
From: wawltor
Date: Wed, 7 Sep 2022 14:49:13 +0800
Subject: [PATCH 023/159] Update README_cn.md
---
README_cn.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README_cn.md b/README_cn.md
index ffcd879f125c..7a1d05c31582 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -33,7 +33,7 @@
* 🔥 **2022.9.6 发布 [PaddleNLP v2.4](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.4.0)**
* 💎 NLP工具: **[NLP 流水线系统 Pipelines](./pipelines)** 发布,让解决 NLP 任务像搭积木一样便捷、灵活、高效!
- * 😊 产业应用: 新增[文本分类应用](./applications/text_classification)**多分类、多标签、层次分类**的全流程方案,支持 **小样本学习** 和 **TrustAI** 可信计算模型调优;[UIE信息抽取](./model_zoo/uie)发布**UIE-M**,支持中英文混合抽取,新增**UIE 数据蒸馏**方案,推理速度提升100倍以上;
+ * 😊 产业应用: 新增[文本分类应用](./applications/text_classification)**多分类、多标签、层次分类**的全流程方案,支持 **小样本学习** 和 **TrustAI** 可信计算模型训练与调优;[UIE信息抽取](./model_zoo/uie)发布**UIE-M**,支持中英文混合抽取,新增**UIE 数据蒸馏**方案,打破UIE推理瓶颈,推理速度提升100倍以上;
* 🍭 AIGC内容生成: 新增代码生成SOTA模型[**CodeGen**](./examples/code_generation/codegen),支持多种编程语言代码生成;集成[文图生成潮流模型](https://github.com/PaddlePaddle/PaddleNLP/blob/develop/docs/model_zoo/taskflow.md#%E6%96%87%E5%9B%BE%E7%94%9F%E6%88%90) **DALL-E-mini**、**Disco Diffusion**、**Stable Diffusion**,更多趣玩模型等你来玩;新增[中文文本摘要应用](./applications/text_summarization),基于大规模语料的中文摘要模型首次发布,可支持Taskflow一键调用;
* 💪 框架升级: [**模型自动压缩API**](./docs/compression.md)发布,自动对模型进行裁减和量化,大幅提升模型部署效率;[**小样本Prompt框架发布**](./applications/text_classification/multi_class/few-shot),支持PET、P-Tuning、RGL等经典模型的快速实现;
From ead43829e3dc2ab2ae9efdc06bbed8c475ff145a Mon Sep 17 00:00:00 2001
From: wawltor
Date: Wed, 7 Sep 2022 15:05:48 +0800
Subject: [PATCH 024/159] Update README_cn.md
---
README_cn.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README_cn.md b/README_cn.md
index 7a1d05c31582..3ae456e29138 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -33,9 +33,9 @@
* 🔥 **2022.9.6 发布 [PaddleNLP v2.4](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.4.0)**
* 💎 NLP工具: **[NLP 流水线系统 Pipelines](./pipelines)** 发布,让解决 NLP 任务像搭积木一样便捷、灵活、高效!
- * 😊 产业应用: 新增[文本分类应用](./applications/text_classification)**多分类、多标签、层次分类**的全流程方案,支持 **小样本学习** 和 **TrustAI** 可信计算模型训练与调优;[UIE信息抽取](./model_zoo/uie)发布**UIE-M**,支持中英文混合抽取,新增**UIE 数据蒸馏**方案,打破UIE推理瓶颈,推理速度提升100倍以上;
+ * 😊 产业应用: 新增[文本分类](./applications/text_classification)**多分类、多标签、层次分类**的全流程应用方案,支持 **小样本学习** 和 **TrustAI** 可信计算模型训练与调优;[UIE信息抽取](./model_zoo/uie)发布**UIE-M**,支持中英文混合抽取,新增**UIE 数据蒸馏**方案,打破UIE推理瓶颈,推理速度提升100倍以上;
* 🍭 AIGC内容生成: 新增代码生成SOTA模型[**CodeGen**](./examples/code_generation/codegen),支持多种编程语言代码生成;集成[文图生成潮流模型](https://github.com/PaddlePaddle/PaddleNLP/blob/develop/docs/model_zoo/taskflow.md#%E6%96%87%E5%9B%BE%E7%94%9F%E6%88%90) **DALL-E-mini**、**Disco Diffusion**、**Stable Diffusion**,更多趣玩模型等你来玩;新增[中文文本摘要应用](./applications/text_summarization),基于大规模语料的中文摘要模型首次发布,可支持Taskflow一键调用;
- * 💪 框架升级: [**模型自动压缩API**](./docs/compression.md)发布,自动对模型进行裁减和量化,大幅提升模型部署效率;[**小样本Prompt框架发布**](./applications/text_classification/multi_class/few-shot),支持PET、P-Tuning、RGL等经典模型的快速实现;
+ * 💪 框架升级: [**模型自动压缩API**](./docs/compression.md)发布,自动对模型进行裁减和量化,大幅降低模型压缩技术使用成本;[**小样本Prompt框架发布**](./applications/text_classification/multi_class/few-shot),支持PET、P-Tuning、RGL等经典模型的快速实现;
* 👀 **2022.9.6 飞桨智慧金融行业系列直播课**
From 824982dadee18534baa44210283b49f4245022a7 Mon Sep 17 00:00:00 2001
From: wawltor
Date: Wed, 7 Sep 2022 15:18:12 +0800
Subject: [PATCH 025/159] Update README_cn.md
---
README_cn.md | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/README_cn.md b/README_cn.md
index 3ae456e29138..f29184f96c86 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -32,10 +32,10 @@
## News 📢
* 🔥 **2022.9.6 发布 [PaddleNLP v2.4](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.4.0)**
- * 💎 NLP工具: **[NLP 流水线系统 Pipelines](./pipelines)** 发布,让解决 NLP 任务像搭积木一样便捷、灵活、高效!
- * 😊 产业应用: 新增[文本分类](./applications/text_classification)**多分类、多标签、层次分类**的全流程应用方案,支持 **小样本学习** 和 **TrustAI** 可信计算模型训练与调优;[UIE信息抽取](./model_zoo/uie)发布**UIE-M**,支持中英文混合抽取,新增**UIE 数据蒸馏**方案,打破UIE推理瓶颈,推理速度提升100倍以上;
- * 🍭 AIGC内容生成: 新增代码生成SOTA模型[**CodeGen**](./examples/code_generation/codegen),支持多种编程语言代码生成;集成[文图生成潮流模型](https://github.com/PaddlePaddle/PaddleNLP/blob/develop/docs/model_zoo/taskflow.md#%E6%96%87%E5%9B%BE%E7%94%9F%E6%88%90) **DALL-E-mini**、**Disco Diffusion**、**Stable Diffusion**,更多趣玩模型等你来玩;新增[中文文本摘要应用](./applications/text_summarization),基于大规模语料的中文摘要模型首次发布,可支持Taskflow一键调用;
- * 💪 框架升级: [**模型自动压缩API**](./docs/compression.md)发布,自动对模型进行裁减和量化,大幅降低模型压缩技术使用成本;[**小样本Prompt框架发布**](./applications/text_classification/multi_class/few-shot),支持PET、P-Tuning、RGL等经典模型的快速实现;
+ * 💎 NLP工具:**[NLP 流水线系统 Pipelines](./pipelines)** 发布,支持快速搭建搜索引擎、问答系统,让解决 NLP 任务像搭积木一样便捷、灵活、高效!
+ * 😊 产业应用:新增[文本分类](./applications/text_classification)**多分类、多标签、层次分类**的全流程应用方案,支持 **小样本学习** 和 **TrustAI** 可信计算模型训练与调优;[UIE信息抽取](./model_zoo/uie)发布**UIE-M**,支持中英文混合抽取,新增**UIE 数据蒸馏**方案,打破UIE推理瓶颈,推理速度提升100倍以上;
+ * 🍭 AIGC内容生成:新增代码生成SOTA模型[**CodeGen**](./examples/code_generation/codegen),支持多种编程语言代码生成;集成[文图生成潮流模型](https://github.com/PaddlePaddle/PaddleNLP/blob/develop/docs/model_zoo/taskflow.md#%E6%96%87%E5%9B%BE%E7%94%9F%E6%88%90) **DALL·E Mini**、**Disco Diffusion**、**Stable Diffusion**,更多趣玩模型等你来玩;新增[中文文本摘要应用](./applications/text_summarization),基于大规模语料的中文摘要模型首次发布,可支持Taskflow一键调用;
+ * 💪 框架升级:[**模型自动压缩API**](./docs/compression.md)发布,自动对模型进行裁减和量化,大幅降低模型压缩技术使用成本;[**小样本Prompt框架发布**](./applications/text_classification/multi_class/few-shot),支持PET、P-Tuning、RGL等经典模型的快速实现;
* 👀 **2022.9.6 飞桨智慧金融行业系列直播课**
From 447c0146f509c2e5301e573556387ff0cadcc956 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=AA=91=E9=A9=AC=E5=B0=8F=E7=8C=AB?= <1435130236@qq.com>
Date: Wed, 7 Sep 2022 17:59:09 +0800
Subject: [PATCH 026/159] [ModelingOutput]add more output for skep model
(#3146)
* update return_dict/label in skep model
* complete skep add-more-output
* refactor simple code
Co-authored-by: Zhong Hui
Co-authored-by: Guo Sheng
Co-authored-by: liu zhengxi <380185688@qq.com>
---
paddlenlp/transformers/skep/modeling.py | 227 +++++++++++++++++++----
tests/transformers/skep/test_modeling.py | 145 ++++++++++-----
2 files changed, 290 insertions(+), 82 deletions(-)
diff --git a/paddlenlp/transformers/skep/modeling.py b/paddlenlp/transformers/skep/modeling.py
index a65da0af5acc..9b1ddd71e5e2 100644
--- a/paddlenlp/transformers/skep/modeling.py
+++ b/paddlenlp/transformers/skep/modeling.py
@@ -25,6 +25,15 @@
else:
from paddlenlp.layers.crf import ViterbiDecoder
+from ..model_outputs import (
+ BaseModelOutputWithPoolingAndCrossAttentions,
+ SequenceClassifierOutput,
+ TokenClassifierOutput,
+ QuestionAnsweringModelOutput,
+ MultipleChoiceModelOutput,
+ MaskedLMOutput,
+ CausalLMOutputWithCrossAttentions,
+)
from .. import PretrainedModel, register_base_model
__all__ = [
@@ -284,7 +293,10 @@ def forward(self,
input_ids,
token_type_ids=None,
position_ids=None,
- attention_mask=None):
+ attention_mask=None,
+ output_hidden_states=False,
+ output_attentions=False,
+ return_dict=False):
r"""
The SkepModel forward method, overrides the `__call__()` special method.
@@ -319,9 +331,23 @@ def forward(self,
For example, its shape can be [batch_size, sequence_length], [batch_size, sequence_length, sequence_length],
[batch_size, num_attention_heads, sequence_length, sequence_length].
Defaults to `None`, which means nothing needed to be prevented attention to.
+ output_hidden_states (bool, optional):
+ Whether to return the hidden states of all layers.
+ Defaults to `False`.
+ output_attentions (bool, optional):
+ Whether to return the attentions tensors of all attention layers.
+ Defaults to `False`.
+ return_dict (bool, optional):
+ Whether to return a :class:`~paddlenlp.transformers.model_outputs.ModelOutput` object. If `False`, the output
+ will be a tuple of tensors. Defaults to `False`.
Returns:
- tuple: Returns tuple (`sequence_output`, `pooled_output`).
+ An instance of :class:`~paddlenlp.transformers.model_outputs.BaseModelOutputWithPoolingAndCrossAttentions` if
+ `return_dict=True`. Otherwise it returns a tuple of tensors corresponding
+ to ordered and not None (depending on the input arguments) fields of
+ :class:`~paddlenlp.transformers.model_outputs.BaseModelOutputWithPoolingAndCrossAttentions`.
+
+ if the reuslt is tuple: Returns tuple (`sequence_output`, `pooled_output`).
With the fields:
@@ -356,10 +382,26 @@ def forward(self,
embedding_output = self.embeddings(input_ids=input_ids,
position_ids=position_ids,
token_type_ids=token_type_ids)
- encoder_outputs = self.encoder(embedding_output, attention_mask)
- sequence_output = encoder_outputs
+ encoder_outputs = self.encoder(
+ embedding_output,
+ attention_mask,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict)
+
+ if paddle.is_tensor(encoder_outputs):
+ encoder_outputs = (encoder_outputs, )
+
+ sequence_output = encoder_outputs[0]
pooled_output = self.pooler(sequence_output)
- return sequence_output, pooled_output
+ if not return_dict:
+ return (sequence_output, pooled_output) + encoder_outputs[1:]
+ return BaseModelOutputWithPoolingAndCrossAttentions(
+ last_hidden_state=sequence_output,
+ pooler_output=pooled_output,
+ past_key_values=encoder_outputs.past_key_values,
+ hidden_states=encoder_outputs.hidden_states,
+ attentions=encoder_outputs.attentions)
def get_input_embeddings(self) -> nn.Embedding:
"""get skep input word embedding
@@ -409,7 +451,11 @@ def forward(self,
input_ids,
token_type_ids=None,
position_ids=None,
- attention_mask=None):
+ attention_mask=None,
+ labels=None,
+ output_hidden_states=False,
+ output_attentions=False,
+ return_dict=False):
r"""
The SkepForSequenceClassification forward method, overrides the __call__() special method.
@@ -422,10 +468,25 @@ def forward(self,
See :class:`SkepModel`.
attention_mask (Tensor, optional):
See :class:`SkepModel`.
+ labels (Tensor of shape `(batch_size,)`, optional):
+ Labels for computing the sequence classification/regression loss.
+ Indices should be in `[0, ..., num_classes - 1]`. If `num_classes == 1`
+ a regression loss is computed (Mean-Square loss), If `num_classes > 1`
+ a classification loss is computed (Cross-Entropy).
+ output_hidden_states (bool, optional):
+ Whether to return the hidden states of all layers.
+ Defaults to `False`.
+ output_attentions (bool, optional):
+ Whether to return the attentions tensors of all attention layers.
+ Defaults to `False`.
+ return_dict (bool, optional):
+ Whether to return a :class:`~paddlenlp.transformers.model_outputs.SequenceClassifierOutput` object. If
+ `False`, the output will be a tuple of tensors. Defaults to `False`.
Returns:
- Tensor: Returns tensor `logits`, a tensor of the input text classification logits.
- Shape as `[batch_size, num_classes]` and dtype as float32.
+ An instance of :class:`~paddlenlp.transformers.model_outputs.SequenceClassifierOutput` if `return_dict=True`.
+ Otherwise it returns a tuple of tensors corresponding to ordered and
+ not None (depending on the input arguments) fields of :class:`~paddlenlp.transformers.model_outputs.SequenceClassifierOutput`.
Example:
.. code-block::
@@ -441,14 +502,46 @@ def forward(self,
logits = model(**inputs)
"""
- _, pooled_output = self.skep(input_ids,
- token_type_ids=token_type_ids,
- position_ids=position_ids,
- attention_mask=attention_mask)
+ outputs = self.skep(input_ids,
+ token_type_ids=token_type_ids,
+ position_ids=position_ids,
+ attention_mask=attention_mask,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict)
+
+ pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output)
logits = self.classifier(pooled_output)
- return logits
+
+ loss = None
+ if labels is not None:
+ if self.num_classes == 1:
+ loss_fct = paddle.nn.MSELoss()
+ loss = loss_fct(logits, labels)
+ elif labels.dtype == paddle.int64 or labels.dtype == paddle.int32:
+ loss_fct = paddle.nn.CrossEntropyLoss()
+ loss = loss_fct(logits.reshape((-1, self.num_classes)),
+ labels.reshape((-1, )))
+ else:
+ loss_fct = paddle.nn.BCEWithLogitsLoss()
+ loss = loss_fct(logits, labels)
+
+ if not return_dict:
+ output = (logits, ) + outputs[2:]
+ if loss is not None:
+ return (loss, ) + output
+ if len(output) == 1:
+ return output[0]
+ return output
+
+ return SequenceClassifierOutput(
+ loss=loss,
+ logits=logits,
+ hidden_states=outputs.hidden_states,
+ attentions=outputs.attentions,
+ )
class SkepForTokenClassification(SkepPretrainedModel):
@@ -482,7 +575,11 @@ def forward(self,
input_ids,
token_type_ids=None,
position_ids=None,
- attention_mask=None):
+ attention_mask=None,
+ labels=None,
+ output_hidden_states=False,
+ output_attentions=False,
+ return_dict=False):
r"""
The SkepForTokenClassification forward method, overrides the __call__() special method.
@@ -495,10 +592,22 @@ def forward(self,
See :class:`SkepModel`.
attention_mask (Tensor, optional):
See :class:`SkepModel`.
+ labels (Tensor of shape `(batch_size, sequence_length)`, optional):
+ Labels for computing the token classification loss. Indices should be in `[0, ..., num_classes - 1]`.
+ output_hidden_states (bool, optional):
+ Whether to return the hidden states of all layers.
+ Defaults to `False`.
+ output_attentions (bool, optional):
+ Whether to return the attentions tensors of all attention layers.
+ Defaults to `False`.
+ return_dict (bool, optional):
+ Whether to return a :class:`~paddlenlp.transformers.model_outputs.TokenClassifierOutput` object. If
+ `False`, the output will be a tuple of tensors. Defaults to `False`.
Returns:
- Tensor: Returns tensor `logits`, a tensor of the input token classification logits.
- Shape as `[batch_size, sequence_length, num_classes]` and dtype as `float32`.
+ An instance of :class:`~paddlenlp.transformers.model_outputs.TokenClassifierOutput` if `return_dict=True`.
+ Otherwise it returns a tuple of tensors corresponding to ordered and
+ not None (depending on the input arguments) fields of :class:`~paddlenlp.transformers.model_outputs.TokenClassifierOutput`.
Example:
.. code-block::
@@ -514,14 +623,39 @@ def forward(self,
logits = model(**inputs)
"""
- sequence_output, _ = self.skep(input_ids,
- token_type_ids=token_type_ids,
- position_ids=position_ids,
- attention_mask=attention_mask)
+ outputs = self.skep(input_ids,
+ token_type_ids=token_type_ids,
+ position_ids=position_ids,
+ attention_mask=attention_mask,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict)
+
+ sequence_output = outputs[0]
sequence_output = self.dropout(sequence_output)
logits = self.classifier(sequence_output)
- return logits
+
+ loss = None
+ if labels is not None:
+ loss_fct = paddle.nn.CrossEntropyLoss()
+ loss = loss_fct(logits.reshape((-1, self.num_classes)),
+ labels.reshape((-1, )))
+
+ if not return_dict:
+ output = (logits, ) + outputs[2:]
+ if loss is not None:
+ return (loss, ) + output
+ if len(output) == 1:
+ return output[0]
+ return output
+
+ return TokenClassifierOutput(
+ loss=loss,
+ logits=logits,
+ hidden_states=outputs.hidden_states,
+ attentions=outputs.attentions,
+ )
class SkepCrfForTokenClassification(SkepPretrainedModel):
@@ -564,7 +698,10 @@ def forward(self,
position_ids=None,
attention_mask=None,
seq_lens=None,
- labels=None):
+ labels=None,
+ output_hidden_states=False,
+ output_attentions=False,
+ return_dict=False):
r"""
The SkepCrfForTokenClassification forward method, overrides the __call__() special method.
@@ -584,9 +721,22 @@ def forward(self,
labels (Tensor, optional):
The input label tensor.
Its data type should be int64 and its shape is `[batch_size, sequence_length]`.
+ output_hidden_states (bool, optional):
+ Whether to return the hidden states of all layers.
+ Defaults to `False`.
+ output_attentions (bool, optional):
+ Whether to return the attentions tensors of all attention layers.
+ Defaults to `False`.
+ return_dict (bool, optional):
+ Whether to return a :class:`~paddlenlp.transformers.model_outputs.TokenClassifierOutput` object. If
+ `False`, the output will be a tuple of tensors. Defaults to `False`.
Returns:
- Tensor: Returns tensor `loss` if `labels` is not None. Otherwise, returns tensor `prediction`.
+ An instance of :class:`~paddlenlp.transformers.model_outputs.TokenClassifierOutput` if `return_dict=True`.
+ Otherwise it returns a tuple of tensors corresponding to ordered and
+ not None (depending on the input arguments) fields of :class:`~paddlenlp.transformers.model_outputs.TokenClassifierOutput`.
+
+ if return_dict is False, Returns tensor `loss` if `labels` is not None. Otherwise, returns tensor `prediction`.
- `loss` (Tensor):
The crf loss. Its data type is float32 and its shape is `[batch_size]`.
@@ -596,13 +746,15 @@ def forward(self,
Its data type is int64 and its shape is `[batch_size, sequence_length]`.
"""
- sequence_output, _ = self.skep(input_ids,
- token_type_ids=token_type_ids,
- position_ids=position_ids,
- attention_mask=attention_mask)
-
- bigru_output, _ = self.gru(
- sequence_output) #, sequence_length=seq_lens)
+ outputs = self.skep(input_ids,
+ token_type_ids=token_type_ids,
+ position_ids=position_ids,
+ attention_mask=attention_mask,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict)
+
+ bigru_output, _ = self.gru(outputs[0]) #, sequence_length=seq_lens)
emission = self.fc(bigru_output)
if seq_lens is None:
@@ -616,9 +768,22 @@ def forward(self,
seq_lens = paddle.ones(shape=[input_ids_shape[0]],
dtype=paddle.int64) * input_ids_shape[1]
+ loss, prediction = None, None
if labels is not None:
loss = self.crf_loss(emission, seq_lens, labels)
- return loss
else:
_, prediction = self.viterbi_decoder(emission, seq_lens)
+
+ # FIXME(wj-Mcat): the output of this old version model is single tensor when return_dict is False
+ if not return_dict:
+ # when loss is None, return prediction
+ if labels is not None:
+ return loss
return prediction
+
+ return TokenClassifierOutput(
+ loss=loss,
+ logits=prediction,
+ hidden_states=outputs.hidden_states,
+ attentions=outputs.attentions,
+ )
diff --git a/tests/transformers/skep/test_modeling.py b/tests/transformers/skep/test_modeling.py
index 03e2ed87cefe..b3016eaf2c58 100644
--- a/tests/transformers/skep/test_modeling.py
+++ b/tests/transformers/skep/test_modeling.py
@@ -17,6 +17,7 @@
from typing import Optional, Tuple, Dict, Any
import paddle
from paddle import Tensor
+from parameterized import parameterized_class
from dataclasses import dataclass, asdict, fields, Field
from paddlenlp.transformers import (
@@ -70,6 +71,8 @@ class SkepTestConfig(SkepTestModelConfig):
# used for sequence classification
num_classes: int = 3
+ num_choices: int = 3
+ type_sequence_label_size: int = 3
class SkepModelTester:
@@ -82,6 +85,11 @@ def __init__(self, parent, config: Optional[SkepTestConfig] = None):
self.is_training = self.config.is_training
+ def __getattr__(self, key: str):
+ if not hasattr(self.config, key):
+ raise AttributeError(f'attribute <{key}> not exist')
+ return getattr(self.config, key)
+
def prepare_config_and_inputs(
self) -> Tuple[Dict[str, Any], Tensor, Tensor, Tensor]:
config = self.config
@@ -98,23 +106,36 @@ def prepare_config_and_inputs(
token_type_ids = ids_tensor([config.batch_size, config.seq_length],
config.type_vocab_size)
- return config.model_kwargs, input_ids, token_type_ids, input_mask
+ sequence_labels = None
+ token_labels = None
+ choice_labels = None
+
+ if self.parent.use_labels:
+ sequence_labels = ids_tensor([self.batch_size],
+ self.type_sequence_label_size)
+ token_labels = ids_tensor([self.batch_size, self.seq_length],
+ self.num_classes)
+ choice_labels = ids_tensor([self.batch_size], self.num_choices)
- def create_and_check_model(
- self,
- config,
- input_ids: Tensor,
- token_type_ids: Tensor,
- input_mask: Tensor,
- ):
+ config = self.get_config()
+ return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+
+ def create_and_check_model(self, config, input_ids: Tensor,
+ token_type_ids: Tensor, input_mask: Tensor,
+ sequence_labels: Tensor, token_labels: Tensor,
+ choice_labels: Tensor):
model = SkepModel(**config)
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
- result = model(input_ids, token_type_ids=token_type_ids)
- result = model(input_ids)
+ token_type_ids=token_type_ids,
+ return_dict=self.parent.return_dict)
+ result = model(input_ids,
+ token_type_ids=token_type_ids,
+ return_dict=self.parent.return_dict)
+ result = model(input_ids, return_dict=self.parent.return_dict)
+
self.parent.assertEqual(result[0].shape, [
self.config.batch_size, self.config.seq_length,
self.config.hidden_size
@@ -123,60 +144,83 @@ def create_and_check_model(
result[1].shape, [self.config.batch_size, self.config.hidden_size])
def create_and_check_for_sequence_classification(
- self,
- config,
- input_ids: Tensor,
- token_type_ids: Tensor,
- input_mask: Tensor,
- ):
+ self, config, input_ids: Tensor, token_type_ids: Tensor,
+ input_mask: Tensor, sequence_labels: Tensor, token_labels: Tensor,
+ choice_labels: Tensor):
model = SkepForSequenceClassification(
SkepModel(**config), num_classes=self.config.num_classes)
model.eval()
- result = model(
- input_ids,
- attention_mask=input_mask,
- token_type_ids=token_type_ids,
- )
+ result = model(input_ids,
+ attention_mask=input_mask,
+ token_type_ids=token_type_ids,
+ return_dict=self.parent.return_dict,
+ labels=sequence_labels)
+
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
+
self.parent.assertEqual(
- result.shape, [self.config.batch_size, self.config.num_classes])
+ result[0].shape, [self.config.batch_size, self.config.num_classes])
def create_and_check_for_token_classification(
- self,
- config,
- input_ids,
- token_type_ids,
- input_mask,
- ):
+ self, config, input_ids: Tensor, token_type_ids: Tensor,
+ input_mask: Tensor, sequence_labels: Tensor, token_labels: Tensor,
+ choice_labels: Tensor):
model = SkepForTokenClassification(SkepModel(**config),
num_classes=self.config.num_classes)
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
- self.parent.assertEqual(result.shape, [
+ token_type_ids=token_type_ids,
+ return_dict=self.parent.return_dict,
+ labels=token_labels)
+
+ if token_labels is not None:
+ result = result[1:]
+ elif paddle.is_tensor(result):
+ result = [result]
+
+ self.parent.assertEqual(result[0].shape, [
self.config.batch_size, self.config.seq_length,
self.config.num_classes
])
def create_and_check_for_crf_token_classification(
- self,
- config,
- input_ids,
- token_type_ids,
- input_mask,
- ):
+ self, config, input_ids: Tensor, token_type_ids: Tensor,
+ input_mask: Tensor, sequence_labels: Tensor, token_labels: Tensor,
+ choice_labels: Tensor):
model = SkepCrfForTokenClassification(
SkepModel(**config), num_classes=self.config.num_classes)
model.eval()
result = model(input_ids,
attention_mask=input_mask,
- token_type_ids=token_type_ids)
- self.parent.assertEqual(
- result.shape, [self.config.batch_size, self.config.seq_length])
+ token_type_ids=token_type_ids,
+ return_dict=self.parent.return_dict,
+ labels=token_labels)
+ # TODO(wj-Mcat): the output of SkepCrfForTokenClassification is wrong
+ if paddle.is_tensor(result):
+ result = [result]
+
+ if token_labels is not None:
+ self.parent.assertEqual(result[0].shape, [self.config.batch_size])
+ else:
+ self.parent.assertEqual(
+ result[0].shape,
+ [self.config.batch_size, self.config.seq_length])
def prepare_config_and_inputs_for_common(self):
- config, input_ids, token_type_ids, input_mask = self.prepare_config_and_inputs(
- )
+ config_and_inputs = self.prepare_config_and_inputs()
+ (
+ config,
+ input_ids,
+ token_type_ids,
+ input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
+ ) = config_and_inputs
inputs_dict = {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
@@ -193,12 +237,19 @@ def get_config(self) -> dict:
return self.config.model_kwargs
+@parameterized_class(("return_dict", "use_labels"), [
+ [False, False],
+ [False, True],
+ [True, False],
+ [True, True],
+])
class SkepModelTest(ModelTesterMixin, unittest.TestCase):
base_model_class = SkepModel
+ return_dict = False
+ use_labels = False
all_model_classes = (
SkepModel,
- # TODO(wj-Mcat): to activate this model later
SkepCrfForTokenClassification,
SkepForSequenceClassification,
SkepForTokenClassification,
@@ -207,9 +258,6 @@ class SkepModelTest(ModelTesterMixin, unittest.TestCase):
def setUp(self):
self.model_tester = SkepModelTester(self)
- def get_config():
- pass
-
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
@@ -225,11 +273,6 @@ def test_for_token_classification(self):
*config_and_inputs)
def test_for_crf_token_classification(self):
- # TODO(wj-Mcat): to activate this method later
- # self.skipTest(
- # "skip for crf token classification: there are contains something wrong in paddle.text.viterib_decode"
- # )
- # return
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_crf_token_classification(
*config_and_inputs)
From a837aeead60d0fb156b1c1bd828e33b22ad70aa1 Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Wed, 7 Sep 2022 21:00:47 +0800
Subject: [PATCH 027/159] remove model_config_file and resource_files_names
---
paddlenlp/transformers/albert/modeling.py | 2 --
paddlenlp/transformers/bart/modeling.py | 2 --
paddlenlp/transformers/bert/modeling.py | 2 --
paddlenlp/transformers/bigbird/modeling.py | 2 --
paddlenlp/transformers/blenderbot/modeling.py | 2 --
paddlenlp/transformers/blenderbot_small/modeling.py | 2 --
paddlenlp/transformers/chinesebert/modeling.py | 3 ---
paddlenlp/transformers/codegen/modeling.py | 2 --
paddlenlp/transformers/convbert/modeling.py | 2 --
paddlenlp/transformers/ctrl/modeling.py | 2 --
paddlenlp/transformers/dallebart/modeling.py | 2 --
paddlenlp/transformers/distilbert/modeling.py | 2 --
paddlenlp/transformers/electra/modeling.py | 2 --
paddlenlp/transformers/ernie/modeling.py | 1 -
paddlenlp/transformers/ernie_ctm/modeling.py | 2 --
paddlenlp/transformers/ernie_doc/modeling.py | 2 --
paddlenlp/transformers/ernie_gen/modeling.py | 2 --
paddlenlp/transformers/ernie_gram/modeling.py | 2 --
paddlenlp/transformers/ernie_m/modeling.py | 2 --
paddlenlp/transformers/fnet/modeling.py | 2 --
paddlenlp/transformers/funnel/modeling.py | 1 -
paddlenlp/transformers/gau_alpha/modeling.py | 2 --
paddlenlp/transformers/gpt/modeling.py | 2 --
paddlenlp/transformers/layoutlm/modeling.py | 2 --
paddlenlp/transformers/layoutlmv2/modeling.py | 2 --
paddlenlp/transformers/layoutxlm/modeling.py | 2 --
paddlenlp/transformers/luke/modeling.py | 2 --
paddlenlp/transformers/mbart/modeling.py | 2 --
paddlenlp/transformers/megatronbert/modeling.py | 2 --
paddlenlp/transformers/mobilebert/modeling.py | 2 --
paddlenlp/transformers/mpnet/modeling.py | 2 --
paddlenlp/transformers/nezha/modeling.py | 2 --
paddlenlp/transformers/opt/modeling.py | 2 --
paddlenlp/transformers/ppminilm/modeling.py | 1 -
paddlenlp/transformers/prophetnet/modeling.py | 2 --
paddlenlp/transformers/reformer/modeling.py | 2 --
paddlenlp/transformers/rembert/modeling.py | 2 --
paddlenlp/transformers/roberta/modeling.py | 2 --
paddlenlp/transformers/roformer/modeling.py | 2 --
paddlenlp/transformers/roformerv2/modeling.py | 2 --
paddlenlp/transformers/skep/modeling.py | 2 --
paddlenlp/transformers/squeezebert/modeling.py | 2 --
paddlenlp/transformers/t5/modeling.py | 2 --
paddlenlp/transformers/tinybert/modeling.py | 2 --
paddlenlp/transformers/unified_transformer/modeling.py | 2 --
paddlenlp/transformers/unimo/modeling.py | 2 --
paddlenlp/transformers/xlm/modeling.py | 2 --
paddlenlp/transformers/xlnet/modeling.py | 2 --
48 files changed, 94 deletions(-)
diff --git a/paddlenlp/transformers/albert/modeling.py b/paddlenlp/transformers/albert/modeling.py
index 9d7999a4b42b..79c06c27e955 100644
--- a/paddlenlp/transformers/albert/modeling.py
+++ b/paddlenlp/transformers/albert/modeling.py
@@ -432,7 +432,6 @@ class AlbertPretrainedModel(PretrainedModel):
loading pretrained models. See `PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"albert-base-v1": {
"attention_probs_dropout_prob": 0.1,
@@ -716,7 +715,6 @@ class AlbertPretrainedModel(PretrainedModel):
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"albert-base-v1":
diff --git a/paddlenlp/transformers/bart/modeling.py b/paddlenlp/transformers/bart/modeling.py
index 7e560b461379..694b41e19787 100644
--- a/paddlenlp/transformers/bart/modeling.py
+++ b/paddlenlp/transformers/bart/modeling.py
@@ -49,7 +49,6 @@ class BartPretrainedModel(PretrainedModel):
loading pretrained models.
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"bart-base": {
"vocab_size": 50265,
@@ -94,7 +93,6 @@ class BartPretrainedModel(PretrainedModel):
"init_std": 0.02,
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"bart-base":
diff --git a/paddlenlp/transformers/bert/modeling.py b/paddlenlp/transformers/bert/modeling.py
index 8a0d955606f8..f2063282fe2a 100644
--- a/paddlenlp/transformers/bert/modeling.py
+++ b/paddlenlp/transformers/bert/modeling.py
@@ -124,7 +124,6 @@ class BertPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"bert-base-uncased": {
"vocab_size": 30522,
@@ -379,7 +378,6 @@ class BertPretrainedModel(PretrainedModel):
"pad_token_id": 0
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"bert-base-uncased":
diff --git a/paddlenlp/transformers/bigbird/modeling.py b/paddlenlp/transformers/bigbird/modeling.py
index 51e221156eb5..00d153f8c6d0 100644
--- a/paddlenlp/transformers/bigbird/modeling.py
+++ b/paddlenlp/transformers/bigbird/modeling.py
@@ -252,7 +252,6 @@ class BigBirdPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"bigbird-base-uncased": {
"num_layers": 12,
@@ -276,7 +275,6 @@ class BigBirdPretrainedModel(PretrainedModel):
"initializer_range": 0.02,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"bigbird-base-uncased":
diff --git a/paddlenlp/transformers/blenderbot/modeling.py b/paddlenlp/transformers/blenderbot/modeling.py
index 8fe66d6d7c59..9cebbbd3d3a6 100644
--- a/paddlenlp/transformers/blenderbot/modeling.py
+++ b/paddlenlp/transformers/blenderbot/modeling.py
@@ -51,7 +51,6 @@ class BlenderbotPretrainedModel(PretrainedModel):
Refer to :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
base_model_prefix = "blenderbot"
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"blenderbot-3B": {
@@ -121,7 +120,6 @@ class BlenderbotPretrainedModel(PretrainedModel):
"scale_embedding": True,
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"blenderbot-3B":
diff --git a/paddlenlp/transformers/blenderbot_small/modeling.py b/paddlenlp/transformers/blenderbot_small/modeling.py
index 85155bed2440..c1ff4f841133 100644
--- a/paddlenlp/transformers/blenderbot_small/modeling.py
+++ b/paddlenlp/transformers/blenderbot_small/modeling.py
@@ -78,7 +78,6 @@ class BlenderbotSmallPretrainedModel(PretrainedModel):
loading pretrained models.
Refer to :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"blenderbot_small-90M": {
"vocab_size": 54944,
@@ -103,7 +102,6 @@ class BlenderbotSmallPretrainedModel(PretrainedModel):
"normalize_before": False,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"blenderbot_small-90M":
diff --git a/paddlenlp/transformers/chinesebert/modeling.py b/paddlenlp/transformers/chinesebert/modeling.py
index 3b70971bd2dd..9211c6c8f642 100644
--- a/paddlenlp/transformers/chinesebert/modeling.py
+++ b/paddlenlp/transformers/chinesebert/modeling.py
@@ -220,7 +220,6 @@ class ChineseBertPretrainedModel(PretrainedModel):
"""
base_model_prefix = "chinesebert"
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"ChineseBERT-base": {
@@ -259,8 +258,6 @@ class ChineseBertPretrainedModel(PretrainedModel):
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
-
pretrained_resource_files_map = {
"model_state": {
"ChineseBERT-base":
diff --git a/paddlenlp/transformers/codegen/modeling.py b/paddlenlp/transformers/codegen/modeling.py
index 44a597ec0a8a..11cd10135215 100644
--- a/paddlenlp/transformers/codegen/modeling.py
+++ b/paddlenlp/transformers/codegen/modeling.py
@@ -279,9 +279,7 @@ class CodeGenPreTrainedModel(PretrainedModel):
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {"model_state": {}}
base_model_prefix = "transformer"
diff --git a/paddlenlp/transformers/convbert/modeling.py b/paddlenlp/transformers/convbert/modeling.py
index 9228f58a51ee..43ef53c01125 100644
--- a/paddlenlp/transformers/convbert/modeling.py
+++ b/paddlenlp/transformers/convbert/modeling.py
@@ -365,7 +365,6 @@ class ConvBertPretrainedModel(PretrainedModel):
"""
base_model_prefix = "convbert"
- model_config_file = "model_config.json"
# pretrained general configuration
gen_weight = 1.0
@@ -431,7 +430,6 @@ class ConvBertPretrainedModel(PretrainedModel):
"num_groups": 1,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"convbert-base":
diff --git a/paddlenlp/transformers/ctrl/modeling.py b/paddlenlp/transformers/ctrl/modeling.py
index 66724bdcff17..b8ebc57221e2 100755
--- a/paddlenlp/transformers/ctrl/modeling.py
+++ b/paddlenlp/transformers/ctrl/modeling.py
@@ -205,7 +205,6 @@ class CTRLPreTrainedModel(PretrainedModel):
"""
base_model_prefix = "ctrl"
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"ctrl": {
@@ -237,7 +236,6 @@ class CTRLPreTrainedModel(PretrainedModel):
"pad_token_id": None
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"ctrl":
diff --git a/paddlenlp/transformers/dallebart/modeling.py b/paddlenlp/transformers/dallebart/modeling.py
index ef661acd61bd..3e860d6baaba 100644
--- a/paddlenlp/transformers/dallebart/modeling.py
+++ b/paddlenlp/transformers/dallebart/modeling.py
@@ -83,7 +83,6 @@ class DalleBartPretrainedModel(PretrainedModel):
loading pretrained models.
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"dalle-mini": {
"text_vocab_size": 50264,
@@ -178,7 +177,6 @@ class DalleBartPretrainedModel(PretrainedModel):
"init_std": 0.02
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"dalle-mini":
diff --git a/paddlenlp/transformers/distilbert/modeling.py b/paddlenlp/transformers/distilbert/modeling.py
index 2253a2aa8ca9..03bcca462b4f 100644
--- a/paddlenlp/transformers/distilbert/modeling.py
+++ b/paddlenlp/transformers/distilbert/modeling.py
@@ -72,7 +72,6 @@ class DistilBertPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"distilbert-base-uncased": {
"vocab_size": 30522,
@@ -103,7 +102,6 @@ class DistilBertPretrainedModel(PretrainedModel):
"pad_token_id": 0,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"distilbert-base-uncased":
diff --git a/paddlenlp/transformers/electra/modeling.py b/paddlenlp/transformers/electra/modeling.py
index 6b70dccb921c..839aaecb14ff 100644
--- a/paddlenlp/transformers/electra/modeling.py
+++ b/paddlenlp/transformers/electra/modeling.py
@@ -240,7 +240,6 @@ class ElectraPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
base_model_prefix = "electra"
- model_config_file = "model_config.json"
# pretrained general configuration
gen_weight = 1.0
@@ -343,7 +342,6 @@ class ElectraPretrainedModel(PretrainedModel):
"layer_norm_eps": 1e-5
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"electra-small":
diff --git a/paddlenlp/transformers/ernie/modeling.py b/paddlenlp/transformers/ernie/modeling.py
index c7f6528eb993..70563fda34f4 100644
--- a/paddlenlp/transformers/ernie/modeling.py
+++ b/paddlenlp/transformers/ernie/modeling.py
@@ -149,7 +149,6 @@ class ErniePretrainedModel(PretrainedModel):
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
# Deprecated, alias for ernie-1.0-base-zh
"ernie-1.0": {
diff --git a/paddlenlp/transformers/ernie_ctm/modeling.py b/paddlenlp/transformers/ernie_ctm/modeling.py
index afb2b38b851e..da70502d09f3 100644
--- a/paddlenlp/transformers/ernie_ctm/modeling.py
+++ b/paddlenlp/transformers/ernie_ctm/modeling.py
@@ -111,7 +111,6 @@ class ErnieCtmPretrainedModel(PretrainedModel):
and loading pretrained models.
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"ernie-ctm": {
"vocab_size": 23000,
@@ -165,7 +164,6 @@ class ErnieCtmPretrainedModel(PretrainedModel):
"cls_num": 2,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"ernie-ctm":
diff --git a/paddlenlp/transformers/ernie_doc/modeling.py b/paddlenlp/transformers/ernie_doc/modeling.py
index f3ad51da568d..d0157a1ca7ed 100644
--- a/paddlenlp/transformers/ernie_doc/modeling.py
+++ b/paddlenlp/transformers/ernie_doc/modeling.py
@@ -312,7 +312,6 @@ class ErnieDocPretrainedModel(PretrainedModel):
and loading pretrained models.
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"ernie-doc-base-en": {
"attention_dropout_prob": 0.0,
@@ -347,7 +346,6 @@ class ErnieDocPretrainedModel(PretrainedModel):
"pad_token_id": 0
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"ernie-doc-base-en":
diff --git a/paddlenlp/transformers/ernie_gen/modeling.py b/paddlenlp/transformers/ernie_gen/modeling.py
index 10b1b8ce3da9..6aa6ceff3a2e 100644
--- a/paddlenlp/transformers/ernie_gen/modeling.py
+++ b/paddlenlp/transformers/ernie_gen/modeling.py
@@ -225,7 +225,6 @@ class ErnieGenPretrainedModel(object):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
ernie_gen_pretrained_init_configuration = {
"ernie-gen-base-en": {
"attention_probs_dropout_prob": 0.1,
@@ -270,7 +269,6 @@ class ErnieGenPretrainedModel(object):
"pad_token_id": 0,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
ernie_gen_pretrained_resource_files_map = {
"model_state": {
"ernie-gen-base-en":
diff --git a/paddlenlp/transformers/ernie_gram/modeling.py b/paddlenlp/transformers/ernie_gram/modeling.py
index f25a009b1cc2..72606d0e8adf 100644
--- a/paddlenlp/transformers/ernie_gram/modeling.py
+++ b/paddlenlp/transformers/ernie_gram/modeling.py
@@ -84,7 +84,6 @@ class ErnieGramPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"ernie-gram-zh": {
"attention_probs_dropout_prob": 0.1,
@@ -113,7 +112,6 @@ class ErnieGramPretrainedModel(PretrainedModel):
"vocab_size": 18018
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"ernie-gram-zh":
diff --git a/paddlenlp/transformers/ernie_m/modeling.py b/paddlenlp/transformers/ernie_m/modeling.py
index fee41706ba91..92a8c94d64ab 100644
--- a/paddlenlp/transformers/ernie_m/modeling.py
+++ b/paddlenlp/transformers/ernie_m/modeling.py
@@ -83,7 +83,6 @@ class ErnieMPretrainedModel(PretrainedModel):
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"ernie-m-base": {
"attention_probs_dropout_prob": 0.1,
@@ -110,7 +109,6 @@ class ErnieMPretrainedModel(PretrainedModel):
"pad_token_id": 1
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"ernie-m-base":
diff --git a/paddlenlp/transformers/fnet/modeling.py b/paddlenlp/transformers/fnet/modeling.py
index d90c57cc5e58..52ba431e7161 100644
--- a/paddlenlp/transformers/fnet/modeling.py
+++ b/paddlenlp/transformers/fnet/modeling.py
@@ -353,7 +353,6 @@ class FNetPretrainedModel(PretrainedModel):
`pretrained_resource_files_map`, `base_model_prefix` for downloading and
loading pretrained models. See `PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"fnet-base": {
"vocab_size": 32000,
@@ -386,7 +385,6 @@ class FNetPretrainedModel(PretrainedModel):
"eos_token_id": 2,
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"fnet-base":
diff --git a/paddlenlp/transformers/funnel/modeling.py b/paddlenlp/transformers/funnel/modeling.py
index 5335be5d544a..4e4ce5a3692d 100644
--- a/paddlenlp/transformers/funnel/modeling.py
+++ b/paddlenlp/transformers/funnel/modeling.py
@@ -1782,7 +1782,6 @@ class FunnelPreTrainedModel(PreTrainedModel):
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"funnel-transformer/small": {}, # B4-4-4H768
"funnel-transformer/small-base": {}, # B4-4-4H768, no decoder
diff --git a/paddlenlp/transformers/gau_alpha/modeling.py b/paddlenlp/transformers/gau_alpha/modeling.py
index 670b63e449cb..10cde30805ca 100644
--- a/paddlenlp/transformers/gau_alpha/modeling.py
+++ b/paddlenlp/transformers/gau_alpha/modeling.py
@@ -257,7 +257,6 @@ class GAUAlphaPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"chinese_GAU-alpha-char_L-24_H-768": {
"vocab_size": 12000,
@@ -277,7 +276,6 @@ class GAUAlphaPretrainedModel(PretrainedModel):
"attention_scale": True,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"chinese_GAU-alpha-char_L-24_H-768":
diff --git a/paddlenlp/transformers/gpt/modeling.py b/paddlenlp/transformers/gpt/modeling.py
index d3d38da84973..66ae46293d74 100644
--- a/paddlenlp/transformers/gpt/modeling.py
+++ b/paddlenlp/transformers/gpt/modeling.py
@@ -450,7 +450,6 @@ class GPTPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"gpt-cpm-large-cn": { # 2.6B
"vocab_size": 30000,
@@ -592,7 +591,6 @@ class GPTPretrainedModel(PretrainedModel):
"eol_token_id": 198,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"gpt-cpm-large-cn":
diff --git a/paddlenlp/transformers/layoutlm/modeling.py b/paddlenlp/transformers/layoutlm/modeling.py
index 181eb2576111..6f9f1c7ffb01 100644
--- a/paddlenlp/transformers/layoutlm/modeling.py
+++ b/paddlenlp/transformers/layoutlm/modeling.py
@@ -144,7 +144,6 @@ def forward(self,
class LayoutLMPretrainedModel(PretrainedModel):
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"layoutlm-base-uncased": {
"vocab_size": 30522,
@@ -179,7 +178,6 @@ class LayoutLMPretrainedModel(PretrainedModel):
"type_vocab_size": 2,
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"layoutlm-base-uncased":
diff --git a/paddlenlp/transformers/layoutlmv2/modeling.py b/paddlenlp/transformers/layoutlmv2/modeling.py
index 30b5f6c7686a..dba104d7663f 100644
--- a/paddlenlp/transformers/layoutlmv2/modeling.py
+++ b/paddlenlp/transformers/layoutlmv2/modeling.py
@@ -196,7 +196,6 @@ def forward(self,
class LayoutLMv2PretrainedModel(PretrainedModel):
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"layoutlmv2-base-uncased": {
"attention_probs_dropout_prob": 0.1,
@@ -290,7 +289,6 @@ class LayoutLMv2PretrainedModel(PretrainedModel):
"use_visual_backbone": False,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"layoutlmv2-base-uncased":
diff --git a/paddlenlp/transformers/layoutxlm/modeling.py b/paddlenlp/transformers/layoutxlm/modeling.py
index 7084fce78134..3483ef575428 100644
--- a/paddlenlp/transformers/layoutxlm/modeling.py
+++ b/paddlenlp/transformers/layoutxlm/modeling.py
@@ -223,7 +223,6 @@ def forward(self,
class LayoutXLMPretrainedModel(PretrainedModel):
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"layoutxlm-base-uncased": {
"attention_probs_dropout_prob": 0.1,
@@ -291,7 +290,6 @@ class LayoutXLMPretrainedModel(PretrainedModel):
"vocab_size": 250002,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"layoutxlm-base-uncased":
diff --git a/paddlenlp/transformers/luke/modeling.py b/paddlenlp/transformers/luke/modeling.py
index 1cf360f3d2a4..93eb033c6422 100644
--- a/paddlenlp/transformers/luke/modeling.py
+++ b/paddlenlp/transformers/luke/modeling.py
@@ -102,7 +102,6 @@ class LukePretrainedModel(PretrainedModel):
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"luke-base": {
"attention_probs_dropout_prob": 0.1,
@@ -133,7 +132,6 @@ class LukePretrainedModel(PretrainedModel):
"vocab_size": 50267
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"luke-base":
diff --git a/paddlenlp/transformers/mbart/modeling.py b/paddlenlp/transformers/mbart/modeling.py
index 7179032960b0..809ce8cd103e 100644
--- a/paddlenlp/transformers/mbart/modeling.py
+++ b/paddlenlp/transformers/mbart/modeling.py
@@ -54,7 +54,6 @@ class MBartPretrainedModel(PretrainedModel):
loading pretrained models.
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"mbart-large-cc25": {
"vocab_size": 250027,
@@ -157,7 +156,6 @@ class MBartPretrainedModel(PretrainedModel):
"init_std": 0.02,
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"mbart-large-cc25":
diff --git a/paddlenlp/transformers/megatronbert/modeling.py b/paddlenlp/transformers/megatronbert/modeling.py
index 3521fe5256ec..8e1148328e47 100644
--- a/paddlenlp/transformers/megatronbert/modeling.py
+++ b/paddlenlp/transformers/megatronbert/modeling.py
@@ -82,7 +82,6 @@ class MegatronBertPretrainedModel(PretrainedModel):
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"megatronbert-cased": {
"attention_probs_dropout_prob": 0.1,
@@ -113,7 +112,6 @@ class MegatronBertPretrainedModel(PretrainedModel):
"pad_token_id": 0
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"megatronbert-cased":
diff --git a/paddlenlp/transformers/mobilebert/modeling.py b/paddlenlp/transformers/mobilebert/modeling.py
index b2acc076a78f..526902baa0f3 100644
--- a/paddlenlp/transformers/mobilebert/modeling.py
+++ b/paddlenlp/transformers/mobilebert/modeling.py
@@ -784,7 +784,6 @@ class MobileBertPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"mobilebert-uncased": {
"attention_probs_dropout_prob": 0.1,
@@ -812,7 +811,6 @@ class MobileBertPretrainedModel(PretrainedModel):
"vocab_size": 30522
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
diff --git a/paddlenlp/transformers/mpnet/modeling.py b/paddlenlp/transformers/mpnet/modeling.py
index bfb4064ad5d7..8af7773b4dd5 100644
--- a/paddlenlp/transformers/mpnet/modeling.py
+++ b/paddlenlp/transformers/mpnet/modeling.py
@@ -316,7 +316,6 @@ class MPNetPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"mpnet-base": {
"vocab_size": 30527,
@@ -334,7 +333,6 @@ class MPNetPretrainedModel(PretrainedModel):
"pad_token_id": 1,
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"mpnet-base":
diff --git a/paddlenlp/transformers/nezha/modeling.py b/paddlenlp/transformers/nezha/modeling.py
index eb0d4dc8f696..b2c09539e390 100644
--- a/paddlenlp/transformers/nezha/modeling.py
+++ b/paddlenlp/transformers/nezha/modeling.py
@@ -364,7 +364,6 @@ class NeZhaPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"nezha-base-chinese": {
"vocab_size": 21128,
@@ -427,7 +426,6 @@ class NeZhaPretrainedModel(PretrainedModel):
"use_relative_position": True
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"nezha-base-chinese":
diff --git a/paddlenlp/transformers/opt/modeling.py b/paddlenlp/transformers/opt/modeling.py
index 3f5a447e95c2..873544b8bf09 100644
--- a/paddlenlp/transformers/opt/modeling.py
+++ b/paddlenlp/transformers/opt/modeling.py
@@ -222,9 +222,7 @@ class OPTPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {"model_state": {}}
base_model_prefix = "opt"
diff --git a/paddlenlp/transformers/ppminilm/modeling.py b/paddlenlp/transformers/ppminilm/modeling.py
index aa9eeebc7b44..b4998751c3bc 100644
--- a/paddlenlp/transformers/ppminilm/modeling.py
+++ b/paddlenlp/transformers/ppminilm/modeling.py
@@ -107,7 +107,6 @@ class PPMiniLMPretrainedModel(FasterPretrainedModel):
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"ppminilm-6l-768h": {
"attention_probs_dropout_prob": 0.1,
diff --git a/paddlenlp/transformers/prophetnet/modeling.py b/paddlenlp/transformers/prophetnet/modeling.py
index 52137fc4a087..2a0b552070fb 100644
--- a/paddlenlp/transformers/prophetnet/modeling.py
+++ b/paddlenlp/transformers/prophetnet/modeling.py
@@ -135,7 +135,6 @@ class ProphetNetPretrainedModel(PretrainedModel):
`pretrained_resource_files_map`, `base_model_prefix` for downloading and
loading pretrained models.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"prophetnet-large-uncased": {
"activation_dropout": 0.1,
@@ -171,7 +170,6 @@ class ProphetNetPretrainedModel(PretrainedModel):
"vocab_size": 30522
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"prophetnet-large-uncased":
diff --git a/paddlenlp/transformers/reformer/modeling.py b/paddlenlp/transformers/reformer/modeling.py
index 7e907b2e5449..63645df99a5a 100644
--- a/paddlenlp/transformers/reformer/modeling.py
+++ b/paddlenlp/transformers/reformer/modeling.py
@@ -2417,7 +2417,6 @@ class ReformerPretrainedModel(PretrainedModel):
"""
base_model_prefix = "reformer"
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"reformer-enwik8": {
@@ -2532,7 +2531,6 @@ class ReformerPretrainedModel(PretrainedModel):
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"reformer-enwik8":
diff --git a/paddlenlp/transformers/rembert/modeling.py b/paddlenlp/transformers/rembert/modeling.py
index 174685eae3e4..04329f328929 100644
--- a/paddlenlp/transformers/rembert/modeling.py
+++ b/paddlenlp/transformers/rembert/modeling.py
@@ -69,7 +69,6 @@ def gelu_new(x):
class RembertPretrainedModel(PretrainedModel):
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"rembert": {
"attention_probs_dropout_prob": 0,
@@ -88,7 +87,6 @@ class RembertPretrainedModel(PretrainedModel):
"layer_norm_eps": 1e-12
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"rembert":
diff --git a/paddlenlp/transformers/roberta/modeling.py b/paddlenlp/transformers/roberta/modeling.py
index 50584a7376d7..8812bb2e85d1 100644
--- a/paddlenlp/transformers/roberta/modeling.py
+++ b/paddlenlp/transformers/roberta/modeling.py
@@ -126,7 +126,6 @@ class RobertaPretrainedModel(PretrainedModel):
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"hfl/roberta-wwm-ext": {
"attention_probs_dropout_prob": 0.1,
@@ -213,7 +212,6 @@ class RobertaPretrainedModel(PretrainedModel):
"pad_token_id": 0
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"hfl/roberta-wwm-ext":
diff --git a/paddlenlp/transformers/roformer/modeling.py b/paddlenlp/transformers/roformer/modeling.py
index d4aa86629372..417904ed2f3f 100644
--- a/paddlenlp/transformers/roformer/modeling.py
+++ b/paddlenlp/transformers/roformer/modeling.py
@@ -249,7 +249,6 @@ class RoFormerPretrainedModel(PretrainedModel):
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"roformer-chinese-small": {
"vocab_size": 50000,
@@ -421,7 +420,6 @@ class RoFormerPretrainedModel(PretrainedModel):
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"roformer-chinese-small":
diff --git a/paddlenlp/transformers/roformerv2/modeling.py b/paddlenlp/transformers/roformerv2/modeling.py
index e289baafbec2..857afe2802e1 100644
--- a/paddlenlp/transformers/roformerv2/modeling.py
+++ b/paddlenlp/transformers/roformerv2/modeling.py
@@ -248,7 +248,6 @@ class RoFormerv2PretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"roformer_v2_chinese_char_small": {
"vocab_size": 12000,
@@ -297,7 +296,6 @@ class RoFormerv2PretrainedModel(PretrainedModel):
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"roformer_v2_chinese_char_small":
diff --git a/paddlenlp/transformers/skep/modeling.py b/paddlenlp/transformers/skep/modeling.py
index a65da0af5acc..89ff4e9300a1 100644
--- a/paddlenlp/transformers/skep/modeling.py
+++ b/paddlenlp/transformers/skep/modeling.py
@@ -113,7 +113,6 @@ class SkepPretrainedModel(PretrainedModel):
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"skep_ernie_1.0_large_ch": {
"attention_probs_dropout_prob": 0.1,
@@ -158,7 +157,6 @@ class SkepPretrainedModel(PretrainedModel):
"pad_token_id": 1,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"skep_ernie_1.0_large_ch":
diff --git a/paddlenlp/transformers/squeezebert/modeling.py b/paddlenlp/transformers/squeezebert/modeling.py
index fd704db8d637..93d9a32298a9 100755
--- a/paddlenlp/transformers/squeezebert/modeling.py
+++ b/paddlenlp/transformers/squeezebert/modeling.py
@@ -438,7 +438,6 @@ class SqueezeBertPreTrainedModel(PretrainedModel):
"""
base_model_prefix = "squeezebert"
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"squeezebert-uncased": {
@@ -512,7 +511,6 @@ class SqueezeBertPreTrainedModel(PretrainedModel):
'layer_norm_eps': 1e-12
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
diff --git a/paddlenlp/transformers/t5/modeling.py b/paddlenlp/transformers/t5/modeling.py
index 402bdfc95b24..efeffa66b67e 100644
--- a/paddlenlp/transformers/t5/modeling.py
+++ b/paddlenlp/transformers/t5/modeling.py
@@ -601,7 +601,6 @@ class T5PretrainedModel(PretrainedModel):
"""
base_model_prefix = "t5"
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"t5-small": {
@@ -695,7 +694,6 @@ class T5PretrainedModel(PretrainedModel):
"feed_forward_proj": "gated-gelu",
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"t5-small":
diff --git a/paddlenlp/transformers/tinybert/modeling.py b/paddlenlp/transformers/tinybert/modeling.py
index 974b8e0c56fc..455e39133cae 100644
--- a/paddlenlp/transformers/tinybert/modeling.py
+++ b/paddlenlp/transformers/tinybert/modeling.py
@@ -35,7 +35,6 @@ class TinyBertPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"tinybert-4l-312d": {
"vocab_size": 30522,
@@ -122,7 +121,6 @@ class TinyBertPretrainedModel(PretrainedModel):
"pad_token_id": 0,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"tinybert-4l-312d":
diff --git a/paddlenlp/transformers/unified_transformer/modeling.py b/paddlenlp/transformers/unified_transformer/modeling.py
index 9b827c0fdb05..d88bc82940a1 100644
--- a/paddlenlp/transformers/unified_transformer/modeling.py
+++ b/paddlenlp/transformers/unified_transformer/modeling.py
@@ -37,7 +37,6 @@ class UnifiedTransformerPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"unified_transformer-12L-cn": {
"vocab_size": 30004,
@@ -117,7 +116,6 @@ class UnifiedTransformerPretrainedModel(PretrainedModel):
"mask_token_id": 8000,
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"unified_transformer-12L-cn":
diff --git a/paddlenlp/transformers/unimo/modeling.py b/paddlenlp/transformers/unimo/modeling.py
index f9ff98b48833..7bf4642aead6 100644
--- a/paddlenlp/transformers/unimo/modeling.py
+++ b/paddlenlp/transformers/unimo/modeling.py
@@ -37,7 +37,6 @@ class UNIMOPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"unimo-text-1.0": {
"vocab_size": 18000,
@@ -97,7 +96,6 @@ class UNIMOPretrainedModel(PretrainedModel):
"mask_token_id": 3,
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"unimo-text-1.0":
diff --git a/paddlenlp/transformers/xlm/modeling.py b/paddlenlp/transformers/xlm/modeling.py
index d0be26d38413..e4d0adb9ef65 100644
--- a/paddlenlp/transformers/xlm/modeling.py
+++ b/paddlenlp/transformers/xlm/modeling.py
@@ -198,7 +198,6 @@ class XLMPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
'xlm-mlm-en-2048': {
'is_encoder': True,
@@ -578,7 +577,6 @@ class XLMPretrainedModel(PretrainedModel):
}
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
'xlm-mlm-en-2048':
diff --git a/paddlenlp/transformers/xlnet/modeling.py b/paddlenlp/transformers/xlnet/modeling.py
index 304e16be7976..c777335c3fe5 100644
--- a/paddlenlp/transformers/xlnet/modeling.py
+++ b/paddlenlp/transformers/xlnet/modeling.py
@@ -481,7 +481,6 @@ class XLNetPretrainedModel(PretrainedModel):
See :class:`~paddlenlp.transformers.model_utils.PretrainedModel` for more details.
"""
- model_config_file = "model_config.json"
pretrained_init_configuration = {
"xlnet-base-cased": {
"attn_type": "bi",
@@ -580,7 +579,6 @@ class XLNetPretrainedModel(PretrainedModel):
},
}
- resource_files_names = {"model_state": "model_state.pdparams"}
pretrained_resource_files_map = {
"model_state": {
"xlnet-base-cased":
From 6fa2df53d2487d51e9a428f510e5f7ed3120a8c2 Mon Sep 17 00:00:00 2001
From: chenxiaozeng
Date: Thu, 8 Sep 2022 00:05:22 +0800
Subject: [PATCH 028/159] Update README_cn.md (#3219)
---
README_cn.md | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/README_cn.md b/README_cn.md
index f29184f96c86..281186705668 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -32,10 +32,10 @@
## News 📢
* 🔥 **2022.9.6 发布 [PaddleNLP v2.4](https://github.com/PaddlePaddle/PaddleNLP/releases/tag/v2.4.0)**
- * 💎 NLP工具:**[NLP 流水线系统 Pipelines](./pipelines)** 发布,支持快速搭建搜索引擎、问答系统,让解决 NLP 任务像搭积木一样便捷、灵活、高效!
- * 😊 产业应用:新增[文本分类](./applications/text_classification)**多分类、多标签、层次分类**的全流程应用方案,支持 **小样本学习** 和 **TrustAI** 可信计算模型训练与调优;[UIE信息抽取](./model_zoo/uie)发布**UIE-M**,支持中英文混合抽取,新增**UIE 数据蒸馏**方案,打破UIE推理瓶颈,推理速度提升100倍以上;
- * 🍭 AIGC内容生成:新增代码生成SOTA模型[**CodeGen**](./examples/code_generation/codegen),支持多种编程语言代码生成;集成[文图生成潮流模型](https://github.com/PaddlePaddle/PaddleNLP/blob/develop/docs/model_zoo/taskflow.md#%E6%96%87%E5%9B%BE%E7%94%9F%E6%88%90) **DALL·E Mini**、**Disco Diffusion**、**Stable Diffusion**,更多趣玩模型等你来玩;新增[中文文本摘要应用](./applications/text_summarization),基于大规模语料的中文摘要模型首次发布,可支持Taskflow一键调用;
- * 💪 框架升级:[**模型自动压缩API**](./docs/compression.md)发布,自动对模型进行裁减和量化,大幅降低模型压缩技术使用成本;[**小样本Prompt框架发布**](./applications/text_classification/multi_class/few-shot),支持PET、P-Tuning、RGL等经典模型的快速实现;
+ * 💎 NLP工具:**[NLP 流水线系统 Pipelines](./pipelines)** 发布,支持快速搭建搜索引擎、问答系统,可扩展支持各类NLP系统,让解决 NLP 任务像搭积木一样便捷、灵活、高效!
+ * 💢 产业应用:新增 **[文本分类全流程应用方案](./applications/text_classification)** ,覆盖多分类、多标签、层次分类各类场景,支持 **小样本学习** 和 **TrustAI** 可信计算模型训练与调优;[**通用信息抽取 UIE 能力升级**](./model_zoo/uie),发布 **UIE-M**,支持中英文混合抽取,新增**UIE 数据蒸馏**方案,打破 UIE 推理瓶颈,推理速度提升 100 倍以上;
+ * 🍭 AIGC 内容生成:新增代码生成 SOTA 模型[**CodeGen**](./examples/code_generation/codegen),支持多种编程语言代码生成;集成[**文图生成潮流模型**](https://github.com/PaddlePaddle/PaddleNLP/blob/develop/docs/model_zoo/taskflow.md#%E6%96%87%E5%9B%BE%E7%94%9F%E6%88%90) DALL·E Mini、Disco Diffusion、Stable Diffusion,更多趣玩模型等你来玩;新增[**中文文本摘要应用**](./applications/text_summarization),基于大规模语料的中文摘要模型首次发布,可支持 Taskflow 一键调用和定制训练;
+ * 💪 框架升级:[**模型自动压缩 API**](./docs/compression.md) 发布,自动对模型进行裁减和量化,大幅降低模型压缩技术使用门槛;[**小样本 Prompt**](./applications/text_classification/multi_class/few-shot)能力发布,集成 PET、P-Tuning、RGL 等经典算法。
* 👀 **2022.9.6 飞桨智慧金融行业系列直播课**
From 23979e704b3d13c6bef35138506f1d557de191c3 Mon Sep 17 00:00:00 2001
From: Jack Zhou
Date: Thu, 8 Sep 2022 09:15:31 +0800
Subject: [PATCH 029/159] Remove boost library. (#3215)
* Remove boost library.
* add conditional include for gtest
* Add test, demo exclude
---
faster_tokenizer/CMakeLists.txt | 13 +-
faster_tokenizer/cmake/external/boost.cmake | 49 -
faster_tokenizer/cmake/third_party.cmake | 5 +-
.../faster_tokenizer/core/CMakeLists.txt | 1 -
.../faster_tokenizer/core/tokenizer.cc | 14 +-
.../faster_tokenizer/core/tokenizer.h | 6 +-
.../faster_tokenizer/decoders/CMakeLists.txt | 1 -
.../faster_tokenizer/models/CMakeLists.txt | 2 +-
.../postprocessors/CMakeLists.txt | 2 +-
.../postprocessors/template.cc | 34 +-
.../postprocessors/template.h | 15 +-
.../faster_tokenizer/pybind/CMakeLists.txt | 4 +-
.../faster_tokenizer/utils/variant.h | 2845 +++++++++++++++++
13 files changed, 2893 insertions(+), 98 deletions(-)
delete mode 100644 faster_tokenizer/cmake/external/boost.cmake
create mode 100644 faster_tokenizer/faster_tokenizer/utils/variant.h
diff --git a/faster_tokenizer/CMakeLists.txt b/faster_tokenizer/CMakeLists.txt
index 1e6538b80ff6..c5325955ecca 100644
--- a/faster_tokenizer/CMakeLists.txt
+++ b/faster_tokenizer/CMakeLists.txt
@@ -102,7 +102,7 @@ endforeach()
ELSE(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC")
- IF (LINUX)
+ IF (NOT APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ldl -lpthread")
ENDIF()
set (PUBLIC_DEPEND_LIBS ${CMAKE_DL_LIBS})
@@ -110,7 +110,7 @@ ENDIF(WIN32)
# For OpenMP
# openmp not support well for now on windows
-if (LINUX)
+if (NOT APPLE AND NOT WIN32) # Linux
find_package(OpenMP)
if (OPENMP_FOUND)
add_definitions(-DWITH_OMP)
@@ -143,7 +143,7 @@ if(WITH_PYTHON)
add_subdirectory(python)
-if(LINUX)
+if (NOT APPLE AND NOT WIN32) # Linux
add_custom_target(build_tokenizers_bdist_wheel ALL
COMMAND ${PYTHON_EXECUTABLE} setup.py bdist_wheel --plat-name=manylinux1_x86_64
COMMENT "Packing whl packages------>>>"
@@ -168,6 +168,8 @@ file(COPY ${PROJECT_SOURCE_DIR}/FasterTokenizer.cmake DESTINATION ${CPP_PACKAGE_
# copy headers
file(COPY ${PROJECT_SOURCE_DIR}/faster_tokenizer/ DESTINATION ${CPP_PACKAGE_DIR}/include/faster_tokenizer/
FILES_MATCHING PATTERN "*.h"
+ PATTERN "test" EXCLUDE
+ PATTERN "demo" EXCLUDE
PATTERN "pybind" EXCLUDE)
add_custom_target(copy_third_party_headers ALL
@@ -177,11 +179,6 @@ add_custom_target(copy_third_party_headers ALL
${CPP_PACKAGE_DIR}/third_party/include
DEPENDS build_cpp_package_dir)
-add_custom_target(copy_boost_headers ALL
- COMMAND ${CMAKE_COMMAND} -E copy_directory
- ${BOOST_INCLUDE_DIR}/boost ${CPP_PACKAGE_DIR}/third_party/include/boost
- DEPENDS build_cpp_package_dir)
-
# copy library
set(TOKENIZER_CORE_NAME "core_tokenizers")
set(TOKENIZER_CORE_PATH ${CMAKE_BINARY_DIR}/faster_tokenizer)
diff --git a/faster_tokenizer/cmake/external/boost.cmake b/faster_tokenizer/cmake/external/boost.cmake
deleted file mode 100644
index 317fab04da59..000000000000
--- a/faster_tokenizer/cmake/external/boost.cmake
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-include(ExternalProject)
-
-set(BOOST_PROJECT "extern_boost")
-set(BOOST_VER "1.79.0")
-set(BOOST_URL "https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.zip" CACHE STRING "" FORCE)
-
-MESSAGE(STATUS "BOOST_VERSION: ${BOOST_VER}, BOOST_URL: ${BOOST_URL}")
-
-set(BOOST_PREFIX_DIR ${THIRD_PARTY_PATH}/boost)
-
-set(BOOST_INCLUDE_DIR "${THIRD_PARTY_PATH}/boost/src/extern_boost" CACHE PATH "boost include directory." FORCE)
-set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1)
-
-include_directories(${BOOST_INCLUDE_DIR})
-
-if(WIN32 AND MSVC_VERSION GREATER_EQUAL 1600)
- add_definitions(-DBOOST_HAS_STATIC_ASSERT)
-endif()
-
-ExternalProject_Add(
- ${BOOST_PROJECT}
- ${EXTERNAL_PROJECT_LOG_ARGS}
- URL ${BOOST_URL}
- URL_HASH SHA256=3634f9a85759311f321e587eace21799c0d0c946ff933e477a2f98885c54bbff
- PREFIX ${BOOST_PREFIX_DIR}
- CONFIGURE_COMMAND ""
- BUILD_COMMAND ""
- INSTALL_COMMAND ""
- UPDATE_COMMAND ""
- )
-
-add_library(boost INTERFACE)
-add_definitions(-DBOOST_ERROR_CODE_HEADER_ONLY)
-add_dependencies(boost ${BOOST_PROJECT})
-set(Boost_INCLUDE_DIR ${BOOST_INCLUDE_DIR})
diff --git a/faster_tokenizer/cmake/third_party.cmake b/faster_tokenizer/cmake/third_party.cmake
index 51a5c338dbe0..83d2ae2a5106 100644
--- a/faster_tokenizer/cmake/third_party.cmake
+++ b/faster_tokenizer/cmake/third_party.cmake
@@ -18,11 +18,12 @@ set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.")
include(external/icu)
-include(external/gtest)
+if(WITH_TESTING)
+ include(external/gtest)
+endif()
include(external/gflags)
include(external/glog)
include(external/re2)
-include(external/boost)
include(external/nlohmann_json)
include(external/dart) # For trie
if (WITH_PYTHON)
diff --git a/faster_tokenizer/faster_tokenizer/core/CMakeLists.txt b/faster_tokenizer/faster_tokenizer/core/CMakeLists.txt
index 1d5f01346556..ea831123e90e 100644
--- a/faster_tokenizer/faster_tokenizer/core/CMakeLists.txt
+++ b/faster_tokenizer/faster_tokenizer/core/CMakeLists.txt
@@ -1,4 +1,3 @@
cc_library(added_vocabulary SRCS added_vocabulary.cc DEPS normalizers pretokenizers json)
cc_library(tokenizer SRCS tokenizer.cc DEPS added_vocabulary json decoders trie models postprocessors)
cc_library(core SRCS encoding.cc DEPS json)
-add_dependencies(tokenizer extern_boost)
diff --git a/faster_tokenizer/faster_tokenizer/core/tokenizer.cc b/faster_tokenizer/faster_tokenizer/core/tokenizer.cc
index 8222e5602cac..626910584486 100644
--- a/faster_tokenizer/faster_tokenizer/core/tokenizer.cc
+++ b/faster_tokenizer/faster_tokenizer/core/tokenizer.cc
@@ -163,7 +163,7 @@ bool Tokenizer::DoPreTokenize(
return true;
}
-struct InputStringVisitor : public boost::static_visitor<> {
+struct InputStringVisitor {
InputStringVisitor(const Tokenizer* tokenizer,
uint32_t type_id,
OffsetType offset_type,
@@ -190,8 +190,8 @@ void Tokenizer::EncodeSingleString(const InputString& input_string,
uint32_t type_id,
OffsetType offset_type,
Encoding* encodings) const {
- boost::apply_visitor(
- InputStringVisitor(this, type_id, offset_type, encodings), input_string);
+ paddlenlp::visit(InputStringVisitor(this, type_id, offset_type, encodings),
+ input_string);
}
void Tokenizer::PostProcess(Encoding* encoding,
@@ -234,13 +234,13 @@ void Tokenizer::EncodePairStrings(const EncodeInput& encode_input,
bool add_special_tokens) const {
Encoding encoding;
if (encode_input.type() == typeid(InputString)) {
- const auto& input_string = boost::get(encode_input);
+ const auto& input_string = paddlenlp::get(encode_input);
EncodeSingleString(input_string, 0, OffsetType::CHAR, &encoding);
PostProcess(&encoding, nullptr, add_special_tokens, encodings);
} else {
Encoding pair_encoding;
const auto& input_string_pair =
- boost::get>(encode_input);
+ paddlenlp::get>(encode_input);
EncodeSingleString(input_string_pair.first, 0, OffsetType::CHAR, &encoding);
EncodeSingleString(
input_string_pair.second, 1, OffsetType::CHAR, &pair_encoding);
@@ -273,9 +273,9 @@ void Tokenizer::EncodeBatchStrings(
void Tokenizer::EncodePairStringsCharOffsets(const EncodeInput& encode_input,
Encoding* encodings,
bool add_special_tokens) const {
- const auto& input_string = boost::get(&encode_input);
+ const auto& input_string = paddlenlp::get_if(&encode_input);
const auto& input_string_pair =
- boost::get>(&encode_input);
+ paddlenlp::get_if>(&encode_input);
Encoding encoding;
Encoding pair_encoding;
if (input_string != nullptr) {
diff --git a/faster_tokenizer/faster_tokenizer/core/tokenizer.h b/faster_tokenizer/faster_tokenizer/core/tokenizer.h
index f7e9e35e3ee1..bf317efe1b98 100644
--- a/faster_tokenizer/faster_tokenizer/core/tokenizer.h
+++ b/faster_tokenizer/faster_tokenizer/core/tokenizer.h
@@ -19,7 +19,7 @@ limitations under the License. */
#include "faster_tokenizer/core/added_vocabulary.h"
#include "faster_tokenizer/core/base.h"
#include "faster_tokenizer/utils/utils.h"
-#include "boost/variant.hpp"
+#include "faster_tokenizer/utils/variant.h"
#include "nlohmann/json.hpp"
namespace paddlenlp {
@@ -56,9 +56,9 @@ namespace core {
class AddedVocabulary;
class Encoding;
-using InputString = boost::variant>;
+using InputString = paddlenlp::variant>;
using EncodeInput =
- boost::variant>;
+ paddlenlp::variant>;
class FASTERTOKENIZER_DECL Tokenizer {
public:
diff --git a/faster_tokenizer/faster_tokenizer/decoders/CMakeLists.txt b/faster_tokenizer/faster_tokenizer/decoders/CMakeLists.txt
index 496d5eea885b..d2fffc3dac6e 100644
--- a/faster_tokenizer/faster_tokenizer/decoders/CMakeLists.txt
+++ b/faster_tokenizer/faster_tokenizer/decoders/CMakeLists.txt
@@ -1,2 +1 @@
cc_library(decoders SRCS wordpiece.cc DEPS json utils)
-add_dependencies(decoders extern_boost)
\ No newline at end of file
diff --git a/faster_tokenizer/faster_tokenizer/models/CMakeLists.txt b/faster_tokenizer/faster_tokenizer/models/CMakeLists.txt
index a8a09148a428..05c568cb9a87 100644
--- a/faster_tokenizer/faster_tokenizer/models/CMakeLists.txt
+++ b/faster_tokenizer/faster_tokenizer/models/CMakeLists.txt
@@ -1,3 +1,3 @@
cc_library(models
SRCS wordpiece.cc faster_wordpiece.cc bpe.cc unigram.cc
- DEPS core json boost trie failure icuuc icudata lattice utils)
+ DEPS core json trie failure icuuc icudata lattice utils)
diff --git a/faster_tokenizer/faster_tokenizer/postprocessors/CMakeLists.txt b/faster_tokenizer/faster_tokenizer/postprocessors/CMakeLists.txt
index b4844bb8203e..ec4a80daf73d 100644
--- a/faster_tokenizer/faster_tokenizer/postprocessors/CMakeLists.txt
+++ b/faster_tokenizer/faster_tokenizer/postprocessors/CMakeLists.txt
@@ -1 +1 @@
-cc_library(postprocessors SRCS bert.cc postprocessor.cc template.cc DEPS core json boost)
+cc_library(postprocessors SRCS bert.cc postprocessor.cc template.cc DEPS core json)
diff --git a/faster_tokenizer/faster_tokenizer/postprocessors/template.cc b/faster_tokenizer/faster_tokenizer/postprocessors/template.cc
index 2ff2d1cb0713..7bbb8a3e2bd7 100644
--- a/faster_tokenizer/faster_tokenizer/postprocessors/template.cc
+++ b/faster_tokenizer/faster_tokenizer/postprocessors/template.cc
@@ -16,8 +16,8 @@
#include
#include "faster_tokenizer/core/encoding.h"
-#include "glog/logging.h"
#include "faster_tokenizer/postprocessors/template.h"
+#include "glog/logging.h"
namespace paddlenlp {
namespace faster_tokenizer {
@@ -27,7 +27,7 @@ void ParseIdFromString(const std::string& template_id_string,
TemplatePiece* template_piece) {
if (template_id_string.find_first_of("$") == 0) {
*template_piece = TemplateSequence();
- auto& seq = boost::get(*template_piece);
+ auto& seq = paddlenlp::get(*template_piece);
std::string rest =
template_id_string.substr(template_id_string.find_first_not_of("$"));
if (rest == "" || rest == "A" || rest == "a") {
@@ -48,15 +48,16 @@ void ParseIdFromString(const std::string& template_id_string,
}
} else {
*template_piece = TemplateSpecialToken();
- boost::get(*template_piece) = {template_id_string, 0};
+ paddlenlp::get(*template_piece) = {template_id_string,
+ 0};
}
}
void SetTypeId(uint32_t type_id, TemplatePiece* template_piece) {
- if (boost::get(template_piece) != nullptr) {
- boost::get(*template_piece).second = type_id;
+ if (paddlenlp::get_if(template_piece) != nullptr) {
+ paddlenlp::get(*template_piece).second = type_id;
} else {
- boost::get(*template_piece).second = type_id;
+ paddlenlp::get(*template_piece).second = type_id;
}
}
@@ -84,8 +85,8 @@ void GetTemplatePieceFromString(const std::string& template_string,
}
void to_json(nlohmann::json& j, const TemplatePiece& template_piece) {
- if (boost::get(&template_piece) != nullptr) {
- auto& template_sequence = boost::get(template_piece);
+ if (paddlenlp::get_if(&template_piece) != nullptr) {
+ auto& template_sequence = paddlenlp::get(template_piece);
j = {
{"Sequence",
{
@@ -95,7 +96,7 @@ void to_json(nlohmann::json& j, const TemplatePiece& template_piece) {
};
} else {
auto& template_special_token =
- boost::get(template_piece);
+ paddlenlp::get(template_piece);
j = {
{"SpecialToken",
{
@@ -135,7 +136,7 @@ size_t TemplatePostProcessor::CountAdded(
size_t count = 0;
for (auto& piece : template_->pieces_) {
TemplateSpecialToken* special_token =
- boost::get(&piece);
+ paddlenlp::get_if(&piece);
if (special_token != nullptr) {
auto token_iter =
special_tokens_map.tokens_map_.find(special_token->first);
@@ -244,8 +245,8 @@ void TemplatePostProcessor::ApplyTemplate(
core::Encoding* result_encoding) const {
size_t new_size = 0;
for (auto&& piece : pieces.pieces_) {
- if (boost::get(&piece) != nullptr) {
- auto seq_type = boost::get(piece).first;
+ if (paddlenlp::get_if(&piece) != nullptr) {
+ auto seq_type = paddlenlp::get(piece).first;
if (seq_type == SequenceType::SEQ_A) {
new_size += encoding->GetLen();
} else {
@@ -257,7 +258,8 @@ void TemplatePostProcessor::ApplyTemplate(
}
} else {
if (add_special_tokens) {
- auto&& special_token = boost::get(piece).first;
+ auto&& special_token =
+ paddlenlp::get(piece).first;
if (special_tokens_map_.tokens_map_.find(special_token) !=
special_tokens_map_.tokens_map_.end()) {
new_size +=
@@ -330,8 +332,8 @@ void TemplatePostProcessor::ApplyTemplate(
}
VLOG(6) << "Template pieces num: " << pieces.pieces_.size();
for (auto& piece : pieces.pieces_) {
- if (boost::get(&piece) != nullptr) {
- auto& template_sequence = boost::get(piece);
+ if (paddlenlp::get_if(&piece) != nullptr) {
+ auto& template_sequence = paddlenlp::get(piece);
if (template_sequence.first == SequenceType::SEQ_A) {
auto seq_start = ids.size();
auto seq_end = seq_start + encoding->GetLen();
@@ -385,7 +387,7 @@ void TemplatePostProcessor::ApplyTemplate(
pair_encoding->GetAttentionMask().end());
}
} else {
- auto& special_token = boost::get(piece);
+ auto& special_token = paddlenlp::get(piece);
if (add_special_tokens) {
const std::string& id = special_token.first;
uint32_t type_id = special_token.second;
diff --git a/faster_tokenizer/faster_tokenizer/postprocessors/template.h b/faster_tokenizer/faster_tokenizer/postprocessors/template.h
index c533a8d211f1..5083cfe8b7cf 100644
--- a/faster_tokenizer/faster_tokenizer/postprocessors/template.h
+++ b/faster_tokenizer/faster_tokenizer/postprocessors/template.h
@@ -18,11 +18,11 @@ limitations under the License. */
#include
#include
-#include "boost/variant.hpp"
-#include "glog/logging.h"
-#include "nlohmann/json.hpp"
#include "faster_tokenizer/postprocessors/postprocessor.h"
#include "faster_tokenizer/utils/utils.h"
+#include "faster_tokenizer/utils/variant.h"
+#include "glog/logging.h"
+#include "nlohmann/json.hpp"
namespace paddlenlp {
namespace faster_tokenizer {
@@ -37,7 +37,8 @@ NLOHMANN_JSON_SERIALIZE_ENUM(SequenceType,
using TemplateSequence = std::pair;
using TemplateSpecialToken = std::pair;
-using TemplatePiece = boost::variant;
+using TemplatePiece =
+ paddlenlp::variant;
void to_json(nlohmann::json& j, const TemplatePiece& template_piece);
void from_json(const nlohmann::json& j, TemplatePiece& template_piece);
@@ -119,10 +120,10 @@ struct FASTERTOKENIZER_DECL Template {
for (auto&& piece : pieces) {
TemplatePiece template_piece;
GetTemplatePieceFromString(piece, &template_piece);
- if (boost::get(&template_piece)) {
- pieces_.push_back(boost::get(template_piece));
+ if (paddlenlp::get_if(&template_piece)) {
+ pieces_.push_back(paddlenlp::get(template_piece));
} else {
- pieces_.push_back(boost::get(template_piece));
+ pieces_.push_back(paddlenlp::get(template_piece));
}
}
}
diff --git a/faster_tokenizer/faster_tokenizer/pybind/CMakeLists.txt b/faster_tokenizer/faster_tokenizer/pybind/CMakeLists.txt
index 620ee1e2e20c..f267f6350174 100644
--- a/faster_tokenizer/faster_tokenizer/pybind/CMakeLists.txt
+++ b/faster_tokenizer/faster_tokenizer/pybind/CMakeLists.txt
@@ -3,8 +3,8 @@ cc_library(pybind_utils SRCS utils.cc DEPS pybind python json)
cc_library(pybind_normalizers SRCS normalizers.cc DEPS pybind python json)
cc_library(pybind_pretokenizers SRCS pretokenizers.cc DEPS pybind python json)
cc_library(pybind_models SRCS models.cc DEPS pybind python json)
-cc_library(pybind_postprocessors SRCS postprocessors.cc DEPS pybind python core json boost)
-cc_library(pybind_tokenizers SRCS tokenizers.cc DEPS pybind python pybind_utils json boost)
+cc_library(pybind_postprocessors SRCS postprocessors.cc DEPS pybind python core json)
+cc_library(pybind_tokenizers SRCS tokenizers.cc DEPS pybind python pybind_utils json)
cc_library(pybind_exception SRCS exception.cc DEPS pybind python)
cc_library(pybind_decoders SRCS decoders.cc DEPS pybind python json)
cc_library(pybind_core SRCS core.cc DEPS pybind python json)
\ No newline at end of file
diff --git a/faster_tokenizer/faster_tokenizer/utils/variant.h b/faster_tokenizer/faster_tokenizer/utils/variant.h
new file mode 100644
index 000000000000..696f8312afe4
--- /dev/null
+++ b/faster_tokenizer/faster_tokenizer/utils/variant.h
@@ -0,0 +1,2845 @@
+// Copy from
+// https://github.com/mpark/variant/blob/single-header/v1.4.0/variant.hpp
+// Modify the following points:
+// 1. modify namespace mpark to namespace paddlenlp
+// 2. add type() member function for variant class
+// 3. remove the visitation implementation under the branhch with
+// MPARK_CPP14_CONSTEXPR defined since lib::cpp14::array could not be converted
+// to std::initializer_list in Paddle's compilation
+// 4. decorate PYBIND11_HIDDEN for struct value_visitor
+
+// MPark.Variant
+//
+// Copyright Michael Park, 2015-2017
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.md or copy at
+// http://boost.org/LICENSE_1_0.txt)
+
+#pragma once
+
+// gcc >= 9 has a bug that creates a false positive warning.
+// Reference:
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92145
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89381
+#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 9
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-copy"
+#endif
+
+/*
+ variant synopsis
+
+namespace std {
+
+ // 20.7.2, class template variant
+ template
+ class variant {
+ public:
+
+ // 20.7.2.1, constructors
+ constexpr variant() noexcept(see below);
+ variant(const variant&);
+ variant(variant&&) noexcept(see below);
+
+ template constexpr variant(T&&) noexcept(see below);
+
+ template
+ constexpr explicit variant(in_place_type_t, Args&&...);
+
+ template
+ constexpr explicit variant(
+ in_place_type_t, initializer_list, Args&&...);
+
+ template
+ constexpr explicit variant(in_place_index_t, Args&&...);
+
+ template
+ constexpr explicit variant(
+ in_place_index_t, initializer_list, Args&&...);
+
+ // 20.7.2.2, destructor
+ ~variant();
+
+ // 20.7.2.3, assignment
+ variant& operator=(const variant&);
+ variant& operator=(variant&&) noexcept(see below);
+
+ template variant& operator=(T&&) noexcept(see below);
+
+ // 20.7.2.4, modifiers
+ template
+ T& emplace(Args&&...);
+
+ template
+ T& emplace(initializer_list, Args&&...);
+
+ template
+ variant_alternative& emplace(Args&&...);
+
+ template
+ variant_alternative& emplace(initializer_list, Args&&...);
+
+ // 20.7.2.5, value status
+ constexpr bool valueless_by_exception() const noexcept;
+ constexpr size_t index() const noexcept;
+
+ // 20.7.2.6, swap
+ void swap(variant&) noexcept(see below);
+ };
+
+ // 20.7.3, variant helper classes
+ template struct variant_size; // undefined
+
+ template
+ constexpr size_t variant_size_v = variant_size::value;
+
+ template struct variant_size;
+ template struct variant_size;
+ template struct variant_size;
+
+ template
+ struct variant_size>;
+
+ template struct variant_alternative; // undefined
+
+ template
+ using variant_alternative_t = typename variant_alternative::type;
+
+ template struct variant_alternative;
+ template struct variant_alternative;
+ template struct variant_alternative;
+
+ template
+ struct variant_alternative>;
+
+ constexpr size_t variant_npos = -1;
+
+ // 20.7.4, value access
+ template
+ constexpr bool holds_alternative(const variant&) noexcept;
+
+ template
+ constexpr variant_alternative_t>&
+ get(variant&);
+
+ template
+ constexpr variant_alternative_t>&&
+ get(variant&&);
+
+ template
+ constexpr variant_alternative_t> const&
+ get(const variant&);
+
+ template
+ constexpr variant_alternative_t> const&&
+ get(const variant&&);
+
+ template
+ constexpr T& get(variant&);
+
+ template
+ constexpr T&& get(variant&&);
+
+ template
+ constexpr const T& get(const variant&);
+
+ template
+ constexpr const T&& get(const variant&&);
+
+ template
+ constexpr add_pointer_t>>
+ get_if(variant*) noexcept;
+
+ template
+ constexpr add_pointer_t>>
+ get_if(const variant*) noexcept;
+
+ template
+ constexpr add_pointer_t
+ get_if(variant*) noexcept;
+
+ template
+ constexpr add_pointer_t
+ get_if(const variant*) noexcept;
+
+ // 20.7.5, relational operators
+ template
+ constexpr bool operator==(const variant&, const variant&);
+
+ template
+ constexpr bool operator!=(const variant&, const variant&);
+
+ template
+ constexpr bool operator<(const variant&, const variant&);
+
+ template
+ constexpr bool operator>(const variant&, const variant&);
+
+ template
+ constexpr bool operator<=(const variant&, const variant&);
+
+ template
+ constexpr bool operator>=(const variant&, const variant&);
+
+ // 20.7.6, visitation
+ template
+ constexpr see below visit(Visitor&&, Variants&&...);
+
+ // 20.7.7, class monostate
+ struct monostate;
+
+ // 20.7.8, monostate relational operators
+ constexpr bool operator<(monostate, monostate) noexcept;
+ constexpr bool operator>(monostate, monostate) noexcept;
+ constexpr bool operator<=(monostate, monostate) noexcept;
+ constexpr bool operator>=(monostate, monostate) noexcept;
+ constexpr bool operator==(monostate, monostate) noexcept;
+ constexpr bool operator!=(monostate, monostate) noexcept;
+
+ // 20.7.9, specialized algorithms
+ template
+ void swap(variant&, variant&) noexcept(see below);
+
+ // 20.7.10, class bad_variant_access
+ class bad_variant_access;
+
+ // 20.7.11, hash support
+ template struct hash;
+ template struct hash>;
+ template <> struct hash;
+
+} // namespace std
+
+*/
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+// MPark.Variant
+//
+// Copyright Michael Park, 2015-2017
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.md or copy at
+// http://boost.org/LICENSE_1_0.txt)
+
+#ifndef MPARK_CONFIG_HPP
+#define MPARK_CONFIG_HPP
+
+// MSVC 2015 Update 3.
+#if __cplusplus < 201103L && (!defined(_MSC_VER) || _MSC_FULL_VER < 190024210)
+#error "MPark.Variant requires C++11 support."
+#endif
+
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#ifndef __has_include
+#define __has_include(x) 0
+#endif
+
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#if __has_attribute(always_inline) || defined(__GNUC__)
+#define MPARK_ALWAYS_INLINE __attribute__((__always_inline__)) inline
+#elif defined(_MSC_VER)
+#define MPARK_ALWAYS_INLINE __forceinline
+#else
+#define MPARK_ALWAYS_INLINE inline
+#endif
+
+#if __has_builtin(__builtin_addressof) || \
+ (defined(__GNUC__) && __GNUC__ >= 7) || defined(_MSC_VER)
+#define MPARK_BUILTIN_ADDRESSOF
+#endif
+
+#if __has_builtin(__builtin_unreachable) || defined(__GNUC__)
+#define MPARK_BUILTIN_UNREACHABLE __builtin_unreachable()
+#elif defined(_MSC_VER)
+#define MPARK_BUILTIN_UNREACHABLE __assume(false)
+#else
+#define MPARK_BUILTIN_UNREACHABLE
+#endif
+
+#if __has_builtin(__type_pack_element)
+#define MPARK_TYPE_PACK_ELEMENT
+#endif
+
+#if defined(__cpp_constexpr) && __cpp_constexpr >= 200704 && \
+ !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 9)
+#define MPARK_CPP11_CONSTEXPR
+#endif
+
+#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304
+#define MPARK_CPP14_CONSTEXPR
+#endif
+
+#if __has_feature(cxx_exceptions) || defined(__cpp_exceptions) || \
+ (defined(_MSC_VER) && defined(_CPPUNWIND))
+#define MPARK_EXCEPTIONS
+#endif
+
+#if defined(__cpp_generic_lambdas) || defined(_MSC_VER)
+#define MPARK_GENERIC_LAMBDAS
+#endif
+
+#if defined(__cpp_lib_integer_sequence)
+#define MPARK_INTEGER_SEQUENCE
+#endif
+
+#if defined(__cpp_return_type_deduction) || defined(_MSC_VER)
+#define MPARK_RETURN_TYPE_DEDUCTION
+#endif
+
+#if defined(__cpp_lib_transparent_operators) || defined(_MSC_VER)
+#define MPARK_TRANSPARENT_OPERATORS
+#endif
+
+#if defined(__cpp_variable_templates) || defined(_MSC_VER)
+#define MPARK_VARIABLE_TEMPLATES
+#endif
+
+#if !defined(__GLIBCXX__) || __has_include() // >= libstdc++-5
+#define MPARK_TRIVIALITY_TYPE_TRAITS
+#define MPARK_INCOMPLETE_TYPE_TRAITS
+#endif
+
+#endif // MPARK_CONFIG_HPP
+
+// MPark.Variant
+//
+// Copyright Michael Park, 2015-2017
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.md or copy at
+// http://boost.org/LICENSE_1_0.txt)
+
+#ifndef MPARK_IN_PLACE_HPP
+#define MPARK_IN_PLACE_HPP
+
+#include
+
+namespace paddlenlp {
+
+struct in_place_t {
+ explicit in_place_t() = default;
+};
+
+template
+struct in_place_index_t {
+ explicit in_place_index_t() = default;
+};
+
+template
+struct in_place_type_t {
+ explicit in_place_type_t() = default;
+};
+
+#ifdef MPARK_VARIABLE_TEMPLATES
+constexpr in_place_t in_place{};
+
+template
+constexpr in_place_index_t in_place_index{};
+
+template
+constexpr in_place_type_t in_place_type{};
+#endif
+
+} // namespace paddlenlp
+
+#endif // MPARK_IN_PLACE_HPP
+
+// MPark.Variant
+//
+// Copyright Michael Park, 2015-2017
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.md or copy at
+// http://boost.org/LICENSE_1_0.txt)
+
+#ifndef MPARK_LIB_HPP
+#define MPARK_LIB_HPP
+
+#include
+#include
+#include
+#include
+
+#define MPARK_RETURN(...) \
+ noexcept(noexcept(__VA_ARGS__))->decltype(__VA_ARGS__) { return __VA_ARGS__; }
+
+namespace paddlenlp {
+namespace lib {
+template
+struct identity {
+ using type = T;
+};
+
+inline namespace cpp14 {
+template
+struct array {
+ constexpr const T &operator[](std::size_t index) const { return data[index]; }
+
+ T data[N == 0 ? 1 : N];
+};
+
+template
+using add_pointer_t = typename std::add_pointer::type;
+
+template
+using common_type_t = typename std::common_type::type;
+
+template
+using decay_t = typename std::decay::type;
+
+template
+using enable_if_t = typename std::enable_if::type;
+
+template
+using remove_const_t = typename std::remove_const::type;
+
+template
+using remove_reference_t = typename std::remove_reference::type;
+
+template
+inline constexpr T &&forward(remove_reference_t &t) noexcept {
+ return static_cast(t);
+}
+
+template
+inline constexpr T &&forward(remove_reference_t &&t) noexcept {
+ static_assert(!std::is_lvalue_reference::value,
+ "can not forward an rvalue as an lvalue");
+ return static_cast(t);
+}
+
+template
+inline constexpr remove_reference_t &&move(T &&t) noexcept {
+ return static_cast &&>(t);
+}
+
+#ifdef MPARK_INTEGER_SEQUENCE
+using std::index_sequence;
+using std::index_sequence_for;
+using std::integer_sequence;
+using std::make_index_sequence;
+#else
+template
+struct integer_sequence {
+ using value_type = T;
+ static constexpr std::size_t size() noexcept { return sizeof...(Is); }
+};
+
+template
+using index_sequence = integer_sequence;
+
+template
+struct make_index_sequence_concat;
+
+template
+struct make_index_sequence_concat,
+ index_sequence>
+ : identity> {};
+
+template
+struct make_index_sequence_impl;
+
+template
+using make_index_sequence = typename make_index_sequence_impl::type;
+
+template
+struct make_index_sequence_impl
+ : make_index_sequence_concat,
+ make_index_sequence> {};
+
+template <>
+struct make_index_sequence_impl<0> : identity> {};
+
+template <>
+struct make_index_sequence_impl<1> : identity> {};
+
+template
+using index_sequence_for = make_index_sequence;
+#endif
+
+//
+#ifdef MPARK_TRANSPARENT_OPERATORS
+using equal_to = std::equal_to<>;
+#else
+struct equal_to {
+ template
+ inline constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
+ MPARK_RETURN(lib::forward(lhs) == lib::forward(rhs))
+};
+#endif
+
+#ifdef MPARK_TRANSPARENT_OPERATORS
+using not_equal_to = std::not_equal_to<>;
+#else
+struct not_equal_to {
+ template
+ inline constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
+ MPARK_RETURN(lib::forward(lhs) != lib::forward(rhs))
+};
+#endif
+
+#ifdef MPARK_TRANSPARENT_OPERATORS
+using less = std::less<>;
+#else
+struct less {
+ template
+ inline constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
+ MPARK_RETURN(lib::forward(lhs) < lib::forward(rhs))
+};
+#endif
+
+#ifdef MPARK_TRANSPARENT_OPERATORS
+using greater = std::greater<>;
+#else
+struct greater {
+ template
+ inline constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
+ MPARK_RETURN(lib::forward(lhs) > lib::forward(rhs))
+};
+#endif
+
+#ifdef MPARK_TRANSPARENT_OPERATORS
+using less_equal = std::less_equal<>;
+#else
+struct less_equal {
+ template
+ inline constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
+ MPARK_RETURN(lib::forward(lhs) <= lib::forward(rhs))
+};
+#endif
+
+#ifdef MPARK_TRANSPARENT_OPERATORS
+using greater_equal = std::greater_equal<>;
+#else
+struct greater_equal {
+ template