Skip to content

Commit 99fbc41

Browse files
authored
Fix type promotion problem. (#8414)
* fix type promotion problem.
1 parent c6e5459 commit 99fbc41

File tree

12 files changed

+27
-23
lines changed

12 files changed

+27
-23
lines changed

paddlenlp/generation/utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,9 @@ def update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder
511511
def update_scores_for_generation(scores, next_scores, length, unfinished_flag):
512512
# update scores
513513

514-
unfinished_scores = (scores * length + next_scores) / (length + 1)
514+
unfinished_scores = (scores * paddle.to_tensor(length, dtype=scores.dtype) + next_scores) / (
515+
paddle.to_tensor(length, dtype=scores.dtype) + 1
516+
)
515517
scores = paddle.where(unfinished_flag, unfinished_scores, scores)
516518
return scores
517519

paddlenlp/layers/crf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def _point_score(self, inputs, labels, lengths):
165165
flattened_inputs = inputs.reshape([-1])
166166
offsets = paddle.unsqueeze(self._get_batch_index(batch_size) * seq_len * n_labels, 1)
167167
offsets += paddle.unsqueeze(self._get_seq_index(seq_len) * n_labels, 0)
168-
flattened_tag_indices = paddle.reshape(offsets + labels, [-1])
168+
flattened_tag_indices = paddle.reshape(offsets + labels.astype(offsets.dtype), [-1])
169169

170170
scores = paddle.gather(flattened_inputs, flattened_tag_indices).reshape([batch_size, seq_len])
171171

paddlenlp/metrics/perplexity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def compute(self, pred, label, seq_mask=None):
9292
ce = F.cross_entropy(input=pred, label=label, reduction="none", soft_label=False)
9393
ce = paddle.squeeze(ce, axis=[2])
9494
if seq_mask is not None:
95-
ce = ce * seq_mask
95+
ce = ce * seq_mask.astype(ce.dtype)
9696
word_num = paddle.sum(seq_mask)
9797
return ce, word_num
9898
return ce

paddlenlp/prompt/verbalizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ def aggregate(self, outputs: Tensor, mask: Tensor, atype: str):
162162
Aggregate multiple tokens/words for each word/label.
163163
"""
164164
if atype == "mean":
165-
outputs = outputs * mask
165+
outputs = outputs * mask.astype(outputs.dtype)
166166
outputs = outputs.sum(axis=-1) / (mask.sum(axis=-1) + 1e-15)
167167
elif atype == "max":
168168
outputs = (outputs - 1e4 * (1 - mask)).max(axis=-1)

paddlenlp/transformers/convbert/modeling.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1137,7 +1137,9 @@ def update_inputs(self, sequence, updates, positions):
11371137
N = positions.shape[1]
11381138
assert N == L, "the dimension of inputs and mask should be same as [batch_size, sequence_length]"
11391139

1140-
updated_sequence = ((paddle.ones_like(sequence) - positions) * sequence) + (positions * updates)
1140+
updated_sequence = ((paddle.ones_like(sequence) - positions) * sequence) + (
1141+
positions * updates.astype(positions.dtype)
1142+
)
11411143

11421144
return updated_sequence
11431145

paddlenlp/transformers/electra/modeling.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,7 +1051,9 @@ def get_discriminator_inputs(self, inputs, raw_inputs, generator_logits, generat
10511051
mask_positions = paddle.where(generator_labels == -100, umask_positions, mask_positions)
10521052
updated_inputs = self.update_inputs(inputs, sampled_tokids, mask_positions)
10531053
# use inputs and updated_input to get discriminator labels
1054-
labels = mask_positions * (paddle.ones_like(inputs) - paddle.equal(updated_inputs, raw_inputs).astype("int64"))
1054+
labels = mask_positions * (
1055+
paddle.ones_like(inputs) - paddle.equal(updated_inputs, raw_inputs).astype(raw_inputs.dtype)
1056+
)
10551057
return updated_inputs, labels, sampled_tokids
10561058

10571059
def sample_from_softmax(self, logits, use_softmax_sample=True):
@@ -1073,7 +1075,9 @@ def update_inputs(self, sequence, updates, positions):
10731075
N = positions.shape[1]
10741076
assert N == L, "the dimension of inputs and mask should be same as [B, L]"
10751077

1076-
updated_sequence = ((paddle.ones_like(sequence) - positions) * sequence) + (positions * updates)
1078+
updated_sequence = ((paddle.ones_like(sequence) - positions) * sequence) + (
1079+
positions * updates.astype(positions.dtype)
1080+
)
10771081

10781082
return updated_sequence
10791083

paddlenlp/transformers/funnel/modeling.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ def relative_positional_attention(self, position_embeds, q_head, context_len, cl
519519
positional_attn = _relative_shift_gather(positional_attn, context_len, shift)
520520

521521
if cls_mask is not None:
522-
positional_attn *= cls_mask
522+
positional_attn *= cls_mask.astype(positional_attn.dtype)
523523
return positional_attn
524524

525525
def relative_token_type_attention(self, token_type_mat, q_head, cls_mask=None):
@@ -547,7 +547,7 @@ def relative_token_type_attention(self, token_type_mat, q_head, cls_mask=None):
547547
)
548548

549549
if cls_mask is not None:
550-
token_type_attn *= cls_mask
550+
token_type_attn *= cls_mask.astype(token_type_attn.dtype)
551551
return token_type_attn
552552

553553
def forward(self, query, key, value, attention_inputs, output_attentions=False):

paddlenlp/transformers/gptj/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def _attn(
158158

159159
if attention_mask is not None:
160160
# Apply the attention mask
161-
attn_weights = attn_weights + attention_mask
161+
attn_weights = attn_weights + attention_mask.astype(attn_weights.dtype)
162162

163163
attn_weights = paddle.nn.functional.softmax(attn_weights, axis=-1)
164164
attn_weights = attn_weights.astype(value.dtype)

paddlenlp/transformers/mbart/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def shift_tokens_right(input_ids, pad_token_id):
6363
batch_size, seq_length = shifted_input_ids.shape
6464
index = paddle.arange(0, batch_size, 1, dtype="int32") * seq_length
6565
index_of_eos = paddle.cast(shifted_input_ids != pad_token_id, dtype="int32").sum(axis=-1) - 1
66-
decoder_start_tokens = paddle.gather(input_flat, index + index_of_eos)
66+
decoder_start_tokens = paddle.gather(input_flat, index + index_of_eos.astype(index.dtype))
6767
shifted_input_ids[:, 1:] = shifted_input_ids[:, :-1].clone()
6868
shifted_input_ids[:, 0] = decoder_start_tokens
6969
return shifted_input_ids

paddlenlp/transformers/megatronbert/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def forward(self, hidden_states, attention_mask=None):
171171
attention_scores = attention_scores / math.sqrt(self.attention_head_size)
172172
if attention_mask is not None:
173173
# Apply the attention mask is (precomputed for all layers in MegatronBertModel forward() function)
174-
attention_scores = attention_scores + attention_mask
174+
attention_scores = attention_scores + attention_mask.astype(attention_scores.dtype)
175175

176176
# Normalize the attention scores to probabilities.
177177
attention_probs = nn.functional.softmax(attention_scores, axis=-1)

paddlenlp/transformers/prophetnet/modeling.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,9 @@ def compute_relative_buckets(num_buckets, max_distance, relative_positions, is_b
7171
)
7272
inv_relative_positions = paddle.abs(inv_relative_positions)
7373
else:
74-
inv_relative_positions = (
75-
paddle.cast(
76-
paddle.less_than(paddle.zeros_like(inv_relative_positions), inv_relative_positions), dtype=paddle.int32
77-
)
78-
* inv_relative_positions
79-
)
74+
inv_relative_positions = paddle.cast(
75+
paddle.less_than(paddle.zeros_like(inv_relative_positions), inv_relative_positions), dtype=paddle.int32
76+
) * inv_relative_positions.astype(paddle.int32)
8077

8178
max_exact = num_buckets // 2
8279
is_small = paddle.less_than(inv_relative_positions, paddle.to_tensor(max_exact).cast(dtype=paddle.int32))
@@ -85,10 +82,9 @@ def compute_relative_buckets(num_buckets, max_distance, relative_positions, is_b
8582
) / math.log(max_distance / max_exact) * (num_buckets - max_exact)
8683
val_if_large_num_buckets = paddle.ones_like(val_if_large) * (num_buckets - 1)
8784
val_if_large_lt = paddle.cast(paddle.less_than(val_if_large, val_if_large_num_buckets), dtype=paddle.int32)
88-
val_if_large = (
89-
paddle.cast(val_if_large_lt * val_if_large, dtype=paddle.int32)
90-
+ (1 - val_if_large_lt) * val_if_large_num_buckets
91-
)
85+
val_if_large = val_if_large_lt * val_if_large.astype(val_if_large_lt.dtype) + (
86+
1 - val_if_large_lt
87+
) * val_if_large_num_buckets.astype(val_if_large_lt.dtype)
9288
rel_positions_bucket = rel_positions_bucket + paddle.where(
9389
is_small, paddle.cast(inv_relative_positions, dtype=paddle.int32), val_if_large
9490
)

paddlenlp/transformers/rembert/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def forward(self, hidden_states, attention_mask=None):
150150
attention_scores = attention_scores / math.sqrt(self.attention_head_size)
151151
if attention_mask is not None:
152152
# Apply the attention mask is (precomputed for all layers in RemBertModel forward() function)
153-
attention_scores = attention_scores + attention_mask
153+
attention_scores = attention_scores + attention_mask.astype(attention_scores.dtype)
154154

155155
# Normalize the attention scores to probabilities.
156156
attention_probs = F.softmax(attention_scores, axis=-1)

0 commit comments

Comments
 (0)