Skip to content

Commit 9250d64

Browse files
committed
Merge commit '525eef76f0513f205f5d6e6122cdb53f52505386' into xpullama2
2 parents 81d7e07 + 525eef7 commit 9250d64

File tree

6 files changed

+13
-21
lines changed

6 files changed

+13
-21
lines changed

paddlenlp/transformers/qwen2/modeling.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
# See the License for the specific language governing permissions and
1919
# limitations under the License.
2020
"""Paddle Qwen2 model."""
21+
from __future__ import annotations
2122

2223
import math
2324
import warnings
@@ -187,11 +188,11 @@ def scaled_dot_product_attention(
187188
else:
188189
# [ bz, seqlen, nhead, head_dim] -> [bs, nhead, seq_len, head_dim]
189190
query_states = paddle.transpose(query_states, [0, 2, 1, 3])
190-
# merge with the next tranpose
191+
# merge with the next transpose
191192
key_states = paddle.transpose(key_states, [0, 2, 1, 3])
192193
value_states = paddle.transpose(value_states, [0, 2, 1, 3])
193194

194-
# matmul and devide by sqrt(head_dim)
195+
# matmul and divide by sqrt(head_dim)
195196
attn_weights = paddle.matmul(query_states / math.sqrt(head_dim), key_states.transpose([0, 1, 3, 2]))
196197

197198
if attn_weights.shape != [bsz, num_heads, q_len, kv_seq_len]:
@@ -1127,7 +1128,7 @@ def forward(self, prediction_scores, masked_lm_labels):
11271128
if self.enable_parallel_cross_entropy:
11281129
if prediction_scores.shape[-1] == self.config.vocab_size:
11291130
warnings.warn(
1130-
f"enable_parallel_cross_entropy, the vocab_size should be splited: {prediction_scores.shape[-1]}, {self.config.vocab_size}"
1131+
f"enable_parallel_cross_entropy, the vocab_size should be splitted: {prediction_scores.shape[-1]}, {self.config.vocab_size}"
11311132
)
11321133
self.loss_func = paddle.nn.CrossEntropyLoss(reduction="none", ignore_index=self.ignore_index)
11331134

@@ -1202,14 +1203,7 @@ def get_decoder(self):
12021203
return self.qwen2
12031204

12041205
def prepare_inputs_for_generation(
1205-
self,
1206-
input_ids,
1207-
use_cache=False,
1208-
past_key_values=None,
1209-
attention_mask=None,
1210-
inputs_embeds=None,
1211-
output_router_logits=False,
1212-
**kwargs
1206+
self, input_ids, use_cache=False, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
12131207
):
12141208
batch_size, seq_length = input_ids.shape
12151209
position_ids = kwargs.get("position_ids", paddle.arange(seq_length).expand((batch_size, seq_length)))
@@ -1230,7 +1224,6 @@ def prepare_inputs_for_generation(
12301224
"past_key_values": past_key_values,
12311225
"use_cache": use_cache,
12321226
"attention_mask": attention_mask,
1233-
"output_router_logits": output_router_logits,
12341227
}
12351228
)
12361229
return model_inputs
@@ -1325,7 +1318,7 @@ def forward(
13251318
hidden_states = outputs[0]
13261319

13271320
# if labels is None,means we need full output, instead of tensor_parallel_output
1328-
# tensor_parallel_output is togather with ParallelCrossEntropy
1321+
# tensor_parallel_output is together with ParallelCrossEntropy
13291322
tensor_parallel_output = (
13301323
self.config.tensor_parallel_output and labels is not None and self.config.tensor_parallel_degree > 1
13311324
)

tests/test_tipc/auto_tuner/autoconfig/check_mem_usage.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ auto_log_file=./autoconfig/${model_name}_auto_tuner.log
1919

2020
log="./llama7b_pretrain_auto_tuner.log"
2121
launch_best_cfg=$(sed -n "s/.*Launch best cfg: \(.*\)}/\1/p" "$auto_log_file")
22-
cfg_max_mem_usage=$(echo "$launch_best_cfg" | awk -F"max_mem_usage': " '{print $2}' | awk -F, '{print $1}')
22+
cfg_max_mem_usage=$(echo "$launch_best_cfg" | awk -F"\"max_mem_usage\":" '{print $2}' | awk -F, '{print $1}')
2323

2424
buffer=$(sed -n 's/.*"buffer":\([^,}]*\).*/\1/p' $autoconfig_json_file | awk '{print $1}')
2525
max_mem_usage=$(sed -n 's/.*"max_mem_usage":\([^,}]*\).*/\1/p' $autoconfig_json_file | awk '{print $1}')
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
job_id,mp_degree,sharding_degree,pp_degree,dp_degree,sharding_stage,micro_batch_size,vpp_degree,use_recompute,recompute_granularity,acc_steps,global_batch_size,exec_time,interval_samples_per_second,max_mem_usage,error_info
2-
5,4,2,1,1,2,4,1,True,full,1,8,72.38,4.74138,26130,
3-
2,4,1,2,1,1,8,1,True,full,1,8,57.35,2.58279,27722,
2+
4,4,2,1,1,2,4,1,True,full,1,8,72.38,4.74138,26130,
3+
2,4,1,2,1,2,4,1,True,full,2,8,40.17,3.3612,24590,
44
1,8,1,1,1,2,1,1,True,full,8,8,55.32,2.3064,19050,
5-
3,4,1,2,1,1,8,1,True,full_attn,1,8,45.29,,OOM,['Out of memory']
6-
4,4,1,2,1,1,4,1,True,full_attn,2,8,55.15,,OOM,['Out of memory']
5+
3,4,1,2,1,2,4,1,True,full_attn,2,8,39.38,,OOM,['Out of memory']

tests/test_tipc/auto_tuner/llama_finetune/benchmark_common/prepare.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ else
3434
rank=$PADDLE_TRAINER_ID
3535
echo $master_ip $rank
3636
if [ $rank == 0 ]; then
37-
net=$(netstat -anp | grep 2379 | grep "LISTEN")
37+
net=$(netstat -anp | grep :2379 | grep "LISTEN")
3838
if [ ${#net} == 0 ]; then
3939
apt-get install -y --allow-downgrades etcd
4040
nohup etcd -data-dir ~/data.etcd -advertise-client-urls http://0.0.0.0:2379 -listen-client-urls http://0.0.0.0:2379 &

tests/test_tipc/auto_tuner/llama_pretrain/benchmark_common/prepare.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ else
4444
rank=$PADDLE_TRAINER_ID
4545
echo $master_ip $rank
4646
if [ $rank == 0 ]; then
47-
net=$(netstat -anp | grep 2379 | grep "LISTEN")
47+
net=$(netstat -anp | grep :2379 | grep "LISTEN")
4848
if [ ${#net} == 0 ]; then
4949
apt-get install -y --allow-downgrades etcd
5050
nohup etcd -data-dir ~/data.etcd -advertise-client-urls http://0.0.0.0:2379 -listen-client-urls http://0.0.0.0:2379 &
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414

1515
param="model_item=meta-llama-Llama-2-7b_pretrain_dy2st "
16-
param+="run_mode=Sharding_Stage2 "
16+
param+="run_mode=Sharding32_Stage2 "
1717
param+="device_num=N4C32 "
1818
param+="global_batch_size=32 "
1919
param+="nnodes=4 "

0 commit comments

Comments
 (0)