Skip to content

Commit 4c4632f

Browse files
BirdylxLokeZhounemonameless
authored
[NPU] InternVL2 supports npu training (PaddlePaddle#714)
Co-authored-by: LokeZhou <aishenghuoaiqq@163.com> Co-authored-by: nifeng <nemonameless@qq.com>
1 parent 164ad73 commit 4c4632f

File tree

7 files changed

+55
-10
lines changed

7 files changed

+55
-10
lines changed

paddlemix/datasets/internvl_dataset.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,7 @@ def preprocess_internlm(
532532
).input_ids
533533
targets = input_ids.clone()
534534

535+
new_targets = []
535536
# print('tokenizer.pad_token_id:\n', tokenizer.pad_token_id) # 151643
536537
# print('targets', targets, targets.shape, targets.sum().item())
537538
# [[151644, 8948 , 198 , ..., 103978, 1773 , 151645]] [1, 1918] 281157253
@@ -569,10 +570,14 @@ def preprocess_internlm(
569570
target[:] = IGNORE_TOKEN_ID
570571
print(f'WARNING: tokenization mismatch: {cur_len} vs. {total_len}. This dataset is {ds_name}.')
571572
sys.stdout.flush()
573+
574+
new_targets.append(target)
575+
576+
new_targets = paddle.stack(new_targets, axis=0)
572577

573578
return dict(
574579
input_ids=input_ids,
575-
labels=targets,
580+
labels=new_targets,
576581
attention_mask=input_ids.not_equal(paddle.to_tensor(tokenizer.pad_token_id)),
577582
)
578583

paddlemix/examples/internvl2/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ sh paddlemix/examples/internvl2/shell/internvl2.0/2nd_finetune/internvl2_2b_inte
5353
sh paddlemix/examples/internvl2/shell/internvl2.0/2nd_finetune/internvl2_8b_internlm2_7b_dynamic_res_2nd_finetune_full.sh
5454
```
5555

56+
## 5 NPU硬件训练
57+
请参照[tools](../../tools/README.md)进行NPU硬件Paddle安装和环境变量设置,配置完成后可直接执行微调命令进行训练或预测。
5658

5759
### 参考文献
5860
```BibTeX

paddlemix/examples/llava/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ python paddlemix/tools/supervised_finetune.py paddlemix/config/llava/v1_5/lora_s
104104
python paddlemix/tools/supervised_finetune.py paddlemix/config/llava/v1_5/sft_argument.json
105105
```
106106

107+
## 5 NPU硬件训练
108+
请参照[tools](../../tools/README.md)进行NPU硬件Paddle安装和环境变量设置,配置完成后可直接执行微调命令进行训练或预测。
109+
107110
### 参考文献
108111
```BibTeX
109112
@misc{liu2024llavanext,

paddlemix/models/internvl2/internvl_chat/modeling_intern_vit.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# --------------------------------------------------------
2020

2121
from typing import Optional, Tuple, Union
22+
import numpy as np
2223

2324
import paddle
2425
import paddle.nn as nn
@@ -63,7 +64,7 @@ def drop_path(x, drop_prob: float = 0.0, training: bool = False, scale_by_keep:
6364
return x
6465
keep_prob = 1 - drop_prob
6566
shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
66-
random_tensor = paddle.bernoulli(paddle.full(shape, keep_prob, dtype=x.dtype))
67+
random_tensor = paddle.bernoulli(paddle.full(shape, keep_prob, dtype='float32')).cast(x.dtype)
6768
if keep_prob > 0.0 and scale_by_keep:
6869
random_tensor = paddle.divide(random_tensor, paddle.to_tensor(keep_prob))
6970
return x * random_tensor
@@ -386,9 +387,9 @@ def forward(
386387
hidden_states = hidden_states.cast(original_dtype)
387388
return hidden_states
388389
else:
389-
hidden_states = hidden_states + self.drop_path1(self.attn(self.norm1(hidden_states.cast("bfloat16"))) * self.ls1)
390-
hidden_states = hidden_states + self.drop_path2(self.mlp(self.norm2(hidden_states.cast("bfloat16"))) * self.ls2)
391-
return hidden_states.cast("bfloat16")
390+
hidden_states = hidden_states + self.drop_path1(self.attn(self.norm1(hidden_states)) * self.ls1)
391+
hidden_states = hidden_states + self.drop_path2(self.mlp(self.norm2(hidden_states)) * self.ls2)
392+
return hidden_states
392393

393394

394395
class InternVisionEncoder(nn.Layer):
@@ -405,7 +406,7 @@ def __init__(self, config: InternVisionConfig):
405406
super().__init__()
406407
self.config = config
407408
# stochastic depth decay rule
408-
dpr = [x.item() for x in paddle.linspace(0, config.drop_path_rate, config.num_hidden_layers)]
409+
dpr = [x.item() for x in np.linspace(0, config.drop_path_rate, config.num_hidden_layers)]
409410
self.layers = nn.LayerList([
410411
InternVisionEncoderLayer(config, dpr[idx]) for idx in range(config.num_hidden_layers)])
411412
self.gradient_checkpointing = True

paddlemix/models/internvl2/internvl_chat/modeling_internvl_chat.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from paddlenlp.generation import GenerationConfig
3232
from paddlenlp.transformers import LlamaForCausalLM, Qwen2ForCausalLM
3333
from paddlenlp.transformers.model_outputs import CausalLMOutputWithPast
34-
from paddlemix.models.model_utils import MixPretrainedModel
34+
from paddlemix.models.model_utils import MixPretrainedModel, NPUCrossEntropyLoss
3535
from paddlenlp.transformers.model_utils import PretrainedModel
3636
from .configuration_internvl_chat import InternVLChatConfig
3737
from .modeling_intern_vit import InternVisionModel
@@ -201,7 +201,7 @@ def forward(
201201
shift_logits = logits[..., :-1, :]
202202
shift_labels = labels[..., 1:]
203203
# Flatten the tokens
204-
loss_fct = CrossEntropyLoss()
204+
loss_fct = NPUCrossEntropyLoss() if "npu" in paddle.get_device() else CrossEntropyLoss()
205205
shift_logits = shift_logits.reshape([-1, self.language_model.config.vocab_size])
206206
shift_labels = shift_labels.reshape([-1])
207207
# Enable model parallelism

paddlemix/models/model_utils.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from paddlemix.utils.env import MODEL_HOME
2828
from paddlemix.utils.log import logger
2929

30-
__all__ = ["MixPretrainedModel"]
30+
__all__ = ["MixPretrainedModel", "NPUCrossEntropyLoss"]
3131

3232

3333
def resolve_cache_dir(pretrained_model_name_or_path: str, cache_dir: Optional[str] = None) -> str:
@@ -489,3 +489,26 @@ def _find_mismatched_keys(
489489
)
490490

491491
return missing_keys, unexpected_keys, mismatched_keys
492+
493+
494+
class NPUCrossEntropyLoss(paddle.nn.Layer):
495+
"""
496+
Make cross_entropy_loss compatible with npu device
497+
"""
498+
def __init__(self, **kwargs):
499+
super().__init__()
500+
self.reduction = kwargs.get('reduction', 'mean')
501+
kwargs['reduction'] = 'none'
502+
self.nll_loss = paddle.nn.NLLLoss(**kwargs)
503+
self.log_softmax = paddle.nn.functional.log_softmax
504+
505+
def forward(self, logits, labels):
506+
loss = self.nll_loss(self.log_softmax(logits, axis=-1), labels)
507+
if self.reduction == 'mean':
508+
return loss.mean()
509+
elif self.reduction == 'sum':
510+
return loss.sum()
511+
elif self.reduction == 'none':
512+
return loss
513+
else:
514+
raise ValueError(f"Unexcepted reduction method: {self.reduction}")

paddlemix/tools/README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,9 @@ python paddlemix/tools/merge_lora_params.py \
193193

194194
**NPU硬件训练**
195195

196-
PaddleMIX支持在NPU硬件上训练,只需要在config配置文件中增加`device`字段制定设备即可:
196+
PaddleMIX支持在NPU硬件上进行训练:
197+
1. 请先参照[PaddleCustomDevice](https://github.com/PaddlePaddle/PaddleCustomDevice/blob/develop/backends/npu/README_cn.md)安装NPU硬件Paddle
198+
2. 在config配置文件中增加`device`字段指定设备:
197199
```json
198200
{
199201
...
@@ -203,4 +205,13 @@ PaddleMIX支持在NPU硬件上训练,只需要在config配置文件中增加`d
203205
...
204206
}
205207
```
208+
3. 启动训练前请设置如下环境变量用于性能加速和精度对齐
209+
```shell
210+
export FLAGS_use_stride_kernel=0
211+
export FLAGS_npu_storage_format=0 # 关闭私有格式
212+
export FLAGS_npu_jit_compile=0 # 关闭即时编译
213+
export FLAGS_npu_scale_aclnn=True # aclnn加速
214+
export FLAGS_npu_split_aclnn=True # aclnn加速
215+
export CUSTOM_DEVICE_BLACK_LIST=set_value,set_value_with_tensor # set_value加入黑名单
216+
```
206217
目前支持NPU训练的模型可以参考此[文档](../examples/README.md)

0 commit comments

Comments
 (0)