File tree Expand file tree Collapse file tree 6 files changed +17
-13
lines changed
paddlenlp/experimental/transformers Expand file tree Collapse file tree 6 files changed +17
-13
lines changed Original file line number Diff line number Diff line change 1
1
# cpu-custom-ops
2
2
3
3
## 快速开始
4
-
5
- ### 1.环境准备
4
+ ### 1. 详细 cpu 推理教程
5
+ [cpu](../../llm/docs/cpu_install.md)
6
+ ###
7
+ ### 2.环境准备
6
8
``` shell
7
9
# 查询机器是否支持 avx512指令
8
10
lscpu | grep avx512*
9
11
```
10
12
11
- ### 2 .安装 cpu 自定义算子和第三方库
13
+ ### 3 .安装 cpu 自定义算子和第三方库
12
14
``` shell
13
15
# 建议在 gcc 9.4.0 下安装第三方库
14
16
bash setup.sh
Original file line number Diff line number Diff line change 17
17
# apt-get install numactl
18
18
19
19
# 1. download XFT
20
- if [ ! -d xFasterTransformer]; then
20
+ if [ ! -d xFasterTransformer ]; then
21
21
git clone https://github.com/intel/xFasterTransformer.git
22
22
fi
23
23
@@ -55,12 +55,12 @@ rm -rf build
55
55
mkdir build && cd build
56
56
cmake ..
57
57
make -j
58
+ cd ..
58
59
59
60
# xft
60
61
export XFT_HEADER_DIR=$PWD
61
62
export XFT_LIB_DIR=$XFT_HEADER_DIR /build
62
63
export LD_LIBRARY_PATH=$XFT_LIB_DIR :$LD_LIBRARY_PATH
63
-
64
64
# setup cpu paddle_nlp ops
65
65
cd ..
66
- python ./src/setup_cpu.py install
66
+ python ./src/setup_cpu.py install --user
Original file line number Diff line number Diff line change @@ -112,7 +112,7 @@ def check_avx512_bf16__support():
112
112
113
113
custom_kernel_dot_module = CppExtension (
114
114
sources = [
115
- "../generation /save_with_output.cc" ,
115
+ "../gpu /save_with_output.cc" ,
116
116
"./src/token_penalty_multi_scores.cc" ,
117
117
"./src/stop_generation_multi_ends.cc" ,
118
118
"./src/set_value_by_flags.cc" ,
@@ -129,6 +129,6 @@ def check_avx512_bf16__support():
129
129
setup (
130
130
name = "paddlenlp_ops" ,
131
131
version = "1.0" ,
132
- description = "custom kernel fot compiling" ,
132
+ description = "custom kernel for compiling" ,
133
133
ext_modules = [custom_kernel_dot_module ],
134
134
)
Original file line number Diff line number Diff line change @@ -3,9 +3,9 @@ PaddleNLP 在支持 AVX 指令的 CPU 上对 llama 系列模型进行了深度
3
3
4
4
### 检查硬件:
5
5
6
- | 芯片类型 | GCC 版本 |
7
- | --- | --- |
8
- | Intel(R) Xeon(R) Platinum 8463B | 9.4.0|
6
+ | 芯片类型 | GCC 版本 | cmake 版本 |
7
+ | --- | --- | --- |
8
+ | Intel(R) Xeon(R) Platinum 8463B | 9.4.0| >=3.18 |
9
9
10
10
** 注:如果要验证您的机器是否支持 AVX 指令,只需系统环境下输入命令,看是否有输出:**
11
11
```
Original file line number Diff line number Diff line change 40
40
"The paddlenlp_ops package is not installed. you can read the docs and install it by hand, "
41
41
"you can refer to: https://github.com/PaddlePaddle/PaddleNLP/blob/develop/csrc/README.md"
42
42
)
43
- from paddlenlp_ops import rebuild_padding_v2
43
+ if (
44
+ paddle .device .get_all_custom_device_type () is not None and len (paddle .device .get_all_custom_device_type ()) > 0
45
+ ) or core .is_compiled_with_cuda ():
46
+ from paddlenlp_ops import rebuild_padding_v2
44
47
45
48
if core .is_compiled_with_cuda ():
46
49
if os .getenv ("FLAGS_CUTLASS_FP8_GEMM" , "False" ) == "True" :
Original file line number Diff line number Diff line change @@ -291,7 +291,6 @@ def forward(
291
291
@paddle .no_grad ()
292
292
# avx
293
293
def set_state_dict (self , state_dict ):
294
- self .transformer_block .init_weight ()
295
294
unfused_state_dict = {}
296
295
head_size = self .hidden_size // self .num_attention_heads
297
296
split_fn = split_param_func ()
You can’t perform that action at this time.
0 commit comments