fix some bugs

bukejiyu · bukejiyu · commit 18d789a86312 · 2024-10-23T15:33:11.000+08:00
diff --git a/csrc/cpu/README.md b/csrc/cpu/README.md
@@ -1,14 +1,16 @@
 # cpu-custom-ops
 
 ## 快速开始
-
-### 1.环境准备
+### 1. 详细 cpu 推理教程
+    [cpu](../../llm/docs/cpu_install.md)
+###
+### 2.环境准备
 ```shell
 # 查询机器是否支持 avx512指令
 lscpu | grep avx512*
 ```
 
-### 2.安装 cpu 自定义算子和第三方库
+### 3.安装 cpu 自定义算子和第三方库
 ```shell
 #建议在 gcc 9.4.0 下安装第三方库
 bash setup.sh
diff --git a/csrc/cpu/setup.sh b/csrc/cpu/setup.sh
@@ -17,7 +17,7 @@
 # apt-get install numactl
 
 # 1. download XFT
-if [ ! -d xFasterTransformer]; then
+if [ ! -d xFasterTransformer ]; then
     git clone https://github.com/intel/xFasterTransformer.git
 fi
 
@@ -55,12 +55,12 @@ rm -rf build
 mkdir build && cd build
 cmake ..
 make -j
+cd ..
 
 #xft
 export XFT_HEADER_DIR=$PWD
 export XFT_LIB_DIR=$XFT_HEADER_DIR/build
 export LD_LIBRARY_PATH=$XFT_LIB_DIR:$LD_LIBRARY_PATH
-
 #setup cpu paddle_nlp ops
 cd ..
-python ./src/setup_cpu.py install
+python ./src/setup_cpu.py install --user
diff --git a/csrc/cpu/src/setup_cpu.py b/csrc/cpu/src/setup_cpu.py
@@ -112,7 +112,7 @@ def check_avx512_bf16__support():
 
 custom_kernel_dot_module = CppExtension(
     sources=[
-        "../generation/save_with_output.cc",
+        "../gpu/save_with_output.cc",
         "./src/token_penalty_multi_scores.cc",
         "./src/stop_generation_multi_ends.cc",
         "./src/set_value_by_flags.cc",
@@ -129,6 +129,6 @@ def check_avx512_bf16__support():
 setup(
     name="paddlenlp_ops",
     version="1.0",
-    description="custom kernel fot compiling",
+    description="custom kernel for compiling",
     ext_modules=[custom_kernel_dot_module],
 )
diff --git a/llm/docs/cpu_install.md b/llm/docs/cpu_install.md
@@ -3,9 +3,9 @@ PaddleNLP 在支持 AVX 指令的 CPU 上对 llama 系列模型进行了深度
 
 ### 检查硬件：
 
- | 芯片类型 | GCC 版本 |
- | --- | --- |
- | Intel(R) Xeon(R) Platinum 8463B | 9.4.0|
+ | 芯片类型 | GCC 版本 |cmake 版本 |
+ | --- | --- | --- |
+ | Intel(R) Xeon(R) Platinum 8463B | 9.4.0| >=3.18 |
 
 **注：如果要验证您的机器是否支持 AVX 指令，只需系统环境下输入命令，看是否有输出：**
 ```
diff --git a/paddlenlp/experimental/transformers/fused_transformer_layers.py b/paddlenlp/experimental/transformers/fused_transformer_layers.py
@@ -40,7 +40,10 @@
         "The paddlenlp_ops package is not installed. you can read the docs and install it by hand, "
         "you can refer to: https://github.com/PaddlePaddle/PaddleNLP/blob/develop/csrc/README.md"
     )
-from paddlenlp_ops import rebuild_padding_v2
+if (
+    paddle.device.get_all_custom_device_type() is not None and len(paddle.device.get_all_custom_device_type()) > 0
+) or core.is_compiled_with_cuda():
+    from paddlenlp_ops import rebuild_padding_v2
 
 if core.is_compiled_with_cuda():
     if os.getenv("FLAGS_CUTLASS_FP8_GEMM", "False") == "True":
diff --git a/paddlenlp/experimental/transformers/llama/modeling.py b/paddlenlp/experimental/transformers/llama/modeling.py
@@ -291,7 +291,6 @@ def forward(
     @paddle.no_grad()
     # avx
     def set_state_dict(self, state_dict):
-        self.transformer_block.init_weight()
         unfused_state_dict = {}
         head_size = self.hidden_size // self.num_attention_heads
         split_fn = split_param_func()