PaddlePaddle
diff --git a/‎csrc/cpu/0001-patch-fp16-and-bf16.patch
Lines changed: 280 additions & 0 deletions b/‎csrc/cpu/0001-patch-fp16-and-bf16.patch
Lines changed: 280 additions & 0 deletions
diff --git a/‎csrc/cpu/0001-patch-fp32.patch
Lines changed: 302 additions & 0 deletions b/‎csrc/cpu/0001-patch-fp32.patch
Lines changed: 302 additions & 0 deletions
diff --git a/‎csrc/cpu/README.md
Lines changed: 8 additions & 0 deletions b/‎csrc/cpu/README.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎csrc/cpu/setup.sh
Lines changed: 62 additions & 0 deletions b/‎csrc/cpu/setup.sh
Lines changed: 62 additions & 0 deletions
diff --git a/‎csrc/cpu/src/set_value_by_flags.cc
Lines changed: 56 additions & 0 deletions b/‎csrc/cpu/src/set_value_by_flags.cc
Lines changed: 56 additions & 0 deletions
diff --git a/‎csrc/cpu/src/setup_cpu.py
Lines changed: 121 additions & 0 deletions b/‎csrc/cpu/src/setup_cpu.py
Lines changed: 121 additions & 0 deletions
diff --git a/‎csrc/cpu/src/stop_generation_multi_ends.cc
Lines changed: 87 additions & 0 deletions b/‎csrc/cpu/src/stop_generation_multi_ends.cc
Lines changed: 87 additions & 0 deletions
@@ -0,0 +1,8 @@
+# cpu-custom-ops
+
+## 快速开始
+# 构建 cpu 自定义算子库
+```
+$ 前提条件:机器支持avx指令
+$ bash setup.sh
+```
@@ -0,0 +1,62 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#1. download XFT
+if [ ! -d xFasterTransformer]; then
+    git clone --branch v1.7.2 https://github.com/intel/xFasterTransformer.git
+fi
+
+#2.cp patch
+cd xFasterTransformer
+git checkout .
+cd ..
+
+if lscpu | grep -q "avx512_bf16"; then
+    echo "apply bf16 and fp16."
+    if [ ! -f 0001-patch-fp16-and-bf16.patch ]; then
+        echo "Error:  0001-patch-fp16-and-bf16.patch not exist."
+        exit 1
+    fi
+    # apply patch
+    cp ./0001-patch-fp16-and-bf16.patch  ./xFasterTransformer/paddle.patch
+else
+    echo "apply fp32 "
+    if [ ! -f 0001-patch-fp32.patch ]; then
+        echo "Error:  does 0001-patch-fp32.patch not exist."
+        exit 1
+    fi
+    cp ./0001-patch-fp32.patch  ./xFasterTransformer/paddle.patch
+fi
+
+#3. apply patch
+cd xFasterTransformer
+git apply paddle.patch
+
+# #4. build xFasterTransformer
+sh ./3rdparty/prepare_oneccl.sh
+source ./3rdparty/oneccl/build/_install/env/setvars.sh
+
+rm -rf build
+mkdir build && cd build
+cmake ..
+make -j
+
+#xft
+export XFT_HEADER_DIR=$PWD
+export XFT_LIB_DIR=$XFT_HEADER_DIR/build
+export LD_LIBRARY_PATH=$XFT_LIB_DIR:$LD_LIBRARY_PATH
+
+#setup cpu paddle_nlp ops
+cd ..
+python ./src/setup_cpu.py install
@@ -0,0 +1,56 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+//     http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/extension.h"
+
+void set_value_by_flag_and_id(const bool *stop_flags, int64_t *pre_ids_all, const int64_t *pre_ids, const int64_t *step_idx, int bs, int length) {
+    for (int bi=0;bi<bs;bi++){
+        if(!stop_flags[bi]){
+             int64_t *pre_ids_all_now = pre_ids_all + bi * length;
+             if (step_idx[bi] >= 0) {
+                pre_ids_all_now[step_idx[bi]] = pre_ids[bi];
+             }
+        }
+    }
+}
+
+std::vector<paddle::Tensor> SetValueByFlagsAndIdx(const paddle::Tensor& pre_ids_all, const paddle::Tensor& pre_ids_now, const paddle::Tensor& step_idx, const paddle::Tensor& stop_flags) {
+    std::vector<int64_t> pre_ids_all_shape = pre_ids_all.shape();
+    auto stop_flags_out = stop_flags.copy_to(stop_flags.place(), false); 
+
+    int bs = stop_flags.shape()[0];
+    int length = pre_ids_all_shape[1];
+
+    set_value_by_flag_and_id(stop_flags.data<bool>(), const_cast<int64_t*>(pre_ids_all.data<int64_t>()), pre_ids_now.data<int64_t>(), step_idx.data<int64_t>(), bs, length);
+    return {stop_flags_out};
+}
+
+std::vector<std::vector<int64_t>> SetValueByFlagsAndIdxInferShape(const std::vector<int64_t>& pre_ids_all_shape, const std::vector<int64_t>& pre_ids_now_shape,
+                                                                  const std::vector<int64_t>& step_idx_shape, const std::vector<int64_t>& stop_flags_shape) {
+    return {stop_flags_shape};
+}
+
+std::vector<paddle::DataType> SetValueByFlagsAndIdxInferDtype(const paddle::DataType& pre_ids_all_dtype,
+                                                              const paddle::DataType& pre_ids_now_dtype,
+                                                              const paddle::DataType& step_idx_dtype,
+                                                              const paddle::DataType& stop_flags_dtype) {
+    return {stop_flags_dtype};
+}
+
+PD_BUILD_OP(set_value_by_flags_and_idx)
+    .Inputs({"pre_ids_all", "pre_ids_now", "step_idx", "stop_flags"})
+    .Outputs({"stop_flags_out"})
+    .SetKernelFn(PD_KERNEL(SetValueByFlagsAndIdx))
+    .SetInferShapeFn(PD_INFER_SHAPE(SetValueByFlagsAndIdxInferShape))
+    .SetInferDtypeFn(PD_INFER_DTYPE(SetValueByFlagsAndIdxInferDtype));
@@ -0,0 +1,121 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import site
+import subprocess
+
+from paddle.utils.cpp_extension import CppExtension, setup
+
+# from setuptools import Extension, setup
+from setuptools.command.build_ext import build_ext
+
+
+# refer: https://note.qidong.name/2018/03/setup-warning-strict-prototypes
+# Avoid a gcc warning below:
+# cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid
+# for C/ObjC but not for C++
+class BuildExt(build_ext):
+    def build_extensions(self):
+        if "-Wstrict-prototypes" in self.compiler.compiler_so:
+            self.compiler.compiler_so.remove("-Wstrict-prototypes")
+        super().build_extensions()
+
+
+def check_avx512_bf16__support():
+    try:
+        result = subprocess.run(
+            ["lscpu", "|", "grep", '"avx512_bf16"'],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            shell=True,
+        )
+
+        if "avx512_bf16" in result.stdout.lower():
+            return True
+        else:
+            return False
+
+    except Exception as e:
+        print(f"Error checking AVX512 support: {e}")
+        return False
+
+
+# cc flags
+paddle_extra_compile_args = [
+    "-std=c++17",
+    "-shared",
+    "-fPIC",
+    "-Wno-parentheses",
+    "-DPADDLE_WITH_CUSTOM_KERNEL",
+]
+
+if check_avx512_bf16__support():
+    paddle_extra_compile_args += [
+        "-DAVX512_BF16_WEIGHT_ONLY_BF16=true",
+        "-DAVX512_BF16_WEIGHT_ONLY_BF16=true",
+    ]
+else:
+    paddle_extra_compile_args += [
+        "-DAVX512_FP32_WEIGHT_ONLY_FP16=true",
+        "-DAVX512_FP32_WEIGHT_ONLY_INT8=true",
+    ]
+# include path
+site_packages_path = site.getsitepackages()
+paddle_custom_kernel_include = [os.path.join(path, "paddle", "include") for path in site_packages_path]
+
+XFT_INCLUDE_DIR = os.environ["XFT_HEADER_DIR"]
+XFT_LIBRARY_DIR = os.environ["XFT_LIB_DIR"]
+
+# include path third_party
+paddle_custom_kernel_include += [
+    os.path.join(XFT_INCLUDE_DIR, "include"),  # glog
+    os.path.join(XFT_INCLUDE_DIR, "src/common"),  # src
+    os.path.join(XFT_INCLUDE_DIR, "src/kernel"),  # src
+    os.path.join(XFT_INCLUDE_DIR, "src/layers"),  # src
+    os.path.join(XFT_INCLUDE_DIR, "src/models"),  # src
+    os.path.join(XFT_INCLUDE_DIR, "src/utils"),  # src
+    os.path.join(XFT_INCLUDE_DIR, "3rdparty/onednn/include"),  # src
+    os.path.join(XFT_INCLUDE_DIR, "3rdparty/onednn/build/include"),  # src
+    os.path.join(XFT_INCLUDE_DIR, "3rdparty/xdnn"),  # src
+]
+
+# libs path
+paddle_custom_kernel_library_dir = [os.path.join(path, "paddle", "base") for path in site_packages_path]
+paddle_custom_kernel_library_dir += [XFT_LIBRARY_DIR]
+
+
+libs = [":libxfastertransformer.so", ":libxft_comm_helper.so"]
+
+custom_kernel_dot_module = CppExtension(
+    sources=[
+        "./src/xft_llama_layer.cc",
+        "../generation/save_with_output.cc",
+        "./src/token_penalty_multi_scores.cc",
+        "./src/stop_generation_multi_ends.cc",
+        "./src/set_value_by_flags.cc",
+    ],
+    include_dirs=paddle_custom_kernel_include,
+    library_dirs=paddle_custom_kernel_library_dir,
+    libraries=libs,
+    extra_compile_args=paddle_extra_compile_args,
+)
+
+setup(
+    name="paddlenlp_ops",
+    version="1.0",
+    description="custom kernel fot compiling",
+    ext_modules=[custom_kernel_dot_module],
+)
@@ -0,0 +1,87 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "paddle/extension.h"
+#include <stdio.h>
+
+
+bool is_in_end(const int64_t id, const int64_t* end_ids, int length) {
+  bool flag = false;
+  for (int i = 0; i < length; i++) {
+    if (id == end_ids[i]) {
+      return true;
+    }
+  }
+  return flag;
+}
+
+void set_value_by_flags(const bool* stop_flags,
+                        const int64_t* end_ids,
+                        int64_t* topk_ids,
+                        bool* stop_flags_out,
+                        const int bs,
+                        int end_length) {
+  for (int bi = 0; bi < bs; bi++) {
+    topk_ids[bi] = stop_flags[bi] ? end_ids[0] : topk_ids[bi];
+    if (is_in_end(topk_ids[bi], end_ids, end_length)) {
+      stop_flags_out[bi] = true;
+    }
+  }
+}
+
+
+std::vector<paddle::Tensor> GetStopFlagsMulti(const paddle::Tensor& topk_ids,
+                                              const paddle::Tensor& stop_flags,
+                                              const paddle::Tensor& end_ids) {
+  PD_CHECK(topk_ids.dtype() == paddle::DataType::INT64);
+  PD_CHECK(stop_flags.dtype() == paddle::DataType::BOOL);
+
+  std::vector<int64_t> shape = topk_ids.shape();
+  int64_t bs_now = shape[0];
+  int64_t end_length = end_ids.shape()[0];
+  auto topk_ids_out = topk_ids.copy_to(topk_ids.place(), false);
+  auto stop_flags_out = stop_flags.copy_to(stop_flags.place(), false);
+  set_value_by_flags(stop_flags.data<bool>(),
+                     end_ids.data<int64_t>(),
+                     topk_ids_out.data<int64_t>(),
+                     stop_flags_out.data<bool>(),
+                     bs_now,
+                     end_length);
+
+  return {topk_ids_out, stop_flags_out};
+}
+
+std::vector<std::vector<int64_t>> GetStopFlagsMultiInferShape(
+    const std::vector<int64_t>& topk_ids_shape,
+    const std::vector<int64_t>& stop_flags_shape,
+    const std::vector<int64_t>& end_ids_shape) {
+  return {topk_ids_shape, stop_flags_shape};
+}
+
+std::vector<paddle::DataType> GetStopFlagsMultiInferDtype(
+    const paddle::DataType& topk_ids_dtype,
+    const paddle::DataType& stop_flags_dtype,
+    const paddle::DataType& end_ids_dtype) {
+  return {topk_ids_dtype, stop_flags_dtype};
+}
+
+PD_BUILD_OP(set_stop_value_multi_ends)
+    .Inputs({"topk_ids", "stop_flags", "end_ids"})
+    .Outputs({"topk_ids_out", "stop_flags_out"})
+    .SetKernelFn(PD_KERNEL(GetStopFlagsMulti))
+    .SetInferShapeFn(PD_INFER_SHAPE(GetStopFlagsMultiInferShape))
+    .SetInferDtypeFn(PD_INFER_DTYPE(GetStopFlagsMultiInferDtype));