Skip to content

[xpu] add xpu custom ops support for llama2-7b #8515

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions csrc/xpu/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# ernie-bot-custom-ops
ernie bot 昆仑自定义算子库。

## 快速开始
# 构建 XDNN plugin 和 Paddle 自定义算子库
```
$ cd src
$ wget https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev/20240429/xdnn-ubuntu_x86_64.tar.gz
$ wget https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev/20240429/xre-ubuntu_x86_64.tar.gz
$ wget -q --no-check-certificate https://klx-sdk-release-public.su.bcebos.com/xtdk_llvm15/dev/2.7.98.2/xtdk-llvm15-ubuntu1604_x86_64.tar.gz
$ tar -xf xdnn-ubuntu_x86_64.tar.gz
$ tar -xf xre-ubuntu_x86_64.tar.gz
$ tar -xf xtdk-llvm15-ubuntu1604_x86_64.tar.gz
$ export PWD=$(pwd)
$ export XDNN_PATH=${PWD}/xdnn-ubuntu_x86_64/
$ export XRE_PATH=${PWD}/xre-ubuntu_x86_64/
$ export CLANG_PATH=${PWD}/xtdk-llvm15-ubuntu1604_x86_64/
$ bash ./cmake_build.sh
```

## 测试
# 运行 add2 单测
```
$ cd test/python
$ python test_get_padding_offset_v2.py
```

## 如何贡献
```
$ pip install pre-commit==2.17.0
$ pre-commit install
```
29 changes: 29 additions & 0 deletions csrc/xpu/src/cmake_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

# export XDNN_PATH=Paddle/build/third_party/xpu/src/extern_xpu/xdnn-ubuntu_x86_64/ # <path_to_xdnn>
# export XRE_PATH=Paddle/build/third_party/xpu/src/extern_xpu/xre-ubuntu_x86_64/ # <path_to_xre>
# export CLANG_PATH=xtdk-ubuntu_1604_x86_64 # <path_to_xtdk>
# export HOST_SYSROOT=/opt/compiler/gcc-8.2/bin/gcc # <path_to_gcc>

cd plugin
./cmake_build.sh
cd -

python -m pip uninstall paddlenlp_ops -y
python setup.py install
94 changes: 94 additions & 0 deletions csrc/xpu/src/get_padding_offset_v2.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <paddle/phi/backends/xpu/xpu_context.h>
#include "paddle/extension.h"
#include "xpu/plugin.h"

std::vector<paddle::Tensor> GetPaddingOffset(const paddle::Tensor& input_ids,
const paddle::Tensor& cum_offsets,
const paddle::Tensor& token_num,
const paddle::Tensor& seq_len) {
phi::XPUPlace place(phi::backends::xpu::GetXPUCurrentDeviceId());
auto dev_ctx = paddle::experimental::DeviceContextPool::Instance().Get(place);
auto xpu_ctx = static_cast<const phi::XPUContext*>(dev_ctx);

std::vector<int64_t> input_ids_shape = input_ids.shape();
const int bsz = seq_len.shape()[0];
const int seq_length = input_ids_shape[1];
auto cum_offsets_out = cum_offsets.copy_to(cum_offsets.place(), false);
auto cpu_token_num = token_num.copy_to(paddle::CPUPlace(), false);


const int token_num_data = cpu_token_num.data<int64_t>()[0];
auto x_remove_padding = paddle::full(
{token_num_data}, 0, paddle::DataType::INT64, input_ids.place());
auto padding_offset = paddle::full(
{token_num_data}, 0, paddle::DataType::INT32, input_ids.place());
auto cu_seqlens_q =
paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place());
auto cu_seqlens_k =
paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place());
int r = baidu::xpu::api::plugin::get_padding_offset(
xpu_ctx->x_context(),
padding_offset.data<int>(),
cum_offsets_out.data<int>(),
cu_seqlens_q.data<int>(),
cu_seqlens_k.data<int>(),
x_remove_padding.data<int64_t>(),
input_ids.data<int64_t>(),
cum_offsets.data<int>(),
seq_len.data<int>(),
seq_length,
bsz);
PD_CHECK(r == 0, "baidu::xpu::api::plugin::get_padding_offset failed.");
return {x_remove_padding,
cum_offsets_out,
padding_offset,
cu_seqlens_q,
cu_seqlens_k};
}

std::vector<std::vector<int64_t>> GetPaddingOffsetInferShape(
const std::vector<int64_t>& input_ids_shape,
const std::vector<int64_t>& cum_offsets_shape,
const std::vector<int64_t>& token_num_shape,
const std::vector<int64_t>& seq_len_shape) {
int64_t bsz = seq_len_shape[0];
int64_t seq_len = input_ids_shape[1];
return {{-1}, {bsz}, {-1}, {bsz + 1}, {bsz + 1}};
}

std::vector<paddle::DataType> GetPaddingOffsetInferDtype(
const paddle::DataType& input_ids_dtype,
const paddle::DataType& cum_offsets_dtype,
const paddle::DataType& token_num_dtype,
const paddle::DataType& seq_len_dtype) {
return {input_ids_dtype,
seq_len_dtype,
seq_len_dtype,
seq_len_dtype,
seq_len_dtype};
}

PD_BUILD_OP(get_padding_offset_v2)
.Inputs({"input_ids", "token_num", "cum_offsets", "seq_len"})
.Outputs({"x_remove_padding",
"cum_offsets_out",
"padding_offset",
"cu_seqlens_q",
"cu_seqlens_k"})
.SetKernelFn(PD_KERNEL(GetPaddingOffset))
.SetInferShapeFn(PD_INFER_SHAPE(GetPaddingOffsetInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(GetPaddingOffsetInferDtype));
108 changes: 108 additions & 0 deletions csrc/xpu/src/get_token_penalty_multi_scores_v2.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <paddle/phi/backends/xpu/xpu_context.h>
#include "paddle/extension.h"
#include "paddle/phi/core/enforce.h"
#include "xpu/plugin.h"

void TokenPenaltyMultiScores(const paddle::Tensor& pre_ids,
const paddle::Tensor& logits,
const paddle::Tensor& penalty_scores,
const paddle::Tensor& frequency_scores,
const paddle::Tensor& presence_scores,
const paddle::Tensor& temperatures,
const paddle::Tensor& bad_tokens,
const paddle::Tensor& cur_len,
const paddle::Tensor& min_len,
const paddle::Tensor& eos_token_id) {
phi::XPUPlace place(phi::backends::xpu::GetXPUCurrentDeviceId());
auto dev_ctx = paddle::experimental::DeviceContextPool::Instance().Get(place);
auto xpu_ctx = static_cast<const phi::XPUContext*>(dev_ctx);
int64_t bs = logits.shape()[0];
PADDLE_ENFORCE_LE(
bs,
640,
phi::errors::InvalidArgument(
"Only support bsz <= 1024, but received bsz is %d", bs));
int64_t length = logits.shape()[1];
int64_t length_id = pre_ids.shape()[1];
int64_t length_bad_words = bad_tokens.shape()[0];
int64_t end_length = eos_token_id.shape()[0];
switch (logits.type()) {
case paddle::DataType::FLOAT16: {
using XPUType = typename XPUTypeTrait<float16>::Type;
typedef paddle::float16 data_t;
int r = baidu::xpu::api::plugin::token_penalty_multi_scores(
xpu_ctx->x_context(),
pre_ids.data<int64_t>(),
reinterpret_cast<XPUType*>(
const_cast<data_t*>(logits.data<data_t>())),
reinterpret_cast<const XPUType*>(penalty_scores.data<data_t>()),
reinterpret_cast<const XPUType*>(frequency_scores.data<data_t>()),
reinterpret_cast<const XPUType*>(presence_scores.data<data_t>()),
temperatures.data<float>(),
cur_len.data<int64_t>(),
min_len.data<int64_t>(),
eos_token_id.data<int64_t>(),
bad_tokens.data<int64_t>(),
bs,
length,
length_id,
end_length,
length_bad_words);
PD_CHECK(r == 0, "xpu::plugin::token_penalty_multi_scores failed.");
} break;
case paddle::DataType::FLOAT32: {
int r = baidu::xpu::api::plugin::token_penalty_multi_scores(
xpu_ctx->x_context(),
pre_ids.data<int64_t>(),
const_cast<float*>(logits.data<float>()),
penalty_scores.data<float>(),
frequency_scores.data<float>(),
presence_scores.data<float>(),
temperatures.data<float>(),
cur_len.data<int64_t>(),
min_len.data<int64_t>(),
eos_token_id.data<int64_t>(),
bad_tokens.data<int64_t>(),
bs,
length,
length_id,
end_length,
length_bad_words);
PD_CHECK(r == 0, "xpu::plugin::token_penalty_multi_scores failed.");
} break;
default:
PD_THROW(
"NOT supported data type. "
"Only float16 and float32 are supported. ");
break;
}
}

PD_BUILD_OP(get_token_penalty_multi_scores_v2)
.Inputs({"pre_ids",
"logits",
"penalty_scores",
"frequency_scores",
"presence_scores",
"temperatures",
"bad_tokens",
"cur_len",
"min_len",
"eos_token_id"})
.Outputs({"logits_out"})
.SetInplaceMap({{"logits", "logits_out"}})
.SetKernelFn(PD_KERNEL(TokenPenaltyMultiScores));
Loading
Loading