Skip to content

Fix uie predictor #4081

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion model_zoo/uie/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -915,19 +915,33 @@ paddlenlp server server:app --host 0.0.0.0 --port 8989
python deploy/python/infer_cpu.py --model_path_prefix ${finetuned_model}/model
```

部署UIE-M模型

```shell
python deploy/python/infer_cpu.py --model_path_prefix ${finetuned_model}/model --multilingual
```


可配置参数说明:

- `model_path_prefix`: 用于推理的Paddle模型文件路径,需加上文件前缀名称。例如模型文件路径为`./export/model.pdiparams`,则传入`./export/model`。
- `position_prob`:模型对于span的起始位置/终止位置的结果概率 0~1 之间,返回结果去掉小于这个阈值的结果,默认为 0.5,span 的最终概率输出为起始位置概率和终止位置概率的乘积。
- `max_seq_len`: 文本最大切分长度,输入超过最大长度时会对输入文本进行自动切分,默认为 512。
- `batch_size`: 批处理大小,请结合机器情况进行调整,默认为 4。
- `multilingual`:是否是跨语言模型,用 "uie-m-base", "uie-m-large" 等模型进微调得到的模型是多语言模型,需要设置为 True;默认为 False。

- GPU端推理样例

在GPU端,请使用如下命令进行部署

```shell
python deploy/python/infer_gpu.py --model_path_prefix export/model --use_fp16 --device_id 0
python deploy/python/infer_gpu.py --model_path_prefix ${finetuned_model}/model --use_fp16 --device_id 0
```

部署UIE-M模型

```shell
python deploy/python/infer_gpu.py --model_path_prefix ${finetuned_model}/model --use_fp16 --device_id 0 --multilingual
```

可配置参数说明:
Expand All @@ -938,6 +952,7 @@ paddlenlp server server:app --host 0.0.0.0 --port 8989
- `max_seq_len`: 文本最大切分长度,输入超过最大长度时会对输入文本进行自动切分,默认为 512。
- `batch_size`: 批处理大小,请结合机器情况进行调整,默认为 4。
- `device_id`: GPU 设备 ID,默认为 0。
- `multilingual`:是否是跨语言模型,用 "uie-m-base", "uie-m-large" 等模型进微调得到的模型是多语言模型,需要设置为 True;默认为 False。

<a name="CCKS比赛"></a>

Expand Down
3 changes: 1 addition & 2 deletions model_zoo/uie/deploy/python/infer_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
# limitations under the License.

import argparse
import math
from pprint import pprint

import paddle
from uie_predictor import UIEPredictor


Expand All @@ -35,6 +33,7 @@ def parse_args():
type=float,
help="Probability threshold for start/end index probabiliry.",
)
parser.add_argument("--multilingual", action="store_true", help="Whether is the multilingual model.")
parser.add_argument(
"--max_seq_len",
default=512,
Expand Down
2 changes: 1 addition & 1 deletion model_zoo/uie/deploy/python/infer_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import argparse
from pprint import pprint

import paddle
from uie_predictor import UIEPredictor


Expand All @@ -39,6 +38,7 @@ def parse_args():
action="store_true",
help="Whether to use fp16 inference, only takes effect when deploying on gpu.",
)
parser.add_argument("--multilingual", action="store_true", help="Whether is the multilingual model.")
parser.add_argument(
"--max_seq_len",
default=512,
Expand Down
32 changes: 19 additions & 13 deletions model_zoo/uie/deploy/python/uie_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import six
import os
import math
import numpy as np
import onnxruntime as ort
import os
import re

import paddle
import onnxruntime as ort
import paddle2onnx
import six

from paddlenlp.transformers import AutoTokenizer
from paddlenlp.utils.tools import get_bool_ids_greater_than, get_span
Expand All @@ -45,8 +44,8 @@ def __init__(self, model_path_prefix, device="cpu", use_fp16=False, device_id=0)
print(">>> [InferBackend] Use GPU to inference ...")
if use_fp16:
print(">>> [InferBackend] Use FP16 to inference ...")
from onnxconverter_common import float16
import onnx
from onnxconverter_common import float16

fp16_model_file = os.path.join(infer_model_dir, "fp16_model.onnx")
onnx_model = onnx.load_model(float_onnx_file)
Expand All @@ -62,7 +61,7 @@ def __init__(self, model_path_prefix, device="cpu", use_fp16=False, device_id=0)
self.predictor = ort.InferenceSession(onnx_model, sess_options=sess_options, providers=providers)
if device == "gpu":
assert "CUDAExecutionProvider" in self.predictor.get_providers(), (
f"The environment for GPU inference is not set properly. "
"The environment for GPU inference is not set properly. "
"A possible cause is that you had installed both onnxruntime and onnxruntime-gpu. "
"Please run the following commands to reinstall: \n "
"1) pip uninstall -y onnxruntime onnxruntime-gpu \n 2) pip install onnxruntime-gpu"
Expand All @@ -87,6 +86,7 @@ def __init__(self, args):
self._position_prob = args.position_prob
self._max_seq_len = args.max_seq_len
self._batch_size = args.batch_size
self._multilingual = args.multilingual
self._schema_tree = None
self.set_schema(args.schema)
if args.device == "cpu":
Expand Down Expand Up @@ -167,12 +167,18 @@ def _single_stage_predict(self, inputs):
end_probs = []
for idx in range(0, len(texts), self._batch_size):
l, r = idx, idx + self._batch_size
input_dict = {
"input_ids": encoded_inputs["input_ids"][l:r].astype("int64"),
"token_type_ids": encoded_inputs["token_type_ids"][l:r].astype("int64"),
"pos_ids": encoded_inputs["position_ids"][l:r].astype("int64"),
"att_mask": encoded_inputs["attention_mask"][l:r].astype("int64"),
}
if self._multilingual:
input_dict = {
"input_ids": encoded_inputs["input_ids"][l:r].astype("int64"),
"position_ids": encoded_inputs["position_ids"][l:r].astype("int64"),
}
else:
input_dict = {
"input_ids": encoded_inputs["input_ids"][l:r].astype("int64"),
"token_type_ids": encoded_inputs["token_type_ids"][l:r].astype("int64"),
"position_ids": encoded_inputs["position_ids"][l:r].astype("int64"),
"attention_mask": encoded_inputs["attention_mask"][l:r].astype("int64"),
}
start_prob, end_prob = self._infer(input_dict)
start_prob = start_prob.tolist()
end_prob = end_prob.tolist()
Expand Down