Skip to content

[ppdiffusers] Add FastDeploy+PaddleLite more devices support #4042

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ppdiffusers/deploy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ python text_to_img_infer.py --model_dir stable-diffusion-v1-5/ --scheduler "eule
|----------|--------------|
| --model_dir | 导出后模型的目录。 |
| --model_format | 模型格式。默认为`'paddle'`,可选列表:`['paddle', 'onnx']`。 |
| --backend | 推理引擎后端。默认为`paddle`,可选列表:`['onnx_runtime', 'paddle']`,当模型格式为`onnx`时,可选列表为`['onnx_runtime']`。 |
| --backend | 推理引擎后端。默认为`paddle`,可选列表:`['onnx_runtime', 'paddle', 'paddlelite']`,当模型格式为`onnx`时,可选列表为`['onnx_runtime']`。 |
| --device | 运行设备。默认为`gpu`,可选列表:`['cpu', 'gpu', 'huawei_ascend_npu', 'kunlunxin_xpu']`。 |
| --scheduler | StableDiffusion 模型的scheduler。默认为`'pndm'`。可选列表:`['pndm', 'euler_ancestral']`,StableDiffusio模型对应的scheduler可参考[ppdiffuser模型列表](https://github.com/PaddlePaddle/PaddleNLP/tree/main/ppdiffusers#ppdiffusers%E6%A8%A1%E5%9E%8B%E6%94%AF%E6%8C%81%E7%9A%84%E6%9D%83%E9%87%8D)。|
| --unet_model_prefix | UNet模型前缀。默认为`unet`。 |
| --vae_model_prefix | VAE模型前缀。默认为`vae_decoder`。 |
Expand Down
75 changes: 61 additions & 14 deletions ppdiffusers/deploy/img_to_img_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,21 @@ def parse_arguments():
type=str,
default="paddle",
# Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon.
choices=["onnx_runtime", "paddle", "paddlelite"],
help="The inference runtime backend of unet model and text encoder model.",
)
parser.add_argument(
"--device",
type=str,
default="gpu",
# Note(shentanyue): Will support more devices.
choices=[
"onnx_runtime",
"paddle",
"cpu",
"gpu",
"huawei_ascend_npu",
"kunlunxin_xpu",
],
help="The inference runtime backend of unet model and text encoder model.",
help="The inference runtime device of models.",
)
parser.add_argument(
"--image_path", default="fd_astronaut_rides_horse.png", help="The model directory of diffusion_model."
Expand Down Expand Up @@ -123,6 +133,25 @@ def create_paddle_inference_runtime(
return fd.Runtime(option)


def create_paddle_lite_runtime(model_dir, model_prefix, device="cpu", device_id=0):
option = fd.RuntimeOption()
option.use_lite_backend()
if device == "huawei_ascend_npu":
option.use_cann()
option.set_lite_nnadapter_device_names(["huawei_ascend_npu"])
option.set_lite_nnadapter_model_cache_dir(os.path.join(model_dir, model_prefix))
option.set_lite_nnadapter_context_properties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(device_id))
elif device == "kunlunxin_xpu":
# TODO(shentanyue): Add kunlunxin_xpu code
pass
else:
pass
model_file = os.path.join(model_dir, model_prefix, "inference.pdmodel")
params_file = os.path.join(model_dir, model_prefix, "inference.pdiparams")
option.set_model_path(model_file, params_file)
return fd.Runtime(option)


def create_trt_runtime(model_dir, model_prefix, model_format, workspace=(1 << 31), dynamic_shape=None, device_id=0):
option = fd.RuntimeOption()
option.use_trt_backend()
Expand Down Expand Up @@ -210,42 +239,45 @@ def get_scheduler(args):
}

# 4. Init runtime
device_id = args.device_id
if args.device == "cpu":
device_id = -1
if args.backend == "onnx_runtime":
text_encoder_runtime = create_ort_runtime(
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id
)
vae_decoder_runtime = create_ort_runtime(
args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=device_id
)
vae_encoder_runtime = create_ort_runtime(
args.model_dir, args.vae_encoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.vae_encoder_model_prefix, args.model_format, device_id=device_id
)
start = time.time()
unet_runtime = create_ort_runtime(
args.model_dir, args.unet_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.unet_model_prefix, args.model_format, device_id=device_id
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")
elif args.backend == "paddle" or args.backend == "paddle-tensorrt":
use_trt = True if args.backend == "paddle-tensorrt" else False
# Note(zhoushunjie): Will change to paddle runtime later
text_encoder_runtime = create_ort_runtime(
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id
)
vae_decoder_runtime = create_paddle_inference_runtime(
args.model_dir,
args.vae_decoder_model_prefix,
use_trt,
vae_decoder_dynamic_shape,
use_fp16=args.use_fp16,
device_id=args.device_id,
device_id=device_id,
)
vae_encoder_runtime = create_paddle_inference_runtime(
args.model_dir,
args.vae_encoder_model_prefix,
use_trt,
vae_encoder_dynamic_shape,
use_fp16=args.use_fp16,
device_id=args.device_id,
device_id=device_id,
)
start = time.time()
unet_runtime = create_paddle_inference_runtime(
Expand All @@ -254,7 +286,7 @@ def get_scheduler(args):
use_trt,
unet_dynamic_shape,
use_fp16=args.use_fp16,
device_id=args.device_id,
device_id=device_id,
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")
elif args.backend == "tensorrt":
Expand All @@ -265,23 +297,38 @@ def get_scheduler(args):
args.model_format,
workspace=(1 << 30),
dynamic_shape=vae_decoder_dynamic_shape,
device_id=args.device_id,
device_id=device_id,
)
vae_encoder_runtime = create_trt_runtime(
args.model_dir,
args.vae_encoder_model_prefix,
args.model_format,
workspace=(1 << 30),
dynamic_shape=vae_encoder_dynamic_shape,
device_id=args.device_id,
device_id=device_id,
)
start = time.time()
unet_runtime = create_trt_runtime(
args.model_dir,
args.unet_model_prefix,
args.model_format,
dynamic_shape=unet_dynamic_shape,
device_id=args.device_id,
device_id=device_id,
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")
elif args.backend == "paddlelite":
text_encoder_runtime = create_paddle_lite_runtime(
args.model_dir, args.text_encoder_model_prefix, device=args.device, device_id=device_id
)
vae_decoder_runtime = create_paddle_lite_runtime(
args.model_dir, args.vae_decoder_model_prefix, device=args.device, device_id=device_id
)
vae_encoder_runtime = create_paddle_lite_runtime(
args.model_dir, args.vae_encoder_model_prefix, device=args.device, device_id=device_id
)
start = time.time()
unet_runtime = create_paddle_lite_runtime(
args.model_dir, args.unet_model_prefix, device=args.device, device_id=device_id
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")

Expand Down
75 changes: 61 additions & 14 deletions ppdiffusers/deploy/inpaint_legacy_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,21 @@ def parse_arguments():
type=str,
default="paddle",
# Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon.
choices=["onnx_runtime", "paddle", "paddlelite"],
help="The inference runtime backend of unet model and text encoder model.",
)
parser.add_argument(
"--device",
type=str,
default="gpu",
# Note(shentanyue): Will support more devices.
choices=[
"onnx_runtime",
"paddle",
"cpu",
"gpu",
"huawei_ascend_npu",
"kunlunxin_xpu",
],
help="The inference runtime backend of unet model and text encoder model.",
help="The inference runtime device of models.",
)
parser.add_argument(
"--image_path", default="fd_astronaut_rides_horse.png", help="The model directory of diffusion_model."
Expand Down Expand Up @@ -123,6 +133,25 @@ def create_paddle_inference_runtime(
return fd.Runtime(option)


def create_paddle_lite_runtime(model_dir, model_prefix, device="cpu", device_id=0):
option = fd.RuntimeOption()
option.use_lite_backend()
if device == "huawei_ascend_npu":
option.use_cann()
option.set_lite_nnadapter_device_names(["huawei_ascend_npu"])
option.set_lite_nnadapter_model_cache_dir(os.path.join(model_dir, model_prefix))
option.set_lite_nnadapter_context_properties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(device_id))
elif device == "kunlunxin_xpu":
# TODO(shentanyue): Add kunlunxin_xpu code
pass
else:
pass
model_file = os.path.join(model_dir, model_prefix, "inference.pdmodel")
params_file = os.path.join(model_dir, model_prefix, "inference.pdiparams")
option.set_model_path(model_file, params_file)
return fd.Runtime(option)


def create_trt_runtime(model_dir, model_prefix, model_format, workspace=(1 << 31), dynamic_shape=None, device_id=0):
option = fd.RuntimeOption()
option.use_trt_backend()
Expand Down Expand Up @@ -209,42 +238,45 @@ def get_scheduler(args):
}

# 4. Init runtime
device_id = args.device_id
if args.device == "cpu":
device_id = -1
if args.backend == "onnx_runtime":
text_encoder_runtime = create_ort_runtime(
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id
)
vae_decoder_runtime = create_ort_runtime(
args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.vae_decoder_model_prefix, args.model_format, device_id=device_id
)
vae_encoder_runtime = create_ort_runtime(
args.model_dir, args.vae_encoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.vae_encoder_model_prefix, args.model_format, device_id=device_id
)
start = time.time()
unet_runtime = create_ort_runtime(
args.model_dir, args.unet_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.unet_model_prefix, args.model_format, device_id=device_id
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")
elif args.backend == "paddle" or args.backend == "paddle-tensorrt":
use_trt = True if args.backend == "paddle-tensorrt" else False
# Note(zhoushunjie): Will change to paddle runtime later
text_encoder_runtime = create_ort_runtime(
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=args.device_id
args.model_dir, args.text_encoder_model_prefix, args.model_format, device_id=device_id
)
vae_decoder_runtime = create_paddle_inference_runtime(
args.model_dir,
args.vae_decoder_model_prefix,
use_trt,
vae_decoder_dynamic_shape,
use_fp16=args.use_fp16,
device_id=args.device_id,
device_id=device_id,
)
vae_encoder_runtime = create_paddle_inference_runtime(
args.model_dir,
args.vae_encoder_model_prefix,
use_trt,
vae_encoder_dynamic_shape,
use_fp16=args.use_fp16,
device_id=args.device_id,
device_id=device_id,
)
start = time.time()
unet_runtime = create_paddle_inference_runtime(
Expand All @@ -253,7 +285,7 @@ def get_scheduler(args):
use_trt,
unet_dynamic_shape,
use_fp16=args.use_fp16,
device_id=args.device_id,
device_id=device_id,
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")
elif args.backend == "tensorrt":
Expand All @@ -264,23 +296,38 @@ def get_scheduler(args):
args.model_format,
workspace=(1 << 30),
dynamic_shape=vae_decoder_dynamic_shape,
device_id=args.device_id,
device_id=device_id,
)
vae_encoder_runtime = create_trt_runtime(
args.model_dir,
args.vae_encoder_model_prefix,
args.model_format,
workspace=(1 << 30),
dynamic_shape=vae_encoder_dynamic_shape,
device_id=args.device_id,
device_id=device_id,
)
start = time.time()
unet_runtime = create_trt_runtime(
args.model_dir,
args.unet_model_prefix,
args.model_format,
dynamic_shape=unet_dynamic_shape,
device_id=args.device_id,
device_id=device_id,
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")
elif args.backend == "paddlelite":
text_encoder_runtime = create_paddle_lite_runtime(
args.model_dir, args.text_encoder_model_prefix, device=args.device, device_id=device_id
)
vae_decoder_runtime = create_paddle_lite_runtime(
args.model_dir, args.vae_decoder_model_prefix, device=args.device, device_id=device_id
)
vae_encoder_runtime = create_paddle_lite_runtime(
args.model_dir, args.vae_encoder_model_prefix, device=args.device, device_id=device_id
)
start = time.time()
unet_runtime = create_paddle_lite_runtime(
args.model_dir, args.unet_model_prefix, device=args.device, device_id=device_id
)
print(f"Spend {time.time() - start : .2f} s to load unet model.")

Expand Down
Loading