|
20 | 20 | import pytest
|
21 | 21 | from parameterized import parameterized_class
|
22 | 22 |
|
| 23 | +from paddlenlp.experimental.transformers import QWenForQWenVLInferenceModel |
23 | 24 | from paddlenlp.transformers import ( # ChatGLMForCausalLM,
|
| 25 | + AutoConfig, |
24 | 26 | AutoTokenizer,
|
25 | 27 | BloomForCausalLM,
|
26 | 28 | ChatGLMForCausalLM,
|
27 | 29 | ChatGLMv2ForCausalLM,
|
28 | 30 | LlamaForCausalLM,
|
| 31 | + QWenForCausalLM, |
29 | 32 | )
|
30 | 33 | from paddlenlp.utils.downloader import (
|
31 | 34 | COMMUNITY_MODEL_PREFIX,
|
@@ -283,3 +286,110 @@ def test_cachekv_int8(self):
|
283 | 286 | full_match += int(inference_item[:min_length] == no_inference_item[:min_length])
|
284 | 287 |
|
285 | 288 | self.assertGreaterEqual(count / len(result_0), 0.2)
|
| 289 | + |
| 290 | + |
| 291 | +class QWenVLTest(LLMTest, unittest.TestCase): |
| 292 | + config_path: str = "./tests/fixtures/llm/predictor.yaml" |
| 293 | + model_name_or_path: str = "__internal_testing__/tiny-fused-qwen" |
| 294 | + model_class = QWenForCausalLM |
| 295 | + |
| 296 | + def setUp(self) -> None: |
| 297 | + super().setUp() |
| 298 | + paddle.set_default_dtype("float32") |
| 299 | + self.model_class.from_pretrained(self.model_name_or_path, dtype="float16").save_pretrained(self.output_dir) |
| 300 | + AutoTokenizer.from_pretrained(self.model_name_or_path).save_pretrained(self.output_dir) |
| 301 | + |
| 302 | + def test_forward(self): |
| 303 | + self.disable_static() |
| 304 | + config = AutoConfig.from_pretrained(self.output_dir) |
| 305 | + config.quant_type = None |
| 306 | + config.weight_only_quant_bits = None |
| 307 | + |
| 308 | + print(config) |
| 309 | + |
| 310 | + paddle.set_default_dtype("float16") |
| 311 | + model = QWenForQWenVLInferenceModel.from_pretrained(self.output_dir, config=config, dtype="float16") |
| 312 | + |
| 313 | + batch = 1 |
| 314 | + seq = 31 |
| 315 | + max_len = 50 |
| 316 | + dtype = "float16" |
| 317 | + input_ids = paddle.randint(0, 100, [batch, seq], dtype="int64") |
| 318 | + image_features = paddle.randn([batch, 16, config.hidden_size], dtype="float16") |
| 319 | + tgt_generation_mask = paddle.full([batch, 1, 1, max_len], 1, dtype=dtype) |
| 320 | + img_pos = paddle.to_tensor([[0, 4, 21]], dtype="int64") |
| 321 | + attention_mask = paddle.full([batch, 1, max_len, max_len], 0, dtype=dtype) |
| 322 | + attention_mask[:, 0, :seq, :seq] = paddle.tril(paddle.ones(shape=(seq, seq), dtype=dtype)) |
| 323 | + position_ids = paddle.full([batch, seq], 0, dtype="int64") |
| 324 | + for i in range(batch): |
| 325 | + position_ids[i, :] = paddle.to_tensor([i for i in range(seq)], dtype="int64") |
| 326 | + |
| 327 | + inputs = [ |
| 328 | + input_ids, # input_ids |
| 329 | + image_features, # image_features |
| 330 | + img_pos, # img_pos |
| 331 | + attention_mask, # attention_mask |
| 332 | + position_ids, # position_ids |
| 333 | + paddle.full([batch, 1], 1.0, dtype="float32"), # penalty_score |
| 334 | + paddle.full([batch, 1], 0.0, dtype="float32"), # frequency_score, |
| 335 | + paddle.full([batch, 1], 0.0, dtype="float32"), # presence_score, |
| 336 | + paddle.full([batch, 1], 1, dtype="int64"), # min_length, |
| 337 | + paddle.full([batch, 1], max_len - seq, dtype="int64"), # max_length, |
| 338 | + paddle.full([batch, 1], 1.0, dtype="float32"), # temperature, |
| 339 | + paddle.full([batch, 1], 0.0, dtype="float32"), # top_p, |
| 340 | + paddle.full([1], 151643, dtype="int64"), # eos_token_id, |
| 341 | + paddle.full([batch, 1], seq, dtype="int32"), # seq_len_encoder, |
| 342 | + paddle.full([batch, 1], seq, dtype="int32"), # seq_len_decoder, |
| 343 | + paddle.full([batch, 1], 0, dtype="int64"), # step_idx, |
| 344 | + paddle.full([batch, 1], False, dtype="bool"), # stop_flags, |
| 345 | + paddle.full([batch, 1], -123, dtype="int64"), # tgt_ids can be be initialized arbitrarily |
| 346 | + paddle.full([batch, 1], seq - 1, dtype="int64"), # tgt_pos, |
| 347 | + tgt_generation_mask, # tgt_generation_mask, |
| 348 | + paddle.full([batch, max_len], -100, dtype="int64"), # pre_ids, can be initialized arbitrarily |
| 349 | + paddle.full([1], batch, dtype="int64"), # stop_nums, be batch |
| 350 | + ] |
| 351 | + for i in range(config.num_hidden_layers): |
| 352 | + tmp = paddle.rand(shape=[2, batch, 1, max_len, 64], dtype=dtype) |
| 353 | + inputs.append(tmp) |
| 354 | + |
| 355 | + model.eval() |
| 356 | + model.generate_text_with_image_features( |
| 357 | + input_ids=inputs[0], |
| 358 | + image_features=inputs[1], |
| 359 | + img_pos=inputs[2], |
| 360 | + attention_mask=inputs[3], |
| 361 | + position_ids=inputs[4], |
| 362 | + penalty_score=inputs[5], |
| 363 | + frequency_score=inputs[6], |
| 364 | + presence_score=inputs[7], |
| 365 | + min_length=inputs[8], |
| 366 | + max_length=inputs[9], |
| 367 | + temperature=inputs[10], |
| 368 | + top_p=inputs[11], |
| 369 | + eos_token_id=inputs[12], |
| 370 | + seq_len_encoder=inputs[13], |
| 371 | + seq_len_decoder=inputs[14], |
| 372 | + step_idx=inputs[15], |
| 373 | + stop_flags=inputs[16], |
| 374 | + tgt_ids=inputs[17], |
| 375 | + tgt_pos=inputs[18], |
| 376 | + tgt_generation_mask=inputs[19], |
| 377 | + pre_ids=inputs[20], |
| 378 | + stop_nums=inputs[21], |
| 379 | + cache_kvs=inputs[22:], |
| 380 | + ) |
| 381 | + |
| 382 | + def test_export(self): |
| 383 | + self.disable_static() |
| 384 | + config = load_test_config(self.config_path, "inference-to-static") |
| 385 | + config["model_name_or_path"] = self.model_name_or_path |
| 386 | + config["output_path"] = self.output_dir |
| 387 | + config["dtype"] = "float16" |
| 388 | + config["inference_model"] = True |
| 389 | + config["model_prefix"] = "qwen" |
| 390 | + config["model_type"] = "qwen-img2txt" |
| 391 | + |
| 392 | + with argv_context_guard(config): |
| 393 | + from export_model import main |
| 394 | + |
| 395 | + main() |
0 commit comments