From 72847b8800d7ede6deb9d6f8cb429c8097370fca Mon Sep 17 00:00:00 2001 From: will-jl944 Date: Thu, 9 Jan 2025 18:46:08 +0800 Subject: [PATCH] [NPU] Adapt to new flash_attention_npu api --- paddlenlp/transformers/llama/fusion_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddlenlp/transformers/llama/fusion_ops.py b/paddlenlp/transformers/llama/fusion_ops.py index 4cb9101cc730..c2a95541fbbd 100644 --- a/paddlenlp/transformers/llama/fusion_ops.py +++ b/paddlenlp/transformers/llama/fusion_ops.py @@ -206,8 +206,8 @@ def fusion_flash_attention( value_states, None, attention_mask, - [], - [], + None, + None, 0.0, attention_mask is None, True,