FLAGS_CUTLASS_FP8_GEMM=True

ckl117 · ckl117 · commit ac29bc4632cd · 2024-08-29T15:42:54.000+08:00
diff --git a/paddlenlp/experimental/transformers/fused_transformer_layers.py b/paddlenlp/experimental/transformers/fused_transformer_layers.py
@@ -43,7 +43,7 @@
     from paddlenlp_ops import rebuild_padding_v2
 
 if core.is_compiled_with_cuda():
-    if os.getenv("FLAGS_CUTLASS_FP8_GEMM", "False") == "True":
+    if os.getenv("FLAGS_CUTLASS_FP8_GEMM", "True") == "True":
         logger.info("cutlass fp8 gemm is used. you can turn it off by setting FLAGS_CUTLASS_FP8_GEMM to False.")
         from paddlenlp_ops import (
             cutlass_fp8_fp8_fp8_dual_gemm_fused as fp8_dual_gemm_fused,
@@ -76,7 +76,7 @@
 
 
 def use_cutlass_fp8_gemm():
-    return os.getenv("FLAGS_CUTLASS_FP8_GEMM", "False") == "True"
+    return os.getenv("FLAGS_CUTLASS_FP8_GEMM", "True") == "True"
 
 
 # for distributed tensor model parallel