We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1bde9b8 commit ac29bc4Copy full SHA for ac29bc4
paddlenlp/experimental/transformers/fused_transformer_layers.py
@@ -43,7 +43,7 @@
43
from paddlenlp_ops import rebuild_padding_v2
44
45
if core.is_compiled_with_cuda():
46
- if os.getenv("FLAGS_CUTLASS_FP8_GEMM", "False") == "True":
+ if os.getenv("FLAGS_CUTLASS_FP8_GEMM", "True") == "True":
47
logger.info("cutlass fp8 gemm is used. you can turn it off by setting FLAGS_CUTLASS_FP8_GEMM to False.")
48
from paddlenlp_ops import (
49
cutlass_fp8_fp8_fp8_dual_gemm_fused as fp8_dual_gemm_fused,
@@ -76,7 +76,7 @@
76
77
78
def use_cutlass_fp8_gemm():
79
- return os.getenv("FLAGS_CUTLASS_FP8_GEMM", "False") == "True"
+ return os.getenv("FLAGS_CUTLASS_FP8_GEMM", "True") == "True"
80
81
82
# for distributed tensor model parallel
0 commit comments