PaddlePaddle · wawltor · May 29, 2024 · May 29, 2024
diff --git a/paddlenlp/peft/lora/lora_layers.py b/paddlenlp/peft/lora/lora_layers.py
@@ -409,7 +409,8 @@
 
         if not self.merged:
             input_mp = self.lora_dropout(input_mp)
-            if MC2RowSeqParallelCoreLinear is None:
+            # TODO(@gexiao): temporary workaround for deterministic calculation
+            if True or MC2RowSeqParallelCoreLinear is None:
                 input_mp = input_mp @ self.lora_A
                 input_mp = ReduceScatterOp.apply(input_mp)
             else:
@@ -651,7 +652,8 @@
 
         if not self.merged:
             input_a = self.lora_dropout(x) @ self.lora_A
-            if MC2ColumnSeqParallelCoreLinear is None:
+            # TODO(@gexiao): temporary workaround for deterministic calculation
+            if True or MC2ColumnSeqParallelCoreLinear is None:
                 input_a = AllGatherOp.apply(input_a)
                 delta_mp = (input_a @ self.lora_B) * self.scaling
             else: