@@ -140,7 +140,7 @@ def main():
140
140
if not training_args .autotuner_benchmark :
141
141
model = AutoModelForCausalLMPipe .from_pretrained (
142
142
model_args .model_name_or_path ,
143
- tensor_parallel_output = False ,
143
+ tensor_parallel_output = True ,
144
144
tensor_parallel_degree = training_args .tensor_parallel_degree ,
145
145
tensor_parallel_rank = training_args .tensor_parallel_rank ,
146
146
use_flash_attention = model_args .use_flash_attention ,
@@ -152,7 +152,7 @@ def main():
152
152
# NOTE(gongenlei): new add autotuner_benchmark
153
153
model_config = AutoConfig .from_pretrained (
154
154
model_args .model_name_or_path ,
155
- tensor_parallel_output = False ,
155
+ tensor_parallel_output = True ,
156
156
tensor_parallel_degree = training_args .tensor_parallel_degree ,
157
157
tensor_parallel_rank = training_args .tensor_parallel_rank ,
158
158
dtype = dtype ,
@@ -163,7 +163,7 @@ def main():
163
163
else :
164
164
model_config = AutoConfig .from_pretrained (
165
165
model_args .model_name_or_path ,
166
- tensor_parallel_output = False ,
166
+ tensor_parallel_output = True ,
167
167
tensor_parallel_degree = training_args .tensor_parallel_degree ,
168
168
tensor_parallel_rank = training_args .tensor_parallel_rank ,
169
169
dtype = dtype ,
0 commit comments