Skip to content

Commit 9806293

Browse files
authored
remove ceval from run_finetune (#9100)
1 parent 33fc7ff commit 9806293

File tree

6 files changed

+3
-138
lines changed

6 files changed

+3
-138
lines changed

llm/config/llama/ceval/ceval_w8a8_ptq_argument.json

Lines changed: 0 additions & 37 deletions
This file was deleted.

llm/config/llama/ceval/cevel_wfp8afp8_ptq_argument.json

Lines changed: 0 additions & 25 deletions
This file was deleted.

llm/config/qwen/ceval/ceval_w8a8_ptq_argument.json

Lines changed: 0 additions & 28 deletions
This file was deleted.

llm/docs/quantization.md

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,6 @@ python run_finetune.py ./config/llama/ptq_c8_argument.json
9494
python run_finetune.py ./config/llama/fp8_ptq_argument.json
9595
```
9696

97-
### 2.8 测试量化模型 C-Eval 得分
98-
99-
```shell
100-
python run_finetune.py ./config/llama/ceval_quant_argument.json
101-
```
102-
103-
10497
### 2.9 量化参数介绍
10598

10699
<summary>&emsp; 量化参数(QuantArgument)</summary>
@@ -135,8 +128,6 @@ python run_finetune.py ./config/llama/ceval_quant_argument.json
135128
- `do_gptq`: 是否进行 GPTQ 量化,GPTQ 对模型进行 WINT4量化,相比于普通 PTQ 量化精度更高,量化时间较长。默认为 False。
136129
- `gptq_step`: GPTQ 量化步数,也即模型前向次数,默认为8。
137130
- `do_awq`: 是否进行 AWQ 量化,AWQ 对模型进行 WINT4量化,相比于普通 PTQ 量化精度更高。默认为 False。
138-
- `do_ceval`: 是否启动 C-Eval 测试。默认为 False。
139-
- `ceval_data_path`: C-Eval 数据集路径,默认为"../dataset/ceval"。
140131
- `auto_clip`: AWQ 时是否进行自动搜索截断值并对模型权重进行截断操作,截断操作有利于量化模型精度,但搜索速度较慢。默认为 False。
141132
- `autoclip_step`: AutoClip 步数,也即模型前向次数,采样时默认 concat 每轮数据用来搜索截断值,默认为8。
142133

llm/run_finetune.py

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
import paddle
2020
from utils.argument import (
21-
CEvalArgument,
2221
DataArgument,
2322
GenerateArgument,
2423
ModelArgument,
@@ -73,13 +72,11 @@
7372

7473

7574
def main():
76-
parser = PdArgumentParser(
77-
(GenerateArgument, QuantArgument, ModelArgument, DataArgument, TrainingArguments, CEvalArgument)
78-
)
75+
parser = PdArgumentParser((GenerateArgument, QuantArgument, ModelArgument, DataArgument, TrainingArguments))
7976
if len(sys.argv) >= 2 and sys.argv[1].endswith(".json"):
80-
gen_args, quant_args, model_args, data_args, training_args, ceval_args = parser.parse_json_file_and_cmd_lines()
77+
gen_args, quant_args, model_args, data_args, training_args = parser.parse_json_file_and_cmd_lines()
8178
else:
82-
gen_args, quant_args, model_args, data_args, training_args, ceval_args = parser.parse_args_into_dataclasses()
79+
gen_args, quant_args, model_args, data_args, training_args = parser.parse_args_into_dataclasses()
8380

8481
training_args.print_config(model_args, "Model")
8582
training_args.print_config(data_args, "Data")
@@ -561,10 +558,6 @@ def compute_metrics_do_generation(eval_preds):
561558
data_args=data_args,
562559
)
563560

564-
# Evaluation dev set
565-
if training_args.do_eval:
566-
before_eval_result = trainer.evaluate(dev_ds)
567-
568561
# Train
569562
if training_args.do_train:
570563
checkpoint = None
@@ -726,21 +719,10 @@ def compute_metrics_do_generation(eval_preds):
726719
# Evaluation dev set
727720
if training_args.do_eval:
728721

729-
logger.info("*** Evaluate result before train/ptq/qat/ etc.***")
730-
trainer.log_metrics("eval", before_eval_result)
731-
732722
logger.info("*** Evaluate result after train/ptq/qat/ etc.***")
733723
eval_result = trainer.evaluate(dev_ds)
734724
trainer.log_metrics("eval", eval_result)
735725

736-
# C-Eval after qat/ptq/train
737-
if ceval_args.do_ceval:
738-
logger.info("*** Evaluate on C-Eval ***")
739-
ceval_args.output_dir = training_args.output_dir
740-
from experimental.ceval.default.eval import run_eval
741-
742-
run_eval(tokenizer, trainer.model, ceval_args)
743-
744726

745727
if __name__ == "__main__":
746728
main()

llm/utils/argument.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -348,21 +348,3 @@ class GenerateArgument:
348348
top_p: float = field(
349349
default=1.0, metadata={"help": "The cumulative probability for top-p-filtering in the sampling strategy."}
350350
)
351-
352-
353-
@dataclass
354-
class CEvalArgument:
355-
do_ceval: bool = field(
356-
default=False,
357-
metadata={"help": "Whether to run C-Eval"},
358-
)
359-
cot: bool = field(default=False, metadata={"help": "Whether to use chain of thought"})
360-
few_shot: bool = field(default=False, metadata={"help": "Whether to use few shot"})
361-
ntrain: int = field(default=5, metadata={"help": "Number of few shot"})
362-
with_prompt: bool = field(default=False, metadata={"help": "Whether to use prompt"})
363-
constrained_decoding: bool = field(default=True, metadata={"help": "Whether to use constrained decoding"})
364-
temperature: float = field(default=0.2, metadata={"help": "Temperature for decoding"})
365-
n_times: int = field(default=1, metadata={"help": "Number of times to run"})
366-
do_save_csv: bool = field(default=False, metadata={"help": "Whether to save csv"})
367-
do_test: bool = field(default=False, metadata={"help": "Whether to run test"})
368-
ceval_data_path: str = field(default="../dataset/ceval", metadata={"help": "Path to the data for ceval"})

0 commit comments

Comments
 (0)