From 2a1b00d1423c38fd40f52d2470d4420e3e855cea Mon Sep 17 00:00:00 2001 From: Ting Date: Wed, 15 Jan 2025 19:21:12 +0800 Subject: [PATCH] fix dpo pp criterion --- llm/alignment/dpo/run_dpo.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llm/alignment/dpo/run_dpo.py b/llm/alignment/dpo/run_dpo.py index f39ae6db1999..8588a9a1bad6 100644 --- a/llm/alignment/dpo/run_dpo.py +++ b/llm/alignment/dpo/run_dpo.py @@ -127,6 +127,8 @@ def main(): if training_args.pipeline_parallel_degree > 1: model_class = AutoModelForCausalLMPipe + if not dpo_config.reference_free and not dpo_config.lora: + ref_model_config.dpo_config = dpo_config model_config.dpo_config = dpo_config else: model_class = AutoModelForCausalLM