diff --git a/paddlenlp/trainer/trainer.py b/paddlenlp/trainer/trainer.py index 8993a276eb56..1582fa3fd605 100644 --- a/paddlenlp/trainer/trainer.py +++ b/paddlenlp/trainer/trainer.py @@ -1013,6 +1013,7 @@ def _inner_training_loop( self.timers and self.timers("optimizer-step").start() if self.args.gradient_accumulation_steps > 1 and self._enable_delay_scale_loss(): + paddle.device.synchronize() for p in model._layers.parameters(): with paddle.no_grad(): if hasattr(p, "main_grad") and p.main_grad is not None: