Skip to content

[AutoNLP]optimize log #5021

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 31 additions & 4 deletions paddlenlp/experimental/autonlp/auto_trainer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
# limitations under the License.
import copy
import datetime
import logging
import os
import shutil
import sys
from abc import ABCMeta, abstractmethod
from typing import Any, Callable, Dict, List, Optional, Union

Expand Down Expand Up @@ -122,12 +125,36 @@ def _data_checks_and_inference(self, train_dataset: Dataset, eval_dataset: Datas
Performs different data checks and inferences on the training and eval datasets
"""

@abstractmethod
def _construct_trainable(self, train_dataset: Dataset, eval_dataset: Dataset) -> Callable:
def _construct_trainable(self) -> Callable:
"""
Returns the Trainable functions that contains the main preprocessing and training logic
"""

def trainable(model_config):
# import is required for proper pickling
from paddlenlp.utils.log import logger

stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setFormatter(logger.format)
logger.logger.addHandler(stdout_handler)
Comment on lines +137 to +139
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这样子能把所有的log都显示出来吗?如果可以的话,就太好了

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

可以,并且可以区分info error warning

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

再确认以下几个case:

  1. verbosity >0的时候,notebook里有log, 文件里有log
  2. verbosity =0的时候, notebook里没有log, 但是文件里还是有log

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

确认没有问题。


# construct trainer
model_config = model_config["candidates"]
trainer = self._construct_trainer(model_config)
# train
trainer.train()
# evaluate
eval_metrics = trainer.evaluate()
# save dygraph model
trainer.save_model(self.save_path)

if os.path.exists(self.training_path):
logger.info("Removing training checkpoints to conserve disk space")
shutil.rmtree(self.training_path)
return eval_metrics

return trainable

@abstractmethod
def _compute_metrics(self, eval_preds: EvalPrediction) -> Dict[str, float]:
"""
Expand Down Expand Up @@ -325,9 +352,9 @@ def train(
tune_config=tune_config,
run_config=RunConfig(
name=experiment_name,
log_to_file=True,
log_to_file="train.log",
local_dir=self.output_dir if self.output_dir else None,
callbacks=[tune.logger.CSVLoggerCallback(), tune.logger.JsonLoggerCallback()],
callbacks=[tune.logger.CSVLoggerCallback()],
),
)
self.training_results = self.tuner.fit()
Expand Down
28 changes: 1 addition & 27 deletions paddlenlp/experimental/autonlp/text_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import json
import os
import shutil
from typing import Any, Callable, Dict, List, Optional
from typing import Any, Dict, List, Optional

import numpy as np
import paddle
Expand Down Expand Up @@ -372,32 +372,6 @@ def _construct_trainer(self, model_config) -> Trainer:
raise NotImplementedError("'trainer_type' can only be one of ['Trainer', 'PromptTrainer']")
return trainer

def _construct_trainable(self) -> Callable:
"""
Returns the Trainable functions that contains the main preprocessing and training logic
"""

def trainable(model_config):
# import is required for proper pickling
from paddlenlp.utils.log import logger

# construct trainer
model_config = model_config["candidates"]
trainer = self._construct_trainer(model_config)
# train
trainer.train()
# evaluate
eval_metrics = trainer.evaluate()
# save dygraph model
trainer.save_model(self.save_path)

if os.path.exists(self.training_path):
logger.info("Removing training checkpoints to conserve disk space")
shutil.rmtree(self.training_path)
return eval_metrics

return trainable

def evaluate(self, eval_dataset: Optional[Dataset] = None, trial_id: Optional[str] = None):
"""
Run evaluation and returns metrics from a certain `trial_id` on the given dataset.
Expand Down
19 changes: 0 additions & 19 deletions paddlenlp/trainer/integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,25 +156,6 @@ def on_evaluate(self, args, state, control, **kwargs):
if self.tune.is_session_enabled() and metrics is not None and isinstance(metrics, dict):
self.session.report(metrics)

# report session metrics to Ray to track trial progress
def on_epoch_end(self, args, state, control, **kwargs):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

输入并没有metrics

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on_epoch_end 应该会返回一些训练方面的东西,例如loss, iter/s之类的训练指标,可以再看看

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

看起来trainer.py中on_epoch_end没有传入metrics,所以这个函数无法report任何东西
https://github.com/PaddlePaddle/PaddleNLP/blob/develop/paddlenlp/trainer/trainer.py#L731

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

不过那些信息没什么用,不要也ok,可以删

if not state.is_world_process_zero:
return

metrics = kwargs.get("metrics", None)
if self.tune.is_session_enabled() and metrics is not None and isinstance(metrics, dict):
self.session.report(metrics)

# forward trainer logs
def on_log(self, args, state, control, logs=None, **kwargs):
if not state.is_world_process_zero:
return

if logs is not None:
# In AutoNLP's Ray setup, we pipe stdout to a stdout file for logging purposes
# TODO: find a better way for this
print(logs)


INTEGRATION_TO_CALLBACK = {
"visualdl": VisualDLCallback,
Expand Down
15 changes: 6 additions & 9 deletions tests/experimental/autonlp/test_text_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,12 @@ def test_multiclass(self, custom_model_candidate, hp_overrides):
self.assertEqual(len(results_df), num_models)

# test hp override
model_result = auto_trainer._get_model_result()
if hp_overrides is not None:
for hp_key, hp_value in hp_overrides.items():
result_hp_key = f"config/candidates/{hp_key}"
self.assertEqual(results_df[result_hp_key][0], hp_value)
self.assertEqual(model_result.metrics["config"]["candidates"][hp_key], hp_value)

# test save
model_result = auto_trainer._get_model_result()
trainer_type = model_result.metrics["config"]["candidates"]["trainer_type"]
save_path = os.path.join(model_result.log_dir, auto_trainer.save_path)
self.assertTrue(os.path.exists(os.path.join(save_path, "model_state.pdparams")))
Expand Down Expand Up @@ -247,13 +246,12 @@ def test_multilabel(self, custom_model_candidate, hp_overrides):
self.assertEqual(len(results_df), num_models)

# test hp override
model_result = auto_trainer._get_model_result()
if hp_overrides is not None:
for hp_key, hp_value in hp_overrides.items():
result_hp_key = f"config/candidates/{hp_key}"
self.assertEqual(results_df[result_hp_key][0], hp_value)
self.assertEqual(model_result.metrics["config"]["candidates"][hp_key], hp_value)

# test save
model_result = auto_trainer._get_model_result()
trainer_type = model_result.metrics["config"]["candidates"]["trainer_type"]
save_path = os.path.join(model_result.log_dir, auto_trainer.save_path)
self.assertTrue(os.path.exists(os.path.join(save_path, "model_state.pdparams")))
Expand Down Expand Up @@ -358,13 +356,12 @@ def test_default_model_candidate(self, language, hp_overrides):
self.assertEqual(len(results_df), num_models)

# test hp override
model_result = auto_trainer._get_model_result()
if hp_overrides is not None:
for hp_key, hp_value in hp_overrides.items():
result_hp_key = f"config/candidates/{hp_key}"
self.assertEqual(results_df[result_hp_key][0], hp_value)
self.assertEqual(model_result.metrics["config"]["candidates"][hp_key], hp_value)

# test save
model_result = auto_trainer._get_model_result()
trainer_type = model_result.metrics["config"]["candidates"]["trainer_type"]
save_path = os.path.join(model_result.log_dir, auto_trainer.save_path)
self.assertTrue(os.path.exists(os.path.join(save_path, "model_state.pdparams")))
Expand Down