Skip to content

[Feature] Optimize config saving. #8490

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 3 additions & 42 deletions llm/argument.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from dataclasses import dataclass, field
from typing import List, Optional
from typing import Optional

from paddlenlp.trainer import TrainingArguments
from paddlenlp.trainer.trainer_utils import IntervalStrategy
Expand Down Expand Up @@ -105,30 +105,17 @@ class ModelArgument:
model_name_or_path: str = field(
default=None, metadata={"help": "Build-in pretrained model name or the path to local model."}
)
use_flash_attention: bool = field(default=False, metadata={"help": "Whether to use flash attention"})
tokenizer_name_or_path: Optional[str] = field(
default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
)
use_fused_rms_norm: bool = field(
default=False,
metadata={"help": "llama or other model, use_fused_rms_norm"},
)
fuse_attention_qkv: bool = field(
default=False,
default=None,
metadata={"help": "whether to fuse attention qkv"},
)
fuse_attention_ffn: bool = field(
default=False,
default=None,
metadata={"help": "whether to fuse first up and gate proj in mlp block"},
)
recompute_granularity: str = field(
default="full",
metadata={"help": "Choose among ['full', 'core_attn', 'full_attn']"},
)
virtual_pp_degree: int = field(
default=1,
metadata={"help": "virtual_pp_degree"},
)
hidden_dropout_prob: float = field(default=0.1, metadata={"help": "The hidden dropout prob."})
attention_probs_dropout_prob: float = field(default=0.1, metadata={"help": "The attention hidden dropout prob."})

Expand All @@ -138,32 +125,6 @@ class ModelArgument:
"help": "Pre-training from existing paddlenlp model weights. Default False and model will train from scratch. If set True, the model_name_or_path argument must exist in the paddlenlp models."
},
)
sequence_parallel: bool = field(
default=False,
metadata={"help": "whether to use sequence parallel"},
)
fuse_sequence_parallel_allreduce: bool = field(
default=False,
metadata={"help": "whether to use fuse sequence parallel allreduce"},
)
use_fused_rope: Optional[bool] = field(
default=False,
metadata={"help": "Enable rope fusion or not."},
)
no_recompute_layers: Optional[List[int]] = field(
default=None,
metadata={"help": "Specify the full transformer layers that should not be recomputed."},
)
pp_recompute_interval: int = field(
default=1,
metadata={
"help": "The interval for the number of layers at which recomputation occurs. A value of 0 indicates no recomputation. Default is 0."
},
)
recompute_use_reentrant: bool = field(
default=False,
metadata={"help": "recompute_use_reentrant"},
)
weight_quantize_algo: str = field(
default=None,
metadata={
Expand Down
73 changes: 28 additions & 45 deletions llm/finetune_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@
from paddlenlp.transformers import (
AutoConfig,
AutoModelForCausalLM,
AutoModelForCausalLMPipe,
AutoTokenizer,
Llama3Tokenizer,
LlamaTokenizer,
)
from paddlenlp.transformers.configuration_utils import LlmMetaConfig, llmmetaclass
from paddlenlp.utils.log import logger

# Fine-tune Environment Variables to support sharding stage1 overlap optimization.
Expand All @@ -68,6 +70,7 @@ def docstring_decorator(fn):


@dataclass
@llmmetaclass
@add_start_docstrings(TrainingArguments.__doc__)
class FinetuneArguments(TrainingArguments):
decay_steps: int = field(
Expand Down Expand Up @@ -146,65 +149,45 @@ def main():

model_config = AutoConfig.from_pretrained(
model_args.model_name_or_path,
tensor_parallel_output=training_args.tensor_parallel_output,
tensor_parallel_degree=training_args.tensor_parallel_degree,
tensor_parallel_rank=training_args.tensor_parallel_rank,
dtype=dtype,
from_aistudio=model_args.from_aistudio,
quantization_config=quantization_config,
)
if hasattr(model_config, "use_flash_attention"):
model_config.use_flash_attention = model_args.use_flash_attention

model_config.use_fused_rms_norm = model_args.use_fused_rms_norm
model_config.fuse_attention_qkv = model_args.fuse_attention_qkv
model_config.fuse_attention_ffn = model_args.fuse_attention_ffn
model_config.recompute_granularity = model_args.recompute_granularity
model_config.virtual_pp_degree = model_args.virtual_pp_degree
model_config.sequence_parallel = model_args.sequence_parallel
model_config.fuse_sequence_parallel_allreduce = model_args.fuse_sequence_parallel_allreduce
model_config.use_fused_rope = model_args.use_fused_rope

model_config.no_recompute_layers = model_args.no_recompute_layers
model_config.pp_recompute_interval = model_args.pp_recompute_interval
model_config.recompute_use_reentrant = model_args.recompute_use_reentrant
model_config.use_recompute = training_args.recompute

model_config.tensor_parallel_degree = training_args.tensor_parallel_degree
model_config.tensor_parallel_rank = training_args.tensor_parallel_rank

LlmMetaConfig.set_llm_config(model_config, training_args)

# Config for model using dropout, such as GPT.
model_config.hidden_dropout_prob = model_args.hidden_dropout_prob
model_config.attention_probs_dropout_prob = model_args.attention_probs_dropout_prob
if hasattr(model_config, "hidden_dropout_prob"):
model_config.hidden_dropout_prob = model_args.hidden_dropout_prob
if hasattr(model_config, "attention_probs_dropout_prob"):
model_config.attention_probs_dropout_prob = model_args.attention_probs_dropout_prob

if model_args.fuse_attention_qkv is not None:
model_config.fuse_attention_qkv = model_args.fuse_attention_qkv
if model_args.fuse_attention_ffn is not None:
model_config.fuse_attention_ffn = model_args.fuse_attention_ffn

model_config.sep_parallel_degree = training_args.sep_parallel_degree
model_config.tensor_parallel_output = training_args.tensor_parallel_output
model_config.seq_length = data_args.max_length

print("Final model config:", model_config)

model_class = AutoModelForCausalLM
if training_args.pipeline_parallel_degree > 1:
if data_args.eval_with_do_generation and training_args.do_eval:
raise ValueError("Plese set eval_with_do_generation to false in pipeline parallel mode.")
from paddlenlp.transformers import AutoModelForCausalLMPipe

if not training_args.autotuner_benchmark:
model = AutoModelForCausalLMPipe.from_pretrained(
model_args.model_name_or_path,
config=model_config,
from_aistudio=model_args.from_aistudio,
)
else:
# NOTE(gongenlei): new add autotuner_benchmark
model = AutoModelForCausalLMPipe.from_config(model_config, dtype=dtype)
model_class = AutoModelForCausalLMPipe

if not training_args.autotuner_benchmark:
model = model_class.from_pretrained(
model_args.model_name_or_path,
config=model_config,
from_aistudio=model_args.from_aistudio,
)
else:
if not training_args.autotuner_benchmark:
model = AutoModelForCausalLM.from_pretrained(
model_args.model_name_or_path,
config=model_config,
from_aistudio=model_args.from_aistudio,
)
else:
# NOTE(gongenlei): new add autotuner_benchmark
model = AutoModelForCausalLM.from_config(model_config, dtype=dtype)
# NOTE(gongenlei): new add autotuner_benchmark
model = model_class.from_config(model_config, dtype=dtype)

if training_args.do_train and model_args.neftune:
# Inspired by https://github.com/neelsjain/NEFTune
if hasattr(model, "get_input_embeddings"):
Expand Down
1 change: 0 additions & 1 deletion llm/llama/tests/unified-ckpt-llama-500m/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
"num_attention_heads": 8,
"num_hidden_layers": 8,
"pad_token_id": 0,
"paddlenlp_version": null,
"rms_norm_eps": 1e-06,
"vocab_size": 32000
}
Loading
Loading