Skip to content

Commit c2c6748

Browse files
committed
add llama3.1 config
1 parent d455181 commit c2c6748

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"model_name_or_path": "meta-llama/Meta-Llama-3.1-8B",
3+
"tokenizer_name_or_path": "meta-llama/Meta-Llama-3.1-8B",
4+
"input_dir": "./data",
5+
"output_dir": "./checkpoints/llama3.1_pretrain_ckpts",
6+
"per_device_train_batch_size": 1,
7+
"gradient_accumulation_steps": 1,
8+
"per_device_eval_batch_size": 2,
9+
"tensor_parallel_degree": 1,
10+
"pipeline_parallel_degree": 1,
11+
"sharding": "stage1",
12+
"data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate",
13+
"sharding_parallel_config": "enable_stage1_overlap",
14+
"tensor_parallel_config": "enable_mp_async_allreduce",
15+
"pipeline_parallel_config": "",
16+
"virtual_pp_degree": 1,
17+
"sequence_parallel": 0,
18+
"use_flash_attention": true,
19+
"use_fused_rms_norm": true,
20+
"fuse_attention_ffn": true,
21+
"fuse_attention_qkv": true,
22+
"use_fused_rope": true,
23+
"fused_linear_param_grad_add": true,
24+
"max_seq_length": 4096,
25+
"learning_rate": 3e-05,
26+
"min_learning_rate": 3e-06,
27+
"warmup_steps": 30,
28+
"logging_steps": 10,
29+
"max_steps": 500,
30+
"save_steps": 5000,
31+
"eval_steps": 1000,
32+
"weight_decay": 0.01,
33+
"bf16": true,
34+
"fp16_opt_level": "O2",
35+
"amp_master_grad": true,
36+
"amp_custom_black_list": ["reduce_sum", "c_softmax_with_cross_entropy"],
37+
"amp_custom_white_list": ["lookup_table", "lookup_table_v2"],
38+
"warmup_ratio": 0.01,
39+
"max_grad_norm": 1.0,
40+
"dataloader_num_workers": 1,
41+
"continue_training": 0,
42+
"do_train": true,
43+
"do_eval": false,
44+
"do_predict": false,
45+
"disable_tqdm": true,
46+
"skip_profile_timer": true,
47+
"recompute": false,
48+
"recompute_use_reentrant": true,
49+
"distributed_dataloader": 0,
50+
"recompute_granularity": "full",
51+
"save_total_limit": 2,
52+
"device": "gpu",
53+
"to_static": true,
54+
"enable_auto_parallel": true
55+
}
56+
57+

0 commit comments

Comments
 (0)