Skip to content

Commit 0a618b0

Browse files
committed
update
1 parent d815fce commit 0a618b0

File tree

2 files changed

+15
-8
lines changed

2 files changed

+15
-8
lines changed

llm/config/qwen/emb_argument.json

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
{
22
"model_name_or_path": "Qwen/Qwen2-0.5B",
3-
"dataset_name_or_path": "./data",
3+
"dataset_name_or_path": "./dureader_data",
44
"output_dir": "./checkpoints/sft_ckpts",
55
"per_device_train_batch_size": 1,
6-
"gradient_accumulation_steps": 128,
6+
"gradient_accumulation_steps": 4,
77
"per_device_eval_batch_size": 1,
88
"eval_accumulation_steps": 1,
99
"max_steps": 2000,
@@ -12,10 +12,10 @@
1212
"logging_steps": 1,
1313
"evaluation_strategy": "no",
1414
"save_strategy": "epoch",
15-
"max_query_len": 1024,
16-
"max_passage_len": 2048,
15+
"max_query_len": 512,
16+
"max_passage_len": 512,
1717
"group_size": 4,
18-
"bp16": true,
18+
"bf16": true,
1919
"fp16_opt_level": "O2",
2020
"do_train": true,
2121
"do_eval": false,
@@ -27,8 +27,10 @@
2727
"save_total_limit": 1,
2828
"tensor_parallel_degree": 1,
2929
"pipeline_parallel_degree": 1,
30-
"sharding": "stage2",
30+
"sharding": "stage1",
3131
"zero_padding": false,
32-
"unified_checkpoint": false,
33-
"use_flash_attention": false
32+
"unified_checkpoint": true,
33+
"use_flash_attention": true,
34+
"amp_custom_black_list": "elementwise_div",
35+
"release_grads": true
3436
}

llm/utils/argument.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
from dataclasses import dataclass, field
15+
from typing import List, Optional
1516

1617

1718
@dataclass
@@ -83,3 +84,7 @@ class EmbeddingArgument:
8384
default=True,
8485
metadata={"help": "Whether to share the negatives across all GPUs."},
8586
)
87+
embedding_matryoshka_dims: Optional[List[int]] = field(
88+
default=None,
89+
metadata={"help": "The dims for matryoshka training."},
90+
)

0 commit comments

Comments
 (0)