@@ -456,7 +456,7 @@ def layer_prefix(key):
456
456
457
457
def shard_checkpoint (
458
458
state_dict : Dict [str , paddle .Tensor ],
459
- max_shard_size : Union [int , str ] = "10GB " ,
459
+ max_shard_size : Union [int , str ] = "1024GB " ,
460
460
weights_name : str = PADDLE_WEIGHTS_NAME ,
461
461
shard_format = "naive" ,
462
462
):
@@ -466,8 +466,8 @@ def shard_checkpoint(
466
466
467
467
The sub-checkpoints are determined by iterating through the `state_dict` in the order of its keys, so there is no
468
468
optimization made to make each sub-checkpoint as close as possible to the maximum size passed. For example, if the
469
- limit is 10GB and we have weights of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB ] they will get sharded as [6GB ], [6+2GB ],
470
- [6+2+2GB ] and not [6+2+2GB ], [6+2GB ], [6GB ].
469
+ limit is 1024GB and we have weights of sizes [600GB, 600GB, 200GB, 600GB, 200GB, 200GB ] they will get sharded as [600GB ], [600+200GB ],
470
+ [600+200+200GB ] and not [600+200+200GB ], [600+200GB ], [600GB ].
471
471
472
472
<Tip warning={true}>
473
473
@@ -478,7 +478,7 @@ def shard_checkpoint(
478
478
479
479
Args:
480
480
state_dict (`Dict[str, paddle.Tensor]`): The state dictionary of a model to save.
481
- max_shard_size (`int` or `str`, *optional*, defaults to `"10GB "`):
481
+ max_shard_size (`int` or `str`, *optional*, defaults to `"1024GB "`):
482
482
The maximum size of each sub-checkpoint. If expressed as a string, needs to be digits followed by a unit
483
483
(like `"5MB"`).
484
484
weights_name (`str`, *optional*, defaults to `"model_state.pdparams"`):
@@ -2122,7 +2122,7 @@ def save_pretrained(
2122
2122
is_main_process : bool = True ,
2123
2123
state_dict : Optional [dict ] = None ,
2124
2124
save_function : Callable = paddle .save ,
2125
- max_shard_size : Union [int , str ] = "10GB " ,
2125
+ max_shard_size : Union [int , str ] = "1024GB " ,
2126
2126
safe_serialization : bool = False ,
2127
2127
variant : Optional [str ] = None ,
2128
2128
* args ,
0 commit comments