@@ -75,7 +75,7 @@ def make_sft_dataset(path, dataclass, skip_warmup=False, impl="mmap"):
75
75
76
76
print_rank_0 (" > building dataset index ..." )
77
77
start_time = time .time ()
78
- sft_indexed_dataset = SftMMapIndexedDataset (path , dataclass , skip_warmup )
78
+ sft_indexed_dataset = SFTMMapIndexedDataset (path , dataclass , skip_warmup )
79
79
print_rank_0 (" > finished creating SFT indexed dataset in {:4f} " "seconds" .format (time .time () - start_time ))
80
80
print_rank_0 (" number of samples: {}" .format (len (sft_indexed_dataset .doc_idx ) - 1 ))
81
81
@@ -574,7 +574,7 @@ def exists(path):
574
574
return os .path .exists (index_file_path (path )) and os .path .exists (data_file_path (path ))
575
575
576
576
577
- class SftMMapIndexedDataset (paddle .io .Dataset ):
577
+ class SFTMMapIndexedDataset (paddle .io .Dataset ):
578
578
class Index (object ):
579
579
_HDR_MAGIC = b"MMIDIDX\x00 \x00 "
580
580
@@ -798,7 +798,7 @@ def make_builder(out_file, impl, save_dtype, loss_mask_file=None):
798
798
return IndexedDatasetBuilder (out_file , dtype = save_dtype )
799
799
800
800
801
- class SftMMapIndexedDatasetBuilder (object ):
801
+ class SFTMMapIndexedDatasetBuilder (object ):
802
802
def __init__ (self , output_file_dict , dtype ):
803
803
self ._data_file_dict = {}
804
804
for key , filename in output_file_dict .items ():
@@ -823,7 +823,7 @@ def end_document(self):
823
823
def finalize (self , index_file ):
824
824
for key , filename in self ._data_file_dict .items ():
825
825
filename .close ()
826
- with SftMMapIndexedDataset .Index .writer (index_file , self ._dtype ) as index :
826
+ with SFTMMapIndexedDataset .Index .writer (index_file , self ._dtype ) as index :
827
827
index .write (self ._sizes , self ._doc_idx )
828
828
829
829
0 commit comments