From e07fa8383c7fad550351df14eadc81bcd2d867f8 Mon Sep 17 00:00:00 2001 From: patil-suraj Date: Mon, 5 Sep 2022 12:35:54 +0530 Subject: [PATCH] use add_tokens --- examples/textual_inversion/textual_inversion.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/examples/textual_inversion/textual_inversion.py b/examples/textual_inversion/textual_inversion.py index 69309ea5e1e3..fe19e0ea87f0 100644 --- a/examples/textual_inversion/textual_inversion.py +++ b/examples/textual_inversion/textual_inversion.py @@ -357,15 +357,18 @@ def main(): # Load the tokenizer and add the placeholder token as a additional special token if args.tokenizer_name: - tokenizer = CLIPTokenizer.from_pretrained( - args.tokenizer_name, additional_special_tokens=[args.placeholder_token] - ) + tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) elif args.pretrained_model_name_or_path: tokenizer = CLIPTokenizer.from_pretrained( - args.pretrained_model_name_or_path, - additional_special_tokens=[args.placeholder_token], - subfolder="tokenizer", - use_auth_token=args.use_auth_token, + args.pretrained_model_name_or_path, subfolder="tokenizer", use_auth_token=args.use_auth_token + ) + + # Add the placeholder token in tokenizer + num_added_tokens = tokenizer.add_tokens(args.placeholder_token) + if num_added_tokens == 0: + raise ValueError( + f"The tokenizer already contains the token {args.placeholder_token}. Please pass a different" + " `placeholder_token` that is not already in the tokenizer." ) # Convert the initializer_token, placeholder_token to ids