Skip to content

Commit 85a7344

Browse files
zucchini-nlpArthurZucker
authored andcommitted
[paligemma] fix processor with suffix (#38365)
fix pg processor
1 parent fca9caa commit 85a7344

File tree

2 files changed

+16
-1
lines changed

2 files changed

+16
-1
lines changed

src/transformers/models/paligemma/processing_paligemma.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,8 @@ def __call__(
310310
return_data = {**inputs, "pixel_values": pixel_values}
311311

312312
if return_token_type_ids:
313-
labels = inputs["input_ids"].masked_fill(inputs["token_type_ids"] == 0, -100)
313+
labels = np.array(inputs["input_ids"])
314+
labels[np.array(inputs["token_type_ids"]) == 0] = -100
314315
return_data.update({"labels": labels})
315316
return BatchFeature(data=return_data, tensor_type=return_tensors)
316317

tests/models/paligemma/test_processor_paligemma.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,20 @@ def test_image_seq_length(self):
6262
)
6363
self.assertEqual(len(inputs["input_ids"][0]), 112)
6464

65+
@require_torch
66+
def test_call_with_suffix(self):
67+
input_str = "lower newer"
68+
suffix = "upper older longer string"
69+
image_input = self.prepare_image_inputs()
70+
processor = self.get_processor()
71+
inputs = processor(text=input_str, images=image_input, suffix=suffix)
72+
self.assertTrue("labels" in inputs)
73+
self.assertEqual(len(inputs["labels"][0]), len(inputs["input_ids"][0]))
74+
75+
inputs = processor(text=input_str, images=image_input, suffix=suffix, return_tensors="pt")
76+
self.assertTrue("labels" in inputs)
77+
self.assertEqual(len(inputs["labels"][0]), len(inputs["input_ids"][0]))
78+
6579
def test_text_with_image_tokens(self):
6680
image_processor = self.get_component("image_processor")
6781
tokenizer = self.get_component("tokenizer")

0 commit comments

Comments
 (0)