update
Browse files- tokenization_qwen.py +4 -0
tokenization_qwen.py
CHANGED
@@ -366,9 +366,13 @@ class QWenTokenizer(PreTrainedTokenizer):
|
|
366 |
|
367 |
def from_list_format(self, list_format: List[Dict]):
|
368 |
text = ''
|
|
|
369 |
for ele in list_format:
|
370 |
if 'image' in ele:
|
|
|
|
|
371 |
text += self.image_start_tag + ele['image'] + self.image_end_tag
|
|
|
372 |
elif 'text' in ele:
|
373 |
text += ele['text']
|
374 |
elif 'box' in ele:
|
|
|
366 |
|
367 |
def from_list_format(self, list_format: List[Dict]):
|
368 |
text = ''
|
369 |
+
num_images = 0
|
370 |
for ele in list_format:
|
371 |
if 'image' in ele:
|
372 |
+
num_images += 1
|
373 |
+
text += f'Picture {num_images}:'
|
374 |
text += self.image_start_tag + ele['image'] + self.image_end_tag
|
375 |
+
text += '\n'
|
376 |
elif 'text' in ele:
|
377 |
text += ele['text']
|
378 |
elif 'box' in ele:
|