VictorSanh commited on
Commit
5011842
1 Parent(s): ec50e73

looots of fixes

Browse files
Files changed (1) hide show
  1. app_dialogue.py +103 -121
app_dialogue.py CHANGED
@@ -1,6 +1,5 @@
1
  import copy
2
  import hashlib
3
- import logging
4
  import os
5
  import re
6
  import torch
@@ -15,7 +14,7 @@ from PIL import Image
15
  import gradio as gr
16
  from gradio import processing_utils
17
  from gradio_client.client import DEFAULT_TEMP_DIR
18
- from transformers import AutoProcessor, AutoModelForCausalLM, TextIteratorStreamer
19
 
20
  from utils import create_model_inputs
21
 
@@ -27,18 +26,16 @@ MODELS = {
27
  trust_remote_code=True,
28
  torch_dtype=torch.bfloat16,
29
  token=os.environ["HF_AUTH_TOKEN"],
30
- # revision="365283baaef60c2b1131fffdee13a0da909728c8",
31
  ).to(DEVICE),
32
  "HuggingFaceM4/idefics2": AutoModelForCausalLM.from_pretrained(
33
  "HuggingFaceM4/idefics2",
34
  trust_remote_code=True,
35
  torch_dtype=torch.bfloat16,
36
  token=os.environ["HF_AUTH_TOKEN"],
37
- # revision="365283baaef60c2b1131fffdee13a0da909728c8",
38
  ).to(DEVICE),
39
  }
40
-
41
-
42
  PROCESSOR = AutoProcessor.from_pretrained(
43
  "HuggingFaceM4/idefics2",
44
  token=os.environ["HF_AUTH_TOKEN"],
@@ -75,24 +72,10 @@ SYSTEM_PROMPT = [
75
  # """\nAssistant: There is no dogs in this image. The picture shows a tennis player jumping to volley the ball.<end_of_utterance>""",
76
  ]
77
 
78
- BAN_TOKENS = ( # For documentation puporse. We are not using this list, it is hardcoded inside `idefics_causal_lm.py` inside TGI.
79
- "<image>;<fake_token_around_image>"
80
- )
81
- STOP_SUSPECT_LIST = []
82
-
83
  API_TOKEN = os.getenv("HF_AUTH_TOKEN")
84
  # IDEFICS_LOGO = "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/IDEFICS_logo.png"
85
-
86
- PROCESSOR = AutoProcessor.from_pretrained(
87
- "HuggingFaceM4/idefics-9b-instruct",
88
- token=API_TOKEN,
89
- )
90
-
91
  BOT_AVATAR = "IDEFICS_logo.png"
92
 
93
- logging.basicConfig(level=logging.INFO)
94
- logger = logging.getLogger()
95
-
96
 
97
  # Monkey patch adapted from gradio.components.image.Image - mostly to make the `save` step optional in `pil_to_temp_file`
98
  def hash_bytes(bytes: bytes):
@@ -247,6 +230,25 @@ def prompt_list_to_markdown(prompt_list: List[str]) -> str:
247
  resulting_string += elem
248
  return resulting_string
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  def remove_spaces_around_token(text: str) -> str:
251
  pattern = r"\s*(<fake_token_around_image>)\s*"
252
  replacement = r"\1"
@@ -482,17 +484,7 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
482
  )
483
 
484
  # Creating model inputs
485
- images = []
486
- for idx, part in enumerate(formated_prompt_list):
487
- if is_image(part):
488
- if is_url(part):
489
- images.append(fetch_images([part])[0])
490
- else:
491
- images.append(Image.open(part))
492
- formated_prompt_list[idx] = f"{FAKE_TOK_AROUND_IMAGE}{'<image>' * IMAGE_SEQ_LEN}{FAKE_TOK_AROUND_IMAGE}"
493
- input_text = "".join(formated_prompt_list)
494
- input_text = input_text.replace(FAKE_TOK_AROUND_IMAGE * 2, FAKE_TOK_AROUND_IMAGE)
495
- input_text = BOS_TOKEN + input_text
496
  inputs = create_model_inputs([input_text], [images])
497
  inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
498
  generation_args.update(inputs)
@@ -558,17 +550,7 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
558
  )
559
 
560
  # Creating model inputs
561
- images = []
562
- for idx, part in enumerate(formated_prompt_list):
563
- if is_image(part):
564
- if is_url(part):
565
- images.append(fetch_images([part])[0])
566
- else:
567
- images.append(Image.open(part))
568
- formated_prompt_list[idx] = f"{FAKE_TOK_AROUND_IMAGE}{'<image>' * IMAGE_SEQ_LEN}{FAKE_TOK_AROUND_IMAGE}"
569
- input_text = "".join(formated_prompt_list)
570
- input_text = input_text.replace(FAKE_TOK_AROUND_IMAGE * 2, FAKE_TOK_AROUND_IMAGE)
571
- input_text = BOS_TOKEN + input_text
572
  inputs = create_model_inputs([input_text], [images])
573
  inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
574
  generation_args.update(inputs)
@@ -653,85 +635,85 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
653
  textbox.submit(lambda : gr.update(label='📁 Upload image', interactive=True), [], upload_btn)
654
  clear_btn.click(lambda : gr.update(label='📁 Upload image', interactive=True), [], upload_btn)
655
 
656
- examples_path = os.path.dirname(__file__)
657
- gr.Examples(
658
- examples=[
659
- [
660
- (
661
- "Which famous person does the person in the image look like? Could you craft an engaging narrative"
662
- " featuring this character from the image as the main protagonist?"
663
- ),
664
- f"{examples_path}/example_images/obama-harry-potter.jpg",
665
- ],
666
- [
667
- "Can you describe the image? Do you think it's real?",
668
- f"{examples_path}/example_images/rabbit_force.png",
669
- ],
670
- ["Explain this meme to me.", f"{examples_path}/example_images/meme_french.jpg"],
671
- ["Give me a short and easy recipe for this dish.", f"{examples_path}/example_images/recipe_burger.webp"],
672
- [
673
- "I want to go somewhere similar to the one in the photo. Give me destinations and travel tips.",
674
- f"{examples_path}/example_images/travel_tips.jpg",
675
- ],
676
- [
677
- "Can you name the characters in the image and give their French names?",
678
- f"{examples_path}/example_images/gaulois.png",
679
- ],
680
- ["Write a complete sales ad for this product.", f"{examples_path}/example_images/product_ad.jpg"],
681
- # [
682
- # (
683
- # "As an art critic AI assistant, could you describe this painting in details and make a thorough"
684
- # " critic?"
685
- # ),
686
- # f"{examples_path}/example_images/art_critic.png",
687
- # ],
688
- # [
689
- # "Can you tell me a very short story based on this image?",
690
- # f"{examples_path}/example_images/chicken_on_money.png",
691
- # ],
692
- # ["Write 3 funny meme texts about this image.", f"{examples_path}/example_images/elon_smoking.jpg"],
693
- # [
694
- # "Who is in this picture? Why do people find it surprising?",
695
- # f"{examples_path}/example_images/pope_doudoune.webp",
696
- # ],
697
- # ["What are the armed baguettes guarding?", f"{examples_path}/example_images/baguettes_guarding_paris.png"],
698
- # ["What is this animal and why is it unusual?", f"{examples_path}/example_images/blue_dog.png"],
699
- # [
700
- # "What is this object and do you think it is horrifying?",
701
- # f"{examples_path}/example_images/can_horror.png",
702
- # ],
703
- # [
704
- # (
705
- # "What is this sketch for? How would you make an argument to prove this sketch was made by Picasso"
706
- # " himself?"
707
- # ),
708
- # f"{examples_path}/example_images/cat_sketch.png",
709
- # ],
710
- # ["Which celebrity does this claymation figure look like?", f"{examples_path}/example_images/kanye.jpg"],
711
- # ["What can you tell me about the cap in this image?", f"{examples_path}/example_images/ironman_cap.png"],
712
- # [
713
- # "Can you write an advertisement for Coca-Cola based on this image?",
714
- # f"{examples_path}/example_images/polar_bear_coke.png",
715
- # ],
716
- # [
717
- # "What is happening in this image? Which famous personality does this person in center looks like?",
718
- # f"{examples_path}/example_images/gandhi_selfie.jpg",
719
- # ],
720
- # [
721
- # "What do you think the dog is doing and is it unusual?",
722
- # f"{examples_path}/example_images/surfing_dog.jpg",
723
- # ],
724
- ],
725
- inputs=[textbox, imagebox],
726
- outputs=[textbox, imagebox, chatbot],
727
- fn=process_example,
728
- cache_examples=True,
729
- examples_per_page=6,
730
- label=(
731
- "Click on any example below to get started.\nFor convenience, the model generations have been"
732
- " pre-computed with `idefics-80b-instruct`."
733
- ),
734
- )
735
 
736
  demo.queue(max_size=40)
737
  demo.launch()
 
1
  import copy
2
  import hashlib
 
3
  import os
4
  import re
5
  import torch
 
14
  import gradio as gr
15
  from gradio import processing_utils
16
  from gradio_client.client import DEFAULT_TEMP_DIR
17
+ from transformers import AutoProcessor, AutoModelForCausalLM, TextIteratorStreamer, logging
18
 
19
  from utils import create_model_inputs
20
 
 
26
  trust_remote_code=True,
27
  torch_dtype=torch.bfloat16,
28
  token=os.environ["HF_AUTH_TOKEN"],
29
+ revision="1e05755c1c5cb2077a0f60b83ea1368c22a17282",
30
  ).to(DEVICE),
31
  "HuggingFaceM4/idefics2": AutoModelForCausalLM.from_pretrained(
32
  "HuggingFaceM4/idefics2",
33
  trust_remote_code=True,
34
  torch_dtype=torch.bfloat16,
35
  token=os.environ["HF_AUTH_TOKEN"],
36
+ revision="5cd3c3a3eb5e0ea664f5ac09e73c9ef42da93a86",
37
  ).to(DEVICE),
38
  }
 
 
39
  PROCESSOR = AutoProcessor.from_pretrained(
40
  "HuggingFaceM4/idefics2",
41
  token=os.environ["HF_AUTH_TOKEN"],
 
72
  # """\nAssistant: There is no dogs in this image. The picture shows a tennis player jumping to volley the ball.<end_of_utterance>""",
73
  ]
74
 
 
 
 
 
 
75
  API_TOKEN = os.getenv("HF_AUTH_TOKEN")
76
  # IDEFICS_LOGO = "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/IDEFICS_logo.png"
 
 
 
 
 
 
77
  BOT_AVATAR = "IDEFICS_logo.png"
78
 
 
 
 
79
 
80
  # Monkey patch adapted from gradio.components.image.Image - mostly to make the `save` step optional in `pil_to_temp_file`
81
  def hash_bytes(bytes: bytes):
 
230
  resulting_string += elem
231
  return resulting_string
232
 
233
+
234
+ def prompt_list_to_model_input(prompt_list: List[str]) -> Tuple[str, List[Image.Image]]:
235
+ """
236
+ Create the final input string and image list to feed to the model's processor.
237
+ """
238
+ images = []
239
+ for idx, part in enumerate(prompt_list):
240
+ if is_image(part):
241
+ if is_url(part):
242
+ images.append(fetch_images([part])[0])
243
+ else:
244
+ images.append(Image.open(part))
245
+ prompt_list[idx] = f"{FAKE_TOK_AROUND_IMAGE}{'<image>' * IMAGE_SEQ_LEN}{FAKE_TOK_AROUND_IMAGE}"
246
+ input_text = "".join(prompt_list)
247
+ input_text = input_text.replace(FAKE_TOK_AROUND_IMAGE * 2, FAKE_TOK_AROUND_IMAGE)
248
+ input_text = BOS_TOKEN + input_text.strip()
249
+ return input_text, images
250
+
251
+
252
  def remove_spaces_around_token(text: str) -> str:
253
  pattern = r"\s*(<fake_token_around_image>)\s*"
254
  replacement = r"\1"
 
484
  )
485
 
486
  # Creating model inputs
487
+ input_text, images = prompt_list_to_model_input(formated_prompt_list)
 
 
 
 
 
 
 
 
 
 
488
  inputs = create_model_inputs([input_text], [images])
489
  inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
490
  generation_args.update(inputs)
 
550
  )
551
 
552
  # Creating model inputs
553
+ input_text, images = prompt_list_to_model_input(formated_prompt_list)
 
 
 
 
 
 
 
 
 
 
554
  inputs = create_model_inputs([input_text], [images])
555
  inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
556
  generation_args.update(inputs)
 
635
  textbox.submit(lambda : gr.update(label='📁 Upload image', interactive=True), [], upload_btn)
636
  clear_btn.click(lambda : gr.update(label='📁 Upload image', interactive=True), [], upload_btn)
637
 
638
+ # examples_path = os.path.dirname(__file__)
639
+ # gr.Examples(
640
+ # examples=[
641
+ # [
642
+ # (
643
+ # "Which famous person does the person in the image look like? Could you craft an engaging narrative"
644
+ # " featuring this character from the image as the main protagonist?"
645
+ # ),
646
+ # f"{examples_path}/example_images/obama-harry-potter.jpg",
647
+ # ],
648
+ # [
649
+ # "Can you describe the image? Do you think it's real?",
650
+ # f"{examples_path}/example_images/rabbit_force.png",
651
+ # ],
652
+ # ["Explain this meme to me.", f"{examples_path}/example_images/meme_french.jpg"],
653
+ # ["Give me a short and easy recipe for this dish.", f"{examples_path}/example_images/recipe_burger.webp"],
654
+ # [
655
+ # "I want to go somewhere similar to the one in the photo. Give me destinations and travel tips.",
656
+ # f"{examples_path}/example_images/travel_tips.jpg",
657
+ # ],
658
+ # [
659
+ # "Can you name the characters in the image and give their French names?",
660
+ # f"{examples_path}/example_images/gaulois.png",
661
+ # ],
662
+ # ["Write a complete sales ad for this product.", f"{examples_path}/example_images/product_ad.jpg"],
663
+ # [
664
+ # (
665
+ # "As an art critic AI assistant, could you describe this painting in details and make a thorough"
666
+ # " critic?"
667
+ # ),
668
+ # f"{examples_path}/example_images/art_critic.png",
669
+ # ],
670
+ # [
671
+ # "Can you tell me a very short story based on this image?",
672
+ # f"{examples_path}/example_images/chicken_on_money.png",
673
+ # ],
674
+ # ["Write 3 funny meme texts about this image.", f"{examples_path}/example_images/elon_smoking.jpg"],
675
+ # [
676
+ # "Who is in this picture? Why do people find it surprising?",
677
+ # f"{examples_path}/example_images/pope_doudoune.webp",
678
+ # ],
679
+ # ["What are the armed baguettes guarding?", f"{examples_path}/example_images/baguettes_guarding_paris.png"],
680
+ # ["What is this animal and why is it unusual?", f"{examples_path}/example_images/blue_dog.png"],
681
+ # [
682
+ # "What is this object and do you think it is horrifying?",
683
+ # f"{examples_path}/example_images/can_horror.png",
684
+ # ],
685
+ # [
686
+ # (
687
+ # "What is this sketch for? How would you make an argument to prove this sketch was made by Picasso"
688
+ # " himself?"
689
+ # ),
690
+ # f"{examples_path}/example_images/cat_sketch.png",
691
+ # ],
692
+ # ["Which celebrity does this claymation figure look like?", f"{examples_path}/example_images/kanye.jpg"],
693
+ # ["What can you tell me about the cap in this image?", f"{examples_path}/example_images/ironman_cap.png"],
694
+ # [
695
+ # "Can you write an advertisement for Coca-Cola based on this image?",
696
+ # f"{examples_path}/example_images/polar_bear_coke.png",
697
+ # ],
698
+ # [
699
+ # "What is happening in this image? Which famous personality does this person in center looks like?",
700
+ # f"{examples_path}/example_images/gandhi_selfie.jpg",
701
+ # ],
702
+ # [
703
+ # "What do you think the dog is doing and is it unusual?",
704
+ # f"{examples_path}/example_images/surfing_dog.jpg",
705
+ # ],
706
+ # ],
707
+ # inputs=[textbox, imagebox],
708
+ # outputs=[textbox, imagebox, chatbot],
709
+ # fn=process_example,
710
+ # cache_examples=False,
711
+ # examples_per_page=6,
712
+ # label=(
713
+ # "Click on any example below to get started.\nFor convenience, the model generations have been"
714
+ # " pre-computed with `idefics-80b-instruct`."
715
+ # ),
716
+ # )
717
 
718
  demo.queue(max_size=40)
719
  demo.launch()