Leyo commited on
Commit
bf512c7
1 Parent(s): 4485a57

fix system prompt

Browse files
Files changed (1) hide show
  1. app_dialogue.py +28 -13
app_dialogue.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import subprocess
3
 
4
- # Install flash attention
5
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
6
 
7
 
@@ -12,8 +12,10 @@ import torch
12
 
13
  from threading import Thread
14
  from typing import List, Dict, Union
 
15
  from urllib.parse import urlparse
16
  from PIL import Image
 
17
 
18
  import gradio as gr
19
  from transformers import AutoProcessor, TextIteratorStreamer
@@ -22,13 +24,13 @@ from transformers import Idefics2ForConditionalGeneration
22
 
23
  DEVICE = torch.device("cuda")
24
  MODELS = {
25
- "idefics2-8b (sft)": Idefics2ForConditionalGeneration.from_pretrained(
26
- "HuggingFaceM4/idefics2-8b",
27
- torch_dtype=torch.bfloat16,
28
- _attn_implementation="flash_attention_2",
29
- trust_remote_code=True,
30
- token=os.environ["HF_AUTH_TOKEN"],
31
- ).to(DEVICE),
32
  "idefics2-8b-chatty (chat)": Idefics2ForConditionalGeneration.from_pretrained(
33
  "HuggingFaceM4/idefics2-8b-chatty",
34
  torch_dtype=torch.bfloat16,
@@ -47,11 +49,11 @@ PROCESSOR = AutoProcessor.from_pretrained(
47
 
48
  # The conversation begins:""",
49
  # """\nUser:""",
50
- # "https://i1.pickpik.com/photos/515/906/355/kittens-cats-pet-cute-preview.jpg",
51
  # "Describe this image.<end_of_utterance>",
52
  # """\nAssistant: Five kittens are standing together in the center of the photograph. From the left to right, there is one orange kitten, two kittens white and black stripes, and two brown kittens with an orange head. They are in bright green grass and it looks like they are moving forward.<end_of_utterance>""",
53
  # "\nUser:How about this image?",
54
- # "https://cdn.pixabay.com/photo/2017/09/25/13/12/puppy-2785074_1280.jpg",
55
  # "Can you describe it too?<end_of_utterance>",
56
  # """\nAssistant: A dog is lying on the floor, looking at the camera. It is looking directly at you. The dog has a white body and brown patches on its face and ears. Its eyes are dark. Its nose is black, and it has long, floppy ears, white paws, long fur, big eyes.<end_of_utterance>""",
57
  # "\nUser: What kind of breed is it?<end_of_utterance>",
@@ -63,7 +65,7 @@ PROCESSOR = AutoProcessor.from_pretrained(
63
  # "\nUser: Do you like Cavalier King Charles Spaniel?<end_of_utterance>",
64
  # """\nAssistant: I do not have personal opinions as I’m just a computer program. However, cavaliers are known for being excellent family pets due to their gentle and patient demeanor, making them great with children and other pets. Their affectionate nature also makes them ideal therapy dogs, as they can provide comfort and emotional support to those in need.<end_of_utterance>""",
65
  # "\nUser: How many dogs do you see in this image?",
66
- # "https://i.dailymail.co.uk/i/pix/2011/07/01/article-2010308-0CD22A8300000578-496_634x414.jpg",
67
  # "<end_of_utterance>",
68
  # """\nAssistant: There is no dogs in this image. The picture shows a tennis player jumping to volley the ball.<end_of_utterance>""",
69
  # ]
@@ -72,7 +74,7 @@ SYSTEM_PROMPT = [
72
  {
73
  "role": "user",
74
  "content": [
75
- {"type": "image", "image": "https://i1.pickpik.com/photos/515/906/355/kittens-cats-pet-cute-preview.jpg"},
76
  {"type": "text", "text": "Describe this image."},
77
  ],
78
  },
@@ -86,7 +88,7 @@ SYSTEM_PROMPT = [
86
  "role": "user",
87
  "content": [
88
  {"type": "text", "text": "How about this image?"},
89
- {"type": "image", "image": "https://cdn.pixabay.com/photo/2017/09/25/13/12/puppy-2785074_1280.jpg"},
90
  {"type": "text", "text": "Can you describe it too?"},
91
  ],
92
  },
@@ -108,6 +110,12 @@ BOT_AVATAR = "IDEFICS_logo.png"
108
  def turn_is_pure_media(turn):
109
  return turn[1] is None
110
 
 
 
 
 
 
 
111
 
112
  def format_user_prompt_with_im_history_and_system_conditioning(
113
  user_prompt, chat_history
@@ -118,6 +126,12 @@ def format_user_prompt_with_im_history_and_system_conditioning(
118
  """
119
  resulting_messages = copy.deepcopy(SYSTEM_PROMPT)
120
  resulting_images = []
 
 
 
 
 
 
121
 
122
  # Format history
123
  for turn in chat_history:
@@ -232,6 +246,7 @@ def model_inference(
232
  user_prompt=user_prompt,
233
  chat_history=chat_history,
234
  )
 
235
  prompt = PROCESSOR.apply_chat_template(resulting_text, add_generation_prompt=True)
236
  inputs = PROCESSOR(text=prompt, images=resulting_images if resulting_images else None, return_tensors="pt")
237
  inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
 
1
  import os
2
  import subprocess
3
 
4
+ Install flash attention
5
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
6
 
7
 
 
12
 
13
  from threading import Thread
14
  from typing import List, Dict, Union
15
+ import urllib
16
  from urllib.parse import urlparse
17
  from PIL import Image
18
+ import io
19
 
20
  import gradio as gr
21
  from transformers import AutoProcessor, TextIteratorStreamer
 
24
 
25
  DEVICE = torch.device("cuda")
26
  MODELS = {
27
+ # "idefics2-8b (sft)": Idefics2ForConditionalGeneration.from_pretrained(
28
+ # "HuggingFaceM4/idefics2-8b",
29
+ # torch_dtype=torch.bfloat16,
30
+ # _attn_implementation="flash_attention_2",
31
+ # trust_remote_code=True,
32
+ # token=os.environ["HF_AUTH_TOKEN"],
33
+ # ).to(DEVICE),
34
  "idefics2-8b-chatty (chat)": Idefics2ForConditionalGeneration.from_pretrained(
35
  "HuggingFaceM4/idefics2-8b-chatty",
36
  torch_dtype=torch.bfloat16,
 
49
 
50
  # The conversation begins:""",
51
  # """\nUser:""",
52
+ # "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/example_images/kittens-cats-pet-cute-preview.jpg?download=true",
53
  # "Describe this image.<end_of_utterance>",
54
  # """\nAssistant: Five kittens are standing together in the center of the photograph. From the left to right, there is one orange kitten, two kittens white and black stripes, and two brown kittens with an orange head. They are in bright green grass and it looks like they are moving forward.<end_of_utterance>""",
55
  # "\nUser:How about this image?",
56
+ # "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/example_images/puppy.jpg?download=true",
57
  # "Can you describe it too?<end_of_utterance>",
58
  # """\nAssistant: A dog is lying on the floor, looking at the camera. It is looking directly at you. The dog has a white body and brown patches on its face and ears. Its eyes are dark. Its nose is black, and it has long, floppy ears, white paws, long fur, big eyes.<end_of_utterance>""",
59
  # "\nUser: What kind of breed is it?<end_of_utterance>",
 
65
  # "\nUser: Do you like Cavalier King Charles Spaniel?<end_of_utterance>",
66
  # """\nAssistant: I do not have personal opinions as I’m just a computer program. However, cavaliers are known for being excellent family pets due to their gentle and patient demeanor, making them great with children and other pets. Their affectionate nature also makes them ideal therapy dogs, as they can provide comfort and emotional support to those in need.<end_of_utterance>""",
67
  # "\nUser: How many dogs do you see in this image?",
68
+ # "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/example_images/tennis_tsonga.jpg?download=true",
69
  # "<end_of_utterance>",
70
  # """\nAssistant: There is no dogs in this image. The picture shows a tennis player jumping to volley the ball.<end_of_utterance>""",
71
  # ]
 
74
  {
75
  "role": "user",
76
  "content": [
77
+ {"type": "image", "image": "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/example_images/kittens-cats-pet-cute-preview.jpg?download=true"},
78
  {"type": "text", "text": "Describe this image."},
79
  ],
80
  },
 
88
  "role": "user",
89
  "content": [
90
  {"type": "text", "text": "How about this image?"},
91
+ {"type": "image", "image": "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/example_images/puppy.jpg?download=true"},
92
  {"type": "text", "text": "Can you describe it too?"},
93
  ],
94
  },
 
110
  def turn_is_pure_media(turn):
111
  return turn[1] is None
112
 
113
+ def load_image_from_url(url):
114
+ with urllib.request.urlopen(url) as response:
115
+ image_data = response.read()
116
+ image_stream = io.BytesIO(image_data)
117
+ image = Image.open(image_stream)
118
+ return image
119
 
120
  def format_user_prompt_with_im_history_and_system_conditioning(
121
  user_prompt, chat_history
 
126
  """
127
  resulting_messages = copy.deepcopy(SYSTEM_PROMPT)
128
  resulting_images = []
129
+ if len(resulting_messages) > 0:
130
+ for resulting_message in resulting_messages:
131
+ if resulting_message["role"] == "user":
132
+ for content in resulting_message["content"]:
133
+ if content["type"] == "image":
134
+ resulting_images.append(load_image_from_url(content["image"]))
135
 
136
  # Format history
137
  for turn in chat_history:
 
246
  user_prompt=user_prompt,
247
  chat_history=chat_history,
248
  )
249
+
250
  prompt = PROCESSOR.apply_chat_template(resulting_text, add_generation_prompt=True)
251
  inputs = PROCESSOR(text=prompt, images=resulting_images if resulting_images else None, return_tensors="pt")
252
  inputs = {k: v.to(DEVICE) for k, v in inputs.items()}