Leyo commited on
Commit
1430cb0
1 Parent(s): a2496cb

add examples

Browse files
app_dialogue.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import subprocess
3
 
4
  # Install flash attention
5
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
6
 
7
 
8
  import copy
@@ -16,6 +16,10 @@ import urllib
16
  from urllib.parse import urlparse
17
  from PIL import Image
18
  import io
 
 
 
 
19
 
20
  import gradio as gr
21
  from transformers import AutoProcessor, TextIteratorStreamer
@@ -98,6 +102,31 @@ SYSTEM_PROMPT = [
98
  {"type": "text", "text": "A dog is lying on the floor, looking at the camera. It is looking directly at you. The dog has a white body and brown patches on its face and ears. Its eyes are dark. Its nose is black, and it has long, floppy ears, white paws, long fur, big eyes."},
99
  ],
100
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  ]
102
 
103
 
@@ -126,12 +155,11 @@ def format_user_prompt_with_im_history_and_system_conditioning(
126
  """
127
  resulting_messages = copy.deepcopy(SYSTEM_PROMPT)
128
  resulting_images = []
129
- if len(resulting_messages) > 0:
130
- for resulting_message in resulting_messages:
131
- if resulting_message["role"] == "user":
132
- for content in resulting_message["content"]:
133
- if content["type"] == "image":
134
- resulting_images.append(load_image_from_url(content["image"]))
135
 
136
  # Format history
137
  for turn in chat_history:
@@ -277,6 +305,59 @@ def model_inference(
277
  print("-----")
278
 
279
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  # Hyper-parameters for generation
281
  max_new_tokens = gr.Slider(
282
  minimum=8,
@@ -331,6 +412,22 @@ chatbot = gr.Chatbot(
331
  height=450,
332
  )
333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
  with gr.Blocks(fill_height=True, css=""".gradio-container .avatar-container {height: 40px width: 40px !important;}""") as demo:
336
  # model selector should be set to `visbile=False` ultimately
@@ -377,5 +474,69 @@ with gr.Blocks(fill_height=True, css=""".gradio-container .avatar-container {hei
377
  multimodal=True,
378
  additional_inputs=[model_selector, decoding_strategy, temperature, max_new_tokens, repetition_penalty, top_p],
379
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
  demo.launch()
 
2
  import subprocess
3
 
4
  # Install flash attention
5
+ # subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
6
 
7
 
8
  import copy
 
16
  from urllib.parse import urlparse
17
  from PIL import Image
18
  import io
19
+ import pandas as pd
20
+ import datasets
21
+ import json
22
+ import requests
23
 
24
  import gradio as gr
25
  from transformers import AutoProcessor, TextIteratorStreamer
 
102
  {"type": "text", "text": "A dog is lying on the floor, looking at the camera. It is looking directly at you. The dog has a white body and brown patches on its face and ears. Its eyes are dark. Its nose is black, and it has long, floppy ears, white paws, long fur, big eyes."},
103
  ],
104
  },
105
+ {
106
+ "role": "user",
107
+ "content": [
108
+ {"type": "text", "text": "What can you tell me about this breed of dogs?"},
109
+ ],
110
+ },
111
+ {
112
+ "role": "assistant",
113
+ "content": [
114
+ {"type": "text", "text": "One specific characteristic of the Cavalier King Charles Spaniel is its friendly and affectionate nature. This breed is known for being extremely sociable and forming strong bonds with their owners. They are often described as \"velcro dogs\" because they love to be close to their human companions, whether it's sitting on their laps, cuddling on the couch, or simply following them around the house."},
115
+ ],
116
+ },
117
+ {
118
+ "role": "user",
119
+ "content": [
120
+ {"type": "text", "text": "How many dogs do you see in the following image?"},
121
+ {"type": "image", "image": "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/example_images/tennis_tsonga.jpg?download=true"},
122
+ ],
123
+ },
124
+ {
125
+ "role": "assistant",
126
+ "content": [
127
+ {"type": "text", "text": "There are no dogs in this image. The picture shows a tennis player in the midst of a powerful swing."},
128
+ ],
129
+ },
130
  ]
131
 
132
 
 
155
  """
156
  resulting_messages = copy.deepcopy(SYSTEM_PROMPT)
157
  resulting_images = []
158
+ for resulting_message in resulting_messages:
159
+ if resulting_message["role"] == "user":
160
+ for content in resulting_message["content"]:
161
+ if content["type"] == "image":
162
+ resulting_images.append(load_image_from_url(content["image"]))
 
163
 
164
  # Format history
165
  for turn in chat_history:
 
305
  print("-----")
306
 
307
 
308
+ def csv_to_hf_dataset(csv_file):
309
+ df = pd.read_csv(csv_file)
310
+
311
+ FEATURES = datasets.Features(
312
+ {
313
+ "images": datasets.Sequence(datasets.Image(decode=True)),
314
+ "conversation": [
315
+ {
316
+ "user": datasets.Value("string"),
317
+ "assistant": datasets.Value("string"),
318
+ }
319
+ ],
320
+ }
321
+ )
322
+
323
+ def parse_and_download(data_row):
324
+ # Parse the JSON-like structure in the second column
325
+ discussion_data = json.loads(data_row[1].replace('""', '"'))
326
+
327
+ images = []
328
+ conversation = []
329
+ for entry in discussion_data:
330
+ if isinstance(entry[0], dict) and 'file' in entry[0]:
331
+ # Get images
332
+ image = load_image_from_url(entry[0]['file'])
333
+ images.append(image)
334
+ elif isinstance(entry, list):
335
+ # Get conversations
336
+ conversation.append({"user": entry[0], "assistant": entry[1]})
337
+
338
+ return images, conversation
339
+
340
+
341
+ # Apply parsing and downloading function
342
+ df['processed_data'] = df.apply(parse_and_download, axis=1)
343
+
344
+ # Create a Hugging Face dataset
345
+ data_dict = {
346
+ "images": df['processed_data'].apply(lambda x: x[0]),
347
+ "conversation": df['processed_data'].apply(lambda x: x[1])
348
+ }
349
+
350
+ dataset = datasets.Dataset.from_dict(data_dict, features=FEATURES)
351
+ return dataset
352
+
353
+
354
+ def update_dope_problematic_dataset_fn():
355
+ dope_dataset = csv_to_hf_dataset("gradio_dope_data_points/log.csv")
356
+ dope_dataset.push_to_hub("HuggingFaceM4/dope_chatty_dataset", private=True)
357
+ problematic_dataset = csv_to_hf_dataset("gradio_problematic_data_points/log.csv")
358
+ problematic_dataset.push_to_hub("HuggingFaceM4/problematic_chatty_dataset", private=True)
359
+
360
+
361
  # Hyper-parameters for generation
362
  max_new_tokens = gr.Slider(
363
  minimum=8,
 
412
  height=450,
413
  )
414
 
415
+ dope_callback = gr.CSVLogger()
416
+ problematic_callback = gr.CSVLogger()
417
+
418
+
419
+ # Using Flagging for saving dope and problematic examples
420
+ # Dope examples flagging
421
+
422
+
423
+ # gr.Markdown("""## How to use?
424
+
425
+ # There are two ways to provide image inputs:
426
+ # - Using the image box on the left panel
427
+ # - Using the inline syntax: `text<fake_token_around_image><image:URL_IMAGE><fake_token_around_image>text`
428
+
429
+ # The second syntax allows inputting an arbitrary number of images.""")
430
+
431
 
432
  with gr.Blocks(fill_height=True, css=""".gradio-container .avatar-container {height: 40px width: 40px !important;}""") as demo:
433
  # model selector should be set to `visbile=False` ultimately
 
474
  multimodal=True,
475
  additional_inputs=[model_selector, decoding_strategy, temperature, max_new_tokens, repetition_penalty, top_p],
476
  )
477
+ with gr.Group():
478
+ with gr.Row():
479
+ with gr.Column(scale=1, min_width=50):
480
+ dope_bttn = gr.Button("Dope🔥")
481
+ with gr.Column(scale=1, min_width=50):
482
+ problematic_bttn = gr.Button("Problematic😬")
483
+ with gr.Row():
484
+ update_dope_problematic_dataset = gr.Button("Update Dope/Problematic dataset📚")
485
+ dope_callback.setup(
486
+ [
487
+ model_selector,
488
+ chatbot,
489
+ decoding_strategy,
490
+ temperature,
491
+ max_new_tokens,
492
+ repetition_penalty,
493
+ top_p,
494
+ ],
495
+ "gradio_dope_data_points",
496
+ )
497
+ dope_bttn.click(
498
+ lambda *args: dope_callback.flag(args),
499
+ [
500
+ model_selector,
501
+ chatbot,
502
+ decoding_strategy,
503
+ temperature,
504
+ max_new_tokens,
505
+ repetition_penalty,
506
+ top_p,
507
+ ],
508
+ None,
509
+ preprocess=False,
510
+ )
511
+ # Problematic examples flagging
512
+ problematic_callback.setup(
513
+ [
514
+ model_selector,
515
+ chatbot,
516
+ decoding_strategy,
517
+ temperature,
518
+ max_new_tokens,
519
+ repetition_penalty,
520
+ top_p,
521
+ ],
522
+ "gradio_problematic_data_points",
523
+ )
524
+ problematic_bttn.click(
525
+ lambda *args: problematic_callback.flag(args),
526
+ [
527
+ model_selector,
528
+ chatbot,
529
+ decoding_strategy,
530
+ temperature,
531
+ max_new_tokens,
532
+ repetition_penalty,
533
+ top_p,
534
+ ],
535
+ None,
536
+ preprocess=False,
537
+ )
538
+ update_dope_problematic_dataset.click(
539
+ fn=update_dope_problematic_dataset_fn,
540
+ )
541
 
542
  demo.launch()
gradio_dope_data_points/Idefics2/3c521493856dddb7c914/1-Figure1-1.png ADDED

Git LFS Details

  • SHA256: d46f2070b499928f1cb3f2b51d9c2af1c665ab4fe8f5ab2d7f4a01d4321838e5
  • Pointer size: 131 Bytes
  • Size of remote file: 117 kB
gradio_dope_data_points/Idefics2/621831e337f23a0cfa20/5_differences.jpeg ADDED
gradio_dope_data_points/Idefics2/bfbb7b61084cfb7ddfdf/1-Figure1-1.png ADDED

Git LFS Details

  • SHA256: d46f2070b499928f1cb3f2b51d9c2af1c665ab4fe8f5ab2d7f4a01d4321838e5
  • Pointer size: 131 Bytes
  • Size of remote file: 117 kB
gradio_dope_data_points/Idefics2/c9d5e002601021786000/5_differences.jpeg ADDED
gradio_dope_data_points/log.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Idefics2,Decoding strategy,Sampling temperature,Maximum number of new tokens to generate,Repetition penalty,Top P,flag,username,timestamp
2
+ idefics2-8b-chatty (chat),"[[{""file"": ""gradio_dope_data_points/Idefics2/621831e337f23a0cfa20/5_differences.jpeg"", ""alt_text"": null}, null], [""find the 5 differences"", ""In the image, there are five differences between the two bears:\n\n1. The bear on the left is holding a fish in its mouth.\n2. The bear on the right is holding a bug in its mouth.\n3. The bear on the left is wearing a hat.\n4. The bear on the right is wearing glasses.\n5. The bear on the left is holding a stick.\n\nThese differences make the two bears appear distinct from each other.""]]",Greedy,0.4,512,1.1,0.8,,,2024-04-18 15:06:23.515619
3
+ idefics2-8b-chatty (chat),"[[{""file"": ""gradio_dope_data_points/Idefics2/3c521493856dddb7c914/1-Figure1-1.png"", ""alt_text"": null}, null], [""read the text"", ""The first image depicts a lightbulb surrounded by plants, while the second image shows a hanging lightbulb with a plant inside it. Both images feature a lightbulb surrounded by greenery, creating a unique and visually appealing scene.""], [""what text is written exactly?"", ""The text reads: \""(a) some plants surrounding a lightbulb (b) a lightbulb surrounded by some plants.\""""]]",Greedy,0.4,512,1.1,0.8,,,2024-04-18 15:13:57.547009
4
+ idefics2-8b-chatty (chat),"[[{""file"": ""gradio_dope_data_points/Idefics2/bfbb7b61084cfb7ddfdf/1-Figure1-1.png"", ""alt_text"": null}, null], [{""file"": ""gradio_dope_data_points/Idefics2/c9d5e002601021786000/5_differences.jpeg"", ""alt_text"": null}, null], [""how do these 2 images differ?"", ""The first image features a group of kittens standing in the grass, while the second image is a coloring book page with a bear and her cub.""]]",Greedy,0.4,512,1.1,0.8,,,2024-04-18 15:59:57.962236
gradio_problematic_data_points/Idefics2/4e1cacac141902de6e7e/5_differences.jpeg ADDED
gradio_problematic_data_points/Idefics2/4f57685fb80ffdb08c0b/5_differences.jpeg ADDED
gradio_problematic_data_points/log.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Model,Idefics2,Decoding strategy,Sampling temperature,Maximum number of new tokens to generate,Repetition penalty,Top P,flag,username,timestamp
2
+ idefics2-8b-chatty (chat),"[[{""file"": ""gradio_problematic_data_points/Idefics2/4e1cacac141902de6e7e/5_differences.jpeg"", ""alt_text"": null}, null], [""list all the differences"", ""In the first image, the bear family consists of three bears - a mother bear and her two cubs. In the second image, there are only two bears - a mother bear and her single cub. Additionally, the background scenery in the second image includes trees and flowers, while the first image does not have any such elements.""]]",Greedy,0.4,512,1.1,0.8,,,2024-04-18 18:47:44.337779
3
+ idefics2-8b-chatty (chat),"[[{""file"": ""gradio_problematic_data_points/Idefics2/4f57685fb80ffdb08c0b/5_differences.jpeg"", ""alt_text"": null}, null], [""what differences are there ?"", ""In the first image, the bears are standing next to each other under a tree, while in the second image, they are walking down a path. Additionally, the bears have different expressions in both images.""]]",Greedy,0.4,512,1.1,0.8,,,2024-04-18 18:50:24.100295