Leyo commited on
Commit
23c98b1
1 Parent(s): 65a7de2

better saving data

Browse files
Files changed (1) hide show
  1. app_dialogue.py +51 -13
app_dialogue.py CHANGED
@@ -77,6 +77,7 @@ def load_image_from_url(url):
77
  image = Image.open(image_stream)
78
  return image
79
 
 
80
  def img_to_bytes(image_path):
81
  image = Image.open(image_path)
82
  buffer = io.BytesIO()
@@ -85,6 +86,7 @@ def img_to_bytes(image_path):
85
  image.close()
86
  return img_bytes
87
 
 
88
  def format_user_prompt_with_im_history_and_system_conditioning(
89
  user_prompt, chat_history
90
  ) -> List[Dict[str, Union[List, str]]]:
@@ -248,6 +250,37 @@ def model_inference(
248
  print("-----")
249
 
250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  # Hyper-parameters for generation
252
  max_new_tokens = gr.Slider(
253
  minimum=8,
@@ -302,11 +335,12 @@ chatbot = gr.Chatbot(
302
  height=450,
303
  )
304
 
305
- dope_callback = gr.CSVLogger()
306
- problematic_callback = gr.CSVLogger()
307
- dope_dataset_writer = gr.HuggingFaceDatasetSaver(HF_WRITE_TOKEN, "HuggingFaceM4/dope-dataset")
308
- problematic_dataset_writer = gr.HuggingFaceDatasetSaver(HF_WRITE_TOKEN, "HuggingFaceM4/problematic-dataset")
309
-
 
310
  # Using Flagging for saving dope and problematic examples
311
  # Dope examples flagging
312
 
@@ -319,6 +353,8 @@ problematic_dataset_writer = gr.HuggingFaceDatasetSaver(HF_WRITE_TOKEN, "Hugging
319
 
320
  # The second syntax allows inputting an arbitrary number of images.""")
321
 
 
 
322
 
323
  with gr.Blocks(
324
  fill_height=True,
@@ -397,7 +433,8 @@ with gr.Blocks(
397
  dope_dataset_writer.setup(
398
  [
399
  model_selector,
400
- chatbot,
 
401
  decoding_strategy,
402
  temperature,
403
  max_new_tokens,
@@ -407,8 +444,8 @@ with gr.Blocks(
407
  "gradio_dope_data_points",
408
  )
409
  dope_bttn.click(
410
- lambda *args: dope_dataset_writer.flag(args),
411
- [
412
  model_selector,
413
  chatbot,
414
  decoding_strategy,
@@ -417,14 +454,15 @@ with gr.Blocks(
417
  repetition_penalty,
418
  top_p,
419
  ],
420
- None,
421
  preprocess=False,
422
  )
423
  # Problematic examples flagging
424
  problematic_dataset_writer.setup(
425
  [
426
  model_selector,
427
- chatbot,
 
428
  decoding_strategy,
429
  temperature,
430
  max_new_tokens,
@@ -434,8 +472,8 @@ with gr.Blocks(
434
  "gradio_problematic_data_points",
435
  )
436
  problematic_bttn.click(
437
- lambda *args: problematic_dataset_writer.flag(args),
438
- [
439
  model_selector,
440
  chatbot,
441
  decoding_strategy,
@@ -444,7 +482,7 @@ with gr.Blocks(
444
  repetition_penalty,
445
  top_p,
446
  ],
447
- None,
448
  preprocess=False,
449
  )
450
 
 
77
  image = Image.open(image_stream)
78
  return image
79
 
80
+
81
  def img_to_bytes(image_path):
82
  image = Image.open(image_path)
83
  buffer = io.BytesIO()
 
86
  image.close()
87
  return img_bytes
88
 
89
+
90
  def format_user_prompt_with_im_history_and_system_conditioning(
91
  user_prompt, chat_history
92
  ) -> List[Dict[str, Union[List, str]]]:
 
250
  print("-----")
251
 
252
 
253
+ def flag_chat(
254
+ model_selector,
255
+ chat_history,
256
+ decoding_strategy,
257
+ temperature,
258
+ max_new_tokens,
259
+ repetition_penalty,
260
+ top_p,
261
+ ):
262
+ images = []
263
+ conversations = []
264
+ for ex in chat_history:
265
+ if isinstance(ex[0], dict):
266
+ images.append(ex[0]["file"])
267
+ conversations.append([f"User: <image>"])
268
+ else:
269
+ conversations.append([f"User:{ex[0]}", f"\nAssistant:{ex[1]}"])
270
+ dope_dataset_writer.flag(
271
+ flag_data=[
272
+ model_selector,
273
+ images[0],
274
+ conversations,
275
+ decoding_strategy,
276
+ temperature,
277
+ max_new_tokens,
278
+ repetition_penalty,
279
+ top_p,
280
+ ]
281
+ )
282
+
283
+
284
  # Hyper-parameters for generation
285
  max_new_tokens = gr.Slider(
286
  minimum=8,
 
335
  height=450,
336
  )
337
 
338
+ dope_dataset_writer = gr.HuggingFaceDatasetSaver(
339
+ HF_WRITE_TOKEN, "HuggingFaceM4/dope-dataset", private=True
340
+ )
341
+ problematic_dataset_writer = gr.HuggingFaceDatasetSaver(
342
+ HF_WRITE_TOKEN, "HuggingFaceM4/problematic-dataset", private=True
343
+ )
344
  # Using Flagging for saving dope and problematic examples
345
  # Dope examples flagging
346
 
 
353
 
354
  # The second syntax allows inputting an arbitrary number of images.""")
355
 
356
+ image_fake = gr.Image(visible=False)
357
+ text_fake = gr.Textbox(visible=False)
358
 
359
  with gr.Blocks(
360
  fill_height=True,
 
433
  dope_dataset_writer.setup(
434
  [
435
  model_selector,
436
+ image_fake,
437
+ text_fake,
438
  decoding_strategy,
439
  temperature,
440
  max_new_tokens,
 
444
  "gradio_dope_data_points",
445
  )
446
  dope_bttn.click(
447
+ fn=flag_chat,
448
+ inputs=[
449
  model_selector,
450
  chatbot,
451
  decoding_strategy,
 
454
  repetition_penalty,
455
  top_p,
456
  ],
457
+ outputs=None,
458
  preprocess=False,
459
  )
460
  # Problematic examples flagging
461
  problematic_dataset_writer.setup(
462
  [
463
  model_selector,
464
+ image_fake,
465
+ text_fake,
466
  decoding_strategy,
467
  temperature,
468
  max_new_tokens,
 
472
  "gradio_problematic_data_points",
473
  )
474
  problematic_bttn.click(
475
+ fn=flag_chat,
476
+ inputs=[
477
  model_selector,
478
  chatbot,
479
  decoding_strategy,
 
482
  repetition_penalty,
483
  top_p,
484
  ],
485
+ outputs=None,
486
  preprocess=False,
487
  )
488