Spaces:
Running
on
Zero
Running
on
Zero
better saving data
Browse files- app_dialogue.py +51 -13
app_dialogue.py
CHANGED
@@ -77,6 +77,7 @@ def load_image_from_url(url):
|
|
77 |
image = Image.open(image_stream)
|
78 |
return image
|
79 |
|
|
|
80 |
def img_to_bytes(image_path):
|
81 |
image = Image.open(image_path)
|
82 |
buffer = io.BytesIO()
|
@@ -85,6 +86,7 @@ def img_to_bytes(image_path):
|
|
85 |
image.close()
|
86 |
return img_bytes
|
87 |
|
|
|
88 |
def format_user_prompt_with_im_history_and_system_conditioning(
|
89 |
user_prompt, chat_history
|
90 |
) -> List[Dict[str, Union[List, str]]]:
|
@@ -248,6 +250,37 @@ def model_inference(
|
|
248 |
print("-----")
|
249 |
|
250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
# Hyper-parameters for generation
|
252 |
max_new_tokens = gr.Slider(
|
253 |
minimum=8,
|
@@ -302,11 +335,12 @@ chatbot = gr.Chatbot(
|
|
302 |
height=450,
|
303 |
)
|
304 |
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
problematic_dataset_writer = gr.HuggingFaceDatasetSaver(
|
309 |
-
|
|
|
310 |
# Using Flagging for saving dope and problematic examples
|
311 |
# Dope examples flagging
|
312 |
|
@@ -319,6 +353,8 @@ problematic_dataset_writer = gr.HuggingFaceDatasetSaver(HF_WRITE_TOKEN, "Hugging
|
|
319 |
|
320 |
# The second syntax allows inputting an arbitrary number of images.""")
|
321 |
|
|
|
|
|
322 |
|
323 |
with gr.Blocks(
|
324 |
fill_height=True,
|
@@ -397,7 +433,8 @@ with gr.Blocks(
|
|
397 |
dope_dataset_writer.setup(
|
398 |
[
|
399 |
model_selector,
|
400 |
-
|
|
|
401 |
decoding_strategy,
|
402 |
temperature,
|
403 |
max_new_tokens,
|
@@ -407,8 +444,8 @@ with gr.Blocks(
|
|
407 |
"gradio_dope_data_points",
|
408 |
)
|
409 |
dope_bttn.click(
|
410 |
-
|
411 |
-
[
|
412 |
model_selector,
|
413 |
chatbot,
|
414 |
decoding_strategy,
|
@@ -417,14 +454,15 @@ with gr.Blocks(
|
|
417 |
repetition_penalty,
|
418 |
top_p,
|
419 |
],
|
420 |
-
None,
|
421 |
preprocess=False,
|
422 |
)
|
423 |
# Problematic examples flagging
|
424 |
problematic_dataset_writer.setup(
|
425 |
[
|
426 |
model_selector,
|
427 |
-
|
|
|
428 |
decoding_strategy,
|
429 |
temperature,
|
430 |
max_new_tokens,
|
@@ -434,8 +472,8 @@ with gr.Blocks(
|
|
434 |
"gradio_problematic_data_points",
|
435 |
)
|
436 |
problematic_bttn.click(
|
437 |
-
|
438 |
-
[
|
439 |
model_selector,
|
440 |
chatbot,
|
441 |
decoding_strategy,
|
@@ -444,7 +482,7 @@ with gr.Blocks(
|
|
444 |
repetition_penalty,
|
445 |
top_p,
|
446 |
],
|
447 |
-
None,
|
448 |
preprocess=False,
|
449 |
)
|
450 |
|
|
|
77 |
image = Image.open(image_stream)
|
78 |
return image
|
79 |
|
80 |
+
|
81 |
def img_to_bytes(image_path):
|
82 |
image = Image.open(image_path)
|
83 |
buffer = io.BytesIO()
|
|
|
86 |
image.close()
|
87 |
return img_bytes
|
88 |
|
89 |
+
|
90 |
def format_user_prompt_with_im_history_and_system_conditioning(
|
91 |
user_prompt, chat_history
|
92 |
) -> List[Dict[str, Union[List, str]]]:
|
|
|
250 |
print("-----")
|
251 |
|
252 |
|
253 |
+
def flag_chat(
|
254 |
+
model_selector,
|
255 |
+
chat_history,
|
256 |
+
decoding_strategy,
|
257 |
+
temperature,
|
258 |
+
max_new_tokens,
|
259 |
+
repetition_penalty,
|
260 |
+
top_p,
|
261 |
+
):
|
262 |
+
images = []
|
263 |
+
conversations = []
|
264 |
+
for ex in chat_history:
|
265 |
+
if isinstance(ex[0], dict):
|
266 |
+
images.append(ex[0]["file"])
|
267 |
+
conversations.append([f"User: <image>"])
|
268 |
+
else:
|
269 |
+
conversations.append([f"User:{ex[0]}", f"\nAssistant:{ex[1]}"])
|
270 |
+
dope_dataset_writer.flag(
|
271 |
+
flag_data=[
|
272 |
+
model_selector,
|
273 |
+
images[0],
|
274 |
+
conversations,
|
275 |
+
decoding_strategy,
|
276 |
+
temperature,
|
277 |
+
max_new_tokens,
|
278 |
+
repetition_penalty,
|
279 |
+
top_p,
|
280 |
+
]
|
281 |
+
)
|
282 |
+
|
283 |
+
|
284 |
# Hyper-parameters for generation
|
285 |
max_new_tokens = gr.Slider(
|
286 |
minimum=8,
|
|
|
335 |
height=450,
|
336 |
)
|
337 |
|
338 |
+
dope_dataset_writer = gr.HuggingFaceDatasetSaver(
|
339 |
+
HF_WRITE_TOKEN, "HuggingFaceM4/dope-dataset", private=True
|
340 |
+
)
|
341 |
+
problematic_dataset_writer = gr.HuggingFaceDatasetSaver(
|
342 |
+
HF_WRITE_TOKEN, "HuggingFaceM4/problematic-dataset", private=True
|
343 |
+
)
|
344 |
# Using Flagging for saving dope and problematic examples
|
345 |
# Dope examples flagging
|
346 |
|
|
|
353 |
|
354 |
# The second syntax allows inputting an arbitrary number of images.""")
|
355 |
|
356 |
+
image_fake = gr.Image(visible=False)
|
357 |
+
text_fake = gr.Textbox(visible=False)
|
358 |
|
359 |
with gr.Blocks(
|
360 |
fill_height=True,
|
|
|
433 |
dope_dataset_writer.setup(
|
434 |
[
|
435 |
model_selector,
|
436 |
+
image_fake,
|
437 |
+
text_fake,
|
438 |
decoding_strategy,
|
439 |
temperature,
|
440 |
max_new_tokens,
|
|
|
444 |
"gradio_dope_data_points",
|
445 |
)
|
446 |
dope_bttn.click(
|
447 |
+
fn=flag_chat,
|
448 |
+
inputs=[
|
449 |
model_selector,
|
450 |
chatbot,
|
451 |
decoding_strategy,
|
|
|
454 |
repetition_penalty,
|
455 |
top_p,
|
456 |
],
|
457 |
+
outputs=None,
|
458 |
preprocess=False,
|
459 |
)
|
460 |
# Problematic examples flagging
|
461 |
problematic_dataset_writer.setup(
|
462 |
[
|
463 |
model_selector,
|
464 |
+
image_fake,
|
465 |
+
text_fake,
|
466 |
decoding_strategy,
|
467 |
temperature,
|
468 |
max_new_tokens,
|
|
|
472 |
"gradio_problematic_data_points",
|
473 |
)
|
474 |
problematic_bttn.click(
|
475 |
+
fn=flag_chat,
|
476 |
+
inputs=[
|
477 |
model_selector,
|
478 |
chatbot,
|
479 |
decoding_strategy,
|
|
|
482 |
repetition_penalty,
|
483 |
top_p,
|
484 |
],
|
485 |
+
outputs=None,
|
486 |
preprocess=False,
|
487 |
)
|
488 |
|