|
import random |
|
import gradio as gr |
|
from datasets import load_dataset |
|
import os |
|
|
|
auth_token = os.environ.get("auth_token") |
|
whoops = load_dataset("nlphuji/whoops", use_auth_token=auth_token)['test'] |
|
print(f"Loaded WMTIS, first example:") |
|
print(whoops[0]) |
|
dataset_size = len(whoops) |
|
print(f"all dataset size: {dataset_size}") |
|
|
|
IMAGE = 'image' |
|
IMAGE_DESIGNER = 'image_designer' |
|
DESIGNER_EXPLANATION = 'designer_explanation' |
|
CROWD_CAPTIONS = 'crowd_captions' |
|
CROWD_EXPLANATIONS = 'crowd_explanations' |
|
CROWD_UNDERSPECIFIED_CAPTIONS = 'crowd_underspecified_captions' |
|
SELECTED_CAPTION = 'selected_caption' |
|
COMMONSENSE_CATEGORY = 'commonsense_category' |
|
QA = 'question_answering_pairs' |
|
IMAGE_ID = 'image_id' |
|
left_side_columns = [IMAGE] |
|
right_side_columns = [x for x in whoops.features.keys() if x not in left_side_columns and x not in [QA]] |
|
enumerate_cols = [CROWD_CAPTIONS, CROWD_EXPLANATIONS, CROWD_UNDERSPECIFIED_CAPTIONS] |
|
emoji_to_label = {IMAGE_DESIGNER: 'π¨, π§βπ¨, π»', DESIGNER_EXPLANATION: 'π‘, π€, π§βπ¨', |
|
CROWD_CAPTIONS: 'π₯, π¬, π', CROWD_EXPLANATIONS: 'π₯, π‘, π€', CROWD_UNDERSPECIFIED_CAPTIONS: 'π₯, π¬, π', |
|
QA: 'β, π€, π‘', IMAGE_ID: 'π, π, πΎ', COMMONSENSE_CATEGORY: 'π€, π, π‘', SELECTED_CAPTION: 'π, π, π¬'} |
|
target_size = (1024, 1024) |
|
|
|
def get_instance_values(example): |
|
values = [] |
|
for k in left_side_columns + right_side_columns: |
|
if k in enumerate_cols: |
|
value = list_to_string(example[k]) |
|
elif k == QA: |
|
qa_list = [f"Q: {x[0]} A: {x[1]}" for x in example[k]] |
|
value = list_to_string(qa_list) |
|
else: |
|
value = example[k] |
|
values.append(value) |
|
return values |
|
|
|
|
|
def list_to_string(lst): |
|
return '\n'.join(['{}. {}'.format(i + 1, item) for i, item in enumerate(lst)]) |
|
|
|
def plot_image(index): |
|
example = whoops_sample[index] |
|
instance_values = get_instance_values(example) |
|
assert len(left_side_columns) == len( |
|
instance_values[:len(left_side_columns)]) |
|
for key, value in zip(left_side_columns, instance_values[:len(left_side_columns)]): |
|
if key == IMAGE: |
|
img = whoops_sample[index]["image"] |
|
img_resized = img.resize(target_size) |
|
gr.Image(value=img_resized, label=whoops_sample[index]['commonsense_category']) |
|
else: |
|
label = key.capitalize().replace("_", " ") |
|
gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}") |
|
with gr.Accordion("Click for details", open=False): |
|
assert len(right_side_columns) == len( |
|
instance_values[len(left_side_columns):]) |
|
for key, value in zip(right_side_columns, instance_values[len(left_side_columns):]): |
|
label = key.capitalize().replace("_", " ") |
|
gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}") |
|
|
|
|
|
columns_number = 3 |
|
|
|
rows_number = 25 |
|
whoops_sample = whoops.shuffle().select(range(0, columns_number * rows_number)) |
|
index = 0 |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(f"# WHOOPS! Dataset Explorer") |
|
for row_num in range(0, rows_number): |
|
with gr.Row(): |
|
for col_num in range(0, columns_number): |
|
with gr.Column(): |
|
plot_image(index) |
|
index += 1 |
|
demo.launch() |
|
|