Sebastian Hofstätter
add arxiv link
2853348
import json
import gradio as gr
import os
import random
# load data
folder_path="./data/fid-light-xl/"
tasks = [
"fever",
"hotpotqa",
"nq",
"trivia_qa",
"structured_zeroshot",
"trex",
]
labels = [
"FEVER",
"HotpotQA",
"NQ",
"TriviaQA",
"zsRE",
"T-REx",
]
modes = ["Perfect (Both text & provenance are correct)", "Double Failure (Both text & provenance are wrong)",
"Correct Text, Wrong Provenance (R-Precision < 1)", "Wrong Text, Correct Provenance (R-Precision == 1)"]
modes_map = {modes[0]:"perfect", modes[1]:"double_failure", modes[2]:"wrong_passage", modes[3]:"wrong_text"}
data = {}
total_num_per_task = {}
for task in tasks:
data[task] = {}
total_num_per_task[task] = 0
for mode in modes:
data[task][mode] = []
with open(os.path.join(folder_path,"examples_"+task+"_"+modes_map[mode]+".json")) as f:
data[task][mode] = json.load(f)
total_num_per_task[task] += len(data[task][mode])
def render_examples(selected_mode):
all_rendered=[]
for i,task in enumerate(tasks):
examples = random.sample(data[task][selected_mode], 10)
rendered_examples = "## Statistics\nNumber of examples in this category: **"+str(len(data[task][selected_mode]))+"** ("+ \
str(round(len(data[task][selected_mode])/total_num_per_task[task]*100,2)) +"%)\n\n----\n"
for example in examples:
def render_prov(provenance):
t=""
for i, prov in enumerate(provenance):
t+=f"**{i+1})** {prov['text']}\n\n"
if len(provenance) == 0:
t+="<< No provenance returned >>\n\n"
return t
rendered_examples += "#### Query \n"+example["query"]+ "\n"+ \
"#### Target Text"+ "\n" + "\n\n".join(example["target_text"])+ "\n"
if "target_provenance" in example:
rendered_examples+="#### Target Provenance"+ "\n"+ render_prov(example["target_provenance"])+ "\n"
rendered_examples += "#### Output Text"+ "\n"+ str(example["output_text"])+ "\n"+ \
"#### Output Provenance"+ "\n"+ render_prov(example["output_provenance"])+ "\n"+ \
"\n----\n"
all_rendered.append(rendered_examples)
return all_rendered
with gr.Blocks() as interface:
gr.Markdown(
"# FiD-Light Output Explorer \n"+
"This is a random data output explorer for the retrieval augmented generation model FiD-Light on six KILT tasks (showing static dev set results).\n\n"+
"*FiD-Light: Efficient and Effective Retrieval-Augmented Text Generation \nSebastian Hofstätter, Jiecao Chen, Karthik Raman, Hamed Zamani* \n[https://arxiv.org/abs/2209.14290](https://arxiv.org/abs/2209.14290) \n\n" +
"*Every time you click on a result split we load up a new set of 10 random examples from this split for all the tasks.*")
#with gr.Accordion("Open for More!"):
# gr.Markdown("Look at me...")
#with gr.Accordion("Open for More!"):
# gr.Markdown("Look at me...")
selected = gr.Radio(modes, value=modes[0], label="Result Split",interactive=True)
text_fields = []
init_data = render_examples(selected.value)
for i,task in enumerate(tasks):
with gr.Tab(labels[i]):
text_fields.append(gr.Markdown(init_data[i]))
selected.change(fn=render_examples, inputs=selected, outputs=text_fields)
interface.launch()