|
import secrets |
|
from functools import lru_cache |
|
|
|
import gradio as gr |
|
|
|
from llmdataparser import ParserRegistry |
|
from llmdataparser.base_parser import ParseEntry |
|
|
|
|
|
@lru_cache(maxsize=32) |
|
def get_parser_instance(parser_name: str): |
|
"""Get a cached parser instance by name.""" |
|
return ParserRegistry.get_parser(parser_name) |
|
|
|
|
|
def get_available_splits(parser) -> list[str] | None: |
|
"""Get available splits for the selected parser after loading.""" |
|
if not hasattr(parser, "split_names") or not parser.split_names: |
|
return None |
|
return parser.split_names |
|
|
|
|
|
def get_available_tasks(parser) -> list[str]: |
|
"""Get available tasks for the selected parser.""" |
|
if not hasattr(parser, "task_names"): |
|
return ["default"] |
|
return parser.task_names |
|
|
|
|
|
def format_entry_attributes(entry: ParseEntry) -> str: |
|
"""Format all attributes of a ParseEntry except prompt and answer.""" |
|
from dataclasses import fields |
|
|
|
|
|
field_names = [field.name for field in fields(entry)] |
|
|
|
filtered_fields = [name for name in field_names if name not in ["prompt", "answer"]] |
|
|
|
return "\n".join(f"{name}: {getattr(entry, name)}" for name in filtered_fields) |
|
|
|
|
|
def load_and_parse( |
|
parser_name: str, task_name: str | None, split_name: str | None |
|
) -> tuple: |
|
"""Load and parse the dataset, return the first entry and available splits.""" |
|
try: |
|
parser = get_parser_instance(parser_name) |
|
|
|
|
|
parser.load( |
|
task_name=task_name if task_name != "default" else None, |
|
split=split_name, |
|
trust_remote_code=True, |
|
) |
|
|
|
|
|
available_splits = get_available_splits(parser) |
|
|
|
|
|
parser.parse(split_names=split_name, force=True) |
|
|
|
|
|
parsed_data = parser.get_parsed_data |
|
|
|
split_dropdown = gr.Dropdown( |
|
choices=available_splits, |
|
label="Select Split", |
|
interactive=True, |
|
value=None, |
|
allow_custom_value=True, |
|
) |
|
|
|
info = parser.__repr__() |
|
if not parsed_data: |
|
return 0, "No entries found", "", "", split_dropdown, info |
|
|
|
|
|
first_entry = parsed_data[0] |
|
|
|
return ( |
|
0, |
|
first_entry.prompt, |
|
first_entry.raw_question, |
|
first_entry.answer, |
|
format_entry_attributes(first_entry), |
|
split_dropdown, |
|
info, |
|
) |
|
except Exception as e: |
|
|
|
error_msg = f"Failed to load dataset: {str(e)}\nParser: {parser_name}\nTask: {task_name}\nSplit: {split_name}" |
|
return 0, error_msg, "", "", "", [], "" |
|
|
|
|
|
def update_entry(parsed_data_index: int | None, parser_name: str): |
|
"""Update the displayed entry based on the selected index.""" |
|
try: |
|
if not parser_name: |
|
return "Please select a parser first", "", "", "" |
|
|
|
parser = get_parser_instance(parser_name) |
|
parsed_data = parser.get_parsed_data |
|
|
|
if not parsed_data: |
|
return "No data available", "", "", "" |
|
|
|
if parsed_data_index is None: |
|
|
|
random_index = secrets.randbelow(len(parsed_data)) |
|
entry = parsed_data[random_index] |
|
else: |
|
|
|
index = max(0, min(parsed_data_index, len(parsed_data) - 1)) |
|
entry = parsed_data[index] |
|
|
|
return ( |
|
entry.prompt, |
|
entry.raw_question, |
|
entry.answer, |
|
format_entry_attributes(entry), |
|
) |
|
except Exception as e: |
|
return f"Error: {str(e)}", "", "" |
|
|
|
|
|
def update_parser_options(parser_name: str) -> tuple[gr.Dropdown, gr.Dropdown, str]: |
|
"""Update available tasks and splits for the selected parser.""" |
|
try: |
|
parser = get_parser_instance(parser_name) |
|
tasks = get_available_tasks(parser) |
|
default_task = getattr(parser, "_default_task", "default") |
|
|
|
|
|
task_dropdown = gr.Dropdown( |
|
choices=tasks, |
|
value=default_task, |
|
label="Select Task", |
|
interactive=True, |
|
allow_custom_value=True, |
|
) |
|
|
|
|
|
splits = get_available_splits(parser) |
|
split_dropdown = gr.Dropdown( |
|
choices=splits, |
|
label="Select Split", |
|
interactive=True, |
|
value=None, |
|
allow_custom_value=True, |
|
) |
|
|
|
info = parser.__repr__() |
|
return task_dropdown, split_dropdown, info |
|
except Exception as e: |
|
return ( |
|
gr.Dropdown(choices=["default"], value="default"), |
|
gr.Dropdown(choices=[]), |
|
f"Error: {str(e)}", |
|
) |
|
|
|
|
|
def clear_parser_cache(): |
|
"""Clear the parser cache.""" |
|
get_parser_instance.cache_clear() |
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks() as demo: |
|
gr.Markdown("# LLM Evaluation Dataset Parser") |
|
|
|
|
|
parser_state = gr.State("") |
|
dataset_info = gr.Textbox(label="Dataset Info", interactive=False) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
|
|
available_parsers = ParserRegistry.list_parsers() |
|
parser_dropdown = gr.Dropdown( |
|
choices=available_parsers, |
|
label="Select Parser", |
|
value=available_parsers[0] if available_parsers else None, |
|
interactive=True, |
|
allow_custom_value=True, |
|
) |
|
task_dropdown = gr.Dropdown( |
|
choices=["default"], |
|
label="Select Task", |
|
value="default", |
|
interactive=True, |
|
allow_custom_value=True, |
|
) |
|
split_dropdown = gr.Dropdown( |
|
choices=[], |
|
label="Select Split", |
|
interactive=True, |
|
value=None, |
|
allow_custom_value=True, |
|
) |
|
load_button = gr.Button("Load and Parse Dataset", variant="primary") |
|
|
|
|
|
entry_index = gr.Number( |
|
label="Select Entry Index (empty for random)", |
|
precision=0, |
|
interactive=True, |
|
) |
|
update_button = gr.Button("Update/Random Entry", variant="secondary") |
|
|
|
|
|
|
|
|
|
with gr.Column(scale=2): |
|
|
|
prompt_output = gr.Textbox( |
|
label="Prompt", lines=5, show_copy_button=True |
|
) |
|
raw_question_output = gr.Textbox( |
|
label="Raw Question", lines=5, show_copy_button=True |
|
) |
|
answer_output = gr.Textbox( |
|
label="Answer", lines=5, show_copy_button=True |
|
) |
|
attributes_output = gr.Textbox( |
|
label="Other Attributes", lines=5, show_copy_button=True |
|
) |
|
|
|
|
|
parser_dropdown.change( |
|
fn=update_parser_options, |
|
inputs=parser_dropdown, |
|
outputs=[ |
|
task_dropdown, |
|
split_dropdown, |
|
dataset_info, |
|
], |
|
).then(lambda x: x, inputs=parser_dropdown, outputs=parser_state) |
|
|
|
load_button.click( |
|
fn=load_and_parse, |
|
inputs=[parser_dropdown, task_dropdown, split_dropdown], |
|
outputs=[ |
|
entry_index, |
|
prompt_output, |
|
raw_question_output, |
|
answer_output, |
|
attributes_output, |
|
split_dropdown, |
|
dataset_info, |
|
], |
|
api_name="load_and_parse", |
|
show_progress="full", |
|
) |
|
|
|
update_button.click( |
|
fn=update_entry, |
|
inputs=[entry_index, parser_state], |
|
outputs=[ |
|
prompt_output, |
|
raw_question_output, |
|
answer_output, |
|
attributes_output, |
|
], |
|
api_name="update_entry", |
|
) |
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
demo.launch(share=False) |
|
|