JeffYang52415 commited on
Commit
e924b16
·
unverified ·
1 Parent(s): 952a3b5

feat: add gradio app

Browse files
Files changed (2) hide show
  1. .pre-commit-config.yaml +4 -0
  2. app.py +272 -0
.pre-commit-config.yaml CHANGED
@@ -65,10 +65,14 @@ repos:
65
  - id: prettier
66
  types_or: [markdown, yaml]
67
  - repo: https://github.com/astral-sh/ruff-pre-commit
 
68
  rev: v0.4.4
69
  hooks:
 
70
  - id: ruff
71
  args: [--fix]
 
 
72
  - repo: https://github.com/kynan/nbstripout
73
  rev: 0.5.0 # use the latest version
74
  hooks:
 
65
  - id: prettier
66
  types_or: [markdown, yaml]
67
  - repo: https://github.com/astral-sh/ruff-pre-commit
68
+ # Ruff version.
69
  rev: v0.4.4
70
  hooks:
71
+ # Run the linter.
72
  - id: ruff
73
  args: [--fix]
74
+ # Run the formatter.
75
+ - id: ruff-format
76
  - repo: https://github.com/kynan/nbstripout
77
  rev: 0.5.0 # use the latest version
78
  hooks:
app.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import secrets
2
+ from functools import lru_cache
3
+
4
+ import gradio as gr
5
+
6
+ from llmdataparser import ParserRegistry
7
+ from llmdataparser.base_parser import ParseEntry
8
+
9
+
10
+ @lru_cache(maxsize=32)
11
+ def get_parser_instance(parser_name: str):
12
+ """Get a cached parser instance by name."""
13
+ return ParserRegistry.get_parser(parser_name)
14
+
15
+
16
+ def get_available_splits(parser) -> list[str] | None:
17
+ """Get available splits for the selected parser after loading."""
18
+ if not hasattr(parser, "split_names") or not parser.split_names:
19
+ return None
20
+ return parser.split_names
21
+
22
+
23
+ def get_available_tasks(parser) -> list[str]:
24
+ """Get available tasks for the selected parser."""
25
+ if not hasattr(parser, "task_names"):
26
+ return ["default"]
27
+ return parser.task_names
28
+
29
+
30
+ def format_entry_attributes(entry: ParseEntry) -> str:
31
+ """Format all attributes of a ParseEntry except prompt and answer."""
32
+ from dataclasses import fields
33
+
34
+ # Get all field names from the dataclass
35
+ field_names = [field.name for field in fields(entry)]
36
+ # Filter out prompt and answer
37
+ filtered_fields = [name for name in field_names if name not in ["prompt", "answer"]]
38
+ # Build the formatted string
39
+ return "\n".join(f"{name}: {getattr(entry, name)}" for name in filtered_fields)
40
+
41
+
42
+ def load_and_parse(
43
+ parser_name: str, task_name: str | None, split_name: str | None
44
+ ) -> tuple:
45
+ """Load and parse the dataset, return the first entry and available splits."""
46
+ try:
47
+ parser = get_parser_instance(parser_name)
48
+
49
+ # Load the dataset
50
+ parser.load(
51
+ task_name=task_name if task_name != "default" else None,
52
+ split=split_name,
53
+ trust_remote_code=True,
54
+ )
55
+
56
+ # Get available splits after loading
57
+ available_splits = get_available_splits(parser)
58
+
59
+ # Parse the dataset
60
+ parser.parse(split_names=split_name, force=True)
61
+
62
+ # Get parsed data
63
+ parsed_data = parser.get_parsed_data
64
+
65
+ split_dropdown = gr.Dropdown(
66
+ choices=available_splits,
67
+ label="Select Split",
68
+ interactive=True,
69
+ value=None,
70
+ allow_custom_value=True,
71
+ )
72
+
73
+ info = parser.__repr__()
74
+ if not parsed_data:
75
+ return 0, "No entries found", "", "", split_dropdown, info
76
+
77
+ # Get the first entry
78
+ first_entry = parsed_data[0]
79
+
80
+ return (
81
+ 0, # Return first index instead of list of indices
82
+ first_entry.prompt,
83
+ first_entry.raw_question,
84
+ first_entry.answer,
85
+ format_entry_attributes(first_entry),
86
+ split_dropdown,
87
+ info,
88
+ )
89
+ except Exception as e:
90
+ # Make the error message more user-friendly and detailed
91
+ error_msg = f"Failed to load dataset: {str(e)}\nParser: {parser_name}\nTask: {task_name}\nSplit: {split_name}"
92
+ return 0, error_msg, "", "", "", [], ""
93
+
94
+
95
+ def update_entry(parsed_data_index: int | None, parser_name: str):
96
+ """Update the displayed entry based on the selected index."""
97
+ try:
98
+ if not parser_name:
99
+ return "Please select a parser first", "", "", ""
100
+
101
+ parser = get_parser_instance(parser_name)
102
+ parsed_data = parser.get_parsed_data
103
+
104
+ if not parsed_data:
105
+ return "No data available", "", "", ""
106
+
107
+ if parsed_data_index is None:
108
+ # Random selection using secrets instead of random
109
+ random_index = secrets.randbelow(len(parsed_data))
110
+ entry = parsed_data[random_index]
111
+ else:
112
+ # Ensure index is within bounds
113
+ index = max(0, min(parsed_data_index, len(parsed_data) - 1))
114
+ entry = parsed_data[index]
115
+
116
+ return (
117
+ entry.prompt,
118
+ entry.raw_question,
119
+ entry.answer,
120
+ format_entry_attributes(entry),
121
+ )
122
+ except Exception as e:
123
+ return f"Error: {str(e)}", "", ""
124
+
125
+
126
+ def update_parser_options(parser_name: str) -> tuple[gr.Dropdown, gr.Dropdown, str]:
127
+ """Update available tasks and splits for the selected parser."""
128
+ try:
129
+ parser = get_parser_instance(parser_name)
130
+ tasks = get_available_tasks(parser)
131
+ default_task = getattr(parser, "_default_task", "default")
132
+
133
+ # Update task dropdown
134
+ task_dropdown = gr.Dropdown(
135
+ choices=tasks,
136
+ value=default_task,
137
+ label="Select Task",
138
+ interactive=True,
139
+ allow_custom_value=True,
140
+ )
141
+
142
+ # Update split dropdown - Note the value is now explicitly None
143
+ splits = get_available_splits(parser)
144
+ split_dropdown = gr.Dropdown(
145
+ choices=splits,
146
+ label="Select Split",
147
+ interactive=True,
148
+ value=None,
149
+ allow_custom_value=True,
150
+ )
151
+
152
+ info = parser.__repr__()
153
+ return task_dropdown, split_dropdown, info
154
+ except Exception as e:
155
+ return (
156
+ gr.Dropdown(choices=["default"], value="default"),
157
+ gr.Dropdown(choices=[]),
158
+ f"Error: {str(e)}",
159
+ )
160
+
161
+
162
+ def clear_parser_cache():
163
+ """Clear the parser cache."""
164
+ get_parser_instance.cache_clear()
165
+
166
+
167
+ def create_interface():
168
+ with gr.Blocks() as demo:
169
+ gr.Markdown("# LLM Evaluation Dataset Parser")
170
+
171
+ # State management
172
+ parser_state = gr.State("")
173
+ dataset_info = gr.Textbox(label="Dataset Info", interactive=False)
174
+
175
+ with gr.Row():
176
+ with gr.Column(scale=1):
177
+ # Parser selection and controls
178
+ available_parsers = ParserRegistry.list_parsers()
179
+ parser_dropdown = gr.Dropdown(
180
+ choices=available_parsers,
181
+ label="Select Parser",
182
+ value=available_parsers[0] if available_parsers else None,
183
+ interactive=True,
184
+ allow_custom_value=True,
185
+ )
186
+ task_dropdown = gr.Dropdown(
187
+ choices=["default"],
188
+ label="Select Task",
189
+ value="default",
190
+ interactive=True,
191
+ allow_custom_value=True,
192
+ )
193
+ split_dropdown = gr.Dropdown(
194
+ choices=[],
195
+ label="Select Split",
196
+ interactive=True,
197
+ value=None,
198
+ allow_custom_value=True,
199
+ )
200
+ load_button = gr.Button("Load and Parse Dataset", variant="primary")
201
+
202
+ # Entry selection
203
+ entry_index = gr.Number(
204
+ label="Select Entry Index (empty for random)",
205
+ precision=0,
206
+ interactive=True,
207
+ )
208
+ update_button = gr.Button("Update/Random Entry", variant="secondary")
209
+
210
+ # clear_cache_button = gr.Button("Clear Parser Cache")
211
+ # clear_cache_button.click(fn=clear_parser_cache)
212
+
213
+ with gr.Column(scale=2):
214
+ # Output displays
215
+ prompt_output = gr.Textbox(
216
+ label="Prompt", lines=5, show_copy_button=True
217
+ )
218
+ raw_question_output = gr.Textbox(
219
+ label="Raw Question", lines=5, show_copy_button=True
220
+ )
221
+ answer_output = gr.Textbox(
222
+ label="Answer", lines=5, show_copy_button=True
223
+ )
224
+ attributes_output = gr.Textbox(
225
+ label="Other Attributes", lines=5, show_copy_button=True
226
+ )
227
+
228
+ # Event handlers
229
+ parser_dropdown.change(
230
+ fn=update_parser_options,
231
+ inputs=parser_dropdown,
232
+ outputs=[
233
+ task_dropdown, # Update entire component
234
+ split_dropdown,
235
+ dataset_info,
236
+ ],
237
+ ).then(lambda x: x, inputs=parser_dropdown, outputs=parser_state)
238
+
239
+ load_button.click(
240
+ fn=load_and_parse,
241
+ inputs=[parser_dropdown, task_dropdown, split_dropdown],
242
+ outputs=[
243
+ entry_index,
244
+ prompt_output,
245
+ raw_question_output,
246
+ answer_output,
247
+ attributes_output,
248
+ split_dropdown,
249
+ dataset_info,
250
+ ],
251
+ api_name="load_and_parse",
252
+ show_progress="full",
253
+ )
254
+
255
+ update_button.click(
256
+ fn=update_entry,
257
+ inputs=[entry_index, parser_state],
258
+ outputs=[
259
+ prompt_output,
260
+ raw_question_output,
261
+ answer_output,
262
+ attributes_output,
263
+ ],
264
+ api_name="update_entry",
265
+ )
266
+
267
+ return demo
268
+
269
+
270
+ if __name__ == "__main__":
271
+ demo = create_interface()
272
+ demo.launch(share=False) # Enable sharing for remote access