Spaces:
Sleeping
Sleeping
add task reset button
Browse files
app.ipynb
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
-
"execution_count":
|
14 |
"id": "2a8e18f7-cc88-4bbf-a6e1-095237ed7714",
|
15 |
"metadata": {},
|
16 |
"outputs": [
|
@@ -18,7 +18,7 @@
|
|
18 |
"name": "stdout",
|
19 |
"output_type": "stream",
|
20 |
"text": [
|
21 |
-
"Running on local URL: http://127.0.0.1:
|
22 |
"\n",
|
23 |
"To create a public link, set `share=True` in `launch()`.\n"
|
24 |
]
|
@@ -26,7 +26,7 @@
|
|
26 |
{
|
27 |
"data": {
|
28 |
"text/html": [
|
29 |
-
"<div><iframe src=\"http://127.0.0.1:
|
30 |
],
|
31 |
"text/plain": [
|
32 |
"<IPython.core.display.HTML object>"
|
@@ -53,16 +53,20 @@
|
|
53 |
"\n",
|
54 |
" def setup_interface(self):\n",
|
55 |
" \"\"\"Configure the A/B Evaluation RAG task interface.\"\"\"\n",
|
56 |
-
" with gr.Blocks(title='AB Evaluate RAG') as self.interface:\n",
|
57 |
"\n",
|
58 |
" # protected fields\n",
|
59 |
" _task_id = gr.Textbox(label='Task ID', interactive=False, visible=False)\n",
|
60 |
"\n",
|
61 |
-
" # task id
|
62 |
" with gr.Row():\n",
|
63 |
" task_id = gr.Textbox(container=False, placeholder='Task ID', scale=9)\n",
|
64 |
" load_button = gr.Button('Load Task', scale=1)\n",
|
65 |
-
"
|
|
|
|
|
|
|
|
|
66 |
" sources = gr.Markdown()\n",
|
67 |
"\n",
|
68 |
" # model completions for answers 1 and 2\n",
|
@@ -89,9 +93,6 @@
|
|
89 |
" overall = gr.Radio(label='Overall Rating', choices=['#1 Better', 'Equally Bad', 'Equally Good', '#2 Better'])\n",
|
90 |
" notes = gr.Textbox(label='Notes', placeholder='A brief justification for the overall rating')\n",
|
91 |
"\n",
|
92 |
-
" # save button\n",
|
93 |
-
" save_button = gr.Button('Save Task')\n",
|
94 |
-
"\n",
|
95 |
" # input/output fields\n",
|
96 |
" answers = (answer1, answer2)\n",
|
97 |
" ratings1 = (groundedness1, fluency1, utility1, notes1)\n",
|
@@ -101,6 +102,7 @@
|
|
101 |
" # button clicks\n",
|
102 |
" load_button.click(self.load_task, inputs=[task_id], outputs=[_task_id, chat, sources, *answers, *ratings])\n",
|
103 |
" save_button.click(self.save_task, inputs=[_task_id, *ratings], outputs=None)\n",
|
|
|
104 |
"\n",
|
105 |
" def load_task(self, task_id):\n",
|
106 |
" \"\"\"Load the task and parse the info.\"\"\"\n",
|
@@ -111,26 +113,26 @@
|
|
111 |
" answers = [task['answer_1'], task['answer_2']]\n",
|
112 |
" sources = self.load_sources(task)\n",
|
113 |
" ratings = self.load_ratings(task)\n",
|
114 |
-
" gr.Info(f'Task {task_id} is loaded!')\n",
|
115 |
" return id, chat, sources, *answers, *ratings\n",
|
116 |
" except:\n",
|
117 |
-
" raise gr.Error(f'Could not load the task {task_id} :(')\n",
|
118 |
"\n",
|
119 |
" def read_task(self, task_id):\n",
|
120 |
" \"\"\"Read the json task file.\"\"\"\n",
|
121 |
" try:\n",
|
122 |
-
" with open(f'./data/{task_id}.json') as task_file:\n",
|
123 |
" task = json.load(task_file)\n",
|
124 |
" return task\n",
|
125 |
" except FileNotFoundError:\n",
|
126 |
-
" raise gr.Error(f'Task {task_id} is not found :(')\n",
|
127 |
"\n",
|
128 |
" def load_sources(self, task):\n",
|
129 |
" \"\"\"Parse the search results.\"\"\"\n",
|
130 |
-
" sources = ['
|
131 |
" for idx, source in enumerate(task['search_results']):\n",
|
132 |
-
" sources.append(f'
|
133 |
-
" return '\\n
|
134 |
"\n",
|
135 |
" def load_ratings(self, task):\n",
|
136 |
" \"\"\"Parse the ratings for each answer.\"\"\"\n",
|
@@ -152,7 +154,7 @@
|
|
152 |
" def save_task(self, task_id, *ratings):\n",
|
153 |
" \"\"\"Save the task into a new json file.\"\"\"\n",
|
154 |
" # load the original task\n",
|
155 |
-
" with open(f'./data/{task_id}.json') as task_file:\n",
|
156 |
" task = json.load(task_file)\n",
|
157 |
" # parse the ratings\n",
|
158 |
" groundedness1, fluency1, utility1, notes1, \\\n",
|
@@ -173,11 +175,38 @@
|
|
173 |
" task['notes'] = notes\n",
|
174 |
" # save the task to json file\n",
|
175 |
" try:\n",
|
176 |
-
" with open(f'./data/{task_id}.json', 'w', encoding='utf-8') as task_file:\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
" json.dump(task, task_file, ensure_ascii=False, indent=4)\n",
|
178 |
-
" gr.
|
179 |
" except:\n",
|
180 |
-
" raise gr.Error(f'Could not
|
|
|
181 |
"\n",
|
182 |
" def launch_interface(self):\n",
|
183 |
" \"\"\"Launch the A/B Evaluation RAG task interface.\"\"\"\n",
|
@@ -199,7 +228,7 @@
|
|
199 |
},
|
200 |
{
|
201 |
"cell_type": "code",
|
202 |
-
"execution_count":
|
203 |
"id": "6707866e-8f1b-4bda-9b12-0008e289ab77",
|
204 |
"metadata": {},
|
205 |
"outputs": [],
|
@@ -211,16 +240,16 @@
|
|
211 |
"os.makedirs('./data/', exist_ok=True)\n",
|
212 |
"for idx in range(3):\n",
|
213 |
" task = {\n",
|
214 |
-
" 'id': f'
|
215 |
" 'chat_history': [['user message 1', 'bot message 1'], ['user message 2', 'bot message 2']],\n",
|
216 |
" 'question': 'question',\n",
|
217 |
" 'search_query': 'search query',\n",
|
218 |
" 'search_results': ['source 1', 'source 2', 'source 3'],\n",
|
219 |
" 'answer_1': 'answer 1',\n",
|
220 |
" 'answer_2': 'answer 2',\n",
|
221 |
-
" 'ratings_1': {'groundedness': '
|
222 |
-
" 'ratings_2': {'groundedness': '
|
223 |
-
" 'overall': '
|
224 |
" 'notes': ''\n",
|
225 |
" }\n",
|
226 |
" with open(f'./data/demo_task_{idx+1}.json', 'w', encoding='utf-8') as task_file:\n",
|
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
+
"execution_count": 18,
|
14 |
"id": "2a8e18f7-cc88-4bbf-a6e1-095237ed7714",
|
15 |
"metadata": {},
|
16 |
"outputs": [
|
|
|
18 |
"name": "stdout",
|
19 |
"output_type": "stream",
|
20 |
"text": [
|
21 |
+
"Running on local URL: http://127.0.0.1:7870\n",
|
22 |
"\n",
|
23 |
"To create a public link, set `share=True` in `launch()`.\n"
|
24 |
]
|
|
|
26 |
{
|
27 |
"data": {
|
28 |
"text/html": [
|
29 |
+
"<div><iframe src=\"http://127.0.0.1:7870/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
30 |
],
|
31 |
"text/plain": [
|
32 |
"<IPython.core.display.HTML object>"
|
|
|
53 |
"\n",
|
54 |
" def setup_interface(self):\n",
|
55 |
" \"\"\"Configure the A/B Evaluation RAG task interface.\"\"\"\n",
|
56 |
+
" with gr.Blocks(title='Demo AB Evaluate RAG') as self.interface:\n",
|
57 |
"\n",
|
58 |
" # protected fields\n",
|
59 |
" _task_id = gr.Textbox(label='Task ID', interactive=False, visible=False)\n",
|
60 |
"\n",
|
61 |
+
" # task id and load/save/reset buttons\n",
|
62 |
" with gr.Row():\n",
|
63 |
" task_id = gr.Textbox(container=False, placeholder='Task ID', scale=9)\n",
|
64 |
" load_button = gr.Button('Load Task', scale=1)\n",
|
65 |
+
" save_button = gr.Button('Save Task', scale=1)\n",
|
66 |
+
" reset_button = gr.Button('Reset Task', scale=1, variant='stop')\n",
|
67 |
+
"\n",
|
68 |
+
" # chat history and search results\n",
|
69 |
+
" chat = gr.Chatbot(height=700, layout='bubble', bubble_full_width=False, label='Chat History')\n",
|
70 |
" sources = gr.Markdown()\n",
|
71 |
"\n",
|
72 |
" # model completions for answers 1 and 2\n",
|
|
|
93 |
" overall = gr.Radio(label='Overall Rating', choices=['#1 Better', 'Equally Bad', 'Equally Good', '#2 Better'])\n",
|
94 |
" notes = gr.Textbox(label='Notes', placeholder='A brief justification for the overall rating')\n",
|
95 |
"\n",
|
|
|
|
|
|
|
96 |
" # input/output fields\n",
|
97 |
" answers = (answer1, answer2)\n",
|
98 |
" ratings1 = (groundedness1, fluency1, utility1, notes1)\n",
|
|
|
102 |
" # button clicks\n",
|
103 |
" load_button.click(self.load_task, inputs=[task_id], outputs=[_task_id, chat, sources, *answers, *ratings])\n",
|
104 |
" save_button.click(self.save_task, inputs=[_task_id, *ratings], outputs=None)\n",
|
105 |
+
" reset_button.click(self.reset_task, inputs=[_task_id], outputs=[*ratings])\n",
|
106 |
"\n",
|
107 |
" def load_task(self, task_id):\n",
|
108 |
" \"\"\"Load the task and parse the info.\"\"\"\n",
|
|
|
113 |
" answers = [task['answer_1'], task['answer_2']]\n",
|
114 |
" sources = self.load_sources(task)\n",
|
115 |
" ratings = self.load_ratings(task)\n",
|
116 |
+
" gr.Info(f'Task demo_task_{task_id} is loaded!')\n",
|
117 |
" return id, chat, sources, *answers, *ratings\n",
|
118 |
" except:\n",
|
119 |
+
" raise gr.Error(f'Could not load the task demo_task_{task_id} :(')\n",
|
120 |
"\n",
|
121 |
" def read_task(self, task_id):\n",
|
122 |
" \"\"\"Read the json task file.\"\"\"\n",
|
123 |
" try:\n",
|
124 |
+
" with open(f'./data/demo_task_{task_id}.json') as task_file:\n",
|
125 |
" task = json.load(task_file)\n",
|
126 |
" return task\n",
|
127 |
" except FileNotFoundError:\n",
|
128 |
+
" raise gr.Error(f'Task demo_task_{task_id} is not found :(')\n",
|
129 |
"\n",
|
130 |
" def load_sources(self, task):\n",
|
131 |
" \"\"\"Parse the search results.\"\"\"\n",
|
132 |
+
" sources = ['### Search Results']\n",
|
133 |
" for idx, source in enumerate(task['search_results']):\n",
|
134 |
+
" sources.append(f'##### {idx+1}. {source.replace(\"<\", f\"{chr(92)}<\")}')\n",
|
135 |
+
" return '\\n---\\n'.join(sources + [''])\n",
|
136 |
"\n",
|
137 |
" def load_ratings(self, task):\n",
|
138 |
" \"\"\"Parse the ratings for each answer.\"\"\"\n",
|
|
|
154 |
" def save_task(self, task_id, *ratings):\n",
|
155 |
" \"\"\"Save the task into a new json file.\"\"\"\n",
|
156 |
" # load the original task\n",
|
157 |
+
" with open(f'./data/demo_task_{task_id}.json') as task_file:\n",
|
158 |
" task = json.load(task_file)\n",
|
159 |
" # parse the ratings\n",
|
160 |
" groundedness1, fluency1, utility1, notes1, \\\n",
|
|
|
175 |
" task['notes'] = notes\n",
|
176 |
" # save the task to json file\n",
|
177 |
" try:\n",
|
178 |
+
" with open(f'./data/demo_task_{task_id}.json', 'w', encoding='utf-8') as task_file:\n",
|
179 |
+
" json.dump(task, task_file, ensure_ascii=False, indent=4)\n",
|
180 |
+
" gr.Info(f'Task demo_task_{task_id} is saved!')\n",
|
181 |
+
" except:\n",
|
182 |
+
" raise gr.Error(f'Could not save the task demo_task_{task_id} :(')\n",
|
183 |
+
"\n",
|
184 |
+
" def reset_task(self, task_id):\n",
|
185 |
+
" \"\"\"Reset the task by erasing the ratings and operator notes.\"\"\"\n",
|
186 |
+
" # load the original task\n",
|
187 |
+
" with open(f'./data/demo_task_{task_id}.json') as task_file:\n",
|
188 |
+
" task = json.load(task_file)\n",
|
189 |
+
" # erase the ratings for answer 1\n",
|
190 |
+
" task['ratings_1']['groundedness'] = ''\n",
|
191 |
+
" task['ratings_1']['fluency'] = ''\n",
|
192 |
+
" task['ratings_1']['utility'] = ''\n",
|
193 |
+
" task['ratings_1']['notes'] = ''\n",
|
194 |
+
" # erase the ratings for answer 2\n",
|
195 |
+
" task['ratings_2']['groundedness'] = ''\n",
|
196 |
+
" task['ratings_2']['fluency'] = ''\n",
|
197 |
+
" task['ratings_2']['utility'] = ''\n",
|
198 |
+
" task['ratings_2']['notes'] = ''\n",
|
199 |
+
" # erase overall ratings\n",
|
200 |
+
" task['overall'] = ''\n",
|
201 |
+
" task['notes'] = ''\n",
|
202 |
+
" # save the reset task to json file\n",
|
203 |
+
" try:\n",
|
204 |
+
" with open(f'./data/demo_task_{task_id}.json', 'w', encoding='utf-8') as task_file:\n",
|
205 |
" json.dump(task, task_file, ensure_ascii=False, indent=4)\n",
|
206 |
+
" gr.Warning(f'Task demo_task_{task_id} is reset!')\n",
|
207 |
" except:\n",
|
208 |
+
" raise gr.Error(f'Could not reset the task demo_task_{task_id} :(')\n",
|
209 |
+
" return '', '', '', '', '', '', '', '', '', ''\n",
|
210 |
"\n",
|
211 |
" def launch_interface(self):\n",
|
212 |
" \"\"\"Launch the A/B Evaluation RAG task interface.\"\"\"\n",
|
|
|
228 |
},
|
229 |
{
|
230 |
"cell_type": "code",
|
231 |
+
"execution_count": 19,
|
232 |
"id": "6707866e-8f1b-4bda-9b12-0008e289ab77",
|
233 |
"metadata": {},
|
234 |
"outputs": [],
|
|
|
240 |
"os.makedirs('./data/', exist_ok=True)\n",
|
241 |
"for idx in range(3):\n",
|
242 |
" task = {\n",
|
243 |
+
" 'id': f'{idx+1}',\n",
|
244 |
" 'chat_history': [['user message 1', 'bot message 1'], ['user message 2', 'bot message 2']],\n",
|
245 |
" 'question': 'question',\n",
|
246 |
" 'search_query': 'search query',\n",
|
247 |
" 'search_results': ['source 1', 'source 2', 'source 3'],\n",
|
248 |
" 'answer_1': 'answer 1',\n",
|
249 |
" 'answer_2': 'answer 2',\n",
|
250 |
+
" 'ratings_1': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},\n",
|
251 |
+
" 'ratings_2': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},\n",
|
252 |
+
" 'overall': '',\n",
|
253 |
" 'notes': ''\n",
|
254 |
" }\n",
|
255 |
" with open(f'./data/demo_task_{idx+1}.json', 'w', encoding='utf-8') as task_file:\n",
|