Spaces:

sukiboo
/

invisible-rag-demo

Sleeping

App Files Files Community

sukiboo commited on Mar 10, 2024

Commit

996bf1f

1 Parent(s): f5b615c

add task reset button

Browse files

Files changed (1) hide show

app.ipynb +54 -25

app.ipynb CHANGED Viewed

@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "id": "2a8e18f7-cc88-4bbf-a6e1-095237ed7714",
    "metadata": {},
    "outputs": [
@@ -18,7 +18,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Running on local URL:  http://127.0.0.1:7861\n",
       "\n",
       "To create a public link, set `share=True` in `launch()`.\n"
      ]
@@ -26,7 +26,7 @@
     {
      "data": {
       "text/html": [
-       "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -53,16 +53,20 @@
     "\n",
     "    def setup_interface(self):\n",
     "        \"\"\"Configure the A/B Evaluation RAG task interface.\"\"\"\n",
-    "        with gr.Blocks(title='AB Evaluate RAG') as self.interface:\n",
     "\n",
     "            # protected fields\n",
     "            _task_id = gr.Textbox(label='Task ID', interactive=False, visible=False)\n",
     "\n",
-    "            # task id, load button, chat history, search results\n",
     "            with gr.Row():\n",
     "                task_id = gr.Textbox(container=False, placeholder='Task ID', scale=9)\n",
     "                load_button = gr.Button('Load Task', scale=1)\n",
-    "            chat = gr.Chatbot(height=700, layout='bubble', label='Chat History')\n",
     "            sources = gr.Markdown()\n",
     "\n",
     "            # model completions for answers 1 and 2\n",
@@ -89,9 +93,6 @@
     "            overall = gr.Radio(label='Overall Rating', choices=['#1 Better', 'Equally Bad', 'Equally Good', '#2 Better'])\n",
     "            notes = gr.Textbox(label='Notes', placeholder='A brief justification for the overall rating')\n",
     "\n",
-    "            # save button\n",
-    "            save_button = gr.Button('Save Task')\n",
-    "\n",
     "            # input/output fields\n",
     "            answers = (answer1, answer2)\n",
     "            ratings1 = (groundedness1, fluency1, utility1, notes1)\n",
@@ -101,6 +102,7 @@
     "            # button clicks\n",
     "            load_button.click(self.load_task, inputs=[task_id], outputs=[_task_id, chat, sources, *answers, *ratings])\n",
     "            save_button.click(self.save_task, inputs=[_task_id, *ratings], outputs=None)\n",
     "\n",
     "    def load_task(self, task_id):\n",
     "        \"\"\"Load the task and parse the info.\"\"\"\n",
@@ -111,26 +113,26 @@
     "            answers = [task['answer_1'], task['answer_2']]\n",
     "            sources = self.load_sources(task)\n",
     "            ratings = self.load_ratings(task)\n",
-    "            gr.Info(f'Task {task_id} is loaded!')\n",
     "            return id, chat, sources, *answers, *ratings\n",
     "        except:\n",
-    "            raise gr.Error(f'Could not load the task {task_id} :(')\n",
     "\n",
     "    def read_task(self, task_id):\n",
     "        \"\"\"Read the json task file.\"\"\"\n",
     "        try:\n",
-    "            with open(f'./data/{task_id}.json') as task_file:\n",
     "                task = json.load(task_file)\n",
     "            return task\n",
     "        except FileNotFoundError:\n",
-    "            raise gr.Error(f'Task {task_id} is not found :(')\n",
     "\n",
     "    def load_sources(self, task):\n",
     "        \"\"\"Parse the search results.\"\"\"\n",
-    "        sources = ['## Search Results']\n",
     "        for idx, source in enumerate(task['search_results']):\n",
-    "            sources.append(f'### {idx+1}. {source.replace(\"<\", f\"{chr(92)}<\")}')\n",
-    "        return '\\n\\n---\\n\\n'.join(sources + [''])\n",
     "\n",
     "    def load_ratings(self, task):\n",
     "        \"\"\"Parse the ratings for each answer.\"\"\"\n",
@@ -152,7 +154,7 @@
     "    def save_task(self, task_id, *ratings):\n",
     "        \"\"\"Save the task into a new json file.\"\"\"\n",
     "        # load the original task\n",
-    "        with open(f'./data/{task_id}.json') as task_file:\n",
     "            task = json.load(task_file)\n",
     "        # parse the ratings\n",
     "        groundedness1, fluency1, utility1, notes1, \\\n",
@@ -173,11 +175,38 @@
     "        task['notes'] = notes\n",
     "        # save the task to json file\n",
     "        try:\n",
-    "            with open(f'./data/{task_id}.json', 'w', encoding='utf-8') as task_file:\n",
     "                json.dump(task, task_file, ensure_ascii=False, indent=4)\n",
-    "            gr.Info(f'Task {task_id} is saved!')\n",
     "        except:\n",
-    "            raise gr.Error(f'Could not save the task {task_id} :(')\n",
     "\n",
     "    def launch_interface(self):\n",
     "        \"\"\"Launch the A/B Evaluation RAG task interface.\"\"\"\n",
@@ -199,7 +228,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "id": "6707866e-8f1b-4bda-9b12-0008e289ab77",
    "metadata": {},
    "outputs": [],
@@ -211,16 +240,16 @@
     "os.makedirs('./data/', exist_ok=True)\n",
     "for idx in range(3):\n",
     "    task = {\n",
-    "        'id': f'demo_task_{idx+1}',\n",
     "        'chat_history': [['user message 1', 'bot message 1'], ['user message 2', 'bot message 2']],\n",
     "        'question': 'question',\n",
     "        'search_query': 'search query',\n",
     "        'search_results': ['source 1', 'source 2', 'source 3'],\n",
     "        'answer_1': 'answer 1',\n",
     "        'answer_2': 'answer 2',\n",
-    "        'ratings_1': {'groundedness': 'null', 'utility': 'null', 'fluency': 'null', 'notes': ''},\n",
-    "        'ratings_2': {'groundedness': 'null', 'utility': 'null', 'fluency': 'null', 'notes': ''},\n",
-    "        'overall': 'null',\n",
     "        'notes': ''\n",
     "    }\n",
     "    with open(f'./data/demo_task_{idx+1}.json', 'w', encoding='utf-8') as task_file:\n",

   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "id": "2a8e18f7-cc88-4bbf-a6e1-095237ed7714",
    "metadata": {},
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Running on local URL:  http://127.0.0.1:7870\n",
       "\n",
       "To create a public link, set `share=True` in `launch()`.\n"
      ]
     {
      "data": {
       "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7870/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
     "\n",
     "    def setup_interface(self):\n",
     "        \"\"\"Configure the A/B Evaluation RAG task interface.\"\"\"\n",
+    "        with gr.Blocks(title='Demo AB Evaluate RAG') as self.interface:\n",
     "\n",
     "            # protected fields\n",
     "            _task_id = gr.Textbox(label='Task ID', interactive=False, visible=False)\n",
     "\n",
+    "            # task id and load/save/reset buttons\n",
     "            with gr.Row():\n",
     "                task_id = gr.Textbox(container=False, placeholder='Task ID', scale=9)\n",
     "                load_button = gr.Button('Load Task', scale=1)\n",
+    "                save_button = gr.Button('Save Task', scale=1)\n",
+    "                reset_button = gr.Button('Reset Task', scale=1, variant='stop')\n",
+    "\n",
+    "            # chat history and search results\n",
+    "            chat = gr.Chatbot(height=700, layout='bubble', bubble_full_width=False, label='Chat History')\n",
     "            sources = gr.Markdown()\n",
     "\n",
     "            # model completions for answers 1 and 2\n",
     "            overall = gr.Radio(label='Overall Rating', choices=['#1 Better', 'Equally Bad', 'Equally Good', '#2 Better'])\n",
     "            notes = gr.Textbox(label='Notes', placeholder='A brief justification for the overall rating')\n",
     "\n",
     "            # input/output fields\n",
     "            answers = (answer1, answer2)\n",
     "            ratings1 = (groundedness1, fluency1, utility1, notes1)\n",
     "            # button clicks\n",
     "            load_button.click(self.load_task, inputs=[task_id], outputs=[_task_id, chat, sources, *answers, *ratings])\n",
     "            save_button.click(self.save_task, inputs=[_task_id, *ratings], outputs=None)\n",
+    "            reset_button.click(self.reset_task, inputs=[_task_id], outputs=[*ratings])\n",
     "\n",
     "    def load_task(self, task_id):\n",
     "        \"\"\"Load the task and parse the info.\"\"\"\n",
     "            answers = [task['answer_1'], task['answer_2']]\n",
     "            sources = self.load_sources(task)\n",
     "            ratings = self.load_ratings(task)\n",
+    "            gr.Info(f'Task demo_task_{task_id} is loaded!')\n",
     "            return id, chat, sources, *answers, *ratings\n",
     "        except:\n",
+    "            raise gr.Error(f'Could not load the task demo_task_{task_id} :(')\n",
     "\n",
     "    def read_task(self, task_id):\n",
     "        \"\"\"Read the json task file.\"\"\"\n",
     "        try:\n",
+    "            with open(f'./data/demo_task_{task_id}.json') as task_file:\n",
     "                task = json.load(task_file)\n",
     "            return task\n",
     "        except FileNotFoundError:\n",
+    "            raise gr.Error(f'Task demo_task_{task_id} is not found :(')\n",
     "\n",
     "    def load_sources(self, task):\n",
     "        \"\"\"Parse the search results.\"\"\"\n",
+    "        sources = ['### Search Results']\n",
     "        for idx, source in enumerate(task['search_results']):\n",
+    "            sources.append(f'##### {idx+1}. {source.replace(\"<\", f\"{chr(92)}<\")}')\n",
+    "        return '\\n---\\n'.join(sources + [''])\n",
     "\n",
     "    def load_ratings(self, task):\n",
     "        \"\"\"Parse the ratings for each answer.\"\"\"\n",
     "    def save_task(self, task_id, *ratings):\n",
     "        \"\"\"Save the task into a new json file.\"\"\"\n",
     "        # load the original task\n",
+    "        with open(f'./data/demo_task_{task_id}.json') as task_file:\n",
     "            task = json.load(task_file)\n",
     "        # parse the ratings\n",
     "        groundedness1, fluency1, utility1, notes1, \\\n",
     "        task['notes'] = notes\n",
     "        # save the task to json file\n",
     "        try:\n",
+    "            with open(f'./data/demo_task_{task_id}.json', 'w', encoding='utf-8') as task_file:\n",
+    "                json.dump(task, task_file, ensure_ascii=False, indent=4)\n",
+    "            gr.Info(f'Task demo_task_{task_id} is saved!')\n",
+    "        except:\n",
+    "            raise gr.Error(f'Could not save the task demo_task_{task_id} :(')\n",
+    "\n",
+    "    def reset_task(self, task_id):\n",
+    "        \"\"\"Reset the task by erasing the ratings and operator notes.\"\"\"\n",
+    "        # load the original task\n",
+    "        with open(f'./data/demo_task_{task_id}.json') as task_file:\n",
+    "            task = json.load(task_file)\n",
+    "        # erase the ratings for answer 1\n",
+    "        task['ratings_1']['groundedness'] = ''\n",
+    "        task['ratings_1']['fluency'] = ''\n",
+    "        task['ratings_1']['utility'] = ''\n",
+    "        task['ratings_1']['notes'] = ''\n",
+    "        # erase the ratings for answer 2\n",
+    "        task['ratings_2']['groundedness'] = ''\n",
+    "        task['ratings_2']['fluency'] = ''\n",
+    "        task['ratings_2']['utility'] = ''\n",
+    "        task['ratings_2']['notes'] = ''\n",
+    "        # erase overall ratings\n",
+    "        task['overall'] = ''\n",
+    "        task['notes'] = ''\n",
+    "        # save the reset task to json file\n",
+    "        try:\n",
+    "            with open(f'./data/demo_task_{task_id}.json', 'w', encoding='utf-8') as task_file:\n",
     "                json.dump(task, task_file, ensure_ascii=False, indent=4)\n",
+    "            gr.Warning(f'Task demo_task_{task_id} is reset!')\n",
     "        except:\n",
+    "            raise gr.Error(f'Could not reset the task demo_task_{task_id} :(')\n",
+    "        return '', '', '', '', '', '', '', '', '', ''\n",
     "\n",
     "    def launch_interface(self):\n",
     "        \"\"\"Launch the A/B Evaluation RAG task interface.\"\"\"\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "id": "6707866e-8f1b-4bda-9b12-0008e289ab77",
    "metadata": {},
    "outputs": [],
     "os.makedirs('./data/', exist_ok=True)\n",
     "for idx in range(3):\n",
     "    task = {\n",
+    "        'id': f'{idx+1}',\n",
     "        'chat_history': [['user message 1', 'bot message 1'], ['user message 2', 'bot message 2']],\n",
     "        'question': 'question',\n",
     "        'search_query': 'search query',\n",
     "        'search_results': ['source 1', 'source 2', 'source 3'],\n",
     "        'answer_1': 'answer 1',\n",
     "        'answer_2': 'answer 2',\n",
+    "        'ratings_1': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},\n",
+    "        'ratings_2': {'groundedness': '', 'utility': '', 'fluency': '', 'notes': ''},\n",
+    "        'overall': '',\n",
     "        'notes': ''\n",
     "    }\n",
     "    with open(f'./data/demo_task_{idx+1}.json', 'w', encoding='utf-8') as task_file:\n",