Spaces:

HuggingFaceH4
/

chatty-lms-old

Runtime error

App Files Files Community

lewtun HF staff commited on Feb 13, 2023

Commit

caf57a3

1 Parent(s): d0f9fb0

Use proper prompt for Joi

Browse files

Files changed (3) hide show

app.ipynb +156 -36
app.py +24 -26
prompt_templates/openassistant_joi.json +1 -0

app.ipynb CHANGED Viewed

@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -75,7 +75,7 @@
     "    if max_new_tokens_supported is True:\n",
     "        payload[\"parameters\"][\"max_new_tokens\"] = 100\n",
     "        payload[\"parameters\"][\"repetition_penalty\"]: 1.03\n",
-    "        payload[\"parameters\"][\"stop\"] = [\"Human:\"]\n",
     "    else:\n",
     "        payload[\"parameters\"][\"max_length\"] = 512\n",
     "\n",
@@ -95,7 +95,7 @@
     {
      "data": {
       "text/plain": [
-       "{'generated_text': '\\n\\nJoi: Black holes are one of the most fascinating topics in astronomy. They’re objects in space that contain massive amounts of matter, and have such powerful gravity that they warp spacetime. It is thought that black holes might be the most compact objects in the universe. It is thought that black holes are the most powerful sources of gravity in the universe and that they occur in various forms, from stellar-sized black holes to the supermassive black holes at the hearts of galaxies. Black'}"
       ]
      },
      "execution_count": 5,
@@ -112,7 +112,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -125,20 +164,14 @@
     "    history=[],\n",
     "):\n",
     "    if \"joi\" in model_id:\n",
-    "        prompt_filename = \"langchain_default.json\"\n",
     "    else:\n",
     "        prompt_filename = \"anthropic_hhh_single.json\"\n",
-    "    print(prompt_filename)\n",
     "    with open(f\"prompt_templates/{prompt_filename}\", \"r\") as f:\n",
     "        prompt_template = json.load(f)\n",
     "\n",
-    "    history_input = \"\"\n",
-    "    for idx, text in enumerate(history):\n",
-    "        if idx % 2 == 0:\n",
-    "            history_input += f\"Human: {text}\\n\"\n",
-    "        else:\n",
-    "            history_input += f\"Assistant: {text}\\n\"\n",
-    "    history_input = history_input.rstrip(\"\\n\")\n",
     "    inputs = prompt_template[\"prompt\"].format(human_input=text_input, history=history_input)\n",
     "    history.append(text_input)\n",
     "\n",
@@ -146,9 +179,13 @@
     "    print(f\"Inputs: {inputs}\")\n",
     "\n",
     "    output = query_chat_api(model_id, inputs, temperature, top_p)\n",
     "    if isinstance(output, list):\n",
     "        output = output[0]\n",
-    "    output = output[\"generated_text\"].rstrip(\" Human:\")\n",
     "    history.append(\" \" + output)\n",
     "\n",
     "    chat = [\n",
@@ -179,7 +216,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -217,6 +254,39 @@
     "    json.dump({\"prompt\": template}, f)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 28,
@@ -772,7 +842,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -794,7 +864,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -803,7 +873,7 @@
        "'So far, the following prompts are available:\\n\\n* `langchain_default`: The default prompt used in the [LangChain library](https://github.com/hwchase17/langchain/blob/bc53c928fc1b221d0038b839d111039d31729def/langchain/chains/conversation/prompt.py#L4). Around 67 tokens long.\\n* `openai_chatgpt`: The prompt used in the OpenAI ChatGPT model. Around 261 tokens long.\\n* `deepmind_Assistant`: The prompt used in the DeepMind Assistant model (Table 7 of [their paper](https://arxiv.org/abs/2209.14375)). Around 880 tokens long.\\n* `deepmind_gopher`: The prompt used in the DeepMind Assistant model (Table A30 of [their paper](https://arxiv.org/abs/2112.11446)). Around 791 tokens long.\\n* `anthropic_hhh`: The prompt used in the [Anthropic HHH models](https://gist.github.com/jareddk/2509330f8ef3d787fc5aaac67aab5f11#file-hhh_prompt-txt). A whopping 6,341 tokens long!\\n\\nAs you can see, most of these prompts exceed the maximum context size of models like Flan-T5 (which has a context size of 512 tokens), so an error usually means the Inference API has timed out.'"
       ]
      },
-     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -822,14 +892,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Running on local URL:  http://127.0.0.1:7860\n",
       "\n",
       "To create a public link, set `share=True` in `launch()`.\n"
      ]
@@ -837,7 +907,7 @@
     {
      "data": {
       "text/html": [
-       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -850,9 +920,60 @@
      "data": {
       "text/plain": []
      },
-     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
@@ -876,18 +997,6 @@
     "                label=\"Model\",\n",
     "                interactive=True,\n",
     "            )\n",
-    "            # prompt_template = gr.Dropdown(\n",
-    "            #     choices=[\n",
-    "            #         \"langchain_default\",\n",
-    "            #         \"openai_chatgpt\",\n",
-    "            #         \"deepmind_sparrow\",\n",
-    "            #         \"deepmind_gopher\",\n",
-    "            #         \"anthropic_hhh\",\n",
-    "            #     ],\n",
-    "            #     value=\"langchain_default\",\n",
-    "            #     label=\"Prompt Template\",\n",
-    "            #     interactive=True,\n",
-    "            # )\n",
     "            temperature = gr.Slider(\n",
     "                minimum=0.0,\n",
     "                maximum=2.0,\n",
@@ -971,9 +1080,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
    "source": [
     "from nbdev.export import nb_export\n",
     "nb_export('app.ipynb', lib_path='.', name='app')"

   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
     "    if max_new_tokens_supported is True:\n",
     "        payload[\"parameters\"][\"max_new_tokens\"] = 100\n",
     "        payload[\"parameters\"][\"repetition_penalty\"]: 1.03\n",
+    "        payload[\"parameters\"][\"stop\"] = [\"User:\"]\n",
     "    else:\n",
     "        payload[\"parameters\"][\"max_length\"] = 512\n",
     "\n",
     {
      "data": {
       "text/plain": [
+       "{'generated_text': '\\n\\nJoi: Black holes are regions of spacetime where gravity is so strong that nothing, not even light, can escape from inside them. They are the result of huge amounts of mass concentrated in a small space, which causes intense gravitational force. The more massive the mass, the stronger the gravity, and the faster the force of gravity increases with increased mass. Black holes have no size or shape, as they are just a point in spacetime, the event horizon, from which light can no longer'}"
       ]
      },
      "execution_count": 5,
   },
   {
    "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#|export\n",
+    "def format_history(history, human=\"Human\", bot=\"Assistant\"):\n",
+    "    history_input = \"\"\n",
+    "    for idx, text in enumerate(history):\n",
+    "        if idx % 2 == 0:\n",
+    "            history_input += f\"{human}: {text}\\n\\n\"\n",
+    "        else:\n",
+    "            history_input += f\"{bot}: {text}\\n\\n\"\n",
+    "    history_input = history_input.rstrip(\"\\n\")\n",
+    "    return history_input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Human: Hello\n",
+      "\n",
+      "Assistant: Hi\n",
+      "\n",
+      "Human: How are you?\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(format_history([\"Hello\", \"Hi\", \"How are you?\"]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
     "    history=[],\n",
     "):\n",
     "    if \"joi\" in model_id:\n",
+    "        prompt_filename = \"openassistant_joi.json\"\n",
+    "        history_input = format_history(history, human=\"User\", bot=\"Joi\")\n",
     "    else:\n",
     "        prompt_filename = \"anthropic_hhh_single.json\"\n",
+    "        history_input = format_history(history, human=\"Human\", bot=\"Assistant\")\n",
     "    with open(f\"prompt_templates/{prompt_filename}\", \"r\") as f:\n",
     "        prompt_template = json.load(f)\n",
     "\n",
     "    inputs = prompt_template[\"prompt\"].format(human_input=text_input, history=history_input)\n",
     "    history.append(text_input)\n",
     "\n",
     "    print(f\"Inputs: {inputs}\")\n",
     "\n",
     "    output = query_chat_api(model_id, inputs, temperature, top_p)\n",
+    "    print(output)\n",
     "    if isinstance(output, list):\n",
     "        output = output[0]\n",
+    "    if \"joi\" in model_id:\n",
+    "        output = output[\"generated_text\"].rstrip(\"\\n\\nUser:\")\n",
+    "    else:\n",
+    "        output = output[\"generated_text\"].rstrip(\" Human:\")\n",
     "    history.append(\" \" + output)\n",
     "\n",
     "    chat = [\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
     "    json.dump({\"prompt\": template}, f)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "17\n"
+     ]
+    }
+   ],
+   "source": [
+    "template = \"\"\"{history}\n",
+    "\n",
+    "User: {human_input}\n",
+    "\n",
+    "Joi:\"\"\"\n",
+    "\n",
+    "print(len(tokenizer(template)[\"input_ids\"]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"prompt_templates/openassistant_joi.json\", \"w\") as f:\n",
+    "    json.dump({\"prompt\": template}, f)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 28,
   },
   {
    "cell_type": "code",
+   "execution_count": 45,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 46,
    "metadata": {},
    "outputs": [
     {
        "'So far, the following prompts are available:\\n\\n* `langchain_default`: The default prompt used in the [LangChain library](https://github.com/hwchase17/langchain/blob/bc53c928fc1b221d0038b839d111039d31729def/langchain/chains/conversation/prompt.py#L4). Around 67 tokens long.\\n* `openai_chatgpt`: The prompt used in the OpenAI ChatGPT model. Around 261 tokens long.\\n* `deepmind_Assistant`: The prompt used in the DeepMind Assistant model (Table 7 of [their paper](https://arxiv.org/abs/2209.14375)). Around 880 tokens long.\\n* `deepmind_gopher`: The prompt used in the DeepMind Assistant model (Table A30 of [their paper](https://arxiv.org/abs/2112.11446)). Around 791 tokens long.\\n* `anthropic_hhh`: The prompt used in the [Anthropic HHH models](https://gist.github.com/jareddk/2509330f8ef3d787fc5aaac67aab5f11#file-hhh_prompt-txt). A whopping 6,341 tokens long!\\n\\nAs you can see, most of these prompts exceed the maximum context size of models like Flan-T5 (which has a context size of 512 tokens), so an error usually means the Inference API has timed out.'"
       ]
      },
+     "execution_count": 46,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 47,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Running on local URL:  http://127.0.0.1:7866\n",
       "\n",
       "To create a public link, set `share=True` in `launch()`.\n"
      ]
     {
      "data": {
       "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7866/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
      "data": {
       "text/plain": []
      },
+     "execution_count": 47,
      "metadata": {},
      "output_type": "execute_result"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "History: ['What is 2 times 3?']\n",
+      "Inputs: \n",
+      "\n",
+      "User: What is 2 times 3?\n",
+      "\n",
+      "Joi:\n",
+      "{'generated_text': ' 3*2=6\\n\\nUser:'}\n",
+      "History: ['What is 2 times 3?', '  3*2=6', 'What about 4 times 3?']\n",
+      "Inputs: User: What is 2 times 3?\n",
+      "\n",
+      "Joi:   3*2=6\n",
+      "\n",
+      "User: What about 4 times 3?\n",
+      "\n",
+      "Joi:\n",
+      "{'generated_text': ' 3*4=12\\n\\nUser:'}\n",
+      "History: ['What is 2 times 3?', '  3*2=6', 'What about 4 times 3?', '  3*4=12', 'What about -1 times -3?']\n",
+      "Inputs: User: What is 2 times 3?\n",
+      "\n",
+      "Joi:   3*2=6\n",
+      "\n",
+      "User: What about 4 times 3?\n",
+      "\n",
+      "Joi:   3*4=12\n",
+      "\n",
+      "User: What about -1 times -3?\n",
+      "\n",
+      "Joi:\n",
+      "{'generated_text': '   -3*(-1)=3\\n\\nUser:'}\n",
+      "History: ['What can you tell me about llamas?']\n",
+      "Inputs: \n",
+      "\n",
+      "User: What can you tell me about llamas?\n",
+      "\n",
+      "Joi:\n",
+      "{'generated_text': ' Llamas are a large mammal native to South America. They are related to the camelids, which include the alpaca, vicuna, and guanaco. Llamas have a long, thick, curly coat of fur and long, sharp horns. They are very social and socialize with each other. They are also known for their amazing agility and speed. They are considered to be the fastest land animals in the world.'}\n",
+      "History: ['What can you tell me about llamas?', '  Llamas are a large mammal native to South America. They are related to the camelids, which include the alpaca, vicuna, and guanaco. Llamas have a long, thick, curly coat of fur and long, sharp horns. They are very social and socialize with each other. They are also known for their amazing agility and speed. They are considered to be the fastest land animals in the world.', 'Who would win in a battle between a llama and an alpaca?']\n",
+      "Inputs: User: What can you tell me about llamas?\n",
+      "\n",
+      "Joi:   Llamas are a large mammal native to South America. They are related to the camelids, which include the alpaca, vicuna, and guanaco. Llamas have a long, thick, curly coat of fur and long, sharp horns. They are very social and socialize with each other. They are also known for their amazing agility and speed. They are considered to be the fastest land animals in the world.\n",
+      "\n",
+      "User: Who would win in a battle between a llama and an alpaca?\n",
+      "\n",
+      "Joi:\n",
+      "{'generated_text': \" That depends on the alpaca. If they are of the same gender, then it depends on the alpaca's age, size, and condition. Generally speaking, the alpaca would win.\"}\n"
+     ]
     }
    ],
    "source": [
     "                label=\"Model\",\n",
     "                interactive=True,\n",
     "            )\n",
     "            temperature = gr.Slider(\n",
     "                minimum=0.0,\n",
     "                maximum=2.0,\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 48,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+     ]
+    }
+   ],
    "source": [
     "from nbdev.export import nb_export\n",
     "nb_export('app.ipynb', lib_path='.', name='app')"

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.
 # %% auto 0
-__all__ = ['HF_TOKEN', 'ENDPOINT_URL', 'title', 'description', 'get_model_endpoint_params', 'query_chat_api', 'inference_chat']
 # %% app.ipynb 0
 import gradio as gr
@@ -54,7 +55,7 @@ def query_chat_api(
     if max_new_tokens_supported is True:
         payload["parameters"]["max_new_tokens"] = 100
         payload["parameters"]["repetition_penalty"]: 1.03
-        payload["parameters"]["stop"] = ["Human:"]
     else:
         payload["parameters"]["max_length"] = 512
@@ -67,6 +68,17 @@ def query_chat_api(
 # %% app.ipynb 5
 def inference_chat(
     model_id,
     text_input,
@@ -75,20 +87,14 @@ def inference_chat(
     history=[],
 ):
     if "joi" in model_id:
-        prompt_filename = "langchain_default.json"
     else:
         prompt_filename = "anthropic_hhh_single.json"
-    print(prompt_filename)
     with open(f"prompt_templates/{prompt_filename}", "r") as f:
         prompt_template = json.load(f)
-    history_input = ""
-    for idx, text in enumerate(history):
-        if idx % 2 == 0:
-            history_input += f"Human: {text}\n"
-        else:
-            history_input += f"Assistant: {text}\n"
-    history_input = history_input.rstrip("\n")
     inputs = prompt_template["prompt"].format(human_input=text_input, history=history_input)
     history.append(text_input)
@@ -96,9 +102,13 @@ def inference_chat(
     print(f"Inputs: {inputs}")
     output = query_chat_api(model_id, inputs, temperature, top_p)
     if isinstance(output, list):
         output = output[0]
-    output = output["generated_text"].rstrip(" Human:")
     history.append(" " + output)
     chat = [
@@ -108,7 +118,7 @@ def inference_chat(
     return {chatbot: chat, state: history}
-# %% app.ipynb 21
 title = """<h1 align="center">Chatty Language Models</h1>"""
 description = """Pretrained language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
@@ -123,7 +133,7 @@ Assistant: <utterance>
 In this app, you can explore the outputs of several language models conditioned on different conversational prompts. The models are trained on different datasets and have different objectives, so they will have different personalities and strengths.
 """
-# %% app.ipynb 23
 with gr.Blocks(
     css="""
     .message.svelte-w6rprc.svelte-w6rprc.svelte-w6rprc {font-size: 20px; margin-top: 20px}
@@ -143,18 +153,6 @@ with gr.Blocks(
                 label="Model",
                 interactive=True,
             )
-            # prompt_template = gr.Dropdown(
-            #     choices=[
-            #         "langchain_default",
-            #         "openai_chatgpt",
-            #         "deepmind_sparrow",
-            #         "deepmind_gopher",
-            #         "anthropic_hhh",
-            #     ],
-            #     value="langchain_default",
-            #     label="Prompt Template",
-            #     interactive=True,
-            # )
             temperature = gr.Slider(
                 minimum=0.0,
                 maximum=2.0,

 # AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.
 # %% auto 0
+__all__ = ['HF_TOKEN', 'ENDPOINT_URL', 'title', 'description', 'get_model_endpoint_params', 'query_chat_api', 'format_history',
+           'inference_chat']
 # %% app.ipynb 0
 import gradio as gr
     if max_new_tokens_supported is True:
         payload["parameters"]["max_new_tokens"] = 100
         payload["parameters"]["repetition_penalty"]: 1.03
+        payload["parameters"]["stop"] = ["User:"]
     else:
         payload["parameters"]["max_length"] = 512
 # %% app.ipynb 5
+def format_history(history, human="Human", bot="Assistant"):
+    history_input = ""
+    for idx, text in enumerate(history):
+        if idx % 2 == 0:
+            history_input += f"{human}: {text}\n\n"
+        else:
+            history_input += f"{bot}: {text}\n\n"
+    history_input = history_input.rstrip("\n")
+    return history_input
+# %% app.ipynb 7
 def inference_chat(
     model_id,
     text_input,
     history=[],
 ):
     if "joi" in model_id:
+        prompt_filename = "openassistant_joi.json"
+        history_input = format_history(history, human="User", bot="Joi")
     else:
         prompt_filename = "anthropic_hhh_single.json"
+        history_input = format_history(history, human="Human", bot="Assistant")
     with open(f"prompt_templates/{prompt_filename}", "r") as f:
         prompt_template = json.load(f)
     inputs = prompt_template["prompt"].format(human_input=text_input, history=history_input)
     history.append(text_input)
     print(f"Inputs: {inputs}")
     output = query_chat_api(model_id, inputs, temperature, top_p)
+    print(output)
     if isinstance(output, list):
         output = output[0]
+    if "joi" in model_id:
+        output = output["generated_text"].rstrip("\n\nUser:")
+    else:
+        output = output["generated_text"].rstrip(" Human:")
     history.append(" " + output)
     chat = [
     return {chatbot: chat, state: history}
+# %% app.ipynb 25
 title = """<h1 align="center">Chatty Language Models</h1>"""
 description = """Pretrained language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
 In this app, you can explore the outputs of several language models conditioned on different conversational prompts. The models are trained on different datasets and have different objectives, so they will have different personalities and strengths.
 """
+# %% app.ipynb 27
 with gr.Blocks(
     css="""
     .message.svelte-w6rprc.svelte-w6rprc.svelte-w6rprc {font-size: 20px; margin-top: 20px}
                 label="Model",
                 interactive=True,
             )
             temperature = gr.Slider(
                 minimum=0.0,
                 maximum=2.0,

prompt_templates/openassistant_joi.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"prompt": "{history}\n\nUser: {human_input}\n\nJoi:"}