Spaces:

HuggingFaceH4
/

chatty-lms-old

Runtime error

App Files Files Community

lewtun HF staff commited on Feb 13, 2023

Commit

b12b521

1 Parent(s): 423d96b

Add Joi

Browse files

Files changed (2) hide show

app.ipynb +36 -17
app.py +21 -9

app.ipynb CHANGED Viewed

@@ -31,7 +31,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -42,8 +58,7 @@
     "    temperature,\n",
     "    top_p\n",
     "):\n",
-    "    API_URL = f\"https://api-inference.huggingface.co/models/{model_id}\"\n",
-    "    headers = {\"Authorization\": f\"Bearer {HF_TOKEN}\", \"x-wait-for-model\": \"1\"}\n",
     "\n",
     "    payload = {\n",
     "        \"inputs\": inputs,\n",
@@ -55,7 +70,7 @@
     "        },\n",
     "    }\n",
     "\n",
-    "    response = requests.post(API_URL, json=payload, headers=headers)\n",
     "\n",
     "    if response.status_code == 200:\n",
     "        return response.json()\n",
@@ -65,23 +80,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[{'generated_text': 'love'}]"
       ]
      },
-     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "model_id = \"google/flan-t5-xl\"\n",
-    "query = \"what is the answer to the universe?\"\n",
     "query_chat_api(model_id, query, 1, 0.95)"
    ]
   },
@@ -101,7 +117,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -121,7 +137,10 @@
     "    inputs = prompt_template[\"prompt\"].format(human_input=text_input)\n",
     "\n",
     "    output = query_chat_api(model_id, inputs, temperature, top_p)\n",
-    "    history.append(\" \" + output[0][\"generated_text\"])\n",
     "\n",
     "    chat = [\n",
     "        (history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)\n",
@@ -695,14 +714,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Running on local URL:  http://127.0.0.1:7860\n",
       "\n",
       "To create a public link, set `share=True` in `launch()`.\n"
      ]
@@ -710,7 +729,7 @@
     {
      "data": {
       "text/html": [
-       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -723,7 +742,7 @@
      "data": {
       "text/plain": []
      },
-     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -744,7 +763,7 @@
     "    with gr.Row():\n",
     "        with gr.Column(scale=1):\n",
     "            model_id = gr.Dropdown(\n",
-    "                choices=[\"google/flan-t5-xl\"],\n",
     "                value=\"google/flan-t5-xl\",\n",
     "                label=\"Model\",\n",
     "                interactive=True,\n",
@@ -846,7 +865,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [

   },
   {
    "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# |export\n",
+    "def get_model_endpoint(model_id):\n",
+    "    if \"joi\" in model_id:\n",
+    "        headers = None\n",
+    "        return \"https://joi-20b.ngrok.io/generate\", headers\n",
+    "    else:\n",
+    "        headers = {\"Authorization\": f\"Bearer {HF_TOKEN}\", \"x-wait-for-model\": \"1\"}\n",
+    "        return f\"https://api-inference.huggingface.co/models/{model_id}\", headers\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
     "    temperature,\n",
     "    top_p\n",
     "):\n",
+    "    endpoint, headers = get_model_endpoint(model_id)\n",
     "\n",
     "    payload = {\n",
     "        \"inputs\": inputs,\n",
     "        },\n",
     "    }\n",
     "\n",
+    "    response = requests.post(endpoint, json=payload, headers=headers)\n",
     "\n",
     "    if response.status_code == 200:\n",
     "        return response.json()\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "{'generated_text': '\\n\\nJoi: Black holes are regions of space-time where gravity is so strong that nothing'}"
       ]
      },
+     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "# model_id = \"google/flan-t5-xl\"\n",
+    "model_id = \"Rallio67/joi_20B_instruct_alpha\"\n",
+    "query = \"What can you tell me about black holes?\"\n",
     "query_chat_api(model_id, query, 1, 0.95)"
    ]
   },
   },
   {
    "cell_type": "code",
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
     "    inputs = prompt_template[\"prompt\"].format(human_input=text_input)\n",
     "\n",
     "    output = query_chat_api(model_id, inputs, temperature, top_p)\n",
+    "    # TODO: remove this hack when inference backend schema is updated\n",
+    "    if isinstance(output, list):\n",
+    "        output = output[0]\n",
+    "    history.append(\" \" + output[\"generated_text\"])\n",
     "\n",
     "    chat = [\n",
     "        (history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Running on local URL:  http://127.0.0.1:7861\n",
       "\n",
       "To create a public link, set `share=True` in `launch()`.\n"
      ]
     {
      "data": {
       "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
      "data": {
       "text/plain": []
      },
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
     "    with gr.Row():\n",
     "        with gr.Column(scale=1):\n",
     "            model_id = gr.Dropdown(\n",
+    "                choices=[\"google/flan-t5-xl\" ,\"Rallio67/joi_20B_instruct_alpha\"],\n",
     "                value=\"google/flan-t5-xl\",\n",
     "                label=\"Model\",\n",
     "                interactive=True,\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.
 # %% auto 0
-__all__ = ['HF_TOKEN', 'title', 'description', 'query_chat_api', 'inference_chat']
 # %% app.ipynb 0
 import gradio as gr
@@ -21,14 +21,23 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 # %% app.ipynb 2
 def query_chat_api(
     model_id,
     inputs,
     temperature,
     top_p
 ):
-    API_URL = f"https://api-inference.huggingface.co/models/{model_id}"
-    headers = {"Authorization": f"Bearer {HF_TOKEN}", "x-wait-for-model": "1"}
     payload = {
         "inputs": inputs,
@@ -40,7 +49,7 @@ def query_chat_api(
         },
     }
-    response = requests.post(API_URL, json=payload, headers=headers)
     if response.status_code == 200:
         return response.json()
@@ -48,7 +57,7 @@ def query_chat_api(
         return "Error: " + response.text
-# %% app.ipynb 5
 def inference_chat(
     model_id,
     prompt_template,
@@ -64,7 +73,10 @@ def inference_chat(
     inputs = prompt_template["prompt"].format(human_input=text_input)
     output = query_chat_api(model_id, inputs, temperature, top_p)
-    history.append(" " + output[0]["generated_text"])
     chat = [
         (history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)
@@ -73,7 +85,7 @@ def inference_chat(
     return {chatbot: chat, state: history}
-# %% app.ipynb 15
 title = """<h1 align="center">Chatty Language Models</h1>"""
 description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
@@ -98,7 +110,7 @@ So far, the following prompts are available:
 As you can see, most of these prompts exceed the maximum context size of models like Flan-T5, so an error usually means the Inference API has timed out.
 """
-# %% app.ipynb 16
 with gr.Blocks(
     css="""
     .message.svelte-w6rprc.svelte-w6rprc.svelte-w6rprc {font-size: 20px; margin-top: 20px}
@@ -113,7 +125,7 @@ with gr.Blocks(
     with gr.Row():
         with gr.Column(scale=1):
             model_id = gr.Dropdown(
-                choices=["google/flan-t5-xl"],
                 value="google/flan-t5-xl",
                 label="Model",
                 interactive=True,

 # AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.
 # %% auto 0
+__all__ = ['HF_TOKEN', 'title', 'description', 'get_model_endpoint', 'query_chat_api', 'inference_chat']
 # %% app.ipynb 0
 import gradio as gr
 # %% app.ipynb 2
+def get_model_endpoint(model_id):
+    if "joi" in model_id:
+        headers = None
+        return "https://joi-20b.ngrok.io/generate", headers
+    else:
+        headers = {"Authorization": f"Bearer {HF_TOKEN}", "x-wait-for-model": "1"}
+        return f"https://api-inference.huggingface.co/models/{model_id}", headers
+# %% app.ipynb 3
 def query_chat_api(
     model_id,
     inputs,
     temperature,
     top_p
 ):
+    endpoint, headers = get_model_endpoint(model_id)
     payload = {
         "inputs": inputs,
         },
     }
+    response = requests.post(endpoint, json=payload, headers=headers)
     if response.status_code == 200:
         return response.json()
         return "Error: " + response.text
+# %% app.ipynb 6
 def inference_chat(
     model_id,
     prompt_template,
     inputs = prompt_template["prompt"].format(human_input=text_input)
     output = query_chat_api(model_id, inputs, temperature, top_p)
+    # TODO: remove this hack when inference backend schema is updated
+    if isinstance(output, list):
+        output = output[0]
+    history.append(" " + output["generated_text"])
     chat = [
         (history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)
     return {chatbot: chat, state: history}
+# %% app.ipynb 16
 title = """<h1 align="center">Chatty Language Models</h1>"""
 description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
 As you can see, most of these prompts exceed the maximum context size of models like Flan-T5, so an error usually means the Inference API has timed out.
 """
+# %% app.ipynb 17
 with gr.Blocks(
     css="""
     .message.svelte-w6rprc.svelte-w6rprc.svelte-w6rprc {font-size: 20px; margin-top: 20px}
     with gr.Row():
         with gr.Column(scale=1):
             model_id = gr.Dropdown(
+                choices=["google/flan-t5-xl" ,"Rallio67/joi_20B_instruct_alpha"],
                 value="google/flan-t5-xl",
                 label="Model",
                 interactive=True,