Spaces:

Statical
/

STC-LLM

Running

App Files Files

Staticaliza commited on Sep 23, 2023

Commit

c02118d

•

1 Parent(s): 120bd05

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -12

app.py CHANGED Viewed

@@ -2,12 +2,15 @@ import gradio as gr
 from huggingface_hub import Repository, InferenceClient
 import os
 import json
 API_TOKEN = os.environ.get("API_TOKEN")
 API_ENDPOINT = os.environ.get("API_ENDPOINT")
 KEY = os.environ.get("KEY")
 API_ENDPOINTS = {
     "Falcon": "tiiuae/falcon-180B-chat",
     "Llama": "meta-llama/Llama-2-70b-chat-hf"
@@ -20,25 +23,24 @@ for model_name, model_endpoint in API_ENDPOINTS.items():
     CHOICES.append(model_name)
     CLIENTS[model_name] = InferenceClient(model_endpoint, headers = { "Authorization": f"Bearer {API_TOKEN}" })
-def format(input, chat_history, : str) -> str:
-    instructions = instructions.strip(" ").strip("\n")
-    prompt = instructions
-    for turn in chat_history:
-        user_message, bot_message = turn
-        prompt = f"{prompt}\n{USER_NAME}: {user_message}\n{BOT_NAME}: {bot_message}"
-    prompt = f"{prompt}\n{USER_NAME}: {message}\n{BOT_NAME}:"
     return prompt
-def predict(instruction, history, input, access_key, model, temperature, top_p, top_k, rep_p, max_tokens, stop_seqs, seed):
     if (access_key != KEY):
         print(">>> MODEL FAILED: Input: " + input + ", Attempted Key: " + access_key)
         return ("[UNAUTHORIZED ACCESS]", input);
     stops = json.loads(stop_seqs)
     response = CLIENTS[model].text_generation(
-        input,
         temperature = temperature,
         max_new_tokens = max_tokens,
         top_p = top_p,
@@ -52,9 +54,15 @@ def predict(instruction, history, input, access_key, model, temperature, top_p,
         return_full_text = False
     )
-    print(f"---\nUSER: {input}\nBOT: {response}\n---")
-    return (response, input)
 def maintain_cloud():
     print(">>> SPACE MAINTAINED!")
@@ -68,6 +76,7 @@ with gr.Blocks() as demo:
         with gr.Column():
             history = gr.Chatbot(elem_id = "chatbot")
             input = gr.Textbox(label = "Input", lines = 2)
             instruction = gr.Textbox(label = "Instruction", lines = 4)
             access_key = gr.Textbox(label = "Access Key", lines = 1)
             run = gr.Button("▶")
@@ -87,7 +96,7 @@ with gr.Blocks() as demo:
         with gr.Column():
             output = gr.Textbox(label = "Output", value = "", lines = 50)
-    run.click(predict, inputs = [instruction, history, input, access_key, model, temperature, top_p, top_k, rep_p, max_tokens, stop_seqs, seed], outputs = [output, input])
     cloud.click(maintain_cloud, inputs = [], outputs = [input, output])
 demo.queue(concurrency_count = 500, api_open = True).launch(show_api = True)

 from huggingface_hub import Repository, InferenceClient
 import os
 import json
+import re
 API_TOKEN = os.environ.get("API_TOKEN")
 API_ENDPOINT = os.environ.get("API_ENDPOINT")
 KEY = os.environ.get("KEY")
+SPECIAL_SYMBOLS = ["‹", "›"]
 API_ENDPOINTS = {
     "Falcon": "tiiuae/falcon-180B-chat",
     "Llama": "meta-llama/Llama-2-70b-chat-hf"
     CHOICES.append(model_name)
     CLIENTS[model_name] = InferenceClient(model_endpoint, headers = { "Authorization": f"Bearer {API_TOKEN}" })
+def format(instruction = "", history = "", input = "", preinput = ""):
+    sy_l, sy_r = SPECIAL_SYMBOLS[0], SPECIAL_SYMBOLS[1]
+    formatted_history = '\n'.join(f"{{sy_l}{message}{sy_r}" for message in history)
+    task_message = f"{instruction}\n{formatted_history}\n{sy_l}{input}{sy_r}\n{preinput}"
     return prompt
+def predict(instruction, history, input, preinput, access_key, model, temperature, top_p, top_k, rep_p, max_tokens, stop_seqs, seed):
     if (access_key != KEY):
         print(">>> MODEL FAILED: Input: " + input + ", Attempted Key: " + access_key)
         return ("[UNAUTHORIZED ACCESS]", input);
     stops = json.loads(stop_seqs)
+    formatted_input = format(instruction, history, input, preinput)
     response = CLIENTS[model].text_generation(
+        formatted_input,
         temperature = temperature,
         max_new_tokens = max_tokens,
         top_p = top_p,
         return_full_text = False
     )
+    sy_l, sy_r = SPECIAL_SYMBOLS[0], SPECIAL_SYMBOLS[1]
+    pre_result = f"{sy_l}{response}{sy_r}{''.join(SPECIAL_SYMBOLS)}"
+    pattern = re.compile(f"{sy_l}(.*?){sy_r}", re.DOTALL)
+    match = pattern.search(pre_result)
+    get_result = match.group(1).strip() if match else ""
+    print(f"---\nUSER: {input}\nBOT: {get_result}\n---")
+    return (get_result, input)
 def maintain_cloud():
     print(">>> SPACE MAINTAINED!")
         with gr.Column():
             history = gr.Chatbot(elem_id = "chatbot")
             input = gr.Textbox(label = "Input", lines = 2)
+            preinput = gr.Textbox(label = "Pre-Input", lines = 1)
             instruction = gr.Textbox(label = "Instruction", lines = 4)
             access_key = gr.Textbox(label = "Access Key", lines = 1)
             run = gr.Button("▶")
         with gr.Column():
             output = gr.Textbox(label = "Output", value = "", lines = 50)
+    run.click(predict, inputs = [instruction, history, input, preinput, access_key, model, temperature, top_p, top_k, rep_p, max_tokens, stop_seqs, seed], outputs = [output, input])
     cloud.click(maintain_cloud, inputs = [], outputs = [input, output])
 demo.queue(concurrency_count = 500, api_open = True).launch(show_api = True)