yi-6b-chat_llamaCppPython

Runtime error

App Files Files Community

Hristo ZHANG 张鹤立 commited on Nov 7, 2023

Commit

8f263fc

•

1 Parent(s): cc4e355

update

Browse files

Files changed (1) hide show

app.py +8 -23

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ DEFAULT_MODEL_PATH = model_file
 parser = argparse.ArgumentParser()
 parser.add_argument("-m", "--model", default=DEFAULT_MODEL_PATH, type=Path, help="model path")
 parser.add_argument("--mode", default="chat", type=str, choices=["chat", "generate"], help="inference mode")
-parser.add_argument("-l", "--max_length", default=2048, type=int, help="max total length including prompt and output")
 parser.add_argument("-c", "--max_context_length", default=512, type=int, help="max context length")
 parser.add_argument("--top_k", default=0, type=int, help="top-k sampling")
 parser.add_argument("--top_p", default=0.7, type=float, help="top-p sampling")
@@ -25,27 +25,14 @@ llm = Llama(model_path=model_file)
-def predict(input, system_prompt, chatbot, max_length, ctx_length, top_p, temperature, history):
     chatbot.append((input, ""))
     response = ""
     history.append(input)
-    generation_kwargs = dict(
-        max_length=max_length,
-        max_context_length=ctx_length,
-        do_sample=temperature > 0,
-        top_k=40,
-        top_p=top_p,
-        temperature=temperature,
-        repetition_penalty=1.1,
-        num_threads=0,
-        stream=True,
-    )
-    output = llm(input)
-    response = output['choices'][0]['text']
-    for response_piece in response:
-        response += response_piece
         chatbot[-1] = (chatbot[-1][0], response)
         yield chatbot, history
@@ -63,17 +50,15 @@ def reset_state():
 with gr.Blocks() as demo:
-    gr.HTML("""<h1 align="center">01-Yi 6B</h1>""")
     chatbot = gr.Chatbot()
     with gr.Row():
         with gr.Column(scale=4):
-            system_prompt = gr.Textbox(show_label=False, placeholder="system prompt ...", lines=2)
-            user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=6)
             submitBtn = gr.Button("Submit", variant="primary")
         with gr.Column(scale=1):
             max_length = gr.Slider(0, 32048, value=args.max_length, step=1.0, label="Maximum Length", interactive=True)
-            ctx_length = gr.Slider(0, 4096, value=512, step=1.0, label="Maximum Context Length", interactive=True)
             top_p = gr.Slider(0, 1, value=args.top_p, step=0.01, label="Top P", interactive=True)
             temperature = gr.Slider(0, 1, value=args.temp, step=0.01, label="Temperature", interactive=True)
             emptyBtn = gr.Button("Clear History")
@@ -81,7 +66,7 @@ with gr.Blocks() as demo:
     history = gr.State([])
     submitBtn.click(
-        predict, [user_input, system_prompt, chatbot, max_length, ctx_length, top_p, temperature, history], [chatbot, history], show_progress=True
     )
     submitBtn.click(reset_user_input, [], [user_input])

 parser = argparse.ArgumentParser()
 parser.add_argument("-m", "--model", default=DEFAULT_MODEL_PATH, type=Path, help="model path")
 parser.add_argument("--mode", default="chat", type=str, choices=["chat", "generate"], help="inference mode")
+parser.add_argument("-l", "--max_length", default=512, type=int, help="max total length including prompt and output")
 parser.add_argument("-c", "--max_context_length", default=512, type=int, help="max context length")
 parser.add_argument("--top_k", default=0, type=int, help="top-k sampling")
 parser.add_argument("--top_p", default=0.7, type=float, help="top-p sampling")
+def predict(input, chatbot, max_length, top_p, temperature, history):
     chatbot.append((input, ""))
     response = ""
     history.append(input)
+    for output in llm(input, stream=True, temperature=temperature, top_p=top_p, max_tokens=max_length, ):
+        piece = output['choices'][0]['text']
+        response += piece
         chatbot[-1] = (chatbot[-1][0], response)
         yield chatbot, history
 with gr.Blocks() as demo:
+    gr.HTML("""<h1 align="center">Yi-6B-GGUF by llama.cpp</h1>""")
     chatbot = gr.Chatbot()
     with gr.Row():
         with gr.Column(scale=4):
+            user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=8)
             submitBtn = gr.Button("Submit", variant="primary")
         with gr.Column(scale=1):
             max_length = gr.Slider(0, 32048, value=args.max_length, step=1.0, label="Maximum Length", interactive=True)
             top_p = gr.Slider(0, 1, value=args.top_p, step=0.01, label="Top P", interactive=True)
             temperature = gr.Slider(0, 1, value=args.temp, step=0.01, label="Temperature", interactive=True)
             emptyBtn = gr.Button("Clear History")
     history = gr.State([])
     submitBtn.click(
+        predict, [user_input, chatbot, max_length, top_p, temperature, history], [chatbot, history], show_progress=True
     )
     submitBtn.click(reset_user_input, [], [user_input])