KeithCu commited on
Commit
651479a
·
1 Parent(s): f545f1d

first test version

Browse files
Files changed (1) hide show
  1. app.py +39 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio
2
+ import litellm
3
+
4
+ def inference(message, history):
5
+ try:
6
+ flattened_history = [item for sublist in history for item in sublist]
7
+ full_message = " ".join(flattened_history + [message])
8
+ messages_litellm = [{"role": "user", "content": full_message}] # litellm message format
9
+ partial_message = ""
10
+ for chunk in litellm.completion(model="together_ai/teknium/OpenHermes-2p5-Mistral-7B",
11
+ messages=messages_litellm,
12
+ max_new_tokens=4096,
13
+ temperature=.7,
14
+ top_k=100,
15
+ top_p=.9,
16
+ repetition_penalty=1.18,
17
+ stream=True):
18
+ partial_message += chunk['choices'][0]['delta']['content'] # extract text from streamed litellm chunks
19
+ yield partial_message
20
+ except Exception as e:
21
+ print("Exception encountered:", str(e))
22
+ yield f"An Error occured please 'Clear' the error and try your question again"
23
+
24
+
25
+ gr.ChatInterface(
26
+ inference,
27
+ chatbot=gr.Chatbot(height=400),
28
+ textbox=gr.Textbox(placeholder="Enter text here...", container=False, scale=5),
29
+ description=f"""
30
+ CURRENT PROMPT TEMPLATE: {model_name}.
31
+ An incorrect prompt template will cause performance to suffer.
32
+ Check the API specifications to ensure this format matches the target LLM.""",
33
+ title="Simple Chatbot Test Application",
34
+ examples=["Define 'deep learning' in once sentence."],
35
+ retry_btn="Retry",
36
+ undo_btn="Undo",
37
+ clear_btn="Clear",
38
+ theme=theme,
39
+ ).queue().launch()