Spaces:
Sleeping
Sleeping
import gradio | |
import litellm | |
def inference(message, history): | |
try: | |
flattened_history = [item for sublist in history for item in sublist] | |
full_message = " ".join(flattened_history + [message]) | |
messages_litellm = [{"role": "user", "content": full_message}] # litellm message format | |
partial_message = "" | |
for chunk in litellm.completion(model="together_ai/teknium/OpenHermes-2p5-Mistral-7B", | |
messages=messages_litellm, | |
max_new_tokens=4096, | |
temperature=.7, | |
top_k=100, | |
top_p=.9, | |
repetition_penalty=1.18, | |
stream=True): | |
partial_message += chunk['choices'][0]['delta']['content'] # extract text from streamed litellm chunks | |
yield partial_message | |
except Exception as e: | |
print("Exception encountered:", str(e)) | |
yield f"An Error occured please 'Clear' the error and try your question again" | |
gr.ChatInterface( | |
inference, | |
chatbot=gr.Chatbot(height=400), | |
textbox=gr.Textbox(placeholder="Enter text here...", container=False, scale=5), | |
description=f""" | |
CURRENT PROMPT TEMPLATE: {model_name}. | |
An incorrect prompt template will cause performance to suffer. | |
Check the API specifications to ensure this format matches the target LLM.""", | |
title="Simple Chatbot Test Application", | |
examples=["Define 'deep learning' in once sentence."], | |
retry_btn="Retry", | |
undo_btn="Undo", | |
clear_btn="Clear", | |
theme=theme, | |
).queue().launch() |