Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
llm = Llama.from_pretrained( | |
repo_id="PersonalAILabs/Llama-3.1-Quantized-4bit", | |
filename="Meta-Llama-3.1-8B-Instruct.IQ4_XS.gguf", | |
) | |
# For local dev | |
# llm = Llama( | |
# model_path="./checkpoints/Llama-3.1-8b-instruct/Meta-Llama-3.1-8B-Instruct.IQ4_XS.gguf", | |
# verbose=False | |
# ) | |
def predict(message, history): | |
messages = [ | |
{"role": "system", "content": "You are a knowledgeable assistant who knows everything about Personalized AI Labs (or PAIL). PAIL is a research company that creates unique virtual assistants for businesses."}] | |
for user_message, bot_message in history: | |
if user_message: | |
messages.append({"role": "user", "content": user_message}) | |
if bot_message: | |
messages.append({"role": "assistant", "content": bot_message}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
for chunk in llm.create_chat_completion( | |
stream=True, | |
messages=messages, | |
): | |
part = chunk["choices"][0]["delta"].get("content", None) | |
if part: | |
response += part | |
yield response | |
# Create a description and example messages | |
description = """ | |
This is an interactive chat interface powered by Meta's Llama 3.1 language model quantized to 4 bits. | |
You can ask questions about PAIL or ask any other question you might have. | |
The goal of this assistant is to demonstrate how PAIL can help your business achieve a higher level of user engagement. | |
""" | |
# Example messages to populate the initial chat | |
example_messages = [ | |
"What does PAIL stand for?", | |
"How does PAIL-UVA work?", | |
"What services does PAIL offer?", | |
"Do virtual assistants on websites create higher user engagement?" | |
] | |
# Create the Gradio interface with description and examples | |
demo = gr.ChatInterface( | |
predict, | |
description=description, | |
examples=example_messages, | |
title="PAIL Unique Virtual Assistant", | |
type="messages" | |
).launch() |