|
import streamlit as st |
|
from llama_cpp import Llama |
|
|
|
st.set_page_config(page_title="Chat with AI", page_icon="π€") |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.stTextInput > div > div > input { |
|
background-color: #f0f2f6; |
|
} |
|
.chat-message { |
|
padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex |
|
} |
|
.chat-message.user { |
|
background-color: #2b313e |
|
} |
|
.chat-message.bot { |
|
background-color: #475063 |
|
} |
|
.chat-message .avatar { |
|
width: 20%; |
|
} |
|
.chat-message .avatar img { |
|
max-width: 78px; |
|
max-height: 78px; |
|
border-radius: 50%; |
|
object-fit: cover; |
|
} |
|
.chat-message .message { |
|
width: 80%; |
|
padding: 0 1.5rem; |
|
color: #fff; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
@st.cache_resource |
|
def load_model(): |
|
return Llama.from_pretrained( |
|
repo_id="Mykes/med_phi3-mini-4k-GGUF", |
|
filename="*Q4_K_M.gguf", |
|
verbose=False, |
|
n_ctx=512, |
|
n_batch=256, |
|
n_threads=4 |
|
) |
|
|
|
llm = load_model() |
|
|
|
def format_context(messages): |
|
context = "" |
|
for message in messages: |
|
if message["role"] == "user": |
|
context += f"Human: {message['content']}\n" |
|
else: |
|
context += f"Assistant: {message['content']}\n" |
|
return context |
|
|
|
|
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [] |
|
|
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
|
|
|
|
if prompt := st.chat_input("What is your question?"): |
|
|
|
st.chat_message("user").markdown(prompt) |
|
|
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
|
|
|
|
context = format_context(st.session_state.messages[-5:]) |
|
|
|
|
|
model_input = f"{context}Human: {prompt}\nAssistant:" |
|
|
|
|
|
with st.chat_message("assistant"): |
|
message_placeholder = st.empty() |
|
full_response = "" |
|
|
|
for token in llm( |
|
model_input, |
|
max_tokens=None, |
|
stop=["Human:", "<end_of_turn>"], |
|
echo=True, |
|
stream=True |
|
): |
|
full_response += token['choices'][0]['text'] |
|
message_placeholder.markdown(full_response + "β") |
|
|
|
|
|
assistant_response = full_response.split("Assistant:")[-1].strip() |
|
message_placeholder.markdown(assistant_response) |
|
|
|
|
|
st.session_state.messages.append({"role": "assistant", "content": assistant_response}) |
|
|
|
st.sidebar.title("Chat with AI") |
|
st.sidebar.markdown("This is a simple chat interface using Streamlit and an AI model.") |