import streamlit as st from llama_cpp import Llama st.set_page_config(page_title="Chat with AI", page_icon="🤖", layout="wide") # Custom CSS for better styling st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def load_model(): return Llama.from_pretrained( repo_id="Mykes/med_phi3-mini-4k-GGUF", filename="*Q4_K_M.gguf", verbose=False, n_ctx=1024, n_batch=512, n_threads=8, use_mlock=True, use_mmap=True, ) llm = load_model() def format_context(messages): context = "" for message in messages: if message["role"] == "user": context += f"Human: {message['content']}\n" else: context += f"Assistant: {message['content']}\n" return context # Sidebar st.sidebar.title("Chat with AI") st.sidebar.markdown("This is a simple chat interface using Streamlit and an AI model.") # Add useful information to the sidebar st.sidebar.header("How to use") st.sidebar.markdown(""" 1. Type your question in the chat input box at the bottom of the screen. 2. Press Enter or click the Send button to submit your question. 3. The AI will generate a response based on your input. 4. You can have a continuous conversation by asking follow-up questions. """) st.sidebar.header("Model Information") st.sidebar.markdown(""" - Model: med_phi3-mini-4k-GGUF - Context Length: 512 tokens - This model is specialized in medical knowledge. """) st.sidebar.header("Tips") st.sidebar.markdown(""" - Be clear and specific in your questions. - For medical queries, provide relevant details. - Remember that this is an AI model and may not always be 100% accurate. """) # Main chat interface st.title("Chat with AI") # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] # Display chat messages from history on app rerun for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # React to user input if prompt := st.chat_input("What is your question?"): # Display user message in chat message container st.chat_message("user").markdown(prompt) # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) # Format the context with the last 5 messages context = format_context(st.session_state.messages[-5:]) # Prepare the model input model_input = f"{context}Human: {prompt}\nAssistant:" # Display assistant response in chat message container with st.chat_message("assistant"): message_placeholder = st.empty() full_response = "" for token in llm( model_input, max_tokens=None, stop=["Human:", ""], echo=True, stream=True ): full_response += token['choices'][0]['text'] message_placeholder.markdown(full_response + "▌") # Remove the initial context and prompt from the response assistant_response = full_response.split("Assistant:")[-1].strip() message_placeholder.markdown(assistant_response) # Add assistant response to chat history st.session_state.messages.append({"role": "assistant", "content": assistant_response}) # Add a button to clear the chat history if st.sidebar.button("Clear Chat History"): st.session_state.messages = [] st.experimental_rerun() # Display the number of messages in the current conversation st.sidebar.markdown(f"Current conversation length: {len(st.session_state.messages)} messages") # Add a footer st.sidebar.markdown("---") st.sidebar.markdown("Created with ❤️ using Streamlit and Llama.cpp")