import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer # Page config st.set_page_config( page_title="Zephyr Chat", page_icon="🤖", layout="wide" ) # Initialize session state if "messages" not in st.session_state: st.session_state.messages = [] # Load model and tokenizer @st.cache_resource def load_model(): model_name = "HuggingFaceH4/zephyr-7b-beta" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) return model, tokenizer # Main chat interface st.title("Zephyr Chatbot 🤖") try: model, tokenizer = load_model() # Display chat messages for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Chat input if prompt := st.chat_input("What's on your mind?"): # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # Generate response with st.chat_message("assistant"): with st.spinner("Thinking..."): # Prepare input input_text = f"User: {prompt}\nAssistant:" inputs = tokenizer(input_text, return_tensors="pt") # Generate response outputs = model.generate( inputs.input_ids, max_length=200, num_return_sequences=1, temperature=0.7, pad_token_id=tokenizer.eos_token_id ) # Decode and display response response = tokenizer.decode(outputs[0], skip_special_tokens=True) response = response.split("Assistant:")[-1].strip() st.markdown(response) st.session_state.messages.append({"role": "assistant", "content": response}) except Exception as e: st.error(f"Error: {str(e)}") st.info("Note: This app requires significant computational resources. Consider using a smaller model or upgrading your Space's resources.")