# app.py import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer import torch import os from dotenv import load_dotenv # Load environment variables load_dotenv() # Retrieve Hugging Face API token from environment variables (if accessing private models) HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Ensure you set this in Hugging Face Secrets # Streamlit app setup st.title('Llama2 Chatbot Deployment on Hugging Face Spaces') st.write("This chatbot is powered by the Llama2 model. Ask me anything!") @st.cache_resource def load_model(): """ Load the tokenizer and model from Hugging Face. This function is cached to prevent re-loading on every interaction. """ tokenizer = AutoTokenizer.from_pretrained( "meta-llama/Llama-2-7b-chat-hf", use_auth_token=HF_API_TOKEN # Remove if the model is public ) model = AutoModelForCausalLM.from_pretrained( "meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, # Use float16 for reduced memory usage device_map="auto", use_auth_token=HF_API_TOKEN # Remove if the model is public ) return tokenizer, model # Load the model and tokenizer tokenizer, model = load_model() # Initialize session state for conversation history if "conversation" not in st.session_state: st.session_state.conversation = [] # User input user_input = st.text_input("You:", "") if user_input: st.session_state.conversation.append({"role": "user", "content": user_input}) with st.spinner("Generating response..."): try: # Prepare the conversation history for the model conversation_text = "" for message in st.session_state.conversation: if message["role"] == "user": conversation_text += f"User: {message['content']}\n" elif message["role"] == "assistant": conversation_text += f"Assistant: {message['content']}\n" # Encode the input inputs = tokenizer.encode(conversation_text + "Assistant:", return_tensors="pt").to(model.device) # Generate a response output = model.generate( inputs, max_length=1000, temperature=0.7, top_p=0.9, do_sample=True, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id # To avoid warnings ) # Decode the response response = tokenizer.decode(output[0], skip_special_tokens=True) # Extract the assistant's reply assistant_reply = response[len(conversation_text + "Assistant: "):].strip() # Append the assistant's reply to the conversation history st.session_state.conversation.append({"role": "assistant", "content": assistant_reply}) # Display the updated conversation conversation_display = "" for message in st.session_state.conversation: if message["role"] == "user": conversation_display += f"**You:** {message['content']}\n\n" elif message["role"] == "assistant": conversation_display += f"**Bot:** {message['content']}\n\n" st.markdown(conversation_display) except Exception as e: st.error(f"An error occurred: {e}")