Spaces:
Sleeping
Sleeping
File size: 3,338 Bytes
cdb6b83 822b0d9 cdb6b83 822b0d9 cdb6b83 822b0d9 cdb6b83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# app.py
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Retrieve Hugging Face API token from environment variables
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
# Streamlit app setup
st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
st.write("This chatbot is powered by the Llama2 model. Ask me anything!")
@st.cache_resource
def load_model():
"""
Load the tokenizer and model from Hugging Face.
This function is cached to prevent re-loading on every interaction.
"""
tokenizer = AutoTokenizer.from_pretrained(
"meta-llama/Llama-2-7b-chat-hf",
use_auth_token=HF_API_TOKEN # Use the secret token
)
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-chat-hf",
torch_dtype=torch.float16, # Use float16 for reduced memory usage
device_map="auto",
use_auth_token=HF_API_TOKEN # Use the secret token
)
return tokenizer, model
# Load the model and tokenizer
tokenizer, model = load_model()
# Initialize session state for conversation history
if "conversation" not in st.session_state:
st.session_state.conversation = []
# User input
user_input = st.text_input("You:", "")
if user_input:
st.session_state.conversation.append({"role": "user", "content": user_input})
with st.spinner("Generating response..."):
try:
# Prepare the conversation history for the model
conversation_text = ""
for message in st.session_state.conversation:
if message["role"] == "user":
conversation_text += f"User: {message['content']}\n"
elif message["role"] == "assistant":
conversation_text += f"Assistant: {message['content']}\n"
# Encode the input
inputs = tokenizer.encode(conversation_text + "Assistant:", return_tensors="pt").to(model.device)
# Generate a response
output = model.generate(
inputs,
max_length=1000,
temperature=0.7,
top_p=0.9,
do_sample=True,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id # To avoid warnings
)
# Decode the response
response = tokenizer.decode(output[0], skip_special_tokens=True)
# Extract the assistant's reply
assistant_reply = response[len(conversation_text + "Assistant: "):].strip()
# Append the assistant's reply to the conversation history
st.session_state.conversation.append({"role": "assistant", "content": assistant_reply})
# Display the updated conversation
conversation_display = ""
for message in st.session_state.conversation:
if message["role"] == "user":
conversation_display += f"**You:** {message['content']}\n\n"
elif message["role"] == "assistant":
conversation_display += f"**Bot:** {message['content']}\n\n"
st.markdown(conversation_display)
except Exception as e:
st.error(f"An error occurred: {e}")
|