Ollam-Chabot / app.py
Waseem7711's picture
update.app
a3d7cd0 verified
raw
history blame
1.8 kB
# app.py
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Set environment variables for Hugging Face (if needed)
# os.environ["HF_HOME"] = "/path/to/huggingface"
# os.environ["TRANSFORMERS_CACHE"] = "/path/to/transformers/cache"
# Streamlit app setup
st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
st.write("This chatbot is powered by the Llama2 model. Ask me anything!")
# User input
user_input = st.text_input("You:", "")
if user_input:
with st.spinner("Generating response..."):
try:
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-chat-hf",
torch_dtype=torch.float16, # Use float16 for reduced memory usage
device_map="auto" # Automatically map to available devices
)
# Encode the input
inputs = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors="pt").to(model.device)
# Generate a response
output = model.generate(
inputs,
max_length=1000,
temperature=0.7,
top_p=0.9,
do_sample=True,
eos_token_id=tokenizer.eos_token_id
)
# Decode the response
response = tokenizer.decode(output[0], skip_special_tokens=True)
# Display the response
st.text_area("Bot:", value=response, height=200, max_chars=None, key=None)
except Exception as e:
st.error(f"An error occurred: {e}")