Spaces:

Waseem771
/

Ollam-Chabot

Sleeping

App Files Files Community

Ollam-Chabot / app.py

Waseem7711

Update.app.py

429c15a verified about 2 months ago

raw

history blame

3.43 kB

	# app.py

	import streamlit as st
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	import os
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# Retrieve Hugging Face API token from environment variables (if accessing private models)
	HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Ensure you set this in Hugging Face Secrets

	# Streamlit app setup
	st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
	st.write("This chatbot is powered by the Llama2 model. Ask me anything!")

	@st.cache_resource
	def load_model():
	"""
	Load the tokenizer and model from Hugging Face.
	This function is cached to prevent re-loading on every interaction.
	"""
	tokenizer = AutoTokenizer.from_pretrained(
	"meta-llama/Llama-2-7b-chat-hf",
	use_auth_token=HF_API_TOKEN # Remove if the model is public
	)
	model = AutoModelForCausalLM.from_pretrained(
	"meta-llama/Llama-2-7b-chat-hf",
	torch_dtype=torch.float16, # Use float16 for reduced memory usage
	device_map="auto",
	use_auth_token=HF_API_TOKEN # Remove if the model is public
	)
	return tokenizer, model

	# Load the model and tokenizer
	tokenizer, model = load_model()

	# Initialize session state for conversation history
	if "conversation" not in st.session_state:
	st.session_state.conversation = []

	# User input
	user_input = st.text_input("You:", "")

	if user_input:
	st.session_state.conversation.append({"role": "user", "content": user_input})
	with st.spinner("Generating response..."):
	try:
	# Prepare the conversation history for the model
	conversation_text = ""
	for message in st.session_state.conversation:
	if message["role"] == "user":
	conversation_text += f"User: {message['content']}\n"
	elif message["role"] == "assistant":
	conversation_text += f"Assistant: {message['content']}\n"

	# Encode the input
	inputs = tokenizer.encode(conversation_text + "Assistant:", return_tensors="pt").to(model.device)

	# Generate a response
	output = model.generate(
	inputs,
	max_length=1000,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id # To avoid warnings
	)

	# Decode the response
	response = tokenizer.decode(output[0], skip_special_tokens=True)

	# Extract the assistant's reply
	assistant_reply = response[len(conversation_text + "Assistant: "):].strip()

	# Append the assistant's reply to the conversation history
	st.session_state.conversation.append({"role": "assistant", "content": assistant_reply})

	# Display the updated conversation
	conversation_display = ""
	for message in st.session_state.conversation:
	if message["role"] == "user":
	conversation_display += f"You: {message['content']}\n\n"
	elif message["role"] == "assistant":
	conversation_display += f"Bot: {message['content']}\n\n"

	st.markdown(conversation_display)

	except Exception as e:
	st.error(f"An error occurred: {e}")