llama2 / app.py
Waseem7711's picture
Update app.py
c30ad40 verified
raw
history blame
1.26 kB
import streamlit as st
from transformers import AutoTokenizer, LlamaForCausalLM
import torch
# Title of the app
st.title("LLaMA 2 Chatbot")
# Load the LLaMA model and tokenizer from Hugging Face
@st.cache_resource
def load_model_and_tokenizer():
# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
return tokenizer, model
# Function to generate text based on a prompt
def generate_text(prompt, tokenizer, model):
inputs = tokenizer(prompt, return_tensors="pt")
# Generate text
with torch.no_grad():
generate_ids = model.generate(inputs.input_ids, max_length=50)
return tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
# Input field for user prompt
user_input = st.text_input("Enter your prompt:", "Hey, are you conscious? Can you talk to me?")
# Load model and tokenizer
tokenizer, model = load_model_and_tokenizer()
# Generate response when user enters a prompt
if st.button("Generate Response"):
with st.spinner("Generating response..."):
response = generate_text(user_input, tokenizer, model)
st.write(f"Response: {response}")