Spaces:

Waseem7711
/

llama2

Sleeping

App Files Files Community

Waseem7711 commited on Oct 15

Commit

cdb6b83

•

1 Parent(s): 68ac025

Create app.py

Browse files

Files changed (1) hide show

app.py +96 -0

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+# app.py
+import streamlit as st
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Retrieve Hugging Face API token from environment variables (if accessing private models)
+HF_API_TOKEN = os.getenv("HF_API_TOKEN")  # Ensure you set this in Hugging Face Secrets
+# Streamlit app setup
+st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
+st.write("This chatbot is powered by the Llama2 model. Ask me anything!")
+@st.cache_resource
+def load_model():
+    """
+    Load the tokenizer and model from Hugging Face.
+    This function is cached to prevent re-loading on every interaction.
+    """
+    tokenizer = AutoTokenizer.from_pretrained(
+        "meta-llama/Llama-2-7b-chat-hf",
+        use_auth_token= use your api key  # Remove if the model is public
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        "meta-llama/Llama-2-7b-chat-hf",
+        torch_dtype=torch.float16,  # Use float16 for reduced memory usage
+        device_map="auto",
+        use_auth_token=HF_API_TOKEN  # Remove if the model is public
+    )
+    return tokenizer, model
+# Load the model and tokenizer
+tokenizer, model = load_model()
+# Initialize session state for conversation history
+if "conversation" not in st.session_state:
+    st.session_state.conversation = []
+# User input
+user_input = st.text_input("You:", "")
+if user_input:
+    st.session_state.conversation.append({"role": "user", "content": user_input})
+    with st.spinner("Generating response..."):
+        try:
+            # Prepare the conversation history for the model
+            conversation_text = ""
+            for message in st.session_state.conversation:
+                if message["role"] == "user":
+                    conversation_text += f"User: {message['content']}\n"
+                elif message["role"] == "assistant":
+                    conversation_text += f"Assistant: {message['content']}\n"
+            # Encode the input
+            inputs = tokenizer.encode(conversation_text + "Assistant:", return_tensors="pt").to(model.device)
+            # Generate a response
+            output = model.generate(
+                inputs,
+                max_length=1000,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True,
+                eos_token_id=tokenizer.eos_token_id,
+                pad_token_id=tokenizer.eos_token_id  # To avoid warnings
+            )
+            # Decode the response
+            response = tokenizer.decode(output[0], skip_special_tokens=True)
+            # Extract the assistant's reply
+            assistant_reply = response[len(conversation_text + "Assistant: "):].strip()
+            # Append the assistant's reply to the conversation history
+            st.session_state.conversation.append({"role": "assistant", "content": assistant_reply})
+            # Display the updated conversation
+            conversation_display = ""
+            for message in st.session_state.conversation:
+                if message["role"] == "user":
+                    conversation_display += f"**You:** {message['content']}\n\n"
+                elif message["role"] == "assistant":
+                    conversation_display += f"**Bot:** {message['content']}\n\n"
+            st.markdown(conversation_display)
+        except Exception as e:
+            st.error(f"An error occurred: {e}")