Spaces:

Waseem7711
/

llama2

Sleeping

App Files Files Community

Waseem771 commited on Oct 16, 2024

Commit

b895edb

verified ·

1 Parent(s): c44a143

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -88

app.py CHANGED Viewed

@@ -1,93 +1,40 @@
-# app.py
 import streamlit as st
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
 import os
-from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
-# Retrieve Hugging Face API token from environment variables
-HF_API_TOKEN = os.getenv("HF_API_TOKEN")
-# Streamlit app setup
-st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
-st.write("This chatbot is powered by the Llama2 model. Ask me anything!")
-@st.cache_resource
-def load_model():
-    """
-    Load the tokenizer and model from Hugging Face.
-    This function is cached to prevent re-loading on every interaction.
-    """
-    tokenizer = AutoTokenizer.from_pretrained(
-        "meta-llama/Llama-2-7b-chat-hf",
-        use_auth_token=HF_API_TOKEN  # Use the secret token
-    )
-    model = AutoModelForCausalLM.from_pretrained(
-        "meta-llama/Llama-2-7b-chat-hf",
-        torch_dtype=torch.float16,  # Use float16 for reduced memory usage
-        device_map="auto",
-        use_auth_token=HF_API_TOKEN  # Use the secret token
-    )
-    return tokenizer, model
-# Load the model and tokenizer
-tokenizer, model = load_model()
-# Initialize session state for conversation history
-if "conversation" not in st.session_state:
-    st.session_state.conversation = []
 # User input
-user_input = st.text_input("You:", "")
-if user_input:
-    st.session_state.conversation.append({"role": "user", "content": user_input})
-    with st.spinner("Generating response..."):
-        try:
-            # Prepare the conversation history for the model
-            conversation_text = ""
-            for message in st.session_state.conversation:
-                if message["role"] == "user":
-                    conversation_text += f"User: {message['content']}\n"
-                elif message["role"] == "assistant":
-                    conversation_text += f"Assistant: {message['content']}\n"
-            # Encode the input
-            inputs = tokenizer.encode(conversation_text + "Assistant:", return_tensors="pt").to(model.device)
-            # Generate a response
-            output = model.generate(
-                inputs,
-                max_length=1000,
-                temperature=0.7,
-                top_p=0.9,
-                do_sample=True,
-                eos_token_id=tokenizer.eos_token_id,
-                pad_token_id=tokenizer.eos_token_id  # To avoid warnings
-            )
-            # Decode the response
-            response = tokenizer.decode(output[0], skip_special_tokens=True)
-            # Extract the assistant's reply
-            assistant_reply = response[len(conversation_text + "Assistant: "):].strip()
-            # Append the assistant's reply to the conversation history
-            st.session_state.conversation.append({"role": "assistant", "content": assistant_reply})
-            # Display the updated conversation
-            conversation_display = ""
-            for message in st.session_state.conversation:
-                if message["role"] == "user":
-                    conversation_display += f"**You:** {message['content']}\n\n"
-                elif message["role"] == "assistant":
-                    conversation_display += f"**Bot:** {message['content']}\n\n"
-            st.markdown(conversation_display)
-        except Exception as e:
-            st.error(f"An error occurred: {e}")

 import streamlit as st
+from langchain import LLMChain
+from langchain.chat_models import HuggingFaceHub
+from langchain.prompts import ChatPromptTemplate
 import os
+# Initialize HuggingFaceHub LLM with access token from environment variables
+llm = HuggingFaceHub(
+    repo_id="meta-llama/Llama-2-7b-chat-hf",
+    huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_KEY"),
+    model_kwargs={
+        "temperature": 0.7,
+        "max_new_tokens": 512,
+    }
+)
+# Define the prompt template
+prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", "You are a helpful assistant."),
+        ("user", "Question: {question}")
+    ]
+)
+# Create the LLM Chain
+chain = LLMChain(llm=llm, prompt=prompt, output_key="response")
+# Streamlit App Interface
+st.title('LangChain Demo with LLaMA 2 on Hugging Face')
 # User input
+input_text = st.text_input("Enter your question:")
+# Display the response
+if input_text:
+    try:
+        response = chain.run({"question": input_text})
+        st.write(response)
+    except Exception as e:
+        st.error(f"Error: {e}")