Spaces:

Waseem771
/

Ollam-Chabot

Sleeping

App Files Files Community

Update.app.py

by Waseem7711 - opened Oct 15, 2024

base: refs/heads/main

←

from: refs/pr/7

Discussion Files changed

+82

-30

Files changed (1) hide show

app.py +82 -30

app.py CHANGED Viewed

@@ -1,41 +1,93 @@
-from langchain_openai import ChatOpenAI
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-from langchain_community.llms import Ollama
 import streamlit as st
 import os
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
-# Set environment variables
-os.environ["LANGCHAIN_TRACING_V2"] = "true"
-os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
-# Prompt Template
-prompt = ChatPromptTemplate.from_messages(
-    [
-        ("system", "You are a helpful assistant. Please respond to the user queries"),
-        ("user", "Question: {question}")
-    ]
-)
 # Streamlit app setup
-st.title('Langchain Demo With LLAMA2 API')
 # User input
-input_text = st.text_input("Search the topic you want")
-# Ollama LLM (ensure the model is available, or access it through Hugging Face API)
-llm = Ollama(model="llama2")
-output_parser = StrOutputParser()
-chain = prompt | llm | output_parser
-# Display result when user inputs text
-if input_text:
-    try:
-        response = chain.invoke({"question": input_text})
-        st.write(response)
-    except Exception as e:
-        st.error(f"Error: {e}")

+# app.py
 import streamlit as st
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
 import os
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
+# Retrieve Hugging Face API token from environment variables (if accessing private models)
+HF_API_TOKEN = os.getenv("HF_API_TOKEN")  # Ensure you set this in Hugging Face Secrets
 # Streamlit app setup
+st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
+st.write("This chatbot is powered by the Llama2 model. Ask me anything!")
+@st.cache_resource
+def load_model():
+    """
+    Load the tokenizer and model from Hugging Face.
+    This function is cached to prevent re-loading on every interaction.
+    """
+    tokenizer = AutoTokenizer.from_pretrained(
+        "meta-llama/Llama-2-7b-chat-hf",
+        use_auth_token=HF_API_TOKEN  # Remove if the model is public
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        "meta-llama/Llama-2-7b-chat-hf",
+        torch_dtype=torch.float16,  # Use float16 for reduced memory usage
+        device_map="auto",
+        use_auth_token=HF_API_TOKEN  # Remove if the model is public
+    )
+    return tokenizer, model
+# Load the model and tokenizer
+tokenizer, model = load_model()
+# Initialize session state for conversation history
+if "conversation" not in st.session_state:
+    st.session_state.conversation = []
 # User input
+user_input = st.text_input("You:", "")
+if user_input:
+    st.session_state.conversation.append({"role": "user", "content": user_input})
+    with st.spinner("Generating response..."):
+        try:
+            # Prepare the conversation history for the model
+            conversation_text = ""
+            for message in st.session_state.conversation:
+                if message["role"] == "user":
+                    conversation_text += f"User: {message['content']}\n"
+                elif message["role"] == "assistant":
+                    conversation_text += f"Assistant: {message['content']}\n"
+            # Encode the input
+            inputs = tokenizer.encode(conversation_text + "Assistant:", return_tensors="pt").to(model.device)
+            # Generate a response
+            output = model.generate(
+                inputs,
+                max_length=1000,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True,
+                eos_token_id=tokenizer.eos_token_id,
+                pad_token_id=tokenizer.eos_token_id  # To avoid warnings
+            )
+            # Decode the response
+            response = tokenizer.decode(output[0], skip_special_tokens=True)
+            # Extract the assistant's reply
+            assistant_reply = response[len(conversation_text + "Assistant: "):].strip()
+            # Append the assistant's reply to the conversation history
+            st.session_state.conversation.append({"role": "assistant", "content": assistant_reply})
+            # Display the updated conversation
+            conversation_display = ""
+            for message in st.session_state.conversation:
+                if message["role"] == "user":
+                    conversation_display += f"**You:** {message['content']}\n\n"
+                elif message["role"] == "assistant":
+                    conversation_display += f"**Bot:** {message['content']}\n\n"
+            st.markdown(conversation_display)
+        except Exception as e:
+            st.error(f"An error occurred: {e}")