Waseem7711 commited on
Commit
939b0bb
·
verified ·
1 Parent(s): 72e4b93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -30
app.py CHANGED
@@ -1,41 +1,66 @@
1
- from langchain_openai import ChatOpenAI
2
- from langchain_core.prompts import ChatPromptTemplate
3
- from langchain_core.output_parsers import StrOutputParser
4
- from langchain_community.llms import Ollama
5
  import streamlit as st
 
 
6
  import os
7
  from dotenv import load_dotenv
8
 
9
  # Load environment variables
10
  load_dotenv()
11
 
12
- # Set environment variables
13
- os.environ["LANGCHAIN_TRACING_V2"] = "true"
14
- os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
15
-
16
- # Prompt Template
17
- prompt = ChatPromptTemplate.from_messages(
18
- [
19
- ("system", "You are a helpful assistant. Please respond to the user queries"),
20
- ("user", "Question: {question}")
21
- ]
22
- )
23
 
24
  # Streamlit app setup
25
- st.title('Langchain Demo With LLAMA2 API')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # User input
28
- input_text = st.text_input("Search the topic you want")
29
-
30
- # Ollama LLM (ensure the model is available, or access it through Hugging Face API)
31
- llm = Ollama(model="llama2")
32
- output_parser = StrOutputParser()
33
- chain = prompt | llm | output_parser
34
-
35
- # Display result when user inputs text
36
- if input_text:
37
- try:
38
- response = chain.invoke({"question": input_text})
39
- st.write(response)
40
- except Exception as e:
41
- st.error(f"Error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
 
 
3
  import streamlit as st
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import torch
6
  import os
7
  from dotenv import load_dotenv
8
 
9
  # Load environment variables
10
  load_dotenv()
11
 
12
+ # Retrieve Hugging Face API token from environment variables (if accessing private models)
13
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Ensure you set this in Hugging Face Secrets
 
 
 
 
 
 
 
 
 
14
 
15
  # Streamlit app setup
16
+ st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
17
+ st.write("This chatbot is powered by the Llama2 model. Ask me anything!")
18
+
19
+ @st.cache_resource
20
+ def load_model():
21
+ """
22
+ Load the tokenizer and model from Hugging Face.
23
+ This function is cached to prevent re-loading on every interaction.
24
+ """
25
+ tokenizer = AutoTokenizer.from_pretrained(
26
+ "meta-llama/Llama-2-7b-chat-hf",
27
+ use_auth_token=HF_API_TOKEN # Remove if the model is public
28
+ )
29
+ model = AutoModelForCausalLM.from_pretrained(
30
+ "meta-llama/Llama-2-7b-chat-hf",
31
+ torch_dtype=torch.float16, # Use float16 for reduced memory usage
32
+ device_map="auto",
33
+ use_auth_token=HF_API_TOKEN # Remove if the model is public
34
+ )
35
+ return tokenizer, model
36
+
37
+ # Load the model and tokenizer
38
+ tokenizer, model = load_model()
39
 
40
  # User input
41
+ user_input = st.text_input("You:", "")
42
+
43
+ if user_input:
44
+ with st.spinner("Generating response..."):
45
+ try:
46
+ # Encode the input
47
+ inputs = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors="pt").to(model.device)
48
+
49
+ # Generate a response
50
+ output = model.generate(
51
+ inputs,
52
+ max_length=1000,
53
+ temperature=0.7,
54
+ top_p=0.9,
55
+ do_sample=True,
56
+ eos_token_id=tokenizer.eos_token_id
57
+ )
58
+
59
+ # Decode the response
60
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
61
+
62
+ # Display the response
63
+ st.text_area("Bot:", value=response, height=200, max_chars=None, key=None)
64
+
65
+ except Exception as e:
66
+ st.error(f"An error occurred: {e}")