Files changed (1) hide show
  1. app.py +44 -30
app.py CHANGED
@@ -1,41 +1,55 @@
1
- from langchain_openai import ChatOpenAI
2
- from langchain_core.prompts import ChatPromptTemplate
3
- from langchain_core.output_parsers import StrOutputParser
4
- from langchain_community.llms import Ollama
5
  import streamlit as st
 
 
6
  import os
7
  from dotenv import load_dotenv
8
 
9
  # Load environment variables
10
  load_dotenv()
11
 
12
- # Set environment variables
13
- os.environ["LANGCHAIN_TRACING_V2"] = "true"
14
- os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
15
-
16
- # Prompt Template
17
- prompt = ChatPromptTemplate.from_messages(
18
- [
19
- ("system", "You are a helpful assistant. Please respond to the user queries"),
20
- ("user", "Question: {question}")
21
- ]
22
- )
23
 
24
  # Streamlit app setup
25
- st.title('Langchain Demo With LLAMA2 API')
 
 
26
 
27
  # User input
28
- input_text = st.text_input("Search the topic you want")
29
-
30
- # Ollama LLM (ensure the model is available, or access it through Hugging Face API)
31
- llm = Ollama(model="llama2")
32
- output_parser = StrOutputParser()
33
- chain = prompt | llm | output_parser
34
-
35
- # Display result when user inputs text
36
- if input_text:
37
- try:
38
- response = chain.invoke({"question": input_text})
39
- st.write(response)
40
- except Exception as e:
41
- st.error(f"Error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
 
 
3
  import streamlit as st
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import torch
6
  import os
7
  from dotenv import load_dotenv
8
 
9
  # Load environment variables
10
  load_dotenv()
11
 
12
+ # Set environment variables for Hugging Face (if needed)
13
+ # os.environ["HF_HOME"] = "/path/to/huggingface"
14
+ # os.environ["TRANSFORMERS_CACHE"] = "/path/to/transformers/cache"
 
 
 
 
 
 
 
 
15
 
16
  # Streamlit app setup
17
+ st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
18
+
19
+ st.write("This chatbot is powered by the Llama2 model. Ask me anything!")
20
 
21
  # User input
22
+ user_input = st.text_input("You:", "")
23
+
24
+ if user_input:
25
+ with st.spinner("Generating response..."):
26
+ try:
27
+ # Load tokenizer and model
28
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
29
+ model = AutoModelForCausalLM.from_pretrained(
30
+ "meta-llama/Llama-2-7b-chat-hf",
31
+ torch_dtype=torch.float16, # Use float16 for reduced memory usage
32
+ device_map="auto" # Automatically map to available devices
33
+ )
34
+
35
+ # Encode the input
36
+ inputs = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors="pt").to(model.device)
37
+
38
+ # Generate a response
39
+ output = model.generate(
40
+ inputs,
41
+ max_length=1000,
42
+ temperature=0.7,
43
+ top_p=0.9,
44
+ do_sample=True,
45
+ eos_token_id=tokenizer.eos_token_id
46
+ )
47
+
48
+ # Decode the response
49
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
50
+
51
+ # Display the response
52
+ st.text_area("Bot:", value=response, height=200, max_chars=None, key=None)
53
+
54
+ except Exception as e:
55
+ st.error(f"An error occurred: {e}")