Waseem7711 commited on
Commit
cdb6b83
1 Parent(s): 68ac025

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import streamlit as st
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import torch
6
+ import os
7
+ from dotenv import load_dotenv
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+
13
+
14
+
15
+ # Retrieve Hugging Face API token from environment variables (if accessing private models)
16
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Ensure you set this in Hugging Face Secrets
17
+
18
+ # Streamlit app setup
19
+ st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
20
+ st.write("This chatbot is powered by the Llama2 model. Ask me anything!")
21
+
22
+ @st.cache_resource
23
+ def load_model():
24
+ """
25
+ Load the tokenizer and model from Hugging Face.
26
+ This function is cached to prevent re-loading on every interaction.
27
+ """
28
+ tokenizer = AutoTokenizer.from_pretrained(
29
+ "meta-llama/Llama-2-7b-chat-hf",
30
+ use_auth_token= use your api key # Remove if the model is public
31
+ )
32
+ model = AutoModelForCausalLM.from_pretrained(
33
+ "meta-llama/Llama-2-7b-chat-hf",
34
+ torch_dtype=torch.float16, # Use float16 for reduced memory usage
35
+ device_map="auto",
36
+ use_auth_token=HF_API_TOKEN # Remove if the model is public
37
+ )
38
+ return tokenizer, model
39
+
40
+ # Load the model and tokenizer
41
+ tokenizer, model = load_model()
42
+
43
+ # Initialize session state for conversation history
44
+ if "conversation" not in st.session_state:
45
+ st.session_state.conversation = []
46
+
47
+ # User input
48
+ user_input = st.text_input("You:", "")
49
+
50
+ if user_input:
51
+ st.session_state.conversation.append({"role": "user", "content": user_input})
52
+ with st.spinner("Generating response..."):
53
+ try:
54
+ # Prepare the conversation history for the model
55
+ conversation_text = ""
56
+ for message in st.session_state.conversation:
57
+ if message["role"] == "user":
58
+ conversation_text += f"User: {message['content']}\n"
59
+ elif message["role"] == "assistant":
60
+ conversation_text += f"Assistant: {message['content']}\n"
61
+
62
+ # Encode the input
63
+ inputs = tokenizer.encode(conversation_text + "Assistant:", return_tensors="pt").to(model.device)
64
+
65
+ # Generate a response
66
+ output = model.generate(
67
+ inputs,
68
+ max_length=1000,
69
+ temperature=0.7,
70
+ top_p=0.9,
71
+ do_sample=True,
72
+ eos_token_id=tokenizer.eos_token_id,
73
+ pad_token_id=tokenizer.eos_token_id # To avoid warnings
74
+ )
75
+
76
+ # Decode the response
77
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
78
+
79
+ # Extract the assistant's reply
80
+ assistant_reply = response[len(conversation_text + "Assistant: "):].strip()
81
+
82
+ # Append the assistant's reply to the conversation history
83
+ st.session_state.conversation.append({"role": "assistant", "content": assistant_reply})
84
+
85
+ # Display the updated conversation
86
+ conversation_display = ""
87
+ for message in st.session_state.conversation:
88
+ if message["role"] == "user":
89
+ conversation_display += f"**You:** {message['content']}\n\n"
90
+ elif message["role"] == "assistant":
91
+ conversation_display += f"**Bot:** {message['content']}\n\n"
92
+
93
+ st.markdown(conversation_display)
94
+
95
+ except Exception as e:
96
+ st.error(f"An error occurred: {e}")