Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,56 +1,102 @@
|
|
1 |
import streamlit as st
|
2 |
-
from
|
3 |
-
import
|
|
|
|
|
4 |
|
5 |
-
#
|
6 |
-
|
|
|
|
|
|
|
7 |
|
8 |
-
# Initialize the Phi model pipeline
|
9 |
@st.cache_resource
|
10 |
-
def
|
11 |
-
return
|
12 |
-
"
|
13 |
-
|
14 |
-
torch_dtype=torch.bfloat16,
|
15 |
-
# device=device,
|
16 |
)
|
17 |
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
response = phi_model(prompt, max_length=max_length, do_sample=True, temperature=0.7)
|
23 |
-
return response[0]['generated_text']
|
24 |
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
30 |
|
31 |
-
|
32 |
-
# Generate responses
|
33 |
-
with st.spinner("Generating responses..."):
|
34 |
-
traditional_prompt = f"Question: {user_question}\nAnswer:"
|
35 |
-
cot_prompt = f"Question: {user_question}\nLet's approach this step by step:\n1)"
|
36 |
|
37 |
-
|
38 |
-
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
43 |
|
44 |
-
|
45 |
-
st.
|
|
|
46 |
|
47 |
-
|
48 |
-
st.
|
49 |
-
|
50 |
-
|
51 |
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
-
|
56 |
-
""")
|
|
|
1 |
import streamlit as st
|
2 |
+
from huggingface_hub import InferenceClient, HfApi
|
3 |
+
import time
|
4 |
+
import requests
|
5 |
+
from requests.exceptions import RequestException
|
6 |
|
7 |
+
# Set page config at the very beginning
|
8 |
+
st.set_page_config(page_title="Phi-3.5 Chatbot", page_icon="🤖")
|
9 |
+
|
10 |
+
# Add a text input for the Hugging Face API token
|
11 |
+
hf_token = st.text_input("Enter your Hugging Face API token", type="password")
|
12 |
|
|
|
13 |
@st.cache_resource
|
14 |
+
def get_client(token):
|
15 |
+
return InferenceClient(
|
16 |
+
"microsoft/Phi-3.5-mini-instruct",
|
17 |
+
token=token
|
|
|
|
|
18 |
)
|
19 |
|
20 |
+
def validate_token(token):
|
21 |
+
try:
|
22 |
+
api = HfApi(token=token)
|
23 |
+
api.whoami()
|
24 |
+
return True
|
25 |
+
except Exception as e:
|
26 |
+
st.error(f"Token validation failed: {str(e)}")
|
27 |
+
return False
|
28 |
+
|
29 |
+
def make_request_with_retries(client, prompt, max_new_tokens, temperature, top_p, max_retries=5, initial_delay=1):
|
30 |
+
for attempt in range(max_retries):
|
31 |
+
try:
|
32 |
+
response = client.text_generation(
|
33 |
+
prompt,
|
34 |
+
max_new_tokens=max_new_tokens,
|
35 |
+
temperature=temperature,
|
36 |
+
top_p=top_p,
|
37 |
+
)
|
38 |
+
return response
|
39 |
+
except RequestException as e:
|
40 |
+
if attempt < max_retries - 1:
|
41 |
+
delay = initial_delay * (2 ** attempt) # Exponential backoff
|
42 |
+
st.warning(f"Request failed. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})")
|
43 |
+
time.sleep(delay)
|
44 |
+
else:
|
45 |
+
raise e
|
46 |
+
|
47 |
+
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
48 |
+
if not hf_token:
|
49 |
+
st.error("Please enter your Hugging Face API token.")
|
50 |
+
return
|
51 |
|
52 |
+
if not validate_token(hf_token):
|
53 |
+
return
|
|
|
|
|
54 |
|
55 |
+
client = get_client(hf_token)
|
56 |
+
|
57 |
+
# Construct the prompt
|
58 |
+
prompt = f"{system_message}\n\n"
|
59 |
+
for user_msg, assistant_msg in history:
|
60 |
+
prompt += f"Human: {user_msg}\nAssistant: {assistant_msg}\n\n"
|
61 |
+
prompt += f"Human: {message}\nAssistant:"
|
62 |
|
63 |
+
try:
|
64 |
+
response = make_request_with_retries(client, prompt, max_tokens, temperature, top_p)
|
65 |
+
yield response
|
66 |
+
except Exception as e:
|
67 |
+
st.error(f"An error occurred: {str(e)}")
|
68 |
+
yield "I'm sorry, but I encountered an error while processing your request."
|
69 |
|
70 |
+
st.title("Phi-3.5 Mini Chatbot")
|
|
|
|
|
|
|
|
|
71 |
|
72 |
+
if "messages" not in st.session_state:
|
73 |
+
st.session_state.messages = []
|
74 |
|
75 |
+
system_message = st.text_input("System message", value="You are a helpful AI assistant.")
|
76 |
+
max_tokens = st.slider("Max new tokens", min_value=1, max_value=1024, value=256, step=1)
|
77 |
+
temperature = st.slider("Temperature", min_value=0.01, max_value=1.0, value=0.7, step=0.01)
|
78 |
+
top_p = st.slider("Top P", min_value=0.0, max_value=1.0, value=0.9, step=0.01)
|
79 |
|
80 |
+
for message in st.session_state.messages:
|
81 |
+
with st.chat_message(message["role"]):
|
82 |
+
st.markdown(message["content"])
|
83 |
|
84 |
+
if prompt := st.chat_input("What is your message?"):
|
85 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
86 |
+
with st.chat_message("user"):
|
87 |
+
st.markdown(prompt)
|
88 |
|
89 |
+
with st.chat_message("assistant"):
|
90 |
+
message_placeholder = st.empty()
|
91 |
+
full_response = ""
|
92 |
+
for response in respond(prompt,
|
93 |
+
[(msg["content"], st.session_state.messages[i+1]["content"])
|
94 |
+
for i, msg in enumerate(st.session_state.messages[:-1:2])],
|
95 |
+
system_message,
|
96 |
+
max_tokens,
|
97 |
+
temperature,
|
98 |
+
top_p):
|
99 |
+
message_placeholder.markdown(response)
|
100 |
+
full_response = response
|
101 |
|
102 |
+
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
|