Spaces:
Sleeping
Sleeping
two domain added
Browse files
app.py
CHANGED
@@ -20,37 +20,6 @@ from utils.helper_functions import *
|
|
20 |
openai.api_key = os.environ["OPENAI_API_KEY"]
|
21 |
|
22 |
|
23 |
-
# Load the dataset from a provided source.
|
24 |
-
dataset = load_dataset(
|
25 |
-
"eagle0504/youthless-homeless-shelter-web-scrape-dataset-qa-formatted"
|
26 |
-
)
|
27 |
-
|
28 |
-
# Initialize a new client for ChromeDB.
|
29 |
-
client = chromadb.Client()
|
30 |
-
|
31 |
-
# Generate a random number between 1 billion and 10 billion.
|
32 |
-
random_number: int = np.random.randint(low=1e9, high=1e10)
|
33 |
-
|
34 |
-
# Generate a random string consisting of 10 uppercase letters and digits.
|
35 |
-
random_string: str = "".join(
|
36 |
-
np.random.choice(list(string.ascii_uppercase + string.digits), size=10)
|
37 |
-
)
|
38 |
-
|
39 |
-
# Combine the random number and random string into one identifier.
|
40 |
-
combined_string: str = f"{random_number}{random_string}"
|
41 |
-
|
42 |
-
# Create a new collection in ChromeDB with the combined string as its name.
|
43 |
-
collection = client.create_collection(combined_string)
|
44 |
-
|
45 |
-
|
46 |
-
# Embed and store the first N supports for this demo
|
47 |
-
L = len(dataset["train"]["questions"])
|
48 |
-
collection.add(
|
49 |
-
ids=[str(i) for i in range(0, L)], # IDs are just strings
|
50 |
-
documents=dataset["train"]["questions"], # Enter questions here
|
51 |
-
metadatas=[{"type": "support"} for _ in range(0, L)],
|
52 |
-
)
|
53 |
-
|
54 |
# Front-end Design
|
55 |
st.title("Youth Homelessness Chatbot")
|
56 |
|
@@ -75,6 +44,7 @@ st.sidebar.markdown(
|
|
75 |
st.sidebar.success(
|
76 |
"Please enter a distance threshold (we advise to set it around 0.2)."
|
77 |
)
|
|
|
78 |
special_threshold = st.sidebar.number_input(
|
79 |
"Insert a number", value=0.2, placeholder="Type a number..."
|
80 |
) # 0.3
|
@@ -86,6 +56,43 @@ st.sidebar.warning(
|
|
86 |
if clear_button:
|
87 |
st.session_state.messages = []
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
# React to user input
|
90 |
if prompt := st.chat_input("Tell me about YSA"):
|
91 |
# Display user message in chat message container
|
@@ -119,10 +126,15 @@ if prompt := st.chat_input("Tell me about YSA"):
|
|
119 |
ref_from_db_search = ref["answers"].str.cat(sep=" ")
|
120 |
final_ref = ref
|
121 |
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
finetuned_llm_guess = ["from_llm", question, llm_response, 0]
|
128 |
final_ref.loc[-1] = finetuned_llm_guess
|
|
|
20 |
openai.api_key = os.environ["OPENAI_API_KEY"]
|
21 |
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
# Front-end Design
|
24 |
st.title("Youth Homelessness Chatbot")
|
25 |
|
|
|
44 |
st.sidebar.success(
|
45 |
"Please enter a distance threshold (we advise to set it around 0.2)."
|
46 |
)
|
47 |
+
option = st.selectbox("Which website do you want to ask?", ("YSA", "Larkin"))
|
48 |
special_threshold = st.sidebar.number_input(
|
49 |
"Insert a number", value=0.2, placeholder="Type a number..."
|
50 |
) # 0.3
|
|
|
56 |
if clear_button:
|
57 |
st.session_state.messages = []
|
58 |
|
59 |
+
|
60 |
+
# Load the dataset from a provided source.
|
61 |
+
if option == "YSA":
|
62 |
+
dataset = load_dataset(
|
63 |
+
"eagle0504/youthless-homeless-shelter-web-scrape-dataset-qa-formatted"
|
64 |
+
)
|
65 |
+
else:
|
66 |
+
dataset = load_dataset("eagle0504/larkin-web-scrape-dataset-qa-formatted")
|
67 |
+
|
68 |
+
|
69 |
+
# Initialize a new client for ChromeDB.
|
70 |
+
client = chromadb.Client()
|
71 |
+
|
72 |
+
# Generate a random number between 1 billion and 10 billion.
|
73 |
+
random_number: int = np.random.randint(low=1e9, high=1e10)
|
74 |
+
|
75 |
+
# Generate a random string consisting of 10 uppercase letters and digits.
|
76 |
+
random_string: str = "".join(
|
77 |
+
np.random.choice(list(string.ascii_uppercase + string.digits), size=10)
|
78 |
+
)
|
79 |
+
|
80 |
+
# Combine the random number and random string into one identifier.
|
81 |
+
combined_string: str = f"{random_number}{random_string}"
|
82 |
+
|
83 |
+
# Create a new collection in ChromeDB with the combined string as its name.
|
84 |
+
collection = client.create_collection(combined_string)
|
85 |
+
|
86 |
+
|
87 |
+
# Embed and store the first N supports for this demo
|
88 |
+
L = len(dataset["train"]["questions"])
|
89 |
+
collection.add(
|
90 |
+
ids=[str(i) for i in range(0, L)], # IDs are just strings
|
91 |
+
documents=dataset["train"]["questions"], # Enter questions here
|
92 |
+
metadatas=[{"type": "support"} for _ in range(0, L)],
|
93 |
+
)
|
94 |
+
|
95 |
+
|
96 |
# React to user input
|
97 |
if prompt := st.chat_input("Tell me about YSA"):
|
98 |
# Display user message in chat message container
|
|
|
126 |
ref_from_db_search = ref["answers"].str.cat(sep=" ")
|
127 |
final_ref = ref
|
128 |
|
129 |
+
if option == "YSA":
|
130 |
+
try:
|
131 |
+
llm_response = llama2_7b_ysa(question)
|
132 |
+
except:
|
133 |
+
llm_response = "Sorry, the inference endpoint is temporarily down. π"
|
134 |
+
else:
|
135 |
+
llm_response = (
|
136 |
+
"Sorry for the delay. We are in the progress of fine-tune the model. βοΈ"
|
137 |
+
)
|
138 |
|
139 |
finetuned_llm_guess = ["from_llm", question, llm_response, 0]
|
140 |
final_ref.loc[-1] = finetuned_llm_guess
|