Spaces:
Build error
Build error
Upload app.py
Browse files
app.py
CHANGED
@@ -12,13 +12,16 @@ from transformers import (
|
|
12 |
import streamlit as st
|
13 |
import openai
|
14 |
|
|
|
15 |
@st.experimental_singleton
|
16 |
def get_data():
|
17 |
data = pd.read_csv("earnings_calls_sentencewise.csv")
|
18 |
return data
|
19 |
|
|
|
20 |
# Initialize models from HuggingFace
|
21 |
|
|
|
22 |
@st.experimental_singleton
|
23 |
def get_t5_model():
|
24 |
return pipeline("summarization", model="t5-small", tokenizer="t5-small")
|
@@ -69,6 +72,7 @@ def format_query(query_results):
|
|
69 |
context = [result["metadata"]["Text"] for result in query_results["matches"]]
|
70 |
return context
|
71 |
|
|
|
72 |
def sentence_id_combine(data, query_results, lag=2):
|
73 |
# Extract sentence IDs from query results
|
74 |
ids = [result["metadata"]["Sentence_id"] for result in query_results["matches"]]
|
@@ -82,14 +86,16 @@ def sentence_id_combine(data, query_results, lag=2):
|
|
82 |
]
|
83 |
# Create a list of context sentences by joining the sentences corresponding to the lookup IDs
|
84 |
context_list = [
|
85 |
-
" ".join(data.Text.iloc[lookup_id].to_list()) for lookup_id in lookup_ids
|
86 |
]
|
87 |
return context_list
|
88 |
|
|
|
89 |
def text_lookup(data, sentence_ids):
|
90 |
-
context = " ".join(data.iloc[sentence_ids].to_list())
|
91 |
return context
|
92 |
|
|
|
93 |
def gpt3_summary(text):
|
94 |
response = openai.Completion.create(
|
95 |
model="text-davinci-003",
|
@@ -163,7 +169,7 @@ window = int(st.number_input("Sentence Window Size", 1, 3, value=1))
|
|
163 |
|
164 |
data = get_data()
|
165 |
|
166 |
-
#context_list = format_query(query_results)
|
167 |
context_list = sentence_id_combine(data, query_results, lag=window)
|
168 |
|
169 |
|
@@ -181,10 +187,10 @@ if decoder_model == "GPT3 (text_davinci)":
|
|
181 |
output_text = []
|
182 |
for context_text in context_list:
|
183 |
output_text.append(gpt3_summary(context_text))
|
184 |
-
generated_text = " ".join(output_text)
|
185 |
st.write(gpt3_summary(generated_text))
|
186 |
|
187 |
-
elif decoder_model == "GPT3
|
188 |
openai_key = st.text_input(
|
189 |
"Enter OpenAI key",
|
190 |
value="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",
|
@@ -195,7 +201,7 @@ elif decoder_model == "GPT3 - QA":
|
|
195 |
output_text = []
|
196 |
for context_text in context_list:
|
197 |
output_text.append(gpt3_qa(query_text, context_text))
|
198 |
-
generated_text = " ".join(output_text)
|
199 |
st.write(gpt3_qa(query_text, generated_text))
|
200 |
|
201 |
elif decoder_model == "T5":
|
@@ -203,7 +209,7 @@ elif decoder_model == "T5":
|
|
203 |
output_text = []
|
204 |
for context_text in context_list:
|
205 |
output_text.append(t5_pipeline(context_text)[0]["summary_text"])
|
206 |
-
generated_text = " ".join(output_text)
|
207 |
st.write(t5_pipeline(generated_text)[0]["summary_text"])
|
208 |
|
209 |
elif decoder_model == "FLAN-T5":
|
@@ -211,7 +217,7 @@ elif decoder_model == "FLAN-T5":
|
|
211 |
output_text = []
|
212 |
for context_text in context_list:
|
213 |
output_text.append(flan_t5_pipeline(context_text)[0]["summary_text"])
|
214 |
-
generated_text = " ".join(output_text)
|
215 |
st.write(flan_t5_pipeline(generated_text)[0]["summary_text"])
|
216 |
|
217 |
show_retrieved_text = st.checkbox("Show Retrieved Text", value=False)
|
|
|
12 |
import streamlit as st
|
13 |
import openai
|
14 |
|
15 |
+
|
16 |
@st.experimental_singleton
|
17 |
def get_data():
|
18 |
data = pd.read_csv("earnings_calls_sentencewise.csv")
|
19 |
return data
|
20 |
|
21 |
+
|
22 |
# Initialize models from HuggingFace
|
23 |
|
24 |
+
|
25 |
@st.experimental_singleton
|
26 |
def get_t5_model():
|
27 |
return pipeline("summarization", model="t5-small", tokenizer="t5-small")
|
|
|
72 |
context = [result["metadata"]["Text"] for result in query_results["matches"]]
|
73 |
return context
|
74 |
|
75 |
+
|
76 |
def sentence_id_combine(data, query_results, lag=2):
|
77 |
# Extract sentence IDs from query results
|
78 |
ids = [result["metadata"]["Sentence_id"] for result in query_results["matches"]]
|
|
|
86 |
]
|
87 |
# Create a list of context sentences by joining the sentences corresponding to the lookup IDs
|
88 |
context_list = [
|
89 |
+
". ".join(data.Text.iloc[lookup_id].to_list()) for lookup_id in lookup_ids
|
90 |
]
|
91 |
return context_list
|
92 |
|
93 |
+
|
94 |
def text_lookup(data, sentence_ids):
|
95 |
+
context = ". ".join(data.iloc[sentence_ids].to_list())
|
96 |
return context
|
97 |
|
98 |
+
|
99 |
def gpt3_summary(text):
|
100 |
response = openai.Completion.create(
|
101 |
model="text-davinci-003",
|
|
|
169 |
|
170 |
data = get_data()
|
171 |
|
172 |
+
# context_list = format_query(query_results)
|
173 |
context_list = sentence_id_combine(data, query_results, lag=window)
|
174 |
|
175 |
|
|
|
187 |
output_text = []
|
188 |
for context_text in context_list:
|
189 |
output_text.append(gpt3_summary(context_text))
|
190 |
+
generated_text = ". ".join(output_text)
|
191 |
st.write(gpt3_summary(generated_text))
|
192 |
|
193 |
+
elif decoder_model == "GPT3 (QA_davinci)":
|
194 |
openai_key = st.text_input(
|
195 |
"Enter OpenAI key",
|
196 |
value="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",
|
|
|
201 |
output_text = []
|
202 |
for context_text in context_list:
|
203 |
output_text.append(gpt3_qa(query_text, context_text))
|
204 |
+
generated_text = ". ".join(output_text)
|
205 |
st.write(gpt3_qa(query_text, generated_text))
|
206 |
|
207 |
elif decoder_model == "T5":
|
|
|
209 |
output_text = []
|
210 |
for context_text in context_list:
|
211 |
output_text.append(t5_pipeline(context_text)[0]["summary_text"])
|
212 |
+
generated_text = ". ".join(output_text)
|
213 |
st.write(t5_pipeline(generated_text)[0]["summary_text"])
|
214 |
|
215 |
elif decoder_model == "FLAN-T5":
|
|
|
217 |
output_text = []
|
218 |
for context_text in context_list:
|
219 |
output_text.append(flan_t5_pipeline(context_text)[0]["summary_text"])
|
220 |
+
generated_text = ". ".join(output_text)
|
221 |
st.write(flan_t5_pipeline(generated_text)[0]["summary_text"])
|
222 |
|
223 |
show_retrieved_text = st.checkbox("Show Retrieved Text", value=False)
|