awinml commited on
Commit
40eb760
1 Parent(s): 27b618f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -8
app.py CHANGED
@@ -12,13 +12,16 @@ from transformers import (
12
  import streamlit as st
13
  import openai
14
 
 
15
  @st.experimental_singleton
16
  def get_data():
17
  data = pd.read_csv("earnings_calls_sentencewise.csv")
18
  return data
19
 
 
20
  # Initialize models from HuggingFace
21
 
 
22
  @st.experimental_singleton
23
  def get_t5_model():
24
  return pipeline("summarization", model="t5-small", tokenizer="t5-small")
@@ -69,6 +72,7 @@ def format_query(query_results):
69
  context = [result["metadata"]["Text"] for result in query_results["matches"]]
70
  return context
71
 
 
72
  def sentence_id_combine(data, query_results, lag=2):
73
  # Extract sentence IDs from query results
74
  ids = [result["metadata"]["Sentence_id"] for result in query_results["matches"]]
@@ -82,14 +86,16 @@ def sentence_id_combine(data, query_results, lag=2):
82
  ]
83
  # Create a list of context sentences by joining the sentences corresponding to the lookup IDs
84
  context_list = [
85
- " ".join(data.Text.iloc[lookup_id].to_list()) for lookup_id in lookup_ids
86
  ]
87
  return context_list
88
 
 
89
  def text_lookup(data, sentence_ids):
90
- context = " ".join(data.iloc[sentence_ids].to_list())
91
  return context
92
 
 
93
  def gpt3_summary(text):
94
  response = openai.Completion.create(
95
  model="text-davinci-003",
@@ -163,7 +169,7 @@ window = int(st.number_input("Sentence Window Size", 1, 3, value=1))
163
 
164
  data = get_data()
165
 
166
- #context_list = format_query(query_results)
167
  context_list = sentence_id_combine(data, query_results, lag=window)
168
 
169
 
@@ -181,10 +187,10 @@ if decoder_model == "GPT3 (text_davinci)":
181
  output_text = []
182
  for context_text in context_list:
183
  output_text.append(gpt3_summary(context_text))
184
- generated_text = " ".join(output_text)
185
  st.write(gpt3_summary(generated_text))
186
 
187
- elif decoder_model == "GPT3 - QA":
188
  openai_key = st.text_input(
189
  "Enter OpenAI key",
190
  value="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",
@@ -195,7 +201,7 @@ elif decoder_model == "GPT3 - QA":
195
  output_text = []
196
  for context_text in context_list:
197
  output_text.append(gpt3_qa(query_text, context_text))
198
- generated_text = " ".join(output_text)
199
  st.write(gpt3_qa(query_text, generated_text))
200
 
201
  elif decoder_model == "T5":
@@ -203,7 +209,7 @@ elif decoder_model == "T5":
203
  output_text = []
204
  for context_text in context_list:
205
  output_text.append(t5_pipeline(context_text)[0]["summary_text"])
206
- generated_text = " ".join(output_text)
207
  st.write(t5_pipeline(generated_text)[0]["summary_text"])
208
 
209
  elif decoder_model == "FLAN-T5":
@@ -211,7 +217,7 @@ elif decoder_model == "FLAN-T5":
211
  output_text = []
212
  for context_text in context_list:
213
  output_text.append(flan_t5_pipeline(context_text)[0]["summary_text"])
214
- generated_text = " ".join(output_text)
215
  st.write(flan_t5_pipeline(generated_text)[0]["summary_text"])
216
 
217
  show_retrieved_text = st.checkbox("Show Retrieved Text", value=False)
 
12
  import streamlit as st
13
  import openai
14
 
15
+
16
  @st.experimental_singleton
17
  def get_data():
18
  data = pd.read_csv("earnings_calls_sentencewise.csv")
19
  return data
20
 
21
+
22
  # Initialize models from HuggingFace
23
 
24
+
25
  @st.experimental_singleton
26
  def get_t5_model():
27
  return pipeline("summarization", model="t5-small", tokenizer="t5-small")
 
72
  context = [result["metadata"]["Text"] for result in query_results["matches"]]
73
  return context
74
 
75
+
76
  def sentence_id_combine(data, query_results, lag=2):
77
  # Extract sentence IDs from query results
78
  ids = [result["metadata"]["Sentence_id"] for result in query_results["matches"]]
 
86
  ]
87
  # Create a list of context sentences by joining the sentences corresponding to the lookup IDs
88
  context_list = [
89
+ ". ".join(data.Text.iloc[lookup_id].to_list()) for lookup_id in lookup_ids
90
  ]
91
  return context_list
92
 
93
+
94
  def text_lookup(data, sentence_ids):
95
+ context = ". ".join(data.iloc[sentence_ids].to_list())
96
  return context
97
 
98
+
99
  def gpt3_summary(text):
100
  response = openai.Completion.create(
101
  model="text-davinci-003",
 
169
 
170
  data = get_data()
171
 
172
+ # context_list = format_query(query_results)
173
  context_list = sentence_id_combine(data, query_results, lag=window)
174
 
175
 
 
187
  output_text = []
188
  for context_text in context_list:
189
  output_text.append(gpt3_summary(context_text))
190
+ generated_text = ". ".join(output_text)
191
  st.write(gpt3_summary(generated_text))
192
 
193
+ elif decoder_model == "GPT3 (QA_davinci)":
194
  openai_key = st.text_input(
195
  "Enter OpenAI key",
196
  value="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",
 
201
  output_text = []
202
  for context_text in context_list:
203
  output_text.append(gpt3_qa(query_text, context_text))
204
+ generated_text = ". ".join(output_text)
205
  st.write(gpt3_qa(query_text, generated_text))
206
 
207
  elif decoder_model == "T5":
 
209
  output_text = []
210
  for context_text in context_list:
211
  output_text.append(t5_pipeline(context_text)[0]["summary_text"])
212
+ generated_text = ". ".join(output_text)
213
  st.write(t5_pipeline(generated_text)[0]["summary_text"])
214
 
215
  elif decoder_model == "FLAN-T5":
 
217
  output_text = []
218
  for context_text in context_list:
219
  output_text.append(flan_t5_pipeline(context_text)[0]["summary_text"])
220
+ generated_text = ". ".join(output_text)
221
  st.write(flan_t5_pipeline(generated_text)[0]["summary_text"])
222
 
223
  show_retrieved_text = st.checkbox("Show Retrieved Text", value=False)