awinml commited on
Commit
8d46199
·
1 Parent(s): 9975133

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app.py +1 -1
  3. earnings_calls_cleaned_metadata.csv +3 -0
  4. utils.py +10 -8
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  earnings_calls_sentencewise.csv filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  earnings_calls_sentencewise.csv filter=lfs diff=lfs merge=lfs -text
36
+ earnings_calls_cleaned_metadata.csv filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -72,7 +72,7 @@ with st.sidebar:
72
 
73
  # Choose encoder model
74
 
75
- encoder_models_choice = ["SGPT", "MPNET"]
76
  with st.sidebar:
77
  encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
78
 
 
72
 
73
  # Choose encoder model
74
 
75
+ encoder_models_choice = ["MPNET", "SGPT"]
76
  with st.sidebar:
77
  encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
78
 
earnings_calls_cleaned_metadata.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c6474da1f710d2a6d2ea65c475baf6821db95a5cb81dd8703eec3c04cd22cbe
3
+ size 18988194
utils.py CHANGED
@@ -17,7 +17,7 @@ import streamlit_scrollable_textbox as stx
17
 
18
  @st.experimental_singleton
19
  def get_data():
20
- data = pd.read_csv("earnings_calls_sentencewise.csv")
21
  return data
22
 
23
 
@@ -72,6 +72,7 @@ def query_pinecone(query, top_k, model, index, year, quarter, ticker, threshold=
72
  "Year": int(year),
73
  "Quarter": {"$eq": quarter},
74
  "Ticker": {"$eq": ticker},
 
75
  },
76
  include_metadata=True,
77
  )
@@ -103,7 +104,7 @@ def sentence_id_combine(data, query_results, lag=2):
103
  ]
104
  # Create a list of context sentences by joining the sentences corresponding to the lookup IDs
105
  context_list = [
106
- ". ".join(data.Text.iloc[lookup_id].to_list()) for lookup_id in lookup_ids
107
  ]
108
  return context_list
109
 
@@ -114,11 +115,11 @@ def text_lookup(data, sentence_ids):
114
 
115
 
116
  def generate_prompt(query_text, context_list):
117
- #context = " ".join(context_list)
118
  prompt = f"""
119
  Context information is below:
120
  ---------------------
121
- {context_list}
122
  ---------------------
123
  Given the context information and prior knowledge, answer this question:
124
  {query_text}
@@ -148,15 +149,16 @@ def retrieve_transcript(data, year, quarter, ticker):
148
  (data.Year == int(year))
149
  & (data.Quarter == quarter)
150
  & (data.Ticker == ticker),
151
- ["Year", "Month", "Date", "Ticker"],
152
  ]
153
  .drop_duplicates()
154
- .iloc[0]
155
  )
 
156
  # convert row to a string and join values with "-"
157
- row_str = "-".join(row.astype(str)) + ".txt"
158
  open_file = open(
159
- f"Transcripts/{ticker}/{row_str}",
160
  "r",
161
  )
162
  file_text = open_file.read()
 
17
 
18
  @st.experimental_singleton
19
  def get_data():
20
+ data = pd.read_csv("earnings_calls_cleaned_metadata.csv")
21
  return data
22
 
23
 
 
72
  "Year": int(year),
73
  "Quarter": {"$eq": quarter},
74
  "Ticker": {"$eq": ticker},
75
+ "QA_Flag": {"$eq": "Answer"},
76
  },
77
  include_metadata=True,
78
  )
 
104
  ]
105
  # Create a list of context sentences by joining the sentences corresponding to the lookup IDs
106
  context_list = [
107
+ " ".join(data.Text.iloc[lookup_id].to_list()) for lookup_id in lookup_ids
108
  ]
109
  return context_list
110
 
 
115
 
116
 
117
  def generate_prompt(query_text, context_list):
118
+ context = " \n".join(context_list)
119
  prompt = f"""
120
  Context information is below:
121
  ---------------------
122
+ {context}
123
  ---------------------
124
  Given the context information and prior knowledge, answer this question:
125
  {query_text}
 
149
  (data.Year == int(year))
150
  & (data.Quarter == quarter)
151
  & (data.Ticker == ticker),
152
+ ["File_Name"],
153
  ]
154
  .drop_duplicates()
155
+ .iloc[0,0]
156
  )
157
+ print(row)
158
  # convert row to a string and join values with "-"
159
+ #row_str = "-".join(row.astype(str)) + ".txt"
160
  open_file = open(
161
+ f"Transcripts/{ticker}/{row}",
162
  "r",
163
  )
164
  file_text = open_file.read()