rdose commited on
Commit
4f63778
·
1 Parent(s): 69eb1b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -113,6 +113,7 @@ def _inference_classifier(text):
113
  def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companies=10):
114
  input_batch_content = []
115
  if file_in is not None:
 
116
  dft = pd.read_csv(
117
  file_in,
118
  compression=dict(method='zip')
@@ -120,13 +121,15 @@ def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companie
120
  assert file_col_name in dft.columns, "Indicated col_name not found in file"
121
  input_batch_r = dft[file_col_name].values.tolist()
122
  else:
 
123
  assert len(input_batch) > 0, "input_batch array is empty"
124
  input_batch_r = input_batch
125
 
126
- print("->Input size:",len(input_batch_r))
127
- print("+",input_batch_r)
128
 
129
  if isurl:
 
 
130
  for row_in in input_batch_r:
131
  if isinstance(row_in , list):
132
  url = row_in[0]
@@ -140,14 +143,16 @@ def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companie
140
  extracted = Extractor().extract(requests.get(url).text)
141
  input_batch_content.append(extracted['content'])
142
  else:
 
143
  if isinstance(input_batch_r[0], list):
 
144
  for row_in in input_batch_r:
145
  input_batch_content.append(row_in[0])
146
  else:
 
147
  input_batch_content = input_batch_r
148
 
149
- print("->Batch size:",len(input_batch_content))
150
- print("+",input_batch_content)
151
 
152
  prob_outs = _inference_classifier(input_batch_content)
153
  #sentiment = _inference_sentiment_model_via_api_query({"inputs": extracted['content']})
@@ -187,7 +192,7 @@ demo = gr.Interface(fn=inference,
187
  gr.Dropdown(label='data type', choices=['text','url'], type='index', value='url'),
188
  gr.Checkbox(label='if url parse cached in archive.org'),
189
  gr.Slider(minimum=1, maximum=10, step=1, label='Limit NER output', value=5)],
190
- outputs=[gr.Dataframe(label='output raw', col_count=1, datatype='number', type='array', wrap=True, header=OUT_HEADERS)],
191
  #gr.Label(label='Company'),
192
  #gr.Label(label='ESG'),
193
  #gr.Label(label='Sentiment'),
 
113
  def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companies=10):
114
  input_batch_content = []
115
  if file_in is not None:
116
+ print("[i] Input is file:",file_in)
117
  dft = pd.read_csv(
118
  file_in,
119
  compression=dict(method='zip')
 
121
  assert file_col_name in dft.columns, "Indicated col_name not found in file"
122
  input_batch_r = dft[file_col_name].values.tolist()
123
  else:
124
+ print("[i] Input is list")
125
  assert len(input_batch) > 0, "input_batch array is empty"
126
  input_batch_r = input_batch
127
 
128
+ print("[i] Input size:",len(input_batch_r))
 
129
 
130
  if isurl:
131
+ print("[i] Data is URL")
132
+ print("[i] Use chached URL from archive.org") if use_archive
133
  for row_in in input_batch_r:
134
  if isinstance(row_in , list):
135
  url = row_in[0]
 
143
  extracted = Extractor().extract(requests.get(url).text)
144
  input_batch_content.append(extracted['content'])
145
  else:
146
+ print("[i] Data is news contents")
147
  if isinstance(input_batch_r[0], list):
148
+ print("[i] Data is list of lists format")
149
  for row_in in input_batch_r:
150
  input_batch_content.append(row_in[0])
151
  else:
152
+ print("[i] Data is single list format")
153
  input_batch_content = input_batch_r
154
 
155
+ print("[i] Batch size:",len(input_batch_content))
 
156
 
157
  prob_outs = _inference_classifier(input_batch_content)
158
  #sentiment = _inference_sentiment_model_via_api_query({"inputs": extracted['content']})
 
192
  gr.Dropdown(label='data type', choices=['text','url'], type='index', value='url'),
193
  gr.Checkbox(label='if url parse cached in archive.org'),
194
  gr.Slider(minimum=1, maximum=10, step=1, label='Limit NER output', value=5)],
195
+ outputs=[gr.Dataframe(label='output raw', col_count=1, datatype='number', type='array', wrap=True)],#, header=OUT_HEADERS)],
196
  #gr.Label(label='Company'),
197
  #gr.Label(label='ESG'),
198
  #gr.Label(label='Sentiment'),