Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -113,6 +113,7 @@ def _inference_classifier(text):
|
|
113 |
def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companies=10):
|
114 |
input_batch_content = []
|
115 |
if file_in is not None:
|
|
|
116 |
dft = pd.read_csv(
|
117 |
file_in,
|
118 |
compression=dict(method='zip')
|
@@ -120,13 +121,15 @@ def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companie
|
|
120 |
assert file_col_name in dft.columns, "Indicated col_name not found in file"
|
121 |
input_batch_r = dft[file_col_name].values.tolist()
|
122 |
else:
|
|
|
123 |
assert len(input_batch) > 0, "input_batch array is empty"
|
124 |
input_batch_r = input_batch
|
125 |
|
126 |
-
print("
|
127 |
-
print("+",input_batch_r)
|
128 |
|
129 |
if isurl:
|
|
|
|
|
130 |
for row_in in input_batch_r:
|
131 |
if isinstance(row_in , list):
|
132 |
url = row_in[0]
|
@@ -140,14 +143,16 @@ def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companie
|
|
140 |
extracted = Extractor().extract(requests.get(url).text)
|
141 |
input_batch_content.append(extracted['content'])
|
142 |
else:
|
|
|
143 |
if isinstance(input_batch_r[0], list):
|
|
|
144 |
for row_in in input_batch_r:
|
145 |
input_batch_content.append(row_in[0])
|
146 |
else:
|
|
|
147 |
input_batch_content = input_batch_r
|
148 |
|
149 |
-
print("
|
150 |
-
print("+",input_batch_content)
|
151 |
|
152 |
prob_outs = _inference_classifier(input_batch_content)
|
153 |
#sentiment = _inference_sentiment_model_via_api_query({"inputs": extracted['content']})
|
@@ -187,7 +192,7 @@ demo = gr.Interface(fn=inference,
|
|
187 |
gr.Dropdown(label='data type', choices=['text','url'], type='index', value='url'),
|
188 |
gr.Checkbox(label='if url parse cached in archive.org'),
|
189 |
gr.Slider(minimum=1, maximum=10, step=1, label='Limit NER output', value=5)],
|
190 |
-
outputs=[gr.Dataframe(label='output raw', col_count=1, datatype='number', type='array', wrap=True
|
191 |
#gr.Label(label='Company'),
|
192 |
#gr.Label(label='ESG'),
|
193 |
#gr.Label(label='Sentiment'),
|
|
|
113 |
def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companies=10):
|
114 |
input_batch_content = []
|
115 |
if file_in is not None:
|
116 |
+
print("[i] Input is file:",file_in)
|
117 |
dft = pd.read_csv(
|
118 |
file_in,
|
119 |
compression=dict(method='zip')
|
|
|
121 |
assert file_col_name in dft.columns, "Indicated col_name not found in file"
|
122 |
input_batch_r = dft[file_col_name].values.tolist()
|
123 |
else:
|
124 |
+
print("[i] Input is list")
|
125 |
assert len(input_batch) > 0, "input_batch array is empty"
|
126 |
input_batch_r = input_batch
|
127 |
|
128 |
+
print("[i] Input size:",len(input_batch_r))
|
|
|
129 |
|
130 |
if isurl:
|
131 |
+
print("[i] Data is URL")
|
132 |
+
print("[i] Use chached URL from archive.org") if use_archive
|
133 |
for row_in in input_batch_r:
|
134 |
if isinstance(row_in , list):
|
135 |
url = row_in[0]
|
|
|
143 |
extracted = Extractor().extract(requests.get(url).text)
|
144 |
input_batch_content.append(extracted['content'])
|
145 |
else:
|
146 |
+
print("[i] Data is news contents")
|
147 |
if isinstance(input_batch_r[0], list):
|
148 |
+
print("[i] Data is list of lists format")
|
149 |
for row_in in input_batch_r:
|
150 |
input_batch_content.append(row_in[0])
|
151 |
else:
|
152 |
+
print("[i] Data is single list format")
|
153 |
input_batch_content = input_batch_r
|
154 |
|
155 |
+
print("[i] Batch size:",len(input_batch_content))
|
|
|
156 |
|
157 |
prob_outs = _inference_classifier(input_batch_content)
|
158 |
#sentiment = _inference_sentiment_model_via_api_query({"inputs": extracted['content']})
|
|
|
192 |
gr.Dropdown(label='data type', choices=['text','url'], type='index', value='url'),
|
193 |
gr.Checkbox(label='if url parse cached in archive.org'),
|
194 |
gr.Slider(minimum=1, maximum=10, step=1, label='Limit NER output', value=5)],
|
195 |
+
outputs=[gr.Dataframe(label='output raw', col_count=1, datatype='number', type='array', wrap=True)],#, header=OUT_HEADERS)],
|
196 |
#gr.Label(label='Company'),
|
197 |
#gr.Label(label='ESG'),
|
198 |
#gr.Label(label='Sentiment'),
|