Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -110,13 +110,28 @@ def _inference_classifier(text):
|
|
110 |
|
111 |
return sigmoid(ort_outs[0])
|
112 |
|
113 |
-
def inference(input_batch,isurl,use_archive,limit_companies=10):
|
114 |
input_batch_content = []
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
if isurl:
|
118 |
-
for row_in in
|
119 |
-
|
|
|
|
|
|
|
120 |
if use_archive:
|
121 |
archive = is_in_archive(url)
|
122 |
if archive['archived']:
|
@@ -125,8 +140,12 @@ def inference(input_batch,isurl,use_archive,limit_companies=10):
|
|
125 |
extracted = Extractor().extract(requests.get(url).text)
|
126 |
input_batch_content.append(extracted['content'])
|
127 |
else:
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
130 |
print("->Batch size:",len(input_batch_content))
|
131 |
print("+",input_batch_content)
|
132 |
|
@@ -162,7 +181,9 @@ examples = [[[['https://www.bbc.com/news/uk-62732447'],
|
|
162 |
['https://www.bbc.com/news/business-62728621'],
|
163 |
['https://www.bbc.com/news/science-environment-62680423']],'url',False,5]]
|
164 |
demo = gr.Interface(fn=inference,
|
165 |
-
inputs=[gr.
|
|
|
|
|
166 |
gr.Dropdown(label='data type', choices=['text','url'], type='index', value='url'),
|
167 |
gr.Checkbox(label='if url parse cached in archive.org'),
|
168 |
gr.Slider(minimum=1, maximum=10, step=1, label='Limit NER output', value=5)],
|
|
|
110 |
|
111 |
return sigmoid(ort_outs[0])
|
112 |
|
113 |
+
def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companies=10):
|
114 |
input_batch_content = []
|
115 |
+
if file_in is not None:
|
116 |
+
dft = pd.read_csv(
|
117 |
+
file_in,
|
118 |
+
compression=dict(method='zip')
|
119 |
+
)
|
120 |
+
assert file_col_name in dft.columns, "Indicated col_name not found in file"
|
121 |
+
input_batch_r = dft[file_col_name].values.tolist()
|
122 |
+
else:
|
123 |
+
assert len(input_batch) > 0, "input_batch array is empty"
|
124 |
+
input_batch_r = input_batch
|
125 |
+
|
126 |
+
print("->Input size:",len(input_batch_r))
|
127 |
+
print("+",input_batch_r)
|
128 |
+
|
129 |
if isurl:
|
130 |
+
for row_in in input_batch_r:
|
131 |
+
if isinstance(row_in , list):
|
132 |
+
url = row_in[0]
|
133 |
+
else:
|
134 |
+
url = row_in
|
135 |
if use_archive:
|
136 |
archive = is_in_archive(url)
|
137 |
if archive['archived']:
|
|
|
140 |
extracted = Extractor().extract(requests.get(url).text)
|
141 |
input_batch_content.append(extracted['content'])
|
142 |
else:
|
143 |
+
if isinstance(input_batch_r[0], list):
|
144 |
+
for row_in in input_batch_r:
|
145 |
+
input_batch_content.append(row_in[0])
|
146 |
+
else:
|
147 |
+
input_batch_content = input_batch_r
|
148 |
+
|
149 |
print("->Batch size:",len(input_batch_content))
|
150 |
print("+",input_batch_content)
|
151 |
|
|
|
181 |
['https://www.bbc.com/news/business-62728621'],
|
182 |
['https://www.bbc.com/news/science-environment-62680423']],'url',False,5]]
|
183 |
demo = gr.Interface(fn=inference,
|
184 |
+
inputs=[gr.File(label='zipped csv file'),
|
185 |
+
gr.Textbox(label='If csv, column header name that contains the relevant data:'),
|
186 |
+
gr.Dataframe(label='input batch', col_count=1, datatype='str', type='array', wrap=True),
|
187 |
gr.Dropdown(label='data type', choices=['text','url'], type='index', value='url'),
|
188 |
gr.Checkbox(label='if url parse cached in archive.org'),
|
189 |
gr.Slider(minimum=1, maximum=10, step=1, label='Limit NER output', value=5)],
|