Spaces:

hitz02
/

TableQA

Build error

App Files Files Community

hitz02 commited on Dec 20, 2021

Commit

e57fe3b

•

1 Parent(s): 37e8a32

Create app.py

Browse files

Files changed (1) hide show

app.py +96 -0

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import tensorflow.compat.v1 as tf
+import os
+import shutil
+import csv
+import pandas as pd
+import numpy as np
+import IPython
+import streamlit as st
+import subprocess
+from itertools import islice
+import random
+#from transformers import pipeline
+from transformers import TapasTokenizer, TapasForQuestionAnswering
+tf.get_logger().setLevel('ERROR')
+def install(package):
+  subprocess.check_call([sys.executable, "-m", "pip", "install", package])
+install('torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu102.html')
+model_name = 'google/tapas-base-finetuned-wtq'
+#model_name =  "table-question-answering"
+#model = pipeline(model_name)
+model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+st.set_option('deprecation.showfileUploaderEncoding', False)
+st.title('Query your Table')
+st.header('Upload CSV file')
+uploaded_file = st.file_uploader("Choose your CSV file",type = 'csv')
+placeholder = st.empty()
+if uploaded_file is not None:
+    data = pd.read_csv(uploaded_file)
+    data.replace(',','', regex=True, inplace=True)
+    if st.checkbox('Want to see the data?'):
+        placeholder.dataframe(data)
+st.header('Enter your queries')
+input_queries = st.text_input('Type your queries separated by comma(,)',value='')
+input_queries = input_queries.split(',')
+colors1 = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for i in range(len(input_queries))]
+colors2 = ['background-color:'+str(color)+'; color: black' for color in colors1]
+def styling_specific_cell(x,tags,colors):
+    df_styler = pd.DataFrame('', index=x.index, columns=x.columns)
+    for idx,tag in enumerate(tags):
+        for r,c in tag:
+            df_styler.iloc[r, c] = colors[idx]
+    return df_styler
+if st.button('Predict Answers'):
+    with st.spinner('It will take approx a minute'):
+        data = data.astype(str)
+        inputs = tokenizer(table=table, queries=queries, padding='max_length', return_tensors="pt")
+        outputs = model(**inputs)
+        #outputs = model(table = data, query = queries)
+        predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions( inputs, outputs.logits.detach(), outputs.logits_aggregation.detach())
+        id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3:"COUNT"}
+        aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+        answers = []
+        for coordinates in predicted_answer_coordinates:
+           if len(coordinates) == 1:
+             # only a single cell:
+             answers.append(table.iat[coordinates[0]])
+           else:
+             # multiple cells
+             cell_values = []
+             for coordinate in coordinates:
+                cell_values.append(table.iat[coordinate])
+             answers.append(", ".join(cell_values))
+    st.success('Done! Please check below the answers and its cells highlighted in table above')
+    placeholder.dataframe(data.style.apply(styling_specific_cell,tags=predicted_answer_coordinates,colors=colors2,axis=None))
+    for query, answer, predicted_agg, c in zip(queries, answers, aggregation_predictions_string, colors1):
+        st.write('\n')
+        st.markdown('<font color={} size=4>**{}**</font>'.format(c,query), unsafe_allow_html=True)
+        st.write('\n')
+        if predicted_agg == "NONE" or predicted_agg == 'COUNT':
+            st.markdown('**>** '+str(answer))
+        else:
+            if predicted_agg == 'SUM':
+                st.markdown('**>** '+str(sum(answer.split(','))))
+            else:
+                st.markdown('**>** '+str(np.round(np.mean(answer.split(',')),2)))