Spaces:
Build error
Build error
File size: 4,337 Bytes
1ce01d1 d9ea2c1 85eb3dd d9ea2c1 85eb3dd d9ea2c1 85eb3dd d9ea2c1 85eb3dd d9ea2c1 85eb3dd d9ea2c1 85eb3dd d9ea2c1 85eb3dd d9ea2c1 85eb3dd d9ea2c1 85eb3dd d9ea2c1 85eb3dd d9ea2c1 85eb3dd d9ea2c1 85eb3dd 22b38b8 85eb3dd d9ea2c1 85eb3dd d9ea2c1 dc07482 d9ea2c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
from transformers import TapasTokenizer, TFTapasForQuestionAnswering
import pandas as pd
from transformers import TapasTokenizer, TapasForQuestionAnswering
import pandas as pd
import re
p = re.compile('\d+(\.\d+)?')
def load_model_and_tokenizer():
"""
Load
"""
# Load pretrained tokenizer: TAPAS finetuned on WikiTable Questions
tokenizer = TapasTokenizer.from_pretrained("google/tapas-base-finetuned-wtq")
# Load pretrained model: TAPAS finetuned on WikiTable Questions
model = TapasForQuestionAnswering.from_pretrained("google/tapas-base-finetuned-wtq")
# Return tokenizer and model
return tokenizer, model
def prepare_inputs(table, queries, tokenizer):
"""
Convert dictionary into data frame and tokenize inputs given queries.
"""
# Prepare inputs
# table = pd.DataFrame.from_dict(data)
# table = netflix_df[['title', 'release_year', 'rating']].astype('str').head(50)
table = table.astype('str').head(100)
inputs = tokenizer(table=table, queries=queries, padding='max_length', return_tensors="pt")
# Return things
return table, inputs
def generate_predictions(inputs, model, tokenizer):
"""
Generate predictions for some tokenized input.
"""
# Generate model results
outputs = model(**inputs)
# Convert logit outputs into predictions for table cells and aggregation operators
predicted_table_cell_coords, predicted_aggregation_operators = tokenizer.convert_logits_to_predictions(
inputs,
outputs.logits.detach(),
outputs.logits_aggregation.detach()
)
# Return values
return predicted_table_cell_coords, predicted_aggregation_operators
def postprocess_predictions(predicted_aggregation_operators, predicted_table_cell_coords, table):
"""
Compute the predicted operation and nicely structure the answers.
"""
# Process predicted aggregation operators
aggregation_operators = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3:"COUNT"}
aggregation_predictions_string = [aggregation_operators[x] for x in predicted_aggregation_operators]
# Process predicted table cell coordinates
answers = []
for agg, coordinates in zip(predicted_aggregation_operators, predicted_table_cell_coords):
if len(coordinates) == 1:
# 1 cell
answers.append(table.iat[coordinates[0]])
else:
# > 1 cell
cell_values = []
for coordinate in coordinates:
cell_values.append(table.iat[coordinate])
answers.append(", ".join(cell_values))
# Return values
return aggregation_predictions_string, answers
def show_answers(queries, answers, aggregation_predictions_string):
"""
Visualize the postprocessed answers.
"""
agg = {"NONE": lambda x: x, "SUM" : lambda x: sum(x), "AVERAGE": lambda x: (sum(x) / len(x)), "COUNT": lambda x: len(x)}
for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
print(query)
if predicted_agg == "NONE":
print("Predicted answer: " + answer)
else:
if all([not p.match(val) == None for val in answer.split(', ')]):
# print("Predicted answer: " + predicted_agg + "(" + answer + ") = " + str(agg[predicted_agg](list(map(float, answer.split(','))))))
return "Predicted answer: " + str(agg[predicted_agg](list(map(float, answer.split(',')))))
elif predicted_agg == "COUNT":
# print("Predicted answer: " + predicted_agg + "(" + answer + ") = " + str(agg[predicted_agg](answer.split(','))))
return "Predicted answer: " + str(agg[predicted_agg](answer.split(',')))
else:
return "Predicted answer: " + predicted_agg + " > " + answer
def execute_query(query, table):
"""
Invoke the TAPAS model.
"""
queries = [query]
tokenizer, model = load_model_and_tokenizer()
table, inputs = prepare_inputs(table, queries, tokenizer)
predicted_table_cell_coords, predicted_aggregation_operators = generate_predictions(inputs, model, tokenizer)
aggregation_predictions_string, answers = postprocess_predictions(predicted_aggregation_operators, predicted_table_cell_coords, table)
return show_answers(queries, answers, aggregation_predictions_string)
|