Spaces:
Runtime error
Runtime error
File size: 3,230 Bytes
082cde2 40d15de 082cde2 40d15de 082cde2 4931940 001fb6f 4931940 082cde2 3ee2a8f 082cde2 3ee2a8f 082cde2 3ee2a8f 40d15de 082cde2 3ee2a8f f14d548 3ee2a8f 40d15de 3ee2a8f 4931940 3ee2a8f 082cde2 094ce77 f14d548 082cde2 293747f f14d548 082cde2 dc87d46 082cde2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import gradio as gr
from transformers import (
AutoModelForSeq2SeqLM,
AutoModelForTableQuestionAnswering,
AutoTokenizer,
pipeline,
TapexTokenizer,
BartForConditionalGeneration
)
import pandas as pd
# model_tapex = "microsoft/tapex-large-finetuned-wtq"
# tokenizer_tapex = AutoTokenizer.from_pretrained(model_tapex)
# model_tapex = AutoModelForSeq2SeqLM.from_pretrained(model_tapex)
# pipe_tapex = pipeline(
# "table-question-answering", model=model_tapex, tokenizer=tokenizer_tapex
# )
#new
tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
# model_tapas = "google/tapas-large-finetuned-wtq"
# tokenizer_tapas = AutoTokenizer.from_pretrained(model_tapas)
# model_tapas = AutoModelForTableQuestionAnswering.from_pretrained(model_tapas)
# pipe_tapas = pipeline(
# "table-question-answering", model=model_tapas, tokenizer=tokenizer_tapas
# )
#new
pipe_tapas = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")
def process2(query, csv_data):
csv_data={"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
table = pd.DataFrame.from_dict(csv_data)
#microsoft
encoding = tokenizer(table=table, query=query, return_tensors="pt")
outputs = model.generate(**encoding)
result_tapex=tokenizer.batch_decode(outputs, skip_special_tokens=True)
#google
result_tapas = pipe_tapas(table=table, query=query)['cells'][0]
return result_tapex, result_tapas
# Inputs
query_text = gr.Text(label="Enter a question")
# input_file = gr.File(label="Upload a CSV file", type="file")
input_data = gr.Text(label="Input table json data")
# rows_slider = gr.Slider(label="Number of rows")
# Output
answer_text_tapex = gr.Text(label="TAPEX answer")
answer_text_tapas = gr.Text(label="TAPAS answer")
description = "This Space lets you ask questions on CSV documents with Microsoft [TAPEX-Large](https://huggingface.co/microsoft/tapex-large-finetuned-wtq) and Google [TAPAS-Large](https://huggingface.co/google/tapas-large-finetuned-wtq). \
Both have been fine-tuned on the [WikiTableQuestions](https://huggingface.co/datasets/wikitablequestions) dataset. \n\n\
A sample file with football statistics is available in the repository: \n\n\
* Which team has the most wins? Answer: Manchester City FC\n\
* Which team has the most wins: Chelsea, Liverpool or Everton? Answer: Liverpool\n\
* Which teams have scored less than 40 goals? Answer: Cardiff City FC, Fulham FC, Brighton & Hove Albion FC, Huddersfield Town FC\n\
* What is the average number of wins? Answer: 16 (rounded)\n\n\
You can also upload your own CSV file. Please note that maximum sequence length for both models is 1024 tokens, \
so you may need to limit the number of rows in your CSV file. Chunking is not implemented yet."
iface = gr.Interface(
theme="huggingface",
description=description,
layout="vertical",
fn=process2,
inputs=[query_text, input_data],
outputs=[answer_text_tapex, answer_text_tapas],
examples=[
],
allow_flagging="never",
)
iface.launch() |