File size: 3,230 Bytes
082cde2
 
 
 
 
 
40d15de
 
082cde2
40d15de
082cde2
4931940
 
 
 
 
 
 
 
001fb6f
 
4931940
082cde2
3ee2a8f
 
 
 
 
 
082cde2
3ee2a8f
 
082cde2
3ee2a8f
40d15de
082cde2
3ee2a8f
f14d548
3ee2a8f
40d15de
 
 
 
 
3ee2a8f
4931940
3ee2a8f
082cde2
 
 
094ce77
 
f14d548
082cde2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293747f
f14d548
082cde2
 
dc87d46
082cde2
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gradio as gr
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoModelForTableQuestionAnswering,
    AutoTokenizer,
    pipeline,
    TapexTokenizer, 
    BartForConditionalGeneration
)
import pandas as pd

# model_tapex = "microsoft/tapex-large-finetuned-wtq"
# tokenizer_tapex = AutoTokenizer.from_pretrained(model_tapex)
# model_tapex = AutoModelForSeq2SeqLM.from_pretrained(model_tapex)
# pipe_tapex = pipeline(
#     "table-question-answering", model=model_tapex, tokenizer=tokenizer_tapex
# )

#new
tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")


# model_tapas = "google/tapas-large-finetuned-wtq"
# tokenizer_tapas = AutoTokenizer.from_pretrained(model_tapas)
# model_tapas = AutoModelForTableQuestionAnswering.from_pretrained(model_tapas)
# pipe_tapas = pipeline(
#     "table-question-answering", model=model_tapas, tokenizer=tokenizer_tapas
# )

#new
pipe_tapas = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")




def process2(query, csv_data):
    csv_data={"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
    table = pd.DataFrame.from_dict(csv_data)
    #microsoft
    encoding = tokenizer(table=table, query=query, return_tensors="pt")
    outputs = model.generate(**encoding)
    result_tapex=tokenizer.batch_decode(outputs, skip_special_tokens=True)
    #google
    result_tapas = pipe_tapas(table=table, query=query)['cells'][0]
    return result_tapex, result_tapas


# Inputs
query_text = gr.Text(label="Enter a question")
# input_file = gr.File(label="Upload a CSV file", type="file")
input_data = gr.Text(label="Input table json data")
# rows_slider = gr.Slider(label="Number of rows")

# Output
answer_text_tapex = gr.Text(label="TAPEX answer")
answer_text_tapas = gr.Text(label="TAPAS answer")

description = "This Space lets you ask questions on CSV documents with Microsoft [TAPEX-Large](https://huggingface.co/microsoft/tapex-large-finetuned-wtq) and Google [TAPAS-Large](https://huggingface.co/google/tapas-large-finetuned-wtq). \
Both have been fine-tuned on the [WikiTableQuestions](https://huggingface.co/datasets/wikitablequestions) dataset. \n\n\
A sample file with football statistics is available in the repository: \n\n\
* Which team has the most wins? Answer: Manchester City FC\n\
* Which team has the most wins: Chelsea, Liverpool or Everton? Answer: Liverpool\n\
* Which teams have scored less than 40 goals? Answer: Cardiff City FC, Fulham FC, Brighton & Hove Albion FC, Huddersfield Town FC\n\
* What is the average number of wins? Answer: 16 (rounded)\n\n\
You can also upload your own CSV file. Please note that maximum sequence length for both models is 1024 tokens, \
so you may need to limit the number of rows in your CSV file. Chunking is not implemented yet."

iface = gr.Interface(
    theme="huggingface",
    description=description,
    layout="vertical",
    fn=process2,
    inputs=[query_text, input_data],
    outputs=[answer_text_tapex, answer_text_tapas],
    examples=[
        
    ],
    allow_flagging="never",
)

iface.launch()