File size: 2,683 Bytes
ee69da4
 
 
 
 
 
 
 
 
 
 
 
 
 
49e0ecb
ee69da4
 
 
 
 
 
 
 
 
b390a68
 
ee69da4
 
 
 
 
 
 
 
b390a68
84f4052
ee69da4
 
b25ba2e
 
84f4052
 
 
b25ba2e
ee69da4
 
 
 
 
 
 
 
 
 
c9c94e2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import gradio as gr

from setup_database import get_document_store, add_data
from setup_modules import create_retriever, create_readers_and_pipeline, text_reader_types, table_reader_types

document_index = "document"
document_store = get_document_store(document_index)
filenames = ["processed_website_tables","processed_website_text","processed_schedule_tables"]
document_store, data = add_data(filenames, document_store, document_index)
document_store, retriever = create_retriever(document_store)
text_reader_type = text_reader_types['deberta-large']
table_reader_type = table_reader_types['tapas']
pipeline = create_readers_and_pipeline(retriever, text_reader_type, table_reader_type, True, True)

title = "Welcome to the BounWiki: The Question Answering Engine for Bogazici Students!"

head = '''
This engine uses information from the Bogazici University Website to answer questions about different areas such as:

 - Semester Dates (e.g. Registration Period, Add/Dropp Period...)
 - Campus buildings and their locations
 - General Uni Information, like Busses from Uni, Taxi-Numbers
 - Schedule Information for all courses
 
It returns the top 3 results and assigns each of them a confidence score which makes it easier to estimate if a given answer can be relied upon. 
You can click on one of the examples below to get started. Have fun!
'''


article = '''
# How does this work?

This App uses an "MPNet" sentence-transformer to encode information from the website into an embedding space.
When faced with a query, the semantically most similar document is retrieved.
A text and a table language model ("Deberta-large" and "Tapas" here) extract the answers to the original question from the respective document and return them to the interface 
For a more detailed description of the workings of this model, please refer to the full report, which is hosted on GitHub (https://github.com/LeoGitGuy/Bounwiki/blob/main/BounWiki.pdf)
'''

#examples = ["When is the add/dropp period?", "What does it mean if instructor consent is required?", "Where is the english preparatory unit located?"]
examples = [
    ["When is the add/dropp period?"],
    ["What does it mean if instructor consent is required?"],
    ["Where is the english preparatory unit located?"],
]

label = gr.outputs.Label(num_top_classes=3)

def predict(input):
    prediction = pipeline.run(
        query=input, params={"top_k": 3}
        )
    return {a.answer: float(a.score) for a in prediction["answers"]}

interface = gr.Interface(fn=predict, inputs=gr.Textbox(lines=5, max_lines=6, label="Input Text"), outputs=label, title=title, description=head, article=article, examples=examples)
interface.launch()