Spaces:
Sleeping
Sleeping
import gradio as gr | |
from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever | |
from haystack.document_stores import InMemoryDocumentStore | |
from haystack.pipelines import ExtractiveQAPipeline | |
#from torch import | |
from sentence_transformers import CrossEncoder, SentenceTransformer | |
document_store = InMemoryDocumentStore() | |
model = "Saturdays/mdeberta-v3-base-squad2_refugees_dataset_finetuned" | |
#model = 'codellama/CodeLlama-13b-Instruct-hf' | |
reader = FARMReader(model_name_or_path=model) | |
preprocessor = PreProcessor( | |
clean_empty_lines=True, | |
clean_whitespace=True, | |
clean_header_footer=True, | |
split_by="word", | |
split_length=100, | |
split_respect_sentence_boundary=True, | |
split_overlap=3 | |
) | |
def print_answers(results): | |
fields = ["answer", "score"] # "context", | |
answers = results["answers"] | |
filtered_answers = [] | |
for ans in answers: | |
filtered_ans = { | |
field: getattr(ans, field) | |
for field in fields | |
if getattr(ans, field) is not None | |
} | |
filtered_answers.append(filtered_ans) | |
return filtered_answers | |
def pdf_to_document_store(pdf_file): | |
document_store.delete_documents() | |
converter = PDFToTextConverter( | |
remove_numeric_tables=True, valid_languages=["es"]) | |
documents = [converter.convert(file_path=pdf_file, meta=None)[0]] | |
preprocessed_docs = preprocessor.process(documents) | |
document_store.write_documents(preprocessed_docs) | |
return None | |
def predict(question): | |
pdf_to_document_store("data.pdf") | |
retriever = TfidfRetriever(document_store=document_store) | |
pipe = ExtractiveQAPipeline(reader, retriever) | |
result = pipe.run(query=question, params={"Retriever": { | |
"top_k": 5}, "Reader": {"top_k": 3}}) | |
answers = print_answers(result) | |
return answers | |
def respond(message, chat_history): | |
if len(message)==0: | |
message="¿Dónde puedo solicitar asilo?" | |
bot_message = predict(message)[0]['answer'] | |
chat_history.append((message, bot_message)) | |
return "", chat_history | |
description= "Our chatbot helps refugees arriving in Spain by providing information on key topics. \n This project is based on the article titled [Desarrollando un chatbot para refugiados: nuestra experiencia en Saturdays.AI](https://medium.com/saturdays-ai/desarrollando-un-chatbot-para-refugiados-nuestra-experiencia-en-saturdays-ai-9bf2551432c9), which outlines the process of building a chatbot for refugees. \n You can find the training script in this [github repo](https://github.com/jsr90/chatbot_refugiados_train)." | |
with gr.Blocks(theme="huggingface") as demo: | |
gr.HTML("<h1 style='text-align: center; font-size: xx-large'>Chatbot Refugiados (spanish)</h1>") | |
gr.HTML("<h2 style='text-align: center; font-size: large'>The demo you're about to see is from a project currently in development.</h2>") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
chatbot = gr.Chatbot() | |
with gr.Column(scale=1): | |
with gr.Row(): | |
msg = gr.Textbox(label="Write your question:", value="¿Dónde puedo solicitar asilo?") | |
with gr.Row(): | |
submit = gr.Button("Submit") | |
clear = gr.Button("Clear") | |
gr.Image("OIG.jpeg") | |
msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
submit.click(respond, [msg, chatbot], [msg, chatbot]) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
gr.Markdown(description) | |
if __name__ == "__main__": | |
demo.launch() |