Spaces:
Sleeping
Sleeping
File size: 3,551 Bytes
3a6ebd0 8cc74da 7f509d1 3a6ebd0 a78bd5c 3a6ebd0 e5b6d59 9da8577 aa82677 3a6ebd0 ebfc928 3a6ebd0 2331097 3a6ebd0 ebfc928 2331097 ebfc928 2331097 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever
from haystack.document_stores import InMemoryDocumentStore
from haystack.pipelines import ExtractiveQAPipeline
#from torch import
from sentence_transformers import CrossEncoder, SentenceTransformer
document_store = InMemoryDocumentStore()
model = "Saturdays/mdeberta-v3-base-squad2_refugees_dataset_finetuned"
#model = 'codellama/CodeLlama-13b-Instruct-hf'
reader = FARMReader(model_name_or_path=model)
preprocessor = PreProcessor(
clean_empty_lines=True,
clean_whitespace=True,
clean_header_footer=True,
split_by="word",
split_length=100,
split_respect_sentence_boundary=True,
split_overlap=3
)
def print_answers(results):
fields = ["answer", "score"] # "context",
answers = results["answers"]
filtered_answers = []
for ans in answers:
filtered_ans = {
field: getattr(ans, field)
for field in fields
if getattr(ans, field) is not None
}
filtered_answers.append(filtered_ans)
return filtered_answers
def pdf_to_document_store(pdf_file):
document_store.delete_documents()
converter = PDFToTextConverter(
remove_numeric_tables=True, valid_languages=["es"])
documents = [converter.convert(file_path=pdf_file, meta=None)[0]]
preprocessed_docs = preprocessor.process(documents)
document_store.write_documents(preprocessed_docs)
return None
def predict(question):
pdf_to_document_store("data.pdf")
retriever = TfidfRetriever(document_store=document_store)
pipe = ExtractiveQAPipeline(reader, retriever)
result = pipe.run(query=question, params={"Retriever": {
"top_k": 5}, "Reader": {"top_k": 3}})
answers = print_answers(result)
return answers
def respond(message, chat_history):
if len(message)==0:
message="¿Dónde puedo solicitar asilo?"
bot_message = predict(message)[0]['answer']
chat_history.append((message, bot_message))
return "", chat_history
description= "Our chatbot helps refugees arriving in Spain by providing information on key topics. \n This project is based on the article titled [Desarrollando un chatbot para refugiados: nuestra experiencia en Saturdays.AI](https://medium.com/saturdays-ai/desarrollando-un-chatbot-para-refugiados-nuestra-experiencia-en-saturdays-ai-9bf2551432c9), which outlines the process of building a chatbot for refugees. \n You can find the training script in this [github repo](https://github.com/jsr90/chatbot_refugiados_train)."
with gr.Blocks(theme="huggingface") as demo:
gr.HTML("<h1 style='text-align: center; font-size: xx-large'>Chatbot Refugiados (spanish)</h1>")
gr.HTML("<h2 style='text-align: center; font-size: large'>The demo you're about to see is from a project currently in development.</h2>")
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot()
with gr.Column(scale=1):
with gr.Row():
msg = gr.Textbox(label="Write your question:", value="¿Dónde puedo solicitar asilo?")
with gr.Row():
submit = gr.Button("Submit")
clear = gr.Button("Clear")
gr.Image("OIG.jpeg")
msg.submit(respond, [msg, chatbot], [msg, chatbot])
submit.click(respond, [msg, chatbot], [msg, chatbot])
clear.click(lambda: None, None, chatbot, queue=False)
gr.Markdown(description)
if __name__ == "__main__":
demo.launch() |