--- language: en license: mit datasets: - covid19 --- # CoQUAD_MPNet : MPNet model for COVID-19 ## Introduction It is a state-of-the-art language model for MPNet for Covid-19 dataset with focus on post-covid. ## How to use for Deepset Haystack ### ``` %cd /content/drive/MyDrive !sudo apt-get install git-lfs !git lfs install !git clone https://huggingface.co/shaina/CoQUAD_MPNet GIT_LFS_SKIP_SMUDGE=1 from haystack.utils import clean_wiki_text, convert_files_to_dicts, fetch_archive_from_http, print_answers from haystack.nodes import FARMReader, TransformersReader from haystack.utils import launch_es launch_es() ! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q ! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz ! chown -R daemon:daemon elasticsearch-7.9.2 import os from subprocess import Popen, PIPE, STDOUT es_server = Popen(['elasticsearch-7.9.2/bin/elasticsearch'], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1) # as daemon ) ! sleep 30 from haystack.document_stores import ElasticsearchDocumentStore document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document") import pandas as pd df=pd.read_excel('/content/covid.xlsx') df.fillna(value="", inplace=True) print(df.head()) from typing import List import requests import pandas as pd from haystack import Document from haystack.document_stores import FAISSDocumentStore from haystack.nodes import RAGenerator, DensePassageRetriever titles = list(df["document_identifier"].values) texts = list(df["document_text"].values) documents: List[Document] = [] for title, text in zip(titles, texts): documents.append( Document( content=text, meta={ "name": title or "" } ) ) document_store.write_documents(documents) from haystack.nodes import ElasticsearchRetriever retriever = ElasticsearchRetriever(document_store=document_store) reader = FARMReader(model_name_or_path="/content/drive/MyDrive/CoQUAD_MPNet", use_gpu=True) from haystack.pipelines import ExtractiveQAPipeline pipe = ExtractiveQAPipeline(reader, retriever) prediction = pipe.run( query="What is post-COVID?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}} ) from pprint import pprint pprint(prediction) ``` ## Authors Shaina Raza