shaina commited on
Commit
97ba64d
1 Parent(s): 50dc97e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -21
README.md CHANGED
@@ -18,20 +18,13 @@ It is a state-of-the-art language model for MPNet for Covid-19 dataset with focu
18
  !git lfs install
19
 
20
  !git clone https://huggingface.co/shaina/CoQUAD_MPNet
21
- # if you want to clone without large files – just their pointers
22
- # prepend your git clone with the following env var:
23
  GIT_LFS_SKIP_SMUDGE=1
24
 
25
-
26
-
27
-
28
  from haystack.utils import clean_wiki_text, convert_files_to_dicts, fetch_archive_from_http, print_answers
29
  from haystack.nodes import FARMReader, TransformersReader
30
- # Recommended: Start Elasticsearch using Docker via the Haystack utility function
31
  from haystack.utils import launch_es
32
 
33
  launch_es()
34
- # In Colab / No Docker environments: Start Elasticsearch from source
35
  ! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q
36
  ! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz
37
  ! chown -R daemon:daemon elasticsearch-7.9.2
@@ -42,9 +35,7 @@ es_server = Popen(['elasticsearch-7.9.2/bin/elasticsearch'],
42
  stdout=PIPE, stderr=STDOUT,
43
  preexec_fn=lambda: os.setuid(1) # as daemon
44
  )
45
- # wait until ES has started
46
  ! sleep 30
47
- # Connect to Elasticsearch
48
 
49
  from haystack.document_stores import ElasticsearchDocumentStore
50
  document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document")
@@ -62,7 +53,6 @@ from haystack import Document
62
  from haystack.document_stores import FAISSDocumentStore
63
  from haystack.nodes import RAGenerator, DensePassageRetriever
64
 
65
- # Use data to initialize Document objects
66
  titles = list(df["document_identifier"].values)
67
  texts = list(df["document_text"].values)
68
  documents: List[Document] = []
@@ -75,11 +65,6 @@ for title, text in zip(titles, texts):
75
  }
76
  )
77
  )
78
-
79
-
80
-
81
-
82
- # Now, let's write the dicts containing documents to our DB.
83
  document_store.write_documents(documents)
84
 
85
  from haystack.nodes import ElasticsearchRetriever
@@ -88,15 +73,9 @@ reader = FARMReader(model_name_or_path="/content/drive/MyDrive/CoQUAD_MPNet", us
88
 
89
  from haystack.pipelines import ExtractiveQAPipeline
90
  pipe = ExtractiveQAPipeline(reader, retriever)
91
- # You can configure how many candidates the reader and retriever shall return
92
- # The higher top_k_retriever, the better (but also the slower) your answers.
93
-
94
-
95
  prediction = pipe.run(
96
  query="What is post-COVID?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
97
  )
98
-
99
- # Now you can either print the object directly...
100
  from pprint import pprint
101
 
102
  pprint(prediction)
 
18
  !git lfs install
19
 
20
  !git clone https://huggingface.co/shaina/CoQUAD_MPNet
 
 
21
  GIT_LFS_SKIP_SMUDGE=1
22
 
 
 
 
23
  from haystack.utils import clean_wiki_text, convert_files_to_dicts, fetch_archive_from_http, print_answers
24
  from haystack.nodes import FARMReader, TransformersReader
 
25
  from haystack.utils import launch_es
26
 
27
  launch_es()
 
28
  ! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q
29
  ! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz
30
  ! chown -R daemon:daemon elasticsearch-7.9.2
 
35
  stdout=PIPE, stderr=STDOUT,
36
  preexec_fn=lambda: os.setuid(1) # as daemon
37
  )
 
38
  ! sleep 30
 
39
 
40
  from haystack.document_stores import ElasticsearchDocumentStore
41
  document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document")
 
53
  from haystack.document_stores import FAISSDocumentStore
54
  from haystack.nodes import RAGenerator, DensePassageRetriever
55
 
 
56
  titles = list(df["document_identifier"].values)
57
  texts = list(df["document_text"].values)
58
  documents: List[Document] = []
 
65
  }
66
  )
67
  )
 
 
 
 
 
68
  document_store.write_documents(documents)
69
 
70
  from haystack.nodes import ElasticsearchRetriever
 
73
 
74
  from haystack.pipelines import ExtractiveQAPipeline
75
  pipe = ExtractiveQAPipeline(reader, retriever)
 
 
 
 
76
  prediction = pipe.run(
77
  query="What is post-COVID?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
78
  )
 
 
79
  from pprint import pprint
80
 
81
  pprint(prediction)