sengzi commited on
Commit
41c28bc
·
1 Parent(s): 521bf85

Update Sejarah.py

Browse files
Files changed (1) hide show
  1. Sejarah.py +35 -1
Sejarah.py CHANGED
@@ -1,14 +1,48 @@
1
  import langid
 
 
 
2
 
3
  class Sejarah:
4
  def __init__(self):
5
- self.name = 'Sejarah'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def interface(self, question):
8
  language = self.detect_language(question)
9
  answer = "Answer of "+question+" is: "+language
10
  return answer
11
 
 
12
  def detect_language(self, content):
13
  lang = langid.classify(content)
14
  return lang
 
1
  import langid
2
+ from haystack import Pipeline
3
+ from haystack.nodes import TextConverter, PreProcessor, BM25Retriever, FARMReader
4
+ from haystack.document_stores import InMemoryDocumentStore
5
 
6
  class Sejarah:
7
  def __init__(self):
8
+
9
+ document_store = InMemoryDocumentStore(use_bm25=True)
10
+
11
+ # #initialize the pipeline
12
+ # indexing_pipeline = Pipeline()
13
+ # text_converter = TextConverter()
14
+ # preprocessor = PreProcessor(
15
+ # clean_whitespace=True,
16
+ # clean_header_footer=True,
17
+ # clean_empty_lines=True,
18
+ # split_by="word",
19
+ # split_length=200,
20
+ # split_overlap=20,
21
+ # split_respect_sentence_boundary=True,
22
+ # )
23
+
24
+ # indexing_pipeline.add_node(component=text_converter, name="TextConverter", inputs=["File"])
25
+ # indexing_pipeline.add_node(component=preprocessor, name="PreProcessor", inputs=["TextConverter"])
26
+ # indexing_pipeline.add_node(component=document_store, name="DocumentStore", inputs=["PreProcessor"])
27
+
28
+ # doc_dir = "/content/drive/Shareddrives/Natural Language Processing/Dataset/txt files"
29
+
30
+ # files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)]
31
+ # indexing_pipeline.run_batch(file_paths=files_to_index)
32
+
33
+ # retriever = BM25Retriever(document_store=document_store)
34
+ # reader = FARMReader(model_name_or_path="primasr/malaybert-for-eqa-finetuned", use_gpu=True)
35
+
36
+ # self.querying_pipeline = Pipeline()
37
+ # self.querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
38
+ # self.querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])
39
 
40
  def interface(self, question):
41
  language = self.detect_language(question)
42
  answer = "Answer of "+question+" is: "+language
43
  return answer
44
 
45
+
46
  def detect_language(self, content):
47
  lang = langid.classify(content)
48
  return lang