Spaces:
Runtime error
Runtime error
Update Sejarah.py
Browse files- Sejarah.py +35 -1
Sejarah.py
CHANGED
@@ -1,14 +1,48 @@
|
|
1 |
import langid
|
|
|
|
|
|
|
2 |
|
3 |
class Sejarah:
|
4 |
def __init__(self):
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
def interface(self, question):
|
8 |
language = self.detect_language(question)
|
9 |
answer = "Answer of "+question+" is: "+language
|
10 |
return answer
|
11 |
|
|
|
12 |
def detect_language(self, content):
|
13 |
lang = langid.classify(content)
|
14 |
return lang
|
|
|
1 |
import langid
|
2 |
+
from haystack import Pipeline
|
3 |
+
from haystack.nodes import TextConverter, PreProcessor, BM25Retriever, FARMReader
|
4 |
+
from haystack.document_stores import InMemoryDocumentStore
|
5 |
|
6 |
class Sejarah:
|
7 |
def __init__(self):
|
8 |
+
|
9 |
+
document_store = InMemoryDocumentStore(use_bm25=True)
|
10 |
+
|
11 |
+
# #initialize the pipeline
|
12 |
+
# indexing_pipeline = Pipeline()
|
13 |
+
# text_converter = TextConverter()
|
14 |
+
# preprocessor = PreProcessor(
|
15 |
+
# clean_whitespace=True,
|
16 |
+
# clean_header_footer=True,
|
17 |
+
# clean_empty_lines=True,
|
18 |
+
# split_by="word",
|
19 |
+
# split_length=200,
|
20 |
+
# split_overlap=20,
|
21 |
+
# split_respect_sentence_boundary=True,
|
22 |
+
# )
|
23 |
+
|
24 |
+
# indexing_pipeline.add_node(component=text_converter, name="TextConverter", inputs=["File"])
|
25 |
+
# indexing_pipeline.add_node(component=preprocessor, name="PreProcessor", inputs=["TextConverter"])
|
26 |
+
# indexing_pipeline.add_node(component=document_store, name="DocumentStore", inputs=["PreProcessor"])
|
27 |
+
|
28 |
+
# doc_dir = "/content/drive/Shareddrives/Natural Language Processing/Dataset/txt files"
|
29 |
+
|
30 |
+
# files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)]
|
31 |
+
# indexing_pipeline.run_batch(file_paths=files_to_index)
|
32 |
+
|
33 |
+
# retriever = BM25Retriever(document_store=document_store)
|
34 |
+
# reader = FARMReader(model_name_or_path="primasr/malaybert-for-eqa-finetuned", use_gpu=True)
|
35 |
+
|
36 |
+
# self.querying_pipeline = Pipeline()
|
37 |
+
# self.querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
|
38 |
+
# self.querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])
|
39 |
|
40 |
def interface(self, question):
|
41 |
language = self.detect_language(question)
|
42 |
answer = "Answer of "+question+" is: "+language
|
43 |
return answer
|
44 |
|
45 |
+
|
46 |
def detect_language(self, content):
|
47 |
lang = langid.classify(content)
|
48 |
return lang
|