File size: 2,007 Bytes
d560727
c8ba482
 
 
d560727
b91ec33
213e5ae
c8ba482
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b91ec33
e5ea3a0
521bf85
d560727
 
 
c8ba482
d560727
 
652370a
 
d560727
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import langid
from haystack import Pipeline
from haystack.nodes import TextConverter, PreProcessor, BM25Retriever, FARMReader
from haystack.document_stores import InMemoryDocumentStore
    
class Sejarah:
    def __init__(self):
        
        document_store = InMemoryDocumentStore(use_bm25=True)

        # #initialize the pipeline
        # indexing_pipeline = Pipeline()
        # text_converter = TextConverter()
        # preprocessor = PreProcessor(
        #     clean_whitespace=True,
        #     clean_header_footer=True,
        #     clean_empty_lines=True,
        #     split_by="word",
        #     split_length=200,
        #     split_overlap=20,
        #     split_respect_sentence_boundary=True,
        # )

        # indexing_pipeline.add_node(component=text_converter, name="TextConverter", inputs=["File"])
        # indexing_pipeline.add_node(component=preprocessor, name="PreProcessor", inputs=["TextConverter"])
        # indexing_pipeline.add_node(component=document_store, name="DocumentStore", inputs=["PreProcessor"])

        # doc_dir = "/content/drive/Shareddrives/Natural Language Processing/Dataset/txt files"

        # files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)]
        # indexing_pipeline.run_batch(file_paths=files_to_index)

        # retriever = BM25Retriever(document_store=document_store)
        # reader = FARMReader(model_name_or_path="primasr/malaybert-for-eqa-finetuned", use_gpu=True)

        # self.querying_pipeline = Pipeline()
        # self.querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
        # self.querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])

    def interface(self, question):
        language = self.detect_language(question)
        answer = "Answer of "+question+" is: "+language
        return answer

    
    def detect_language(self, content):
        lang = langid.classify(content)
        print(lang)
        return lang[0]