Spaces:
Runtime error
Runtime error
File size: 2,007 Bytes
d560727 c8ba482 d560727 b91ec33 213e5ae c8ba482 b91ec33 e5ea3a0 521bf85 d560727 c8ba482 d560727 652370a d560727 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import langid
from haystack import Pipeline
from haystack.nodes import TextConverter, PreProcessor, BM25Retriever, FARMReader
from haystack.document_stores import InMemoryDocumentStore
class Sejarah:
def __init__(self):
document_store = InMemoryDocumentStore(use_bm25=True)
# #initialize the pipeline
# indexing_pipeline = Pipeline()
# text_converter = TextConverter()
# preprocessor = PreProcessor(
# clean_whitespace=True,
# clean_header_footer=True,
# clean_empty_lines=True,
# split_by="word",
# split_length=200,
# split_overlap=20,
# split_respect_sentence_boundary=True,
# )
# indexing_pipeline.add_node(component=text_converter, name="TextConverter", inputs=["File"])
# indexing_pipeline.add_node(component=preprocessor, name="PreProcessor", inputs=["TextConverter"])
# indexing_pipeline.add_node(component=document_store, name="DocumentStore", inputs=["PreProcessor"])
# doc_dir = "/content/drive/Shareddrives/Natural Language Processing/Dataset/txt files"
# files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)]
# indexing_pipeline.run_batch(file_paths=files_to_index)
# retriever = BM25Retriever(document_store=document_store)
# reader = FARMReader(model_name_or_path="primasr/malaybert-for-eqa-finetuned", use_gpu=True)
# self.querying_pipeline = Pipeline()
# self.querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
# self.querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])
def interface(self, question):
language = self.detect_language(question)
answer = "Answer of "+question+" is: "+language
return answer
def detect_language(self, content):
lang = langid.classify(content)
print(lang)
return lang[0]
|