Spaces:
Runtime error
Runtime error
import langid | |
from haystack import Pipeline | |
from haystack.nodes import TextConverter, PreProcessor, BM25Retriever, FARMReader | |
from haystack.document_stores import InMemoryDocumentStore | |
class Sejarah: | |
def __init__(self): | |
document_store = InMemoryDocumentStore(use_bm25=True) | |
# #initialize the pipeline | |
# indexing_pipeline = Pipeline() | |
# text_converter = TextConverter() | |
# preprocessor = PreProcessor( | |
# clean_whitespace=True, | |
# clean_header_footer=True, | |
# clean_empty_lines=True, | |
# split_by="word", | |
# split_length=200, | |
# split_overlap=20, | |
# split_respect_sentence_boundary=True, | |
# ) | |
# indexing_pipeline.add_node(component=text_converter, name="TextConverter", inputs=["File"]) | |
# indexing_pipeline.add_node(component=preprocessor, name="PreProcessor", inputs=["TextConverter"]) | |
# indexing_pipeline.add_node(component=document_store, name="DocumentStore", inputs=["PreProcessor"]) | |
# doc_dir = "/content/drive/Shareddrives/Natural Language Processing/Dataset/txt files" | |
# files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)] | |
# indexing_pipeline.run_batch(file_paths=files_to_index) | |
# retriever = BM25Retriever(document_store=document_store) | |
# reader = FARMReader(model_name_or_path="primasr/malaybert-for-eqa-finetuned", use_gpu=True) | |
# self.querying_pipeline = Pipeline() | |
# self.querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"]) | |
# self.querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"]) | |
def interface(self, question): | |
language = self.detect_language(question) | |
answer = "Answer of "+question+" is: "+language | |
return answer | |
def detect_language(self, content): | |
lang = langid.classify(content) | |
print(lang) | |
return lang[0] | |