import gradio as gr from sentence_transformers import SentenceTransformer import pandas as pd import pickle def find(query): def get_detailed_instruct(task_description: str, query: str) -> str: return f'Instruct: {task_description}\nQuery: {query}' # Each query must come with a one-sentence instruction that describes the task task = 'Given a web search query, retrieve relevant passages that answer the query' queries = [ get_detailed_instruct(task, query) ] print("cekpoin0\n") quran = pd.read_csv('quran-simple-clean.txt', delimiter="|") file = open('quran-splitted.sav','rb') quran_splitted = pickle.load(file) model = SentenceTransformer('intfloat/multilingual-e5-large-instruct') documents = quran_splitted['text'].tolist() document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True) filename = 'encoded_quran_text_split_multilingual-e5-large-instruct.sav' pickle.dump(embeddings, open(filename, 'wb')) #file = open('encoded_quran_text_split_multilingual-e5-large-instruct.sav','rb') #document_embeddings = pickle.load(file) print("cekpoin1\n") query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True) scores = (query_embeddings @ document_embeddings.T) * 100 print("cekpoin2\n") # insert the similarity value to dataframe & sort it file = open('quran-splitted.sav','rb') quran_splitted = pickle.load(file) quran_splitted['similarity'] = scores.tolist()[0] sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False) print("cekpoin3\n") results = "" i = 0 while i<6: result = sorted_quran.iloc[i] result_quran = quran.loc[(quran['sura']==result['sura']) & (quran['aya']==result['aya'])] results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n" i=i+1 return results demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox") if __name__ == "__main__": demo.launch()