Spaces:
Build error
Build error
File size: 3,339 Bytes
ffbadc4 e85ca86 c4baae7 69e5f39 e45313e ffbadc4 3836430 d8758a7 3836430 fc50127 e8569d3 5f6349c b5392ae b64bcd9 5f6349c 50a6b52 5f6349c f33586f 45fde58 50a6b52 ab70a4d 45fde58 e8569d3 69e5f39 45fde58 b64bcd9 1043bb5 b5392ae 908d90a b64bcd9 7b6cfde 1043bb5 908d90a b5392ae ffbadc4 bacf5e3 17aace2 bacf5e3 6114380 3836430 1043bb5 22383c2 87c19b0 e45313e c9b1232 86ed3e1 62f1c11 d8c56f7 c9b1232 ffbadc4 bacf5e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
from sentence_transformers import SentenceTransformer
import pandas as pd
import pickle
from pathlib import Path
def make_clickable_both(val):
name, url = val.split('#')
print(name+"\n")
print(url+"\n")
return f'<a href="{url}">{name}</a>'
def find(query):
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a web search query, retrieve relevant passages that answer the query'
queries = [
get_detailed_instruct(task, query)
]
print("cekpoin0\n")
quran = pd.read_csv('quran-simple-clean.txt', delimiter="|")
file = open('quran-splitted.sav','rb')
quran_splitted = pickle.load(file)
model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
documents = quran_splitted['text'].tolist()
# document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True)
# filename = 'encoded_quran_text_split_multilingual-e5-large-instruct.sav'
# pickle.dump(embeddings, open(filename, 'wb'))
file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb')
document_embeddings = pickle.load(file)
print("cekpoin1\n")
query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
scores = (query_embeddings @ document_embeddings.T) * 100
print("cekpoin2\n")
# insert the similarity value to dataframe & sort it
file = open('quran-splitted.sav','rb')
quran_splitted = pickle.load(file)
quran_splitted['similarity'] = scores.tolist()[0]
sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
print("cekpoin3\n")
#results = ""
results = pd.DataFrame()
i = 0
while i<20:
result = sorted_quran.iloc[i]
result_quran = quran.loc[(quran['sura']==result['sura']) & (quran['aya']==result['aya'])]
results = pd.concat([results, result_quran])
#results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
i=i+1
url = 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
results['text'] = '<a href="'+url+'">'+results['text']+'</a>'
#results['text'] = results['text'] + '#' + 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
#results = results.style.format({'text': make_clickable_both})
#return sorted_quran
filepath = Path(query+'.csv')
results.to_csv(filepath,index=False)
return results, filepath
demo = gr.Interface(
fn=find,
inputs="textbox",
outputs=[gr.Dataframe(headers=['sura', 'aya', 'text'],datatype=["str", "str", "markdown"],wrap=True),gr.DownloadButton()],
examples=[
["law of inheritance in islam"],
["tunjukilah jalan yang lurus"],
["سليمان"],
],
title="Quran Finder")
#demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox")
if __name__ == "__main__":
demo.launch() |