File size: 3,339 Bytes
ffbadc4
e85ca86
c4baae7
69e5f39
e45313e
ffbadc4
3836430
d8758a7
 
 
 
3836430
fc50127
e8569d3
 
 
 
 
 
 
 
5f6349c
b5392ae
b64bcd9
5f6349c
 
 
 
50a6b52
 
5f6349c
f33586f
 
 
 
 
45fde58
50a6b52
ab70a4d
 
45fde58
e8569d3
 
69e5f39
 
 
 
45fde58
b64bcd9
1043bb5
 
b5392ae
908d90a
b64bcd9
7b6cfde
1043bb5
908d90a
b5392ae
ffbadc4
bacf5e3
17aace2
bacf5e3
 
6114380
3836430
1043bb5
22383c2
87c19b0
e45313e
c9b1232
86ed3e1
 
 
 
62f1c11
 
 
 
 
d8c56f7
c9b1232
ffbadc4
 
bacf5e3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
from sentence_transformers import SentenceTransformer
import pandas as pd
import pickle
from pathlib import Path  

def make_clickable_both(val): 
    name, url = val.split('#')
    print(name+"\n")
    print(url+"\n")
    return f'<a href="{url}">{name}</a>'

def find(query):
    def get_detailed_instruct(task_description: str, query: str) -> str:
        return f'Instruct: {task_description}\nQuery: {query}'
    
    # Each query must come with a one-sentence instruction that describes the task
    task = 'Given a web search query, retrieve relevant passages that answer the query'
    queries = [
        get_detailed_instruct(task, query)
    ]
    print("cekpoin0\n")
    
    quran = pd.read_csv('quran-simple-clean.txt', delimiter="|")
    
    file = open('quran-splitted.sav','rb')
    quran_splitted = pickle.load(file)
    
    model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
    
    documents = quran_splitted['text'].tolist()
    # document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True)
    # filename = 'encoded_quran_text_split_multilingual-e5-large-instruct.sav'
    # pickle.dump(embeddings, open(filename, 'wb'))
    file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb')
    document_embeddings = pickle.load(file)
    print("cekpoin1\n")
    
    query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
    scores = (query_embeddings @ document_embeddings.T) * 100
    print("cekpoin2\n")

    # insert the similarity value to dataframe & sort it
    file = open('quran-splitted.sav','rb')
    quran_splitted = pickle.load(file)
    quran_splitted['similarity'] = scores.tolist()[0]
    sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
    print("cekpoin3\n")
    
    #results = ""
    results = pd.DataFrame()
    i = 0
    while i<20:
        result = sorted_quran.iloc[i]
        result_quran = quran.loc[(quran['sura']==result['sura']) & (quran['aya']==result['aya'])]
        results = pd.concat([results, result_quran])
        #results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
        i=i+1

    url = 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
    results['text'] = '<a href="'+url+'">'+results['text']+'</a>'
    #results['text'] = results['text'] + '#' + 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
    
    #results = results.style.format({'text': make_clickable_both})
    
    #return sorted_quran
    filepath = Path(query+'.csv')  
    results.to_csv(filepath,index=False)  
    return results, filepath
    
demo = gr.Interface(
    fn=find, 
    inputs="textbox", 
    outputs=[gr.Dataframe(headers=['sura', 'aya', 'text'],datatype=["str", "str", "markdown"],wrap=True),gr.DownloadButton()], 
    examples=[
                ["law of inheritance in islam"],
                ["tunjukilah jalan yang lurus"],
                ["سليمان"],
            ],
    title="Quran Finder")
#demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox")
    
if __name__ == "__main__":
    demo.launch()