Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,35 +1,29 @@
|
|
1 |
import gradio as gr
|
2 |
-
import pickle
|
3 |
from sentence_transformers import SentenceTransformer
|
4 |
import pandas as pd
|
5 |
|
6 |
def find(query):
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
# compare query to each quran verse
|
17 |
-
i = 0
|
18 |
-
text_similarity = []
|
19 |
-
for encoded_quran_ayat in encoded_quran_text:
|
20 |
-
similarity = encoded_query_text @ encoded_quran_ayat.T
|
21 |
-
text_similarity.append(similarity)
|
22 |
-
i=i+1
|
23 |
-
print(i)
|
24 |
-
|
25 |
-
# insert the similarity value to dataframe & sort it
|
26 |
-
file = open('quran-splitted.sav','rb')
|
27 |
-
quran_splitted = pickle.load(file)
|
28 |
-
quran_splitted['similarity'] = text_similarity
|
29 |
-
sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
|
30 |
|
31 |
-
# insert the similarity value to dataframe & sort it
|
32 |
quran = pd.read_csv('quran-simple-clean.txt', delimiter="|")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
results = ""
|
35 |
i = 0
|
|
|
1 |
import gradio as gr
|
|
|
2 |
from sentence_transformers import SentenceTransformer
|
3 |
import pandas as pd
|
4 |
|
5 |
def find(query):
|
6 |
+
def get_detailed_instruct(task_description: str, query: str) -> str:
|
7 |
+
return f'Instruct: {task_description}\nQuery: {query}'
|
8 |
+
|
9 |
+
# Each query must come with a one-sentence instruction that describes the task
|
10 |
+
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
11 |
+
queries = [
|
12 |
+
get_detailed_instruct(task, query)
|
13 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
|
|
15 |
quran = pd.read_csv('quran-simple-clean.txt', delimiter="|")
|
16 |
+
documents = quran['text'].tolist()
|
17 |
+
input_texts = queries + documents
|
18 |
+
|
19 |
+
model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
|
20 |
+
|
21 |
+
embeddings = model.encode(input_texts, convert_to_tensor=True, normalize_embeddings=True)
|
22 |
+
scores = (embeddings[:1] @ embeddings[1:].T) * 100
|
23 |
+
|
24 |
+
# insert the similarity value to dataframe & sort it
|
25 |
+
quran['similarity'] = scores.tolist()[0]
|
26 |
+
sorted_quran = quran.sort_values(by='similarity', ascending=False)
|
27 |
|
28 |
results = ""
|
29 |
i = 0
|