Bofandra commited on
Commit
e8569d3
·
verified ·
1 Parent(s): eb50629

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -25
app.py CHANGED
@@ -1,35 +1,29 @@
1
  import gradio as gr
2
- import pickle
3
  from sentence_transformers import SentenceTransformer
4
  import pandas as pd
5
 
6
  def find(query):
7
- # transform query from user
8
- model = SentenceTransformer('Bofandra/fine-tuning-use-cmlm-multilingual-quran-translation-qa')
9
- encoded_query_text = model.encode(query)
10
-
11
- # get encoded quran text
12
- file = open('encoded_quran_fine-tuning-use-cmlm-multilingual-qa-quran-splitted.sav','rb')
13
- encoded_quran_text = pickle.load(file)
14
- file.close()
15
-
16
- # compare query to each quran verse
17
- i = 0
18
- text_similarity = []
19
- for encoded_quran_ayat in encoded_quran_text:
20
- similarity = encoded_query_text @ encoded_quran_ayat.T
21
- text_similarity.append(similarity)
22
- i=i+1
23
- print(i)
24
-
25
- # insert the similarity value to dataframe & sort it
26
- file = open('quran-splitted.sav','rb')
27
- quran_splitted = pickle.load(file)
28
- quran_splitted['similarity'] = text_similarity
29
- sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
30
 
31
- # insert the similarity value to dataframe & sort it
32
  quran = pd.read_csv('quran-simple-clean.txt', delimiter="|")
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  results = ""
35
  i = 0
 
1
  import gradio as gr
 
2
  from sentence_transformers import SentenceTransformer
3
  import pandas as pd
4
 
5
  def find(query):
6
+ def get_detailed_instruct(task_description: str, query: str) -> str:
7
+ return f'Instruct: {task_description}\nQuery: {query}'
8
+
9
+ # Each query must come with a one-sentence instruction that describes the task
10
+ task = 'Given a web search query, retrieve relevant passages that answer the query'
11
+ queries = [
12
+ get_detailed_instruct(task, query)
13
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
15
  quran = pd.read_csv('quran-simple-clean.txt', delimiter="|")
16
+ documents = quran['text'].tolist()
17
+ input_texts = queries + documents
18
+
19
+ model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
20
+
21
+ embeddings = model.encode(input_texts, convert_to_tensor=True, normalize_embeddings=True)
22
+ scores = (embeddings[:1] @ embeddings[1:].T) * 100
23
+
24
+ # insert the similarity value to dataframe & sort it
25
+ quran['similarity'] = scores.tolist()[0]
26
+ sorted_quran = quran.sort_values(by='similarity', ascending=False)
27
 
28
  results = ""
29
  i = 0