adriansd12 commited on
Commit
c6e8a33
β€’
1 Parent(s): 4c628ba

init. commit

Browse files
Files changed (3) hide show
  1. app.py +36 -0
  2. module/bible_index.py +53 -0
  3. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from module.bible_index import BibleIndex
3
+
4
+
5
+ def query_index(query, testament, top_n):
6
+ _index = BibleIndex(testament)
7
+ items = _index.query(query, top_n=top_n)
8
+
9
+ item_list = f"<h2>{query}</h2>"
10
+ item_list += "<ul>"
11
+ for item in items:
12
+ item_list += f"<h3>{item.get('src')}</h3>"
13
+ item_list += f"<li>{item.get('text')}</li>"
14
+ item_list += "</ul>"
15
+ return item_list
16
+
17
+
18
+ demo = gr.Interface(
19
+ query_index,
20
+ [
21
+ gr.Textbox(label="Query text"),
22
+ gr.Radio(["all", "old", "new"], label="Section of the Bible"),
23
+ gr.Slider(0, 10, step=1, label="Top N results"),
24
+ ],
25
+ outputs="html",
26
+ examples=[
27
+ ["What is love", "new", 5],
28
+ ["How old was Adam?", "old", 3],
29
+ ["Who is God?", "all", 7],
30
+ ],
31
+ title="Bible Search Index",
32
+ description="""
33
+ A search index for The Bible using *sentence_transformer*.
34
+ """,
35
+ )
36
+ demo.launch()
module/bible_index.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sentence_transformers import SentenceTransformer, util
3
+
4
+
5
+ class BibleIndex:
6
+ def __init__(self, testament: str = "all") -> None:
7
+ self.model = SentenceTransformer(
8
+ "sentence-transformers/msmarco-bert-base-dot-v5"
9
+ )
10
+
11
+ match testament:
12
+ case "all" | "old" | "new":
13
+ self.testament = testament
14
+ case _:
15
+ print("error:")
16
+
17
+ self.load_emb()
18
+ self.load_text()
19
+
20
+ def load_emb(self) -> None:
21
+ self.emb = np.load(f"data/embeddings/{self.testament}_esv_embeddings.npy")
22
+
23
+ def load_text(self) -> None:
24
+ text_path = f"data/text/{self.testament}_testament_esv.txt"
25
+
26
+ with open(text_path, "r") as f:
27
+ self.text = f.readlines()[1:]
28
+
29
+ def query(self, query: str = "", top_n: int = 10):
30
+ query_emb = self.model.encode(query)
31
+ scores = util.dot_score(query_emb, self.emb)[0].cpu().tolist()
32
+
33
+ # Combine docs & scores
34
+ doc_score_pairs = list(zip(self.text, scores))
35
+
36
+ # Sort by decreasing score
37
+ doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
38
+
39
+ # Output passages & scores
40
+ print("Query:", query)
41
+ results = []
42
+ for doc, score in doc_score_pairs[:top_n]:
43
+ text_split = doc.split(",")
44
+ results.append(
45
+ {
46
+ "src": f"{text_split[0]} {text_split[1]}:{text_split[2]}",
47
+ "text": ",".join(text_split[3:])
48
+ .replace("\xa0", "")
49
+ .replace("\n", ""),
50
+ "score": score,
51
+ }
52
+ )
53
+ return results
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ numpy==1.24.2
2
+ sentence-transformers==2.2.2