Spaces:

thiyagab
/

Thamizh

Build error

App Files Files Community

thiyagab commited on Jan 8, 2023

Commit

c6cc78a

•

1 Parent(s): 3b9fa76

semantic search added

Browse files

Files changed (3) hide show

app.py +8 -2
requirements.txt +4 -0
semanticsearch.py +65 -0

app.py CHANGED Viewed

@@ -1,4 +1,10 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

 import streamlit as st
+import semanticsearch
+# x = st.slider('Select a value')
+x=st.text_input('Ask valluvar')
+# st.write(x, 'squared is', x * x)
+response=semanticsearch.find_similarities(x)
+st.text(response)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+transformers
+sentence-transformers
+sentence_embeddings

semanticsearch.py ADDED Viewed

	@@ -0,0 +1,65 @@

+#Write some lines to encode (sentences 0 and 2 are both ideltical):
+sen = [
+    "Three years later, the coffin was still full of Jello.",
+    "The fish dreamed of escaping the fishbowl and into the toilet where he saw his friend go.",
+    "The person box was packed with jelly many dozens of months later.",
+    "He found a leprechaun in his walnut shell."
+]
+import json
+import numpy
+import os
+# Opening JSON file
+f = open('thirukural_git.json')
+# returns JSON object as
+# a dictionary
+data = json.load(f)
+en_translations=[]
+kurals=[]
+# Iterating through the json
+# list
+for kural in data['kurals']:
+    en_translations.append((kural['meaning']['en'].lower()))
+    kurals.append(kural['kural'])
+# Closing file
+f.close()
+from sentence_transformers import SentenceTransformer
+model = SentenceTransformer('all-MiniLM-L6-v2')
+# model.tokenizer.add_special_tokens({'pad_token':'[thiyaga]'})
+#Encoding:
+sen_embeddings = model.encode(en_translations)
+# sen_embeddings= numpy.memmap('trainedmodel',mode="r",dtype=numpy.float32,shape=(1330,768))
+# sen_embeddings.tofile('trainedmodel')
+def find_similarities(input:str):
+    input_embeddings = model.encode([input.lower()])
+    from sklearn.metrics.pairwise import cosine_similarity
+    #let's calculate cosine similarity for sentence 0:
+    similarity_matrix=cosine_similarity(
+        [input_embeddings[0]],
+        sen_embeddings[1:]
+    )
+    indices=[numpy.argpartition(similarity_matrix[0],-3)[-3:]]
+    response=''
+    for index in indices[0]:
+        print(similarity_matrix[0][index])
+        response+=en_translations[index+1]
+        print(en_translations[index+1])
+        response += "\n".join(kurals[index+1])
+        print("\n".join(kurals[index+1]))
+    return response
+# while True:
+#     text=input('Ask valluvar: ')
+#     if( text == 'exit'):
+#         break
+#     find_similarities(text)