UNIST-Eunchan
commited on
Commit
•
950ede6
1
Parent(s):
dda7218
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import nltk
|
|
4 |
from nltk import sent_tokenize
|
5 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
import json
|
7 |
-
|
8 |
from sentence_transformers import SentenceTransformer
|
9 |
|
10 |
nltk.download('punkt')
|
@@ -38,6 +38,14 @@ def infer(input_ids, max_length, temperature, top_k, top_p):
|
|
38 |
return output_sequences
|
39 |
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
@st.cache_data
|
42 |
def chunking(book_text):
|
43 |
sentences = sent_tokenize(book_text)
|
|
|
4 |
from nltk import sent_tokenize
|
5 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
import json
|
7 |
+
import numpy as np
|
8 |
from sentence_transformers import SentenceTransformer
|
9 |
|
10 |
nltk.download('punkt')
|
|
|
38 |
return output_sequences
|
39 |
|
40 |
|
41 |
+
def cos_similarity(v1, v2):
|
42 |
+
dot_product = np.dot(v1, v2)
|
43 |
+
l2_norm = (np.sqrt(sum(np.square(v1))) * np.sqrt(sum(np.square(v2))))
|
44 |
+
similarity = dot_product / l2_norm
|
45 |
+
|
46 |
+
return similarity
|
47 |
+
|
48 |
+
|
49 |
@st.cache_data
|
50 |
def chunking(book_text):
|
51 |
sentences = sent_tokenize(book_text)
|