Update app.py
Browse files
app.py
CHANGED
@@ -43,18 +43,18 @@ def find_similar_news(text: str, top_n: int=5):
|
|
43 |
|
44 |
vectorizer = TextVectorizer()
|
45 |
collection = get_milvus_collection()
|
46 |
-
sent_model = SentenceTransformer('
|
47 |
|
48 |
def main():
|
49 |
|
50 |
# st.title("Find Similar News")
|
51 |
-
st.markdown("<h3>Find Similar News With Sentence Transformers</h3>", unsafe_allow_html=True)
|
52 |
desc = '''<p style="font-size: 13px;">
|
53 |
Embeddings of 300,000 news headlines are stored in Milvus vector database, used as a feature store.
|
54 |
-
Embeddings of the input headline are computed using sentence transformers (
|
55 |
Similar news headlines are retrieved from the vector database using Euclidean distance as similarity metric.
|
56 |
-
<span style="color: red;">This method
|
57 |
-
from fine-tuned
|
58 |
</p>
|
59 |
'''
|
60 |
st.markdown(desc, unsafe_allow_html=True)
|
|
|
43 |
|
44 |
vectorizer = TextVectorizer()
|
45 |
collection = get_milvus_collection()
|
46 |
+
sent_model = SentenceTransformer('all-mpnet-base-v2')
|
47 |
|
48 |
def main():
|
49 |
|
50 |
# st.title("Find Similar News")
|
51 |
+
st.markdown("<h3>Find Similar News With Sentence Transformers (all-mpnet-base-v2)</h3>", unsafe_allow_html=True)
|
52 |
desc = '''<p style="font-size: 13px;">
|
53 |
Embeddings of 300,000 news headlines are stored in Milvus vector database, used as a feature store.
|
54 |
+
Embeddings of the input headline are computed using sentence transformers (all-mpnet-base-v2).
|
55 |
Similar news headlines are retrieved from the vector database using Euclidean distance as similarity metric.
|
56 |
+
<span style="color: red;">This method (all-mpnet-base-v2) has the best performance compared to multi-qa-distilbert-cos-v1 fine-tuned using TSDAE
|
57 |
+
and extracting embeddings from fine-tuned DistilBERT classifier.</span>
|
58 |
</p>
|
59 |
'''
|
60 |
st.markdown(desc, unsafe_allow_html=True)
|