Update app.py
Browse files
app.py
CHANGED
@@ -37,8 +37,16 @@ def main():
|
|
37 |
return response.json()
|
38 |
|
39 |
|
40 |
-
st.title("Vector-based
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
# User search
|
43 |
user_input = st.text_area("Search box", "What is spacetime made out of?")
|
44 |
|
|
|
37 |
return response.json()
|
38 |
|
39 |
|
40 |
+
st.title("Vector-based of the r/ELI5 dataset with Sentence Transformers and Faiss")
|
41 |
+
|
42 |
+
st.markdown("""This application lets you perform a semantic search through questions in the r/ELI5 <a href="https://huggingface.co/datasets/eli5">dataset</a>.
|
43 |
+
The questions and user input are encoded into a high-dimensional vectors space using a Sentence-Transformer model, and in particular the checkpoint <a href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2">sentence-transformers/all-MiniLM-L6-v2</a>.
|
44 |
+
To perform the search we use FAISS, which performs an efficient similarity search through the (vectorized) questions.
|
45 |
+
The ELI5 dataset contains posts from three subreddits, AskScience (asks), AskHistorians (askh), and ExplainLikeImFive (eli5).
|
46 |
+
The score corresponds to the rating each answer recieved when posted on Reddit.
|
47 |
+
We unfortunately cannot verify the veracity of any of the answers posted!
|
48 |
+
""")
|
49 |
+
|
50 |
# User search
|
51 |
user_input = st.text_area("Search box", "What is spacetime made out of?")
|
52 |
|