|
import streamlit as st |
|
from sentence_transformers import SentenceTransformer |
|
import datasets |
|
import faiss |
|
import time |
|
import faiss |
|
|
|
|
|
if "initialized" not in st.session_state: |
|
st.session_state.dataset = datasets.load_dataset('A-Roucher/english_historical_quotes', download_mode="force_redownload")['train'] |
|
st.session_state.all_authors = list(set(st.session_state.dataset['author'])) |
|
model_name = "BAAI/bge-small-en-v1.5" |
|
st.session_state.encoder = SentenceTransformer(model_name) |
|
st.session_state.index = faiss.read_index('index_alone.faiss') |
|
st.session_state.initialized=True |
|
|
|
def search(query): |
|
start = time.time() |
|
if len(query.strip()) == 0: |
|
return "" |
|
|
|
query_embedding = st.session_state.encoder.encode([query]) |
|
|
|
_, samples = st.session_state.index.search( |
|
query_embedding, k=10 |
|
) |
|
quotes = st.session_state.dataset.select(samples[0]) |
|
|
|
result = "\n\n" |
|
for i in range(len(quotes)): |
|
result += f"###### {quotes['author'][i]}\n> {quotes['quote'][i]}\n----\n" |
|
|
|
delay = "%.3f" % (time.time() - start) |
|
return f"_Computation time: **{delay} seconds**_{result}" |
|
|
|
|
|
st.markdown( |
|
""" |
|
<style> |
|
div[data-testid="column"] |
|
{ |
|
align-self:flex-end; |
|
} |
|
</style> |
|
""",unsafe_allow_html=True |
|
) |
|
col1, col2 = st.columns([8, 2]) |
|
text_input = col1.text_input("Type your idea here:", placeholder="Knowledge of history is power.") |
|
submit_button = col2.button("_Search quotes!_") |
|
|
|
if submit_button: |
|
st.markdown(search(text_input)) |
|
|
|
|