import streamlit as st from sentence_transformers import SentenceTransformer import datasets import faiss import time import faiss if "initialized" not in st.session_state: st.session_state.dataset = datasets.load_dataset('A-Roucher/english_historical_quotes', download_mode="force_redownload")['train'] st.session_state.all_authors = list(set(st.session_state.dataset['author'])) model_name = "BAAI/bge-small-en-v1.5" # "sentence-transformers/all-MiniLM-L6-v2" # # "Cohere/Cohere-embed-english-light-v3.0" # "sentence-transformers/all-MiniLM-L6-v2" st.session_state.encoder = SentenceTransformer(model_name) st.session_state.index = faiss.read_index('index_alone.faiss') st.session_state.initialized=True def search(query): start = time.time() if len(query.strip()) == 0: return "" query_embedding = st.session_state.encoder.encode([query]) _, samples = st.session_state.index.search( query_embedding, k=10 ) quotes = st.session_state.dataset.select(samples[0]) result = "\n\n" for i in range(len(quotes)): result += f"###### {quotes['author'][i]}\n> {quotes['quote'][i]}\n----\n" delay = "%.3f" % (time.time() - start) return f"_Computation time: **{delay} seconds**_{result}" st.markdown( """ """,unsafe_allow_html=True ) col1, col2 = st.columns([8, 2]) text_input = col1.text_input("Type your idea here:", placeholder="Knowledge of history is power.") submit_button = col2.button("_Search quotes!_") if submit_button: st.markdown(search(text_input))