|
import streamlit as st |
|
from sentence_transformers import SentenceTransformer |
|
import datasets |
|
|
|
x = st.slider('Select a value') |
|
st.write(x, 'squared is', x * x) |
|
|
|
st.sidebar.text_input("Type your quote here") |
|
|
|
dataset = datasets.load_dataset('A-Roucher/english_historical_quotes', download_mode="force_redownload") |
|
dataset = dataset['train'] |
|
|
|
model_name = "sentence-transformers/all-MiniLM-L6-v2" |
|
|
|
encoder = SentenceTransformer(model_name) |
|
embeddings = encoder.encode( |
|
dataset["quote"], |
|
batch_size=4, |
|
show_progress_bar=True, |
|
convert_to_numpy=True, |
|
normalize_embeddings=True, |
|
) |
|
|
|
dataset_embeddings = datasets.Dataset.from_dict({"embeddings": embeddings}) |
|
dataset_embeddings.add_faiss_index(column="embeddings") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sentence = "Knowledge of history is power." |
|
sentence_embedding = encoder.encode([sentence]) |
|
|
|
|
|
|
|
|
|
from sentence_transformers.util import semantic_search |
|
|
|
|
|
author_indexes = list(range(1000)) |
|
hits = semantic_search(sentence_embedding, dataset_embeddings[author_indexes, :], top_k=5) |
|
st.write(hits) |
|
list_hits = [author_indexes[i['corpus_id']] for i in hits[0]] |
|
st.write(dataset_embeddings.select([12676, 4967, 2612, 8884, 4797])) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|