Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import torch | |
import numpy as np | |
from tqdm import tqdm | |
from transformers import AutoTokenizer, AutoModel | |
import faiss | |
model_name = "cointegrated/rubert-tiny2" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModel.from_pretrained(model_name) | |
df = pd.read_csv('final_data.csv') | |
MAX_LEN = 300 | |
def embed_bert_cls(text, model=model, tokenizer=tokenizer): | |
t = tokenizer(text, | |
padding=True, | |
truncation=True, | |
return_tensors='pt', | |
max_length=MAX_LEN) | |
with torch.no_grad(): | |
model_output = model(**{k: v.to(model.device) for k, v in t.items()}) | |
embeddings = model_output.last_hidden_state[:, 0, :] | |
embeddings = torch.nn.functional.normalize(embeddings) | |
return embeddings[0].cpu().squeeze() | |
embeddings = np.loadtxt('embeddings.txt') | |
embeddings_tensor = [torch.tensor(embedding) for embedding in embeddings] | |
# Создание индекса Faiss | |
embeddings_matrix = np.stack(embeddings) | |
index = faiss.IndexFlatIP(embeddings_matrix.shape[1]) | |
index.add(embeddings_matrix) | |
st.title('Приложение для рекомендации книг') | |
text = st.text_input('Введите запрос:') | |
num_results = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=3) | |
# Add a button to trigger the recommendation process | |
recommend_button = st.button('Получить рекомендации') | |
if text and recommend_button: # Check if the user entered text and clicked the button | |
# Встраивание запроса и поиск ближайших векторов с использованием Faiss | |
query_embedding = embed_bert_cls(text) | |
query_embedding = query_embedding.numpy().astype('float32') | |
_, indices = index.search(np.expand_dims(query_embedding, axis=0), num_results) | |
st.subheader('Топ рекомендуемых книг:') | |
for i in indices[0]: | |
recommended_embedding = embeddings_tensor[i].numpy() # Вектор рекомендованной книги | |
similarity = np.dot(query_embedding, recommended_embedding) # Косинусное сходство | |
similarity_percent = similarity * 100 | |
col1, col2 = st.columns([1, 3]) | |
with col1: | |
st.image(df['image'][i], use_column_width=True) | |
with col2: | |
st.write(f"**Название книги:** {df['title'][i]}") | |
st.write(f"**Автор:** {df['author'][i]}") | |
st.write(f"**Описание:** {df['annotation'][i]}") | |
st.write(f"**Оценка сходства:** {similarity_percent:.2f}%") | |
st.write("---") | |