booktoread / app.py
Vladislawoo's picture
Upload app.py
4f93011
raw
history blame
2.78 kB
import streamlit as st
import pandas as pd
import torch
import numpy as np
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel
import faiss
model_name = "cointegrated/rubert-tiny2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
df = pd.read_csv('final_data.csv')
MAX_LEN = 300
def embed_bert_cls(text, model=model, tokenizer=tokenizer):
t = tokenizer(text,
padding=True,
truncation=True,
return_tensors='pt',
max_length=MAX_LEN)
with torch.no_grad():
model_output = model(**{k: v.to(model.device) for k, v in t.items()})
embeddings = model_output.last_hidden_state[:, 0, :]
embeddings = torch.nn.functional.normalize(embeddings)
return embeddings[0].cpu().squeeze()
embeddings = np.loadtxt('embeddings.txt')
embeddings_tensor = [torch.tensor(embedding) for embedding in embeddings]
# Создание индекса Faiss
embeddings_matrix = np.stack(embeddings)
index = faiss.IndexFlatIP(embeddings_matrix.shape[1])
index.add(embeddings_matrix)
st.title('Приложение для рекомендации книг')
text = st.text_input('Введите запрос:')
num_results = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=3)
# Add a button to trigger the recommendation process
recommend_button = st.button('Получить рекомендации')
if text and recommend_button: # Check if the user entered text and clicked the button
# Встраивание запроса и поиск ближайших векторов с использованием Faiss
query_embedding = embed_bert_cls(text)
query_embedding = query_embedding.numpy().astype('float32')
_, indices = index.search(np.expand_dims(query_embedding, axis=0), num_results)
st.subheader('Топ рекомендуемых книг:')
for i in indices[0]:
recommended_embedding = embeddings_tensor[i].numpy() # Вектор рекомендованной книги
similarity = np.dot(query_embedding, recommended_embedding) # Косинусное сходство
similarity_percent = similarity * 100
col1, col2 = st.columns([1, 3])
with col1:
st.image(df['image'][i], use_column_width=True)
with col2:
st.write(f"**Название книги:** {df['title'][i]}")
st.write(f"**Автор:** {df['author'][i]}")
st.write(f"**Описание:** {df['annotation'][i]}")
st.write(f"**Оценка сходства:** {similarity_percent:.2f}%")
st.write("---")