Spaces:

MARI-posa
/

FindMyBook

Runtime error

App Files Files Community

MARI-posa commited on Jun 16, 2023

Commit

7d10355

•

1 Parent(s): feb52ca

Update stri.py

Browse files

Files changed (1) hide show

stri.py +11 -38

stri.py CHANGED Viewed

@@ -19,23 +19,6 @@ model = AutoModel.from_pretrained(model_name, output_hidden_states=True)
 # Загрузка датасета и аннотаций к книгам
 books = pd.read_csv('all+++.csv')
 books['author'].fillna('other', inplace=True)
-#books.dropna(inplace=True)
-#books = books[books['annotation'].apply(lambda x: len(x.split()) >= 40)]
-#books.drop_duplicates(subset='title', keep='first', inplace=True)
-#books = books.reset_index(drop=True)
-#def data_preprocessing(text: str) -> str:
-    #text = re.sub(r'http\S+', " ", text)  # удаляем ссылки
-    #text = re.sub(r'@\w+', ' ', text)  # удаляем упоминания пользователей
-    #text = re.sub(r'#\w+', ' ', text)  # удаляем хэштеги
-    #text = re.sub(r'<.*?>', ' ', text)  # html tags
-   # return text
-#for i in ['author', 'title', 'annotation']:
-    #books[i] = books[i].apply(data_preprocessing)
 annot = books['annotation']
@@ -49,29 +32,19 @@ if st.button('Сгенерировать'):
     with open("book_embeddings256xxx.pkl", "rb") as f:
         book_embeddings = pickle.load(f)
-    #book_embeddings = torch.tensor(book_embeddings, device=torch.device('cpu'))
-#if st.button('Сгенерировать'):
-    #with open("book_embeddingsN.pkl", "rb") as f:
-    #book_embeddings = torch.load("book_embeddingsN.pkl", map_location=torch.device('cpu'))
-#
-        #book_embeddings = pickle.load(f)
     query_tokens = tokenizer.encode_plus(
-            query,
-            add_special_tokens=True,
-            max_length=length, # Ограничение на максимальную длину входной последовательности
-            pad_to_max_length=True, # Дополним последовательность нулями до максимальной длины
-            return_tensors='pt' # Вернём тензоры PyTorch
-        )
     with torch.no_grad():
-            query_outputs = model(**query_tokens)
-            query_hidden_states = query_outputs.hidden_states[-1][:,0,:]
-            query_hidden_states = torch.nn.functional.normalize(query_hidden_states)
     # Вычисление косинусного расстояния между эмбеддингом запроса и каждой аннотацией
     cosine_similarities = torch.nn.functional.cosine_similarity(
         query_hidden_states.squeeze(0),
@@ -82,7 +55,7 @@ if st.button('Сгенерировать'):
     indices = np.argsort(cosine_similarities)[::-1]  # Сортировка по убыванию
-    num_books_per_page = st.selectbox("Количество книг на странице:", [3, 5, 10], index=0)
     for i in indices[:num_books_per_page]:
         cols = st.columns(2)  # Создание двух столбцов для размещения информации и изображения
@@ -94,4 +67,4 @@ if st.button('Сгенерировать'):
         image = Image.open(BytesIO(response.content))
         cols[0].image(image)
         cols[0].write(cosine_similarities[i])
-        cols[1].write("---")

 # Загрузка датасета и аннотаций к книгам
 books = pd.read_csv('all+++.csv')
 books['author'].fillna('other', inplace=True)
 annot = books['annotation']
     with open("book_embeddings256xxx.pkl", "rb") as f:
         book_embeddings = pickle.load(f)
     query_tokens = tokenizer.encode_plus(
+        query,
+        add_special_tokens=True,
+        max_length=length,  # Ограничение на максимальную длину входной последовательности
+        pad_to_max_length=True,  # Дополним последовательность нулями до максимальной длины
+        return_tensors='pt'  # Вернём тензоры PyTorch
+    )
     with torch.no_grad():
+        query_outputs = model(**query_tokens)
+        query_hidden_states = query_outputs.hidden_states[-1][:, 0, :]
+        query_hidden_states = torch.nn.functional.normalize(query_hidden_states)
     # Вычисление косинусного расстояния между эмбеддингом запроса и каждой аннотацией
     cosine_similarities = torch.nn.functional.cosine_similarity(
         query_hidden_states.squeeze(0),
     indices = np.argsort(cosine_similarities)[::-1]  # Сортировка по убыванию
+    num_books_per_page = st.sidebar.selectbox("Количество книг на странице:", [3, 5, 10], index=0)
     for i in indices[:num_books_per_page]:
         cols = st.columns(2)  # Создание двух столбцов для размещения информации и изображения
         image = Image.open(BytesIO(response.content))
         cols[0].image(image)
         cols[0].write(cosine_similarities[i])
+        cols[1].write("---")