|
from __future__ import annotations |
|
from os import listdir |
|
|
|
import numpy as np |
|
import pandas as pd |
|
|
|
import faiss |
|
|
|
import streamlit as st |
|
|
|
from PIL import Image |
|
|
|
|
|
def get_category_data(type: str, cat_name: str = None) -> int | list: |
|
""" |
|
Returns the requested data by category |
|
|
|
:param type: Type returned data. Should be one of: id, ru_name or cat_names |
|
:param cat_name: category name |
|
:return: returns the requested data |
|
""" |
|
|
|
cat = { |
|
'makijazh': {'id': 3, 'name': 'макияж'}, |
|
'uhod': {'id': 4, 'name': 'уход'}, |
|
'volosy': {'id': 6, 'name': 'волосы'}, |
|
'parfjumerija': {'id': 7, 'name': 'парфюмерия'}, |
|
'zdorov-e-i-apteka': {'id': 3747, 'name': 'здоровье и аптека'}, |
|
'sexual-wellness': {'id': 5962, 'name': 'sexual wellness'}, |
|
'azija': {'id': 10, 'name': 'азия'}, |
|
'organika': {'id': 12, 'name': 'органика'}, |
|
'dlja-muzhchin': {'id': 3887, 'name': 'для мужчин'}, |
|
'dlja-detej': {'id': 4357, 'name': 'для детей'}, |
|
'tehnika': {'id': 3870, 'name': 'техника'}, |
|
'dlja-doma': {'id': 8202, 'name': 'для дома'}, |
|
'odezhda-i-aksessuary': {'id': 8529, 'name': 'одежда и аксессуары'}, |
|
'nizhnee-bel-jo': {'id': 8563, 'name': 'нижнее бельё'}, |
|
'ukrashenija': {'id': 5746, 'name': 'украшения'}, |
|
'lajfstajl': {'id': 8579, 'name': 'лайфстайл'}, |
|
'ini-formaty': {'id': 5159, 'name': 'тревел-форматы'}, |
|
'tovary-dlja-zhivotnyh': {'id': 7638, 'name': 'товары для животных'} |
|
} |
|
|
|
ru_cat = { |
|
'макияж': 'makijazh', |
|
'уход': 'uhod', |
|
'волосы': 'volosy', |
|
'парфюмерия': 'parfjumerija', |
|
'здоровье и аптека': 'zdorov-e-i-apteka', |
|
'sexual wellness': 'sexual-wellness', |
|
'азия': 'azija', |
|
'органика': 'organika', |
|
'для мужчин': 'dlja-muzhchin', |
|
'для детей': 'dlja-detej', |
|
'техника': 'tehnika', |
|
'для дома': 'dlja-doma', |
|
'одежда и аксессуары': 'odezhda-i-aksessuary', |
|
'нижнее бельё': 'nizhnee-bel-jo', |
|
'украшения': 'ukrashenija', |
|
'лайфстайл': 'lajfstajl', |
|
'тревел-форматы': 'ini-formaty', |
|
'товары для животных': 'tovary-dlja-zhivotnyh' |
|
} |
|
|
|
if type not in ['cat_names', 'id', 'ru_name', 'ru_to_eng']: |
|
raise ValueError('Parameter type should by "cat_names","id", "ru_name" or "ru_to_eng"') |
|
|
|
if type == 'cat_names': |
|
return [cat for cat in cat.keys()] |
|
|
|
if cat_name is None: |
|
raise ValueError('Value cat_name should be not None') |
|
|
|
if type == 'ru_to_eng': |
|
return ru_cat[cat_name] |
|
|
|
try: |
|
cat[cat_name] |
|
except KeyError: |
|
raise ValueError(f'Wrong category name: {cat_name}. \n' |
|
f'Possible cat_names: {", ".join([name for name in cat.keys()])}') |
|
|
|
if type == 'id': |
|
return cat[cat_name]['id'] |
|
elif type == 'ru_name': |
|
return cat[cat_name]['name'] |
|
else: |
|
raise ValueError('type should be "cat_names", "id" or "ru_name"') |
|
|
|
|
|
@st.experimental_memo |
|
def get_faiss_description_index() -> faiss.IndexFlatL2: |
|
return faiss.read_index('data/faiss_index/faiss_description_index.index') |
|
|
|
|
|
@st.experimental_memo |
|
def get_faiss_product_usage_index() -> faiss.IndexFlatL2: |
|
return faiss.read_index('data/faiss_index/faiss_product_usage_index.index') |
|
|
|
|
|
@st.experimental_memo |
|
def get_faiss_product_composition_index() -> faiss.IndexFlatL2: |
|
return faiss.read_index('data/faiss_index/faiss_product_composition_index.index') |
|
|
|
|
|
@st.experimental_memo |
|
def get_products_data() -> pd.DataFrame: |
|
return pd.read_csv('data/products.csv') |
|
|
|
|
|
@st.experimental_memo |
|
def get_description_embeddings() -> pd.DataFrame: |
|
return pd.read_csv('data/embeddings/embedded_description') |
|
|
|
|
|
@st.experimental_memo |
|
def get_product_usage_embeddings() -> pd.DataFrame: |
|
return pd.read_csv('data/embeddings/embedded_product_usage') |
|
|
|
|
|
@st.experimental_memo |
|
def get_product_composition_embeddings() -> pd.DataFrame: |
|
return pd.read_csv('data/embeddings/embedded_product_composition') |
|
|
|
|
|
@st.experimental_memo |
|
def get_image_data() -> pd.DataFrame: |
|
return pd.read_csv('data/product_images.csv') |
|
|
|
|
|
@st.experimental_memo |
|
def get_category_options() -> list: |
|
""" |
|
Returns category list in RU lang |
|
|
|
:return: list of categories in RU lang |
|
""" |
|
data = get_products_data() |
|
ru_cat_names = [get_category_data('ru_name', cat) for cat in data['category'].value_counts().index] |
|
return ru_cat_names |
|
|
|
|
|
def get_random_product() -> pd.Series: |
|
""" |
|
Returns random product |
|
|
|
:return: random product data |
|
""" |
|
data = get_products_data() |
|
return data.loc[np.random.randint(len(data))] |
|
|
|
|
|
def get_image_by_sku(sku: str or int) -> np.array: |
|
""" |
|
Get image by product sku. |
|
|
|
:param: sku: product sku |
|
:return: image product if existed, else image with 'No image' text |
|
""" |
|
image_data = get_image_data() |
|
try: |
|
for num, dir in enumerate(listdir('data/images')): |
|
try: |
|
image_name = image_data[image_data['sku'] == str(sku)]['image'].iloc[0] |
|
with Image.open(f'data/images/{dir}/{image_name}') as img: |
|
res = np.array(img) |
|
return res |
|
except: |
|
if num == 2: |
|
raise Exception |
|
except: |
|
with Image.open('data/service_images/' + 'no_img.jpg') as img: |
|
res = np.array(img) |
|
return res |