File size: 5,381 Bytes
e7fd7bd 976538a e7fd7bd 5704717 e7fd7bd 976538a 7c4c297 6371523 976538a e7fd7bd 7c4c297 6371523 55cbcf8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
from __future__ import annotations
from os import listdir
import numpy as np
import pandas as pd
import faiss
import streamlit as st
from PIL import Image
def get_category_data(type: str, cat_name: str = None) -> int | list:
"""
Returns the requested data by category
:param type: Type returned data. Should be one of: id, ru_name or cat_names
:param cat_name: category name
:return: returns the requested data
"""
cat = {
'makijazh': {'id': 3, 'name': 'макияж'},
'uhod': {'id': 4, 'name': 'уход'},
'volosy': {'id': 6, 'name': 'волосы'},
'parfjumerija': {'id': 7, 'name': 'парфюмерия'},
'zdorov-e-i-apteka': {'id': 3747, 'name': 'здоровье и аптека'},
'sexual-wellness': {'id': 5962, 'name': 'sexual wellness'},
'azija': {'id': 10, 'name': 'азия'},
'organika': {'id': 12, 'name': 'органика'},
'dlja-muzhchin': {'id': 3887, 'name': 'для мужчин'},
'dlja-detej': {'id': 4357, 'name': 'для детей'},
'tehnika': {'id': 3870, 'name': 'техника'},
'dlja-doma': {'id': 8202, 'name': 'для дома'},
'odezhda-i-aksessuary': {'id': 8529, 'name': 'одежда и аксессуары'},
'nizhnee-bel-jo': {'id': 8563, 'name': 'нижнее бельё'},
'ukrashenija': {'id': 5746, 'name': 'украшения'},
'lajfstajl': {'id': 8579, 'name': 'лайфстайл'},
'ini-formaty': {'id': 5159, 'name': 'тревел-форматы'},
'tovary-dlja-zhivotnyh': {'id': 7638, 'name': 'товары для животных'}
}
ru_cat = {
'макияж': 'makijazh',
'уход': 'uhod',
'волосы': 'volosy',
'парфюмерия': 'parfjumerija',
'здоровье и аптека': 'zdorov-e-i-apteka',
'sexual wellness': 'sexual-wellness',
'азия': 'azija',
'органика': 'organika',
'для мужчин': 'dlja-muzhchin',
'для детей': 'dlja-detej',
'техника': 'tehnika',
'для дома': 'dlja-doma',
'одежда и аксессуары': 'odezhda-i-aksessuary',
'нижнее бельё': 'nizhnee-bel-jo',
'украшения': 'ukrashenija',
'лайфстайл': 'lajfstajl',
'тревел-форматы': 'ini-formaty',
'товары для животных': 'tovary-dlja-zhivotnyh'
}
if type not in ['cat_names', 'id', 'ru_name', 'ru_to_eng']:
raise ValueError('Parameter type should by "cat_names","id", "ru_name" or "ru_to_eng"')
if type == 'cat_names':
return [cat for cat in cat.keys()]
if cat_name is None:
raise ValueError('Value cat_name should be not None')
if type == 'ru_to_eng':
return ru_cat[cat_name]
try:
cat[cat_name]
except KeyError:
raise ValueError(f'Wrong category name: {cat_name}. \n'
f'Possible cat_names: {", ".join([name for name in cat.keys()])}')
if type == 'id':
return cat[cat_name]['id']
elif type == 'ru_name':
return cat[cat_name]['name']
else:
raise ValueError('type should be "cat_names", "id" or "ru_name"')
@st.experimental_memo
def get_faiss_description_index() -> faiss.IndexFlatL2:
return faiss.read_index('data/faiss_index/faiss_description_index.index')
@st.experimental_memo
def get_faiss_product_usage_index() -> faiss.IndexFlatL2:
return faiss.read_index('data/faiss_index/faiss_product_usage_index.index')
@st.experimental_memo
def get_faiss_product_composition_index() -> faiss.IndexFlatL2:
return faiss.read_index('data/faiss_index/faiss_product_composition_index.index')
@st.experimental_memo
def get_products_data() -> pd.DataFrame:
return pd.read_csv('data/products.csv')
@st.experimental_memo
def get_description_embeddings() -> pd.DataFrame:
return pd.read_csv('data/embeddings/embedded_description')
@st.experimental_memo
def get_product_usage_embeddings() -> pd.DataFrame:
return pd.read_csv('data/embeddings/embedded_product_usage')
@st.experimental_memo
def get_product_composition_embeddings() -> pd.DataFrame:
return pd.read_csv('data/embeddings/embedded_product_composition')
@st.experimental_memo
def get_image_data() -> pd.DataFrame:
return pd.read_csv('data/product_images.csv')
@st.experimental_memo
def get_category_options() -> list:
"""
Returns category list in RU lang
:return: list of categories in RU lang
"""
data = get_products_data()
ru_cat_names = [get_category_data('ru_name', cat) for cat in data['category'].value_counts().index]
return ru_cat_names
def get_random_product() -> pd.Series:
"""
Returns random product
:return: random product data
"""
data = get_products_data()
return data.loc[np.random.randint(len(data))]
def get_image_by_sku(sku: str or int) -> np.array:
"""
Get image by product sku.
:param: sku: product sku
:return: image product if existed, else image with 'No image' text
"""
image_data = get_image_data()
try:
for num, dir in enumerate(listdir('data/images')):
try:
image_name = image_data[image_data['sku'] == str(sku)]['image'].iloc[0]
with Image.open(f'data/images/{dir}/{image_name}') as img:
res = np.array(img)
return res
except:
if num == 2:
raise Exception
except:
with Image.open('data/service_images/' + 'no_img.jpg') as img:
res = np.array(img)
return res |