File size: 5,381 Bytes
e7fd7bd
976538a
e7fd7bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5704717
e7fd7bd
 
 
 
976538a
 
 
7c4c297
6371523
 
976538a
 
 
e7fd7bd
7c4c297
6371523
55cbcf8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
from __future__ import annotations
from os import listdir

import numpy as np
import pandas as pd

import faiss

import streamlit as st

from PIL import Image


def get_category_data(type: str, cat_name: str = None) -> int | list:
	"""
	Returns the requested data by category

	:param type: Type returned data. Should be one of: id, ru_name or cat_names
	:param cat_name: category name
	:return: returns the requested data
	"""

	cat = {
		'makijazh':              {'id': 3, 'name': 'макияж'},
		'uhod':                  {'id': 4, 'name': 'уход'},
		'volosy':                {'id': 6, 'name': 'волосы'},
		'parfjumerija':          {'id': 7, 'name': 'парфюмерия'},
		'zdorov-e-i-apteka':     {'id': 3747, 'name': 'здоровье и аптека'},
		'sexual-wellness':       {'id': 5962, 'name': 'sexual wellness'},
		'azija':                 {'id': 10, 'name': 'азия'},
		'organika':              {'id': 12, 'name': 'органика'},
		'dlja-muzhchin':         {'id': 3887, 'name': 'для мужчин'},
		'dlja-detej':            {'id': 4357, 'name': 'для детей'},
		'tehnika':               {'id': 3870, 'name': 'техника'},
		'dlja-doma':             {'id': 8202, 'name': 'для дома'},
		'odezhda-i-aksessuary':  {'id': 8529, 'name': 'одежда и аксессуары'},
		'nizhnee-bel-jo':        {'id': 8563, 'name': 'нижнее бельё'},
		'ukrashenija':           {'id': 5746, 'name': 'украшения'},
		'lajfstajl':             {'id': 8579, 'name': 'лайфстайл'},
		'ini-formaty':           {'id': 5159, 'name': 'тревел-форматы'},
		'tovary-dlja-zhivotnyh': {'id': 7638, 'name': 'товары для животных'}
	}

	ru_cat = {
		'макияж': 'makijazh',
		'уход': 'uhod',
		'волосы': 'volosy',
		'парфюмерия': 'parfjumerija',
		'здоровье и аптека': 'zdorov-e-i-apteka',
		'sexual wellness': 'sexual-wellness',
		'азия': 'azija',
		'органика': 'organika',
		'для мужчин': 'dlja-muzhchin',
		'для детей': 'dlja-detej',
		'техника': 'tehnika',
		'для дома': 'dlja-doma',
		'одежда и аксессуары': 'odezhda-i-aksessuary',
		'нижнее бельё': 'nizhnee-bel-jo',
		'украшения': 'ukrashenija',
		'лайфстайл': 'lajfstajl',
		'тревел-форматы': 'ini-formaty',
		'товары для животных': 'tovary-dlja-zhivotnyh'
	}

	if type not in ['cat_names', 'id', 'ru_name', 'ru_to_eng']:
		raise ValueError('Parameter type should by "cat_names","id", "ru_name" or "ru_to_eng"')

	if type == 'cat_names':
		return [cat for cat in cat.keys()]

	if cat_name is None:
		raise ValueError('Value cat_name should be not None')

	if type == 'ru_to_eng':
		return ru_cat[cat_name]

	try:
		cat[cat_name]
	except KeyError:
		raise ValueError(f'Wrong category name: {cat_name}. \n'
						 f'Possible cat_names: {", ".join([name for name in cat.keys()])}')

	if type == 'id':
		return cat[cat_name]['id']
	elif type == 'ru_name':
		return cat[cat_name]['name']
	else:
		raise ValueError('type should be "cat_names", "id" or "ru_name"')


@st.experimental_memo
def get_faiss_description_index() -> faiss.IndexFlatL2:
	return faiss.read_index('data/faiss_index/faiss_description_index.index')


@st.experimental_memo
def get_faiss_product_usage_index() -> faiss.IndexFlatL2:
	return faiss.read_index('data/faiss_index/faiss_product_usage_index.index')


@st.experimental_memo
def get_faiss_product_composition_index() -> faiss.IndexFlatL2:
	return faiss.read_index('data/faiss_index/faiss_product_composition_index.index')


@st.experimental_memo
def get_products_data() -> pd.DataFrame:
	return pd.read_csv('data/products.csv')


@st.experimental_memo
def get_description_embeddings() -> pd.DataFrame:
	return pd.read_csv('data/embeddings/embedded_description')


@st.experimental_memo
def get_product_usage_embeddings() -> pd.DataFrame:
	return pd.read_csv('data/embeddings/embedded_product_usage')


@st.experimental_memo
def get_product_composition_embeddings() -> pd.DataFrame:
	return pd.read_csv('data/embeddings/embedded_product_composition')


@st.experimental_memo
def get_image_data() -> pd.DataFrame:
	return pd.read_csv('data/product_images.csv')


@st.experimental_memo
def get_category_options() -> list:
	"""
	Returns category list in RU lang

	:return: list of categories in RU lang
	"""
	data = get_products_data()
	ru_cat_names = [get_category_data('ru_name', cat) for cat in data['category'].value_counts().index]
	return ru_cat_names


def get_random_product() -> pd.Series:
	"""
	Returns random product

	:return: random product data
	"""
	data = get_products_data()
	return data.loc[np.random.randint(len(data))]


def get_image_by_sku(sku: str or int) -> np.array:
	"""
	Get image by product sku.

	:param: sku: product sku
	:return: image product if existed, else image with 'No image' text
	"""
	image_data = get_image_data()
	try:
		for num, dir in enumerate(listdir('data/images')):
			try:
				image_name = image_data[image_data['sku'] == str(sku)]['image'].iloc[0]
				with Image.open(f'data/images/{dir}/{image_name}') as img:
					res = np.array(img)
				return res
			except:
				if num == 2:
					raise Exception
	except:
		with Image.open('data/service_images/' + 'no_img.jpg') as img:
			res = np.array(img)
		return res