HardWorkingStation commited on
Commit
3fd6b1d
0 Parent(s):

Initial commit

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. src/test.ipynb +0 -0
  3. src/tools.py +43 -0
  4. src/web_app.py +15 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ /venv
src/test.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
src/tools.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+ import pandas as pd
4
+ from sklearn.model_selection import train_test_split
5
+ from sklift.datasets import fetch_lenta
6
+ from catboost import CatBoostClassifier
7
+ import sklearn
8
+ import streamlit as st
9
+
10
+
11
+ @st.experimental_memo
12
+ def get_data() -> sklearn.utils._bunch.Bunch:
13
+
14
+ treat_dict = {
15
+ 'test': 1,
16
+ 'control': 0
17
+ }
18
+ # получаем датасет
19
+ dataset = fetch_lenta()
20
+ # преобразуем строковые значения колонки в числовыые значения
21
+ dataset.treatment = dataset.treatment.map(treat_dict)
22
+ # заполняем пропуски
23
+ dataset.data['gender'] = dataset.data['gender'].fillna(value='Не определен')
24
+ dataset.data['children'] = dataset.data['children'].fillna(0).astype('int')
25
+ dataset.data['age'] = dataset.data['age'].fillna(0).astype('int')
26
+ dataset.data['months_from_register'] = dataset.data['months_from_register'].fillna(0).astype('int')
27
+ return dataset
28
+
29
+
30
+ @st.experimental_memo
31
+ def data_split(data, treatment, target) -> tuple[Any, Any, Any, Any, Any, Any]:
32
+ # склеиваем threatment и target для дальнейшей стратификации по ним
33
+ stratify_cols = pd.concat([treatment, target], axis=1)
34
+ # сплитим датасет
35
+ X_train, X_val, trmnt_train, trmnt_val, y_train, y_val = train_test_split(
36
+ data,
37
+ treatment,
38
+ target,
39
+ stratify=stratify_cols,
40
+ test_size=0.3,
41
+ random_state=42
42
+ )
43
+ return X_train, X_val, trmnt_train, trmnt_val, y_train, y_val
src/web_app.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ import tools
4
+ from time import sleep
5
+
6
+ norm_columns = ['age', 'children', 'gender', 'main_format', 'months_from_register', 'response_sms', 'response_viber']
7
+ dataset = tools.get_data()
8
+
9
+ st.title('Uplift lab')
10
+
11
+ st.write('Какие данные выбрать для рассылки?')
12
+ st.write(dataset.data[norm_columns].head())
13
+ columns = st.multiselect(options=norm_columns, label='Выберите признак')
14
+ age = st.select_slider(label='', options=range(1, 101), value=[18, 100])
15
+ st.write(dataset.data[dataset.data['age'].isin(age)][norm_columns])