HardWorkingStation commited on
Commit
402eb1c
1 Parent(s): bbc09e3

Initial commit

Browse files
Files changed (3) hide show
  1. src/test.ipynb +34 -222
  2. src/tools.py +1 -2
  3. src/web_app.py +5 -9
src/test.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {
7
  "collapsed": true,
8
  "pycharm": {
@@ -15,7 +15,7 @@
15
  "import pandas as pd\n",
16
  "from sklearn.model_selection import train_test_split\n",
17
  "from sklift.datasets import fetch_hillstrom\n",
18
- "from sklift.metrics import uplift_at_k\n",
19
  "from sklift.viz import plot_uplift_preds\n",
20
  "from sklift.models import SoloModel\n",
21
  "from catboost import CatBoostClassifier\n",
@@ -24,32 +24,7 @@
24
  },
25
  {
26
  "cell_type": "code",
27
- "execution_count": 2,
28
- "outputs": [
29
- {
30
- "data": {
31
- "text/plain": "{'new_filter': 'filter'}"
32
- },
33
- "execution_count": 2,
34
- "metadata": {},
35
- "output_type": "execute_result"
36
- }
37
- ],
38
- "source": [
39
- "filters = {}\n",
40
- "filters['new_filter'] = 'filter'\n",
41
- "filters"
42
- ],
43
- "metadata": {
44
- "collapsed": false,
45
- "pycharm": {
46
- "name": "#%%\n"
47
- }
48
- }
49
- },
50
- {
51
- "cell_type": "code",
52
- "execution_count": 115,
53
  "outputs": [],
54
  "source": [
55
  "def get_data() -> tuple[Any, Any, Any]:\n",
@@ -75,54 +50,11 @@
75
  },
76
  {
77
  "cell_type": "code",
78
- "execution_count": 116,
79
  "outputs": [],
80
  "source": [
81
- "data, target, treatment = get_data()"
82
- ],
83
- "metadata": {
84
- "collapsed": false,
85
- "pycharm": {
86
- "name": "#%%\n"
87
- }
88
- }
89
- },
90
- {
91
- "cell_type": "code",
92
- "execution_count": 117,
93
- "outputs": [],
94
- "source": [
95
- "X_train, X_val, y_train, y_val, trmnt_train, trmnt_val = train_test_split(\n",
96
- " data, target, treatment, test_size=0.5, random_state=42\n",
97
- ")\n",
98
- "\n",
99
- "models_results = {\n",
100
- " 'approach': [],\n",
101
- " 'uplift@30%': []\n",
102
- "}"
103
- ],
104
- "metadata": {
105
- "collapsed": false,
106
- "pycharm": {
107
- "name": "#%%\n"
108
- }
109
- }
110
- },
111
- {
112
- "cell_type": "code",
113
- "execution_count": 118,
114
- "outputs": [
115
- {
116
- "data": {
117
- "text/plain": "0 0.871545\n1 0.128455\nName: visit, dtype: float64"
118
- },
119
- "execution_count": 118,
120
- "metadata": {},
121
- "output_type": "execute_result"
122
- }
123
- ],
124
- "source": [
125
- "y_train.value_counts(normalize=True)"
126
  ],
127
  "metadata": {
128
  "collapsed": false,
@@ -133,19 +65,19 @@
133
  },
134
  {
135
  "cell_type": "code",
136
- "execution_count": 119,
137
  "outputs": [
138
  {
139
  "data": {
140
- "text/plain": "0 0.870802\n1 0.129198\nName: visit, dtype: float64"
141
  },
142
- "execution_count": 119,
143
  "metadata": {},
144
  "output_type": "execute_result"
145
  }
146
  ],
147
  "source": [
148
- "y_val.value_counts(normalize=True)"
149
  ],
150
  "metadata": {
151
  "collapsed": false,
@@ -156,44 +88,17 @@
156
  },
157
  {
158
  "cell_type": "code",
159
- "execution_count": 113,
160
- "outputs": [
161
- {
162
- "data": {
163
- "text/plain": " recency history_segment history mens womens zip_code newbie \\\n609 9 3) $200 - $350 212.32 1 0 Surburban 0 \n51952 3 6) $750 - $1,000 849.16 1 1 Surburban 1 \n33629 9 1) $0 - $100 43.22 1 0 Surburban 1 \n22103 8 1) $0 - $100 29.99 0 1 Surburban 0 \n21350 7 2) $100 - $200 164.07 1 0 Surburban 1 \n... ... ... ... ... ... ... ... \n9307 1 2) $100 - $200 110.45 0 1 Urban 0 \n16819 1 1) $0 - $100 88.04 0 1 Surburban 0 \n57173 1 1) $0 - $100 72.63 0 1 Rural 0 \n1282 3 4) $350 - $500 366.16 0 1 Rural 0 \n23624 9 1) $0 - $100 36.87 1 0 Urban 1 \n\n channel \n609 Multichannel \n51952 Multichannel \n33629 Phone \n22103 Web \n21350 Phone \n... ... \n9307 Phone \n16819 Phone \n57173 Phone \n1282 Phone \n23624 Web \n\n[29885 rows x 8 columns]",
164
- "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>recency</th>\n <th>history_segment</th>\n <th>history</th>\n <th>mens</th>\n <th>womens</th>\n <th>zip_code</th>\n <th>newbie</th>\n <th>channel</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>609</th>\n <td>9</td>\n <td>3) $200 - $350</td>\n <td>212.32</td>\n <td>1</td>\n <td>0</td>\n <td>Surburban</td>\n <td>0</td>\n <td>Multichannel</td>\n </tr>\n <tr>\n <th>51952</th>\n <td>3</td>\n <td>6) $750 - $1,000</td>\n <td>849.16</td>\n <td>1</td>\n <td>1</td>\n <td>Surburban</td>\n <td>1</td>\n <td>Multichannel</td>\n </tr>\n <tr>\n <th>33629</th>\n <td>9</td>\n <td>1) $0 - $100</td>\n <td>43.22</td>\n <td>1</td>\n <td>0</td>\n <td>Surburban</td>\n <td>1</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>22103</th>\n <td>8</td>\n <td>1) $0 - $100</td>\n <td>29.99</td>\n <td>0</td>\n <td>1</td>\n <td>Surburban</td>\n <td>0</td>\n <td>Web</td>\n </tr>\n <tr>\n <th>21350</th>\n <td>7</td>\n <td>2) $100 - $200</td>\n <td>164.07</td>\n <td>1</td>\n <td>0</td>\n <td>Surburban</td>\n <td>1</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>9307</th>\n <td>1</td>\n <td>2) $100 - $200</td>\n <td>110.45</td>\n <td>0</td>\n <td>1</td>\n <td>Urban</td>\n <td>0</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>16819</th>\n <td>1</td>\n <td>1) $0 - $100</td>\n <td>88.04</td>\n <td>0</td>\n <td>1</td>\n <td>Surburban</td>\n <td>0</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>57173</th>\n <td>1</td>\n <td>1) $0 - $100</td>\n <td>72.63</td>\n <td>0</td>\n <td>1</td>\n <td>Rural</td>\n <td>0</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>1282</th>\n <td>3</td>\n <td>4) $350 - $500</td>\n <td>366.16</td>\n <td>0</td>\n <td>1</td>\n <td>Rural</td>\n <td>0</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>23624</th>\n <td>9</td>\n <td>1) $0 - $100</td>\n <td>36.87</td>\n <td>1</td>\n <td>0</td>\n <td>Urban</td>\n <td>1</td>\n <td>Web</td>\n </tr>\n </tbody>\n</table>\n<p>29885 rows × 8 columns</p>\n</div>"
165
- },
166
- "execution_count": 113,
167
- "metadata": {},
168
- "output_type": "execute_result"
169
- }
170
- ],
171
- "source": [
172
- "X_train"
173
- ],
174
- "metadata": {
175
- "collapsed": false,
176
- "pycharm": {
177
- "name": "#%%\n"
178
- }
179
- }
180
- },
181
- {
182
- "cell_type": "code",
183
- "execution_count": 114,
184
- "outputs": [
185
- {
186
- "data": {
187
- "text/plain": " recency history_segment history mens womens zip_code newbie \\\n7730 3 5) $500 - $750 503.73 0 1 Rural 1 \n17594 5 2) $100 - $200 163.39 1 0 Urban 0 \n14481 11 3) $200 - $350 287.77 1 0 Rural 1 \n20003 3 1) $0 - $100 29.99 0 1 Surburban 1 \n19981 5 2) $100 - $200 131.51 0 1 Urban 1 \n... ... ... ... ... ... ... ... \n41828 11 4) $350 - $500 457.09 0 1 Surburban 1 \n49354 2 1) $0 - $100 29.99 0 1 Rural 1 \n12063 9 2) $100 - $200 147.48 1 0 Rural 1 \n3757 10 2) $100 - $200 128.78 1 0 Surburban 1 \n10708 6 2) $100 - $200 138.72 1 0 Urban 0 \n\n channel \n7730 Multichannel \n17594 Web \n14481 Web \n20003 Web \n19981 Web \n... ... \n41828 Multichannel \n49354 Phone \n12063 Phone \n3757 Phone \n10708 Web \n\n[12808 rows x 8 columns]",
188
- "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>recency</th>\n <th>history_segment</th>\n <th>history</th>\n <th>mens</th>\n <th>womens</th>\n <th>zip_code</th>\n <th>newbie</th>\n <th>channel</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>7730</th>\n <td>3</td>\n <td>5) $500 - $750</td>\n <td>503.73</td>\n <td>0</td>\n <td>1</td>\n <td>Rural</td>\n <td>1</td>\n <td>Multichannel</td>\n </tr>\n <tr>\n <th>17594</th>\n <td>5</td>\n <td>2) $100 - $200</td>\n <td>163.39</td>\n <td>1</td>\n <td>0</td>\n <td>Urban</td>\n <td>0</td>\n <td>Web</td>\n </tr>\n <tr>\n <th>14481</th>\n <td>11</td>\n <td>3) $200 - $350</td>\n <td>287.77</td>\n <td>1</td>\n <td>0</td>\n <td>Rural</td>\n <td>1</td>\n <td>Web</td>\n </tr>\n <tr>\n <th>20003</th>\n <td>3</td>\n <td>1) $0 - $100</td>\n <td>29.99</td>\n <td>0</td>\n <td>1</td>\n <td>Surburban</td>\n <td>1</td>\n <td>Web</td>\n </tr>\n <tr>\n <th>19981</th>\n <td>5</td>\n <td>2) $100 - $200</td>\n <td>131.51</td>\n <td>0</td>\n <td>1</td>\n <td>Urban</td>\n <td>1</td>\n <td>Web</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>41828</th>\n <td>11</td>\n <td>4) $350 - $500</td>\n <td>457.09</td>\n <td>0</td>\n <td>1</td>\n <td>Surburban</td>\n <td>1</td>\n <td>Multichannel</td>\n </tr>\n <tr>\n <th>49354</th>\n <td>2</td>\n <td>1) $0 - $100</td>\n <td>29.99</td>\n <td>0</td>\n <td>1</td>\n <td>Rural</td>\n <td>1</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>12063</th>\n <td>9</td>\n <td>2) $100 - $200</td>\n <td>147.48</td>\n <td>1</td>\n <td>0</td>\n <td>Rural</td>\n <td>1</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>3757</th>\n <td>10</td>\n <td>2) $100 - $200</td>\n <td>128.78</td>\n <td>1</td>\n <td>0</td>\n <td>Surburban</td>\n <td>1</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>10708</th>\n <td>6</td>\n <td>2) $100 - $200</td>\n <td>138.72</td>\n <td>1</td>\n <td>0</td>\n <td>Urban</td>\n <td>0</td>\n <td>Web</td>\n </tr>\n </tbody>\n</table>\n<p>12808 rows × 8 columns</p>\n</div>"
189
- },
190
- "execution_count": 114,
191
- "metadata": {},
192
- "output_type": "execute_result"
193
- }
194
- ],
195
  "source": [
196
- "X_val"
 
 
 
 
 
 
 
197
  ],
198
  "metadata": {
199
  "collapsed": false,
@@ -204,13 +109,13 @@
204
  },
205
  {
206
  "cell_type": "code",
207
- "execution_count": 139,
208
  "outputs": [],
209
  "source": [
210
- "t = pd.DataFrame(data = [1 for _ in X_train.index],\n",
211
- " index=X_train.index\n",
212
- ")\n",
213
- "# t.loc[t.index] = 10"
214
  ],
215
  "metadata": {
216
  "collapsed": false,
@@ -221,20 +126,19 @@
221
  },
222
  {
223
  "cell_type": "code",
224
- "execution_count": 140,
225
  "outputs": [
226
  {
227
  "data": {
228
- "text/plain": " 0\n15194 1\n23642 1\n57737 1\n33880 1\n21211 1\n... ..\n9307 1\n16819 1\n57173 1\n1282 1\n23624 1\n\n[21346 rows x 1 columns]",
229
- "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>15194</th>\n <td>1</td>\n </tr>\n <tr>\n <th>23642</th>\n <td>1</td>\n </tr>\n <tr>\n <th>57737</th>\n <td>1</td>\n </tr>\n <tr>\n <th>33880</th>\n <td>1</td>\n </tr>\n <tr>\n <th>21211</th>\n <td>1</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n </tr>\n <tr>\n <th>9307</th>\n <td>1</td>\n </tr>\n <tr>\n <th>16819</th>\n <td>1</td>\n </tr>\n <tr>\n <th>57173</th>\n <td>1</td>\n </tr>\n <tr>\n <th>1282</th>\n <td>1</td>\n </tr>\n <tr>\n <th>23624</th>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n<p>21346 rows × 1 columns</p>\n</div>"
230
  },
231
- "execution_count": 140,
232
  "metadata": {},
233
  "output_type": "execute_result"
234
  }
235
  ],
236
  "source": [
237
- "t"
238
  ],
239
  "metadata": {
240
  "collapsed": false,
@@ -245,20 +149,20 @@
245
  },
246
  {
247
  "cell_type": "code",
248
- "execution_count": 152,
249
  "outputs": [
250
  {
251
  "data": {
252
- "text/plain": "0.1987362209568153"
 
253
  },
254
- "execution_count": 152,
255
  "metadata": {},
256
  "output_type": "execute_result"
257
  }
258
  ],
259
  "source": [
260
- "import numpy as np\n",
261
- "np.random.random()"
262
  ],
263
  "metadata": {
264
  "collapsed": false,
@@ -320,98 +224,6 @@
320
  }
321
  }
322
  },
323
- {
324
- "cell_type": "code",
325
- "execution_count": 154,
326
- "outputs": [
327
- {
328
- "data": {
329
- "text/plain": "7730 0\n17594 0\n14481 0\n20003 0\n19981 0\n ..\n20159 0\n54905 0\n22035 0\n32253 1\n18362 0\nName: visit, Length: 21347, dtype: int64"
330
- },
331
- "execution_count": 154,
332
- "metadata": {},
333
- "output_type": "execute_result"
334
- }
335
- ],
336
- "source": [
337
- "y_val"
338
- ],
339
- "metadata": {
340
- "collapsed": false,
341
- "pycharm": {
342
- "name": "#%%\n"
343
- }
344
- }
345
- },
346
- {
347
- "cell_type": "code",
348
- "execution_count": 155,
349
- "outputs": [
350
- {
351
- "data": {
352
- "text/plain": "7730 0\n17594 1\n14481 1\n20003 1\n19981 0\n ..\n20159 1\n54905 0\n22035 0\n32253 1\n18362 0\nName: segment, Length: 21347, dtype: int64"
353
- },
354
- "execution_count": 155,
355
- "metadata": {},
356
- "output_type": "execute_result"
357
- }
358
- ],
359
- "source": [
360
- "trmnt_val"
361
- ],
362
- "metadata": {
363
- "collapsed": false,
364
- "pycharm": {
365
- "name": "#%%\n"
366
- }
367
- }
368
- },
369
- {
370
- "cell_type": "code",
371
- "execution_count": 153,
372
- "outputs": [
373
- {
374
- "data": {
375
- "text/plain": "0.07205126328781693"
376
- },
377
- "execution_count": 153,
378
- "metadata": {},
379
- "output_type": "execute_result"
380
- }
381
- ],
382
- "source": [
383
- "sm_score"
384
- ],
385
- "metadata": {
386
- "collapsed": false,
387
- "pycharm": {
388
- "name": "#%%\n"
389
- }
390
- }
391
- },
392
- {
393
- "cell_type": "code",
394
- "execution_count": 121,
395
- "outputs": [
396
- {
397
- "data": {
398
- "text/plain": "array([ 0.03210443, 0.02052168, -0.00873204, ..., 0.04017716,\n 0.03415103, 0.04917549])"
399
- },
400
- "execution_count": 121,
401
- "metadata": {},
402
- "output_type": "execute_result"
403
- }
404
- ],
405
- "source": [
406
- "uplift_sm"
407
- ],
408
- "metadata": {
409
- "collapsed": false,
410
- "pycharm": {
411
- "name": "#%%\n"
412
- }
413
- }
414
- },
415
  {
416
  "cell_type": "code",
417
  "execution_count": 14,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 170,
6
  "metadata": {
7
  "collapsed": true,
8
  "pycharm": {
 
15
  "import pandas as pd\n",
16
  "from sklearn.model_selection import train_test_split\n",
17
  "from sklift.datasets import fetch_hillstrom\n",
18
+ "from sklift.metrics import uplift_at_k, uplift_by_percentile, weighted_average_uplift\n",
19
  "from sklift.viz import plot_uplift_preds\n",
20
  "from sklift.models import SoloModel\n",
21
  "from catboost import CatBoostClassifier\n",
 
24
  },
25
  {
26
  "cell_type": "code",
27
+ "execution_count": 157,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "outputs": [],
29
  "source": [
30
  "def get_data() -> tuple[Any, Any, Any]:\n",
 
50
  },
51
  {
52
  "cell_type": "code",
53
+ "execution_count": 171,
54
  "outputs": [],
55
  "source": [
56
+ "data, target, treatment = get_data()\n",
57
+ "uplift = [1 for _ in data.index]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  ],
59
  "metadata": {
60
  "collapsed": false,
 
65
  },
66
  {
67
  "cell_type": "code",
68
+ "execution_count": 172,
69
  "outputs": [
70
  {
71
  "data": {
72
+ "text/plain": "0.04515395574087702"
73
  },
74
+ "execution_count": 172,
75
  "metadata": {},
76
  "output_type": "execute_result"
77
  }
78
  ],
79
  "source": [
80
+ "weighted_average_uplift(target, uplift, treatment)"
81
  ],
82
  "metadata": {
83
  "collapsed": false,
 
88
  },
89
  {
90
  "cell_type": "code",
91
+ "execution_count": 159,
92
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  "source": [
94
+ "X_train, X_val, y_train, y_val, trmnt_train, trmnt_val = train_test_split(\n",
95
+ " data, target, treatment, test_size=0.5, random_state=42\n",
96
+ ")\n",
97
+ "\n",
98
+ "models_results = {\n",
99
+ " 'approach': [],\n",
100
+ " 'uplift@30%': []\n",
101
+ "}"
102
  ],
103
  "metadata": {
104
  "collapsed": false,
 
109
  },
110
  {
111
  "cell_type": "code",
112
+ "execution_count": 162,
113
  "outputs": [],
114
  "source": [
115
+ "new_val = X_val.loc[X_val[X_val['newbie'] == 1].index]\n",
116
+ "new_y = y_val.loc[new_val.index]\n",
117
+ "new_trmt = trmnt_val.loc[new_val.index]\n",
118
+ "uplift = [np.random.random() for _ in new_val.index]"
119
  ],
120
  "metadata": {
121
  "collapsed": false,
 
126
  },
127
  {
128
  "cell_type": "code",
129
+ "execution_count": 163,
130
  "outputs": [
131
  {
132
  "data": {
133
+ "text/plain": "0.07056871131907891"
 
134
  },
135
+ "execution_count": 163,
136
  "metadata": {},
137
  "output_type": "execute_result"
138
  }
139
  ],
140
  "source": [
141
+ "uplift_at_k(y_true=new_y, uplift=uplift, treatment=new_trmt, strategy='by_group', k=0.3)"
142
  ],
143
  "metadata": {
144
  "collapsed": false,
 
149
  },
150
  {
151
  "cell_type": "code",
152
+ "execution_count": 165,
153
  "outputs": [
154
  {
155
  "data": {
156
+ "text/plain": " n_treatment n_control response_rate_treatment \\\npercentile \n0-10 549 515 0.149362 \n10-20 553 511 0.113924 \n20-30 511 553 0.138943 \n30-40 546 518 0.130037 \n40-50 544 520 0.156250 \n50-60 542 521 0.153137 \n60-70 518 545 0.106178 \n70-80 529 534 0.130435 \n80-90 528 535 0.128788 \n90-100 539 524 0.116883 \n\n response_rate_control uplift \npercentile \n0-10 0.054369 0.094994 \n10-20 0.066536 0.047388 \n20-30 0.070524 0.068419 \n30-40 0.090734 0.039303 \n40-50 0.080769 0.075481 \n50-60 0.078695 0.074442 \n60-70 0.056881 0.049297 \n70-80 0.071161 0.059274 \n80-90 0.069159 0.059629 \n90-100 0.085878 0.031005 ",
157
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>n_treatment</th>\n <th>n_control</th>\n <th>response_rate_treatment</th>\n <th>response_rate_control</th>\n <th>uplift</th>\n </tr>\n <tr>\n <th>percentile</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0-10</th>\n <td>549</td>\n <td>515</td>\n <td>0.149362</td>\n <td>0.054369</td>\n <td>0.094994</td>\n </tr>\n <tr>\n <th>10-20</th>\n <td>553</td>\n <td>511</td>\n <td>0.113924</td>\n <td>0.066536</td>\n <td>0.047388</td>\n </tr>\n <tr>\n <th>20-30</th>\n <td>511</td>\n <td>553</td>\n <td>0.138943</td>\n <td>0.070524</td>\n <td>0.068419</td>\n </tr>\n <tr>\n <th>30-40</th>\n <td>546</td>\n <td>518</td>\n <td>0.130037</td>\n <td>0.090734</td>\n <td>0.039303</td>\n </tr>\n <tr>\n <th>40-50</th>\n <td>544</td>\n <td>520</td>\n <td>0.156250</td>\n <td>0.080769</td>\n <td>0.075481</td>\n </tr>\n <tr>\n <th>50-60</th>\n <td>542</td>\n <td>521</td>\n <td>0.153137</td>\n <td>0.078695</td>\n <td>0.074442</td>\n </tr>\n <tr>\n <th>60-70</th>\n <td>518</td>\n <td>545</td>\n <td>0.106178</td>\n <td>0.056881</td>\n <td>0.049297</td>\n </tr>\n <tr>\n <th>70-80</th>\n <td>529</td>\n <td>534</td>\n <td>0.130435</td>\n <td>0.071161</td>\n <td>0.059274</td>\n </tr>\n <tr>\n <th>80-90</th>\n <td>528</td>\n <td>535</td>\n <td>0.128788</td>\n <td>0.069159</td>\n <td>0.059629</td>\n </tr>\n <tr>\n <th>90-100</th>\n <td>539</td>\n <td>524</td>\n <td>0.116883</td>\n <td>0.085878</td>\n <td>0.031005</td>\n </tr>\n </tbody>\n</table>\n</div>"
158
  },
159
+ "execution_count": 165,
160
  "metadata": {},
161
  "output_type": "execute_result"
162
  }
163
  ],
164
  "source": [
165
+ "uplift_by_percentile(new_y, uplift, new_trmt)"
 
166
  ],
167
  "metadata": {
168
  "collapsed": false,
 
224
  }
225
  }
226
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  {
228
  "cell_type": "code",
229
  "execution_count": 14,
src/tools.py CHANGED
@@ -141,8 +141,7 @@ def uplift_by_percentile():
141
 
142
 
143
  def get_weighted_average_uplift(target_test: pd.DataFrame, uplift, treatment_test: pd.DataFrame):
144
- res = weighted_average_uplift(target_test, uplift, treatment_test)
145
- return res
146
 
147
 
148
  def get_newbie_plot(data):
 
141
 
142
 
143
  def get_weighted_average_uplift(target_test: pd.DataFrame, uplift, treatment_test: pd.DataFrame):
144
+ return weighted_average_uplift(target_test, uplift, treatment_test)
 
145
 
146
 
147
  def get_newbie_plot(data):
src/web_app.py CHANGED
@@ -157,13 +157,12 @@ elif not surburban and not urban and not rural:
157
 
158
  if not disabled:
159
  filtered_dataset = tools.filter_data(data_test, filters)
 
 
160
  # значение uplift для записей тех клиентов, который выбрал пользователь равен 1
161
  import numpy as np
162
- uplift = pd.DataFrame(
163
- data=[np.random.random() for _ in filtered_dataset.index],
164
- index=filtered_dataset.index
165
- )
166
- target_filtered =target_test.loc[filtered_dataset.index]
167
  treatment_filtered = treatment_test.loc[filtered_dataset.index]
168
  sample_size = 7 if filtered_dataset.shape[0] >= 7 else filtered_dataset.shape[0]
169
  example = filtered_dataset.sample(sample_size)
@@ -174,11 +173,8 @@ if not disabled:
174
 
175
  send_promo = st.button('Отправить рекламу и посмотреть результат', disabled=disabled)
176
  if send_promo:
177
- from sklift.metrics import uplift_by_percentile, uplift_at_k
178
- st.write(uplift_by_percentile(y_true=target_filtered, uplift=uplift, treatment=treatment_filtered))
179
- st.write(uplift_at_k(y_true=target_filtered, uplift=uplift, treatment=treatment_filtered, strategy='by_group', k=0.3))
180
  # st.write(tools.get_weighted_average_uplift(target_filtered, uplift, treatment_filtered))
181
 
182
  # st.write('Если известно, на какой процент пользователей необходимо воздействовать, укажите это ниже')
183
  # st.slider(label='Процент пользователей', min_value=0, max_value=100, value=100)
184
-
 
157
 
158
  if not disabled:
159
  filtered_dataset = tools.filter_data(data_test, filters)
160
+ if filtered_dataset is None:
161
+ st.error('Не найдено пользователей для данных фильтров. Попробуйте изменить фильтры.')
162
  # значение uplift для записей тех клиентов, который выбрал пользователь равен 1
163
  import numpy as np
164
+ uplift = [1 for _ in filtered_dataset.index]
165
+ target_filtered = target_test.loc[filtered_dataset.index]
 
 
 
166
  treatment_filtered = treatment_test.loc[filtered_dataset.index]
167
  sample_size = 7 if filtered_dataset.shape[0] >= 7 else filtered_dataset.shape[0]
168
  example = filtered_dataset.sample(sample_size)
 
173
 
174
  send_promo = st.button('Отправить рекламу и посмотреть результат', disabled=disabled)
175
  if send_promo:
176
+ st.write(tools.get_weighted_average_uplift(target_filtered, uplift, treatment_filtered))
 
 
177
  # st.write(tools.get_weighted_average_uplift(target_filtered, uplift, treatment_filtered))
178
 
179
  # st.write('Если известно, на какой процент пользователей необходимо воздействовать, укажите это ниже')
180
  # st.slider(label='Процент пользователей', min_value=0, max_value=100, value=100)