Spaces:
Runtime error
Runtime error
HardWorkingStation
commited on
Commit
•
402eb1c
1
Parent(s):
bbc09e3
Initial commit
Browse files- src/test.ipynb +34 -222
- src/tools.py +1 -2
- src/web_app.py +5 -9
src/test.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {
|
7 |
"collapsed": true,
|
8 |
"pycharm": {
|
@@ -15,7 +15,7 @@
|
|
15 |
"import pandas as pd\n",
|
16 |
"from sklearn.model_selection import train_test_split\n",
|
17 |
"from sklift.datasets import fetch_hillstrom\n",
|
18 |
-
"from sklift.metrics import uplift_at_k\n",
|
19 |
"from sklift.viz import plot_uplift_preds\n",
|
20 |
"from sklift.models import SoloModel\n",
|
21 |
"from catboost import CatBoostClassifier\n",
|
@@ -24,32 +24,7 @@
|
|
24 |
},
|
25 |
{
|
26 |
"cell_type": "code",
|
27 |
-
"execution_count":
|
28 |
-
"outputs": [
|
29 |
-
{
|
30 |
-
"data": {
|
31 |
-
"text/plain": "{'new_filter': 'filter'}"
|
32 |
-
},
|
33 |
-
"execution_count": 2,
|
34 |
-
"metadata": {},
|
35 |
-
"output_type": "execute_result"
|
36 |
-
}
|
37 |
-
],
|
38 |
-
"source": [
|
39 |
-
"filters = {}\n",
|
40 |
-
"filters['new_filter'] = 'filter'\n",
|
41 |
-
"filters"
|
42 |
-
],
|
43 |
-
"metadata": {
|
44 |
-
"collapsed": false,
|
45 |
-
"pycharm": {
|
46 |
-
"name": "#%%\n"
|
47 |
-
}
|
48 |
-
}
|
49 |
-
},
|
50 |
-
{
|
51 |
-
"cell_type": "code",
|
52 |
-
"execution_count": 115,
|
53 |
"outputs": [],
|
54 |
"source": [
|
55 |
"def get_data() -> tuple[Any, Any, Any]:\n",
|
@@ -75,54 +50,11 @@
|
|
75 |
},
|
76 |
{
|
77 |
"cell_type": "code",
|
78 |
-
"execution_count":
|
79 |
"outputs": [],
|
80 |
"source": [
|
81 |
-
"data, target, treatment = get_data()"
|
82 |
-
|
83 |
-
"metadata": {
|
84 |
-
"collapsed": false,
|
85 |
-
"pycharm": {
|
86 |
-
"name": "#%%\n"
|
87 |
-
}
|
88 |
-
}
|
89 |
-
},
|
90 |
-
{
|
91 |
-
"cell_type": "code",
|
92 |
-
"execution_count": 117,
|
93 |
-
"outputs": [],
|
94 |
-
"source": [
|
95 |
-
"X_train, X_val, y_train, y_val, trmnt_train, trmnt_val = train_test_split(\n",
|
96 |
-
" data, target, treatment, test_size=0.5, random_state=42\n",
|
97 |
-
")\n",
|
98 |
-
"\n",
|
99 |
-
"models_results = {\n",
|
100 |
-
" 'approach': [],\n",
|
101 |
-
" 'uplift@30%': []\n",
|
102 |
-
"}"
|
103 |
-
],
|
104 |
-
"metadata": {
|
105 |
-
"collapsed": false,
|
106 |
-
"pycharm": {
|
107 |
-
"name": "#%%\n"
|
108 |
-
}
|
109 |
-
}
|
110 |
-
},
|
111 |
-
{
|
112 |
-
"cell_type": "code",
|
113 |
-
"execution_count": 118,
|
114 |
-
"outputs": [
|
115 |
-
{
|
116 |
-
"data": {
|
117 |
-
"text/plain": "0 0.871545\n1 0.128455\nName: visit, dtype: float64"
|
118 |
-
},
|
119 |
-
"execution_count": 118,
|
120 |
-
"metadata": {},
|
121 |
-
"output_type": "execute_result"
|
122 |
-
}
|
123 |
-
],
|
124 |
-
"source": [
|
125 |
-
"y_train.value_counts(normalize=True)"
|
126 |
],
|
127 |
"metadata": {
|
128 |
"collapsed": false,
|
@@ -133,19 +65,19 @@
|
|
133 |
},
|
134 |
{
|
135 |
"cell_type": "code",
|
136 |
-
"execution_count":
|
137 |
"outputs": [
|
138 |
{
|
139 |
"data": {
|
140 |
-
"text/plain": "0
|
141 |
},
|
142 |
-
"execution_count":
|
143 |
"metadata": {},
|
144 |
"output_type": "execute_result"
|
145 |
}
|
146 |
],
|
147 |
"source": [
|
148 |
-
"
|
149 |
],
|
150 |
"metadata": {
|
151 |
"collapsed": false,
|
@@ -156,44 +88,17 @@
|
|
156 |
},
|
157 |
{
|
158 |
"cell_type": "code",
|
159 |
-
"execution_count":
|
160 |
-
"outputs": [
|
161 |
-
{
|
162 |
-
"data": {
|
163 |
-
"text/plain": " recency history_segment history mens womens zip_code newbie \\\n609 9 3) $200 - $350 212.32 1 0 Surburban 0 \n51952 3 6) $750 - $1,000 849.16 1 1 Surburban 1 \n33629 9 1) $0 - $100 43.22 1 0 Surburban 1 \n22103 8 1) $0 - $100 29.99 0 1 Surburban 0 \n21350 7 2) $100 - $200 164.07 1 0 Surburban 1 \n... ... ... ... ... ... ... ... \n9307 1 2) $100 - $200 110.45 0 1 Urban 0 \n16819 1 1) $0 - $100 88.04 0 1 Surburban 0 \n57173 1 1) $0 - $100 72.63 0 1 Rural 0 \n1282 3 4) $350 - $500 366.16 0 1 Rural 0 \n23624 9 1) $0 - $100 36.87 1 0 Urban 1 \n\n channel \n609 Multichannel \n51952 Multichannel \n33629 Phone \n22103 Web \n21350 Phone \n... ... \n9307 Phone \n16819 Phone \n57173 Phone \n1282 Phone \n23624 Web \n\n[29885 rows x 8 columns]",
|
164 |
-
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>recency</th>\n <th>history_segment</th>\n <th>history</th>\n <th>mens</th>\n <th>womens</th>\n <th>zip_code</th>\n <th>newbie</th>\n <th>channel</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>609</th>\n <td>9</td>\n <td>3) $200 - $350</td>\n <td>212.32</td>\n <td>1</td>\n <td>0</td>\n <td>Surburban</td>\n <td>0</td>\n <td>Multichannel</td>\n </tr>\n <tr>\n <th>51952</th>\n <td>3</td>\n <td>6) $750 - $1,000</td>\n <td>849.16</td>\n <td>1</td>\n <td>1</td>\n <td>Surburban</td>\n <td>1</td>\n <td>Multichannel</td>\n </tr>\n <tr>\n <th>33629</th>\n <td>9</td>\n <td>1) $0 - $100</td>\n <td>43.22</td>\n <td>1</td>\n <td>0</td>\n <td>Surburban</td>\n <td>1</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>22103</th>\n <td>8</td>\n <td>1) $0 - $100</td>\n <td>29.99</td>\n <td>0</td>\n <td>1</td>\n <td>Surburban</td>\n <td>0</td>\n <td>Web</td>\n </tr>\n <tr>\n <th>21350</th>\n <td>7</td>\n <td>2) $100 - $200</td>\n <td>164.07</td>\n <td>1</td>\n <td>0</td>\n <td>Surburban</td>\n <td>1</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>9307</th>\n <td>1</td>\n <td>2) $100 - $200</td>\n <td>110.45</td>\n <td>0</td>\n <td>1</td>\n <td>Urban</td>\n <td>0</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>16819</th>\n <td>1</td>\n <td>1) $0 - $100</td>\n <td>88.04</td>\n <td>0</td>\n <td>1</td>\n <td>Surburban</td>\n <td>0</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>57173</th>\n <td>1</td>\n <td>1) $0 - $100</td>\n <td>72.63</td>\n <td>0</td>\n <td>1</td>\n <td>Rural</td>\n <td>0</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>1282</th>\n <td>3</td>\n <td>4) $350 - $500</td>\n <td>366.16</td>\n <td>0</td>\n <td>1</td>\n <td>Rural</td>\n <td>0</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>23624</th>\n <td>9</td>\n <td>1) $0 - $100</td>\n <td>36.87</td>\n <td>1</td>\n <td>0</td>\n <td>Urban</td>\n <td>1</td>\n <td>Web</td>\n </tr>\n </tbody>\n</table>\n<p>29885 rows × 8 columns</p>\n</div>"
|
165 |
-
},
|
166 |
-
"execution_count": 113,
|
167 |
-
"metadata": {},
|
168 |
-
"output_type": "execute_result"
|
169 |
-
}
|
170 |
-
],
|
171 |
-
"source": [
|
172 |
-
"X_train"
|
173 |
-
],
|
174 |
-
"metadata": {
|
175 |
-
"collapsed": false,
|
176 |
-
"pycharm": {
|
177 |
-
"name": "#%%\n"
|
178 |
-
}
|
179 |
-
}
|
180 |
-
},
|
181 |
-
{
|
182 |
-
"cell_type": "code",
|
183 |
-
"execution_count": 114,
|
184 |
-
"outputs": [
|
185 |
-
{
|
186 |
-
"data": {
|
187 |
-
"text/plain": " recency history_segment history mens womens zip_code newbie \\\n7730 3 5) $500 - $750 503.73 0 1 Rural 1 \n17594 5 2) $100 - $200 163.39 1 0 Urban 0 \n14481 11 3) $200 - $350 287.77 1 0 Rural 1 \n20003 3 1) $0 - $100 29.99 0 1 Surburban 1 \n19981 5 2) $100 - $200 131.51 0 1 Urban 1 \n... ... ... ... ... ... ... ... \n41828 11 4) $350 - $500 457.09 0 1 Surburban 1 \n49354 2 1) $0 - $100 29.99 0 1 Rural 1 \n12063 9 2) $100 - $200 147.48 1 0 Rural 1 \n3757 10 2) $100 - $200 128.78 1 0 Surburban 1 \n10708 6 2) $100 - $200 138.72 1 0 Urban 0 \n\n channel \n7730 Multichannel \n17594 Web \n14481 Web \n20003 Web \n19981 Web \n... ... \n41828 Multichannel \n49354 Phone \n12063 Phone \n3757 Phone \n10708 Web \n\n[12808 rows x 8 columns]",
|
188 |
-
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>recency</th>\n <th>history_segment</th>\n <th>history</th>\n <th>mens</th>\n <th>womens</th>\n <th>zip_code</th>\n <th>newbie</th>\n <th>channel</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>7730</th>\n <td>3</td>\n <td>5) $500 - $750</td>\n <td>503.73</td>\n <td>0</td>\n <td>1</td>\n <td>Rural</td>\n <td>1</td>\n <td>Multichannel</td>\n </tr>\n <tr>\n <th>17594</th>\n <td>5</td>\n <td>2) $100 - $200</td>\n <td>163.39</td>\n <td>1</td>\n <td>0</td>\n <td>Urban</td>\n <td>0</td>\n <td>Web</td>\n </tr>\n <tr>\n <th>14481</th>\n <td>11</td>\n <td>3) $200 - $350</td>\n <td>287.77</td>\n <td>1</td>\n <td>0</td>\n <td>Rural</td>\n <td>1</td>\n <td>Web</td>\n </tr>\n <tr>\n <th>20003</th>\n <td>3</td>\n <td>1) $0 - $100</td>\n <td>29.99</td>\n <td>0</td>\n <td>1</td>\n <td>Surburban</td>\n <td>1</td>\n <td>Web</td>\n </tr>\n <tr>\n <th>19981</th>\n <td>5</td>\n <td>2) $100 - $200</td>\n <td>131.51</td>\n <td>0</td>\n <td>1</td>\n <td>Urban</td>\n <td>1</td>\n <td>Web</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>41828</th>\n <td>11</td>\n <td>4) $350 - $500</td>\n <td>457.09</td>\n <td>0</td>\n <td>1</td>\n <td>Surburban</td>\n <td>1</td>\n <td>Multichannel</td>\n </tr>\n <tr>\n <th>49354</th>\n <td>2</td>\n <td>1) $0 - $100</td>\n <td>29.99</td>\n <td>0</td>\n <td>1</td>\n <td>Rural</td>\n <td>1</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>12063</th>\n <td>9</td>\n <td>2) $100 - $200</td>\n <td>147.48</td>\n <td>1</td>\n <td>0</td>\n <td>Rural</td>\n <td>1</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>3757</th>\n <td>10</td>\n <td>2) $100 - $200</td>\n <td>128.78</td>\n <td>1</td>\n <td>0</td>\n <td>Surburban</td>\n <td>1</td>\n <td>Phone</td>\n </tr>\n <tr>\n <th>10708</th>\n <td>6</td>\n <td>2) $100 - $200</td>\n <td>138.72</td>\n <td>1</td>\n <td>0</td>\n <td>Urban</td>\n <td>0</td>\n <td>Web</td>\n </tr>\n </tbody>\n</table>\n<p>12808 rows × 8 columns</p>\n</div>"
|
189 |
-
},
|
190 |
-
"execution_count": 114,
|
191 |
-
"metadata": {},
|
192 |
-
"output_type": "execute_result"
|
193 |
-
}
|
194 |
-
],
|
195 |
"source": [
|
196 |
-
"X_val"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
],
|
198 |
"metadata": {
|
199 |
"collapsed": false,
|
@@ -204,13 +109,13 @@
|
|
204 |
},
|
205 |
{
|
206 |
"cell_type": "code",
|
207 |
-
"execution_count":
|
208 |
"outputs": [],
|
209 |
"source": [
|
210 |
-
"
|
211 |
-
"
|
212 |
-
"
|
213 |
-
"
|
214 |
],
|
215 |
"metadata": {
|
216 |
"collapsed": false,
|
@@ -221,20 +126,19 @@
|
|
221 |
},
|
222 |
{
|
223 |
"cell_type": "code",
|
224 |
-
"execution_count":
|
225 |
"outputs": [
|
226 |
{
|
227 |
"data": {
|
228 |
-
"text/plain": "
|
229 |
-
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>15194</th>\n <td>1</td>\n </tr>\n <tr>\n <th>23642</th>\n <td>1</td>\n </tr>\n <tr>\n <th>57737</th>\n <td>1</td>\n </tr>\n <tr>\n <th>33880</th>\n <td>1</td>\n </tr>\n <tr>\n <th>21211</th>\n <td>1</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n </tr>\n <tr>\n <th>9307</th>\n <td>1</td>\n </tr>\n <tr>\n <th>16819</th>\n <td>1</td>\n </tr>\n <tr>\n <th>57173</th>\n <td>1</td>\n </tr>\n <tr>\n <th>1282</th>\n <td>1</td>\n </tr>\n <tr>\n <th>23624</th>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n<p>21346 rows × 1 columns</p>\n</div>"
|
230 |
},
|
231 |
-
"execution_count":
|
232 |
"metadata": {},
|
233 |
"output_type": "execute_result"
|
234 |
}
|
235 |
],
|
236 |
"source": [
|
237 |
-
"
|
238 |
],
|
239 |
"metadata": {
|
240 |
"collapsed": false,
|
@@ -245,20 +149,20 @@
|
|
245 |
},
|
246 |
{
|
247 |
"cell_type": "code",
|
248 |
-
"execution_count":
|
249 |
"outputs": [
|
250 |
{
|
251 |
"data": {
|
252 |
-
"text/plain": "0.
|
|
|
253 |
},
|
254 |
-
"execution_count":
|
255 |
"metadata": {},
|
256 |
"output_type": "execute_result"
|
257 |
}
|
258 |
],
|
259 |
"source": [
|
260 |
-
"
|
261 |
-
"np.random.random()"
|
262 |
],
|
263 |
"metadata": {
|
264 |
"collapsed": false,
|
@@ -320,98 +224,6 @@
|
|
320 |
}
|
321 |
}
|
322 |
},
|
323 |
-
{
|
324 |
-
"cell_type": "code",
|
325 |
-
"execution_count": 154,
|
326 |
-
"outputs": [
|
327 |
-
{
|
328 |
-
"data": {
|
329 |
-
"text/plain": "7730 0\n17594 0\n14481 0\n20003 0\n19981 0\n ..\n20159 0\n54905 0\n22035 0\n32253 1\n18362 0\nName: visit, Length: 21347, dtype: int64"
|
330 |
-
},
|
331 |
-
"execution_count": 154,
|
332 |
-
"metadata": {},
|
333 |
-
"output_type": "execute_result"
|
334 |
-
}
|
335 |
-
],
|
336 |
-
"source": [
|
337 |
-
"y_val"
|
338 |
-
],
|
339 |
-
"metadata": {
|
340 |
-
"collapsed": false,
|
341 |
-
"pycharm": {
|
342 |
-
"name": "#%%\n"
|
343 |
-
}
|
344 |
-
}
|
345 |
-
},
|
346 |
-
{
|
347 |
-
"cell_type": "code",
|
348 |
-
"execution_count": 155,
|
349 |
-
"outputs": [
|
350 |
-
{
|
351 |
-
"data": {
|
352 |
-
"text/plain": "7730 0\n17594 1\n14481 1\n20003 1\n19981 0\n ..\n20159 1\n54905 0\n22035 0\n32253 1\n18362 0\nName: segment, Length: 21347, dtype: int64"
|
353 |
-
},
|
354 |
-
"execution_count": 155,
|
355 |
-
"metadata": {},
|
356 |
-
"output_type": "execute_result"
|
357 |
-
}
|
358 |
-
],
|
359 |
-
"source": [
|
360 |
-
"trmnt_val"
|
361 |
-
],
|
362 |
-
"metadata": {
|
363 |
-
"collapsed": false,
|
364 |
-
"pycharm": {
|
365 |
-
"name": "#%%\n"
|
366 |
-
}
|
367 |
-
}
|
368 |
-
},
|
369 |
-
{
|
370 |
-
"cell_type": "code",
|
371 |
-
"execution_count": 153,
|
372 |
-
"outputs": [
|
373 |
-
{
|
374 |
-
"data": {
|
375 |
-
"text/plain": "0.07205126328781693"
|
376 |
-
},
|
377 |
-
"execution_count": 153,
|
378 |
-
"metadata": {},
|
379 |
-
"output_type": "execute_result"
|
380 |
-
}
|
381 |
-
],
|
382 |
-
"source": [
|
383 |
-
"sm_score"
|
384 |
-
],
|
385 |
-
"metadata": {
|
386 |
-
"collapsed": false,
|
387 |
-
"pycharm": {
|
388 |
-
"name": "#%%\n"
|
389 |
-
}
|
390 |
-
}
|
391 |
-
},
|
392 |
-
{
|
393 |
-
"cell_type": "code",
|
394 |
-
"execution_count": 121,
|
395 |
-
"outputs": [
|
396 |
-
{
|
397 |
-
"data": {
|
398 |
-
"text/plain": "array([ 0.03210443, 0.02052168, -0.00873204, ..., 0.04017716,\n 0.03415103, 0.04917549])"
|
399 |
-
},
|
400 |
-
"execution_count": 121,
|
401 |
-
"metadata": {},
|
402 |
-
"output_type": "execute_result"
|
403 |
-
}
|
404 |
-
],
|
405 |
-
"source": [
|
406 |
-
"uplift_sm"
|
407 |
-
],
|
408 |
-
"metadata": {
|
409 |
-
"collapsed": false,
|
410 |
-
"pycharm": {
|
411 |
-
"name": "#%%\n"
|
412 |
-
}
|
413 |
-
}
|
414 |
-
},
|
415 |
{
|
416 |
"cell_type": "code",
|
417 |
"execution_count": 14,
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 170,
|
6 |
"metadata": {
|
7 |
"collapsed": true,
|
8 |
"pycharm": {
|
|
|
15 |
"import pandas as pd\n",
|
16 |
"from sklearn.model_selection import train_test_split\n",
|
17 |
"from sklift.datasets import fetch_hillstrom\n",
|
18 |
+
"from sklift.metrics import uplift_at_k, uplift_by_percentile, weighted_average_uplift\n",
|
19 |
"from sklift.viz import plot_uplift_preds\n",
|
20 |
"from sklift.models import SoloModel\n",
|
21 |
"from catboost import CatBoostClassifier\n",
|
|
|
24 |
},
|
25 |
{
|
26 |
"cell_type": "code",
|
27 |
+
"execution_count": 157,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
"outputs": [],
|
29 |
"source": [
|
30 |
"def get_data() -> tuple[Any, Any, Any]:\n",
|
|
|
50 |
},
|
51 |
{
|
52 |
"cell_type": "code",
|
53 |
+
"execution_count": 171,
|
54 |
"outputs": [],
|
55 |
"source": [
|
56 |
+
"data, target, treatment = get_data()\n",
|
57 |
+
"uplift = [1 for _ in data.index]"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
],
|
59 |
"metadata": {
|
60 |
"collapsed": false,
|
|
|
65 |
},
|
66 |
{
|
67 |
"cell_type": "code",
|
68 |
+
"execution_count": 172,
|
69 |
"outputs": [
|
70 |
{
|
71 |
"data": {
|
72 |
+
"text/plain": "0.04515395574087702"
|
73 |
},
|
74 |
+
"execution_count": 172,
|
75 |
"metadata": {},
|
76 |
"output_type": "execute_result"
|
77 |
}
|
78 |
],
|
79 |
"source": [
|
80 |
+
"weighted_average_uplift(target, uplift, treatment)"
|
81 |
],
|
82 |
"metadata": {
|
83 |
"collapsed": false,
|
|
|
88 |
},
|
89 |
{
|
90 |
"cell_type": "code",
|
91 |
+
"execution_count": 159,
|
92 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
"source": [
|
94 |
+
"X_train, X_val, y_train, y_val, trmnt_train, trmnt_val = train_test_split(\n",
|
95 |
+
" data, target, treatment, test_size=0.5, random_state=42\n",
|
96 |
+
")\n",
|
97 |
+
"\n",
|
98 |
+
"models_results = {\n",
|
99 |
+
" 'approach': [],\n",
|
100 |
+
" 'uplift@30%': []\n",
|
101 |
+
"}"
|
102 |
],
|
103 |
"metadata": {
|
104 |
"collapsed": false,
|
|
|
109 |
},
|
110 |
{
|
111 |
"cell_type": "code",
|
112 |
+
"execution_count": 162,
|
113 |
"outputs": [],
|
114 |
"source": [
|
115 |
+
"new_val = X_val.loc[X_val[X_val['newbie'] == 1].index]\n",
|
116 |
+
"new_y = y_val.loc[new_val.index]\n",
|
117 |
+
"new_trmt = trmnt_val.loc[new_val.index]\n",
|
118 |
+
"uplift = [np.random.random() for _ in new_val.index]"
|
119 |
],
|
120 |
"metadata": {
|
121 |
"collapsed": false,
|
|
|
126 |
},
|
127 |
{
|
128 |
"cell_type": "code",
|
129 |
+
"execution_count": 163,
|
130 |
"outputs": [
|
131 |
{
|
132 |
"data": {
|
133 |
+
"text/plain": "0.07056871131907891"
|
|
|
134 |
},
|
135 |
+
"execution_count": 163,
|
136 |
"metadata": {},
|
137 |
"output_type": "execute_result"
|
138 |
}
|
139 |
],
|
140 |
"source": [
|
141 |
+
"uplift_at_k(y_true=new_y, uplift=uplift, treatment=new_trmt, strategy='by_group', k=0.3)"
|
142 |
],
|
143 |
"metadata": {
|
144 |
"collapsed": false,
|
|
|
149 |
},
|
150 |
{
|
151 |
"cell_type": "code",
|
152 |
+
"execution_count": 165,
|
153 |
"outputs": [
|
154 |
{
|
155 |
"data": {
|
156 |
+
"text/plain": " n_treatment n_control response_rate_treatment \\\npercentile \n0-10 549 515 0.149362 \n10-20 553 511 0.113924 \n20-30 511 553 0.138943 \n30-40 546 518 0.130037 \n40-50 544 520 0.156250 \n50-60 542 521 0.153137 \n60-70 518 545 0.106178 \n70-80 529 534 0.130435 \n80-90 528 535 0.128788 \n90-100 539 524 0.116883 \n\n response_rate_control uplift \npercentile \n0-10 0.054369 0.094994 \n10-20 0.066536 0.047388 \n20-30 0.070524 0.068419 \n30-40 0.090734 0.039303 \n40-50 0.080769 0.075481 \n50-60 0.078695 0.074442 \n60-70 0.056881 0.049297 \n70-80 0.071161 0.059274 \n80-90 0.069159 0.059629 \n90-100 0.085878 0.031005 ",
|
157 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>n_treatment</th>\n <th>n_control</th>\n <th>response_rate_treatment</th>\n <th>response_rate_control</th>\n <th>uplift</th>\n </tr>\n <tr>\n <th>percentile</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0-10</th>\n <td>549</td>\n <td>515</td>\n <td>0.149362</td>\n <td>0.054369</td>\n <td>0.094994</td>\n </tr>\n <tr>\n <th>10-20</th>\n <td>553</td>\n <td>511</td>\n <td>0.113924</td>\n <td>0.066536</td>\n <td>0.047388</td>\n </tr>\n <tr>\n <th>20-30</th>\n <td>511</td>\n <td>553</td>\n <td>0.138943</td>\n <td>0.070524</td>\n <td>0.068419</td>\n </tr>\n <tr>\n <th>30-40</th>\n <td>546</td>\n <td>518</td>\n <td>0.130037</td>\n <td>0.090734</td>\n <td>0.039303</td>\n </tr>\n <tr>\n <th>40-50</th>\n <td>544</td>\n <td>520</td>\n <td>0.156250</td>\n <td>0.080769</td>\n <td>0.075481</td>\n </tr>\n <tr>\n <th>50-60</th>\n <td>542</td>\n <td>521</td>\n <td>0.153137</td>\n <td>0.078695</td>\n <td>0.074442</td>\n </tr>\n <tr>\n <th>60-70</th>\n <td>518</td>\n <td>545</td>\n <td>0.106178</td>\n <td>0.056881</td>\n <td>0.049297</td>\n </tr>\n <tr>\n <th>70-80</th>\n <td>529</td>\n <td>534</td>\n <td>0.130435</td>\n <td>0.071161</td>\n <td>0.059274</td>\n </tr>\n <tr>\n <th>80-90</th>\n <td>528</td>\n <td>535</td>\n <td>0.128788</td>\n <td>0.069159</td>\n <td>0.059629</td>\n </tr>\n <tr>\n <th>90-100</th>\n <td>539</td>\n <td>524</td>\n <td>0.116883</td>\n <td>0.085878</td>\n <td>0.031005</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
158 |
},
|
159 |
+
"execution_count": 165,
|
160 |
"metadata": {},
|
161 |
"output_type": "execute_result"
|
162 |
}
|
163 |
],
|
164 |
"source": [
|
165 |
+
"uplift_by_percentile(new_y, uplift, new_trmt)"
|
|
|
166 |
],
|
167 |
"metadata": {
|
168 |
"collapsed": false,
|
|
|
224 |
}
|
225 |
}
|
226 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
{
|
228 |
"cell_type": "code",
|
229 |
"execution_count": 14,
|
src/tools.py
CHANGED
@@ -141,8 +141,7 @@ def uplift_by_percentile():
|
|
141 |
|
142 |
|
143 |
def get_weighted_average_uplift(target_test: pd.DataFrame, uplift, treatment_test: pd.DataFrame):
|
144 |
-
|
145 |
-
return res
|
146 |
|
147 |
|
148 |
def get_newbie_plot(data):
|
|
|
141 |
|
142 |
|
143 |
def get_weighted_average_uplift(target_test: pd.DataFrame, uplift, treatment_test: pd.DataFrame):
|
144 |
+
return weighted_average_uplift(target_test, uplift, treatment_test)
|
|
|
145 |
|
146 |
|
147 |
def get_newbie_plot(data):
|
src/web_app.py
CHANGED
@@ -157,13 +157,12 @@ elif not surburban and not urban and not rural:
|
|
157 |
|
158 |
if not disabled:
|
159 |
filtered_dataset = tools.filter_data(data_test, filters)
|
|
|
|
|
160 |
# значение uplift для записей тех клиентов, который выбрал пользователь равен 1
|
161 |
import numpy as np
|
162 |
-
uplift =
|
163 |
-
|
164 |
-
index=filtered_dataset.index
|
165 |
-
)
|
166 |
-
target_filtered =target_test.loc[filtered_dataset.index]
|
167 |
treatment_filtered = treatment_test.loc[filtered_dataset.index]
|
168 |
sample_size = 7 if filtered_dataset.shape[0] >= 7 else filtered_dataset.shape[0]
|
169 |
example = filtered_dataset.sample(sample_size)
|
@@ -174,11 +173,8 @@ if not disabled:
|
|
174 |
|
175 |
send_promo = st.button('Отправить рекламу и посмотреть результат', disabled=disabled)
|
176 |
if send_promo:
|
177 |
-
|
178 |
-
st.write(uplift_by_percentile(y_true=target_filtered, uplift=uplift, treatment=treatment_filtered))
|
179 |
-
st.write(uplift_at_k(y_true=target_filtered, uplift=uplift, treatment=treatment_filtered, strategy='by_group', k=0.3))
|
180 |
# st.write(tools.get_weighted_average_uplift(target_filtered, uplift, treatment_filtered))
|
181 |
|
182 |
# st.write('Если известно, на какой процент пользователей необходимо воздействовать, укажите это ниже')
|
183 |
# st.slider(label='Процент пользователей', min_value=0, max_value=100, value=100)
|
184 |
-
|
|
|
157 |
|
158 |
if not disabled:
|
159 |
filtered_dataset = tools.filter_data(data_test, filters)
|
160 |
+
if filtered_dataset is None:
|
161 |
+
st.error('Не найдено пользователей для данных фильтров. Попробуйте изменить фильтры.')
|
162 |
# значение uplift для записей тех клиентов, который выбрал пользователь равен 1
|
163 |
import numpy as np
|
164 |
+
uplift = [1 for _ in filtered_dataset.index]
|
165 |
+
target_filtered = target_test.loc[filtered_dataset.index]
|
|
|
|
|
|
|
166 |
treatment_filtered = treatment_test.loc[filtered_dataset.index]
|
167 |
sample_size = 7 if filtered_dataset.shape[0] >= 7 else filtered_dataset.shape[0]
|
168 |
example = filtered_dataset.sample(sample_size)
|
|
|
173 |
|
174 |
send_promo = st.button('Отправить рекламу и посмотреть результат', disabled=disabled)
|
175 |
if send_promo:
|
176 |
+
st.write(tools.get_weighted_average_uplift(target_filtered, uplift, treatment_filtered))
|
|
|
|
|
177 |
# st.write(tools.get_weighted_average_uplift(target_filtered, uplift, treatment_filtered))
|
178 |
|
179 |
# st.write('Если известно, на какой процент пользователей необходимо воздействовать, укажите это ниже')
|
180 |
# st.slider(label='Процент пользователей', min_value=0, max_value=100, value=100)
|
|