jucamohedano commited on
Commit
6299d25
1 Parent(s): 65f96e2

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. README.md +216 -0
  3. config.json +67 -0
  4. geographic.png +0 -0
  5. model.skops +3 -0
  6. permutation-importances.png +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model.skops filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sklearn
3
+ tags:
4
+ - sklearn
5
+ - skops
6
+ - tabular-regression
7
+ model_format: skops
8
+ model_file: model.skops
9
+ widget:
10
+ structuredData:
11
+ AveBedrms:
12
+ - 0.9290780141843972
13
+ - 0.9458483754512635
14
+ - 1.087360594795539
15
+ AveOccup:
16
+ - 3.1134751773049647
17
+ - 3.0613718411552346
18
+ - 3.2657992565055762
19
+ AveRooms:
20
+ - 6.304964539007092
21
+ - 6.945848375451264
22
+ - 3.8884758364312266
23
+ HouseAge:
24
+ - 17.0
25
+ - 15.0
26
+ - 24.0
27
+ Latitude:
28
+ - 34.23
29
+ - 36.84
30
+ - 34.04
31
+ Longitude:
32
+ - -117.41
33
+ - -119.77
34
+ - -118.3
35
+ MedInc:
36
+ - 6.1426
37
+ - 5.3886
38
+ - 1.7109
39
+ Population:
40
+ - 439.0
41
+ - 848.0
42
+ - 1757.0
43
+ ---
44
+
45
+ # Model description
46
+
47
+ Gradient boosting regressor trained on California Housing dataset
48
+
49
+ The model is a gradient boosting regressor from sklearn. On top of the standard
50
+ features, it contains predictions from a KNN models. These predictions are calculated
51
+ out of fold, then added on top of the existing features. These features are really
52
+ helpful for decision tree-based models, since those cannot easily learn from geospatial
53
+ data.
54
+
55
+ ## Intended uses & limitations
56
+
57
+ This model is meant for demonstration purposes
58
+
59
+ ## Training Procedure
60
+
61
+ [More Information Needed]
62
+
63
+ ### Hyperparameters
64
+
65
+ <details>
66
+ <summary> Click to expand </summary>
67
+
68
+ | Hyperparameter | Value |
69
+ |-----------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
70
+ | cv | |
71
+ | estimators | [('knn@5', Pipeline(steps=[('select_cols',<br /> ColumnTransformer(transformers=[('long_and_lat', 'passthrough',<br /> ['Longitude', 'Latitude'])])),<br /> ('knn', KNeighborsRegressor())]))] |
72
+ | final_estimator__alpha | 0.9 |
73
+ | final_estimator__ccp_alpha | 0.0 |
74
+ | final_estimator__criterion | friedman_mse |
75
+ | final_estimator__init | |
76
+ | final_estimator__learning_rate | 0.1 |
77
+ | final_estimator__loss | squared_error |
78
+ | final_estimator__max_depth | 3 |
79
+ | final_estimator__max_features | |
80
+ | final_estimator__max_leaf_nodes | |
81
+ | final_estimator__min_impurity_decrease | 0.0 |
82
+ | final_estimator__min_samples_leaf | 1 |
83
+ | final_estimator__min_samples_split | 2 |
84
+ | final_estimator__min_weight_fraction_leaf | 0.0 |
85
+ | final_estimator__n_estimators | 500 |
86
+ | final_estimator__n_iter_no_change | |
87
+ | final_estimator__random_state | 0 |
88
+ | final_estimator__subsample | 1.0 |
89
+ | final_estimator__tol | 0.0001 |
90
+ | final_estimator__validation_fraction | 0.1 |
91
+ | final_estimator__verbose | 0 |
92
+ | final_estimator__warm_start | False |
93
+ | final_estimator | GradientBoostingRegressor(n_estimators=500, random_state=0) |
94
+ | n_jobs | |
95
+ | passthrough | True |
96
+ | verbose | 0 |
97
+ | knn@5 | Pipeline(steps=[('select_cols',<br /> ColumnTransformer(transformers=[('long_and_lat', 'passthrough',<br /> ['Longitude', 'Latitude'])])),<br /> ('knn', KNeighborsRegressor())]) |
98
+ | knn@5__memory | |
99
+ | knn@5__steps | [('select_cols', ColumnTransformer(transformers=[('long_and_lat', 'passthrough',<br /> ['Longitude', 'Latitude'])])), ('knn', KNeighborsRegressor())] |
100
+ | knn@5__verbose | False |
101
+ | knn@5__select_cols | ColumnTransformer(transformers=[('long_and_lat', 'passthrough',<br /> ['Longitude', 'Latitude'])]) |
102
+ | knn@5__knn | KNeighborsRegressor() |
103
+ | knn@5__select_cols__n_jobs | |
104
+ | knn@5__select_cols__remainder | drop |
105
+ | knn@5__select_cols__sparse_threshold | 0.3 |
106
+ | knn@5__select_cols__transformer_weights | |
107
+ | knn@5__select_cols__transformers | [('long_and_lat', 'passthrough', ['Longitude', 'Latitude'])] |
108
+ | knn@5__select_cols__verbose | False |
109
+ | knn@5__select_cols__verbose_feature_names_out | True |
110
+ | knn@5__select_cols__long_and_lat | passthrough |
111
+ | knn@5__knn__algorithm | auto |
112
+ | knn@5__knn__leaf_size | 30 |
113
+ | knn@5__knn__metric | minkowski |
114
+ | knn@5__knn__metric_params | |
115
+ | knn@5__knn__n_jobs | |
116
+ | knn@5__knn__n_neighbors | 5 |
117
+ | knn@5__knn__p | 2 |
118
+ | knn@5__knn__weights | uniform |
119
+
120
+ </details>
121
+
122
+ ### Model Plot
123
+
124
+ <style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id="sk-container-id-1" class="sk-top-container" style="overflow: auto;"><div class="sk-text-repr-fallback"><pre>StackingRegressor(estimators=[(&#x27;knn@5&#x27;,Pipeline(steps=[(&#x27;select_cols&#x27;,ColumnTransformer(transformers=[(&#x27;long_and_lat&#x27;,&#x27;passthrough&#x27;,[&#x27;Longitude&#x27;,&#x27;Latitude&#x27;])])),(&#x27;knn&#x27;,KNeighborsRegressor())]))],final_estimator=GradientBoostingRegressor(n_estimators=500,random_state=0),passthrough=True)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-1" type="checkbox" ><label for="sk-estimator-id-1" class="sk-toggleable__label sk-toggleable__label-arrow">StackingRegressor</label><div class="sk-toggleable__content"><pre>StackingRegressor(estimators=[(&#x27;knn@5&#x27;,Pipeline(steps=[(&#x27;select_cols&#x27;,ColumnTransformer(transformers=[(&#x27;long_and_lat&#x27;,&#x27;passthrough&#x27;,[&#x27;Longitude&#x27;,&#x27;Latitude&#x27;])])),(&#x27;knn&#x27;,KNeighborsRegressor())]))],final_estimator=GradientBoostingRegressor(n_estimators=500,random_state=0),passthrough=True)</pre></div></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><label>knn@5</label></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-serial"><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-2" type="checkbox" ><label for="sk-estimator-id-2" class="sk-toggleable__label sk-toggleable__label-arrow">select_cols: ColumnTransformer</label><div class="sk-toggleable__content"><pre>ColumnTransformer(transformers=[(&#x27;long_and_lat&#x27;, &#x27;passthrough&#x27;,[&#x27;Longitude&#x27;, &#x27;Latitude&#x27;])])</pre></div></div></div><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-3" type="checkbox" ><label for="sk-estimator-id-3" class="sk-toggleable__label sk-toggleable__label-arrow">long_and_lat</label><div class="sk-toggleable__content"><pre>[&#x27;Longitude&#x27;, &#x27;Latitude&#x27;]</pre></div></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-4" type="checkbox" ><label for="sk-estimator-id-4" class="sk-toggleable__label sk-toggleable__label-arrow">passthrough</label><div class="sk-toggleable__content"><pre>passthrough</pre></div></div></div></div></div></div></div></div><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-5" type="checkbox" ><label for="sk-estimator-id-5" class="sk-toggleable__label sk-toggleable__label-arrow">KNeighborsRegressor</label><div class="sk-toggleable__content"><pre>KNeighborsRegressor()</pre></div></div></div></div></div></div></div></div></div></div><div class="sk-item"><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><label>final_estimator</label></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-6" type="checkbox" ><label for="sk-estimator-id-6" class="sk-toggleable__label sk-toggleable__label-arrow">GradientBoostingRegressor</label><div class="sk-toggleable__content"><pre>GradientBoostingRegressor(n_estimators=500, random_state=0)</pre></div></div></div></div></div></div></div></div></div></div></div></div>
125
+
126
+ ## Evaluation Results
127
+
128
+ Metrics are calculated on the test set
129
+
130
+ | Metric | Value |
131
+ |-------------------------|--------------|
132
+ | Root mean squared error | 44273.5 |
133
+ | Mean absolute error | 30079.9 |
134
+ | R² | 0.805954 |
135
+
136
+ ## Dataset description
137
+
138
+ California Housing dataset
139
+ --------------------------
140
+
141
+ **Data Set Characteristics:**
142
+
143
+ :Number of Instances: 20640
144
+
145
+ :Number of Attributes: 8 numeric, predictive attributes and the target
146
+
147
+ :Attribute Information:
148
+ - MedInc median income in block group
149
+ - HouseAge median house age in block group
150
+ - AveRooms average number of rooms per household
151
+ - AveBedrms average number of bedrooms per household
152
+ - Population block group population
153
+ - AveOccup average number of household members
154
+ - Latitude block group latitude
155
+ - Longitude block group longitude
156
+
157
+ :Missing Attribute Values: None
158
+
159
+ This dataset was obtained from the StatLib repository.
160
+ https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html
161
+
162
+ The target variable is the median house value for California districts,
163
+ expressed in hundreds of thousands of dollars ($100,000).
164
+
165
+ This dataset was derived from the 1990 U.S. census, using one row per census
166
+ block group. A block group is the smallest geographical unit for which the U.S.
167
+ Census Bureau publishes sample data (a block group typically has a population
168
+ of 600 to 3,000 people).
169
+
170
+ A household is a group of people residing within a home. Since the average
171
+ number of rooms and bedrooms in this dataset are provided per household, these
172
+ columns may take surprisingly large values for block groups with few households
173
+ and many empty houses, such as vacation resorts.
174
+
175
+ It can be downloaded/loaded using the
176
+ :func:`sklearn.datasets.fetch_california_housing` function.
177
+
178
+ .. topic:: References
179
+
180
+ - Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,
181
+ Statistics and Probability Letters, 33 (1997) 291-297
182
+
183
+ ### Data distribution
184
+
185
+ <details>
186
+ <summary> Click to expand </summary>
187
+
188
+ ![Data distribution](geographic.png)
189
+
190
+ </details>
191
+
192
+ # How to Get Started with the Model
193
+
194
+ Run the code below to load the model
195
+
196
+ ```python
197
+ import json
198
+ import pandas as pd
199
+ import skops.io as sio
200
+ model = sio.load("model.skops")
201
+ with open("config.json") as f:
202
+ config = json.load(f)
203
+ model.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))
204
+ ```
205
+
206
+ # Model Card Authors
207
+
208
+ Benjamin Bossan
209
+
210
+ # Model Card Contact
211
+
212
+ benjamin@huggingface.co
213
+
214
+ # Permutation Importances
215
+
216
+ ![Permutation Importances](permutation-importances.png)
config.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sklearn": {
3
+ "columns": [
4
+ "MedInc",
5
+ "HouseAge",
6
+ "AveRooms",
7
+ "AveBedrms",
8
+ "Population",
9
+ "AveOccup",
10
+ "Latitude",
11
+ "Longitude"
12
+ ],
13
+ "environment": [
14
+ "scikit-learn==1.3.0",
15
+ "pandas==2.0.3",
16
+ "skops==0.9.dev0"
17
+ ],
18
+ "example_input": {
19
+ "AveBedrms": [
20
+ 0.9290780141843972,
21
+ 0.9458483754512635,
22
+ 1.087360594795539
23
+ ],
24
+ "AveOccup": [
25
+ 3.1134751773049647,
26
+ 3.0613718411552346,
27
+ 3.2657992565055762
28
+ ],
29
+ "AveRooms": [
30
+ 6.304964539007092,
31
+ 6.945848375451264,
32
+ 3.8884758364312266
33
+ ],
34
+ "HouseAge": [
35
+ 17.0,
36
+ 15.0,
37
+ 24.0
38
+ ],
39
+ "Latitude": [
40
+ 34.23,
41
+ 36.84,
42
+ 34.04
43
+ ],
44
+ "Longitude": [
45
+ -117.41,
46
+ -119.77,
47
+ -118.3
48
+ ],
49
+ "MedInc": [
50
+ 6.1426,
51
+ 5.3886,
52
+ 1.7109
53
+ ],
54
+ "Population": [
55
+ 439.0,
56
+ 848.0,
57
+ 1757.0
58
+ ]
59
+ },
60
+ "model": {
61
+ "file": "model.skops"
62
+ },
63
+ "model_format": "skops",
64
+ "task": "tabular-regression",
65
+ "use_intelex": false
66
+ }
67
+ }
geographic.png ADDED
model.skops ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9555783df2f35c9b731eec707bd40eda079b358b296b8e3429a16015d2480d4
3
+ size 14969690
permutation-importances.png ADDED