helboukkouri commited on
Commit
0a25afe
1 Parent(s): 39bce97

initial commit

Browse files
Files changed (3) hide show
  1. README.md +5 -3
  2. app.py +252 -0
  3. requirements.txt +220 -0
README.md CHANGED
@@ -1,13 +1,15 @@
1
  ---
2
  title: Regression Models
3
- emoji: 🏢
4
  colorFrom: indigo
5
  colorTo: red
6
  sdk: gradio
7
  sdk_version: 4.20.1
8
  app_file: app.py
9
- pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
  title: Regression Models
3
+ emoji: 🧪
4
  colorFrom: indigo
5
  colorTo: red
6
  sdk: gradio
7
  sdk_version: 4.20.1
8
  app_file: app.py
9
+ pinned: true
10
  license: apache-2.0
11
  ---
12
 
13
+ Play around with various regression models to see how the degree of the polynomial impacts training.
14
+
15
+ You can also change the number of data points along with how noisy the data is.
app.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import sympy as sp
4
+ import seaborn as sns
5
+ from matplotlib import pyplot as plt
6
+
7
+ from sklearn.linear_model import LinearRegression
8
+ from sklearn.preprocessing import PolynomialFeatures
9
+ from sklearn.pipeline import make_pipeline
10
+
11
+
12
+ sns.set_style(style="darkgrid")
13
+ sns.set_context(context="notebook", font_scale=0.7)
14
+
15
+ MAX_NOISE = 20
16
+ DEFAULT_NOISE = 6
17
+ SLIDE_NOISE_STEP = 2
18
+
19
+ MAX_POINTS = 100
20
+ DEFAULT_POINTS = 20
21
+ SLIDE_POINTS_STEP = 5
22
+
23
+ def generate_equation(process_params):
24
+ process_params = process_params.astype(float).values.tolist()
25
+
26
+ # Define symbols
27
+ x = sp.symbols('x')
28
+ coefficients = sp.symbols('a b c d e')
29
+
30
+ # Create the polynomial expression
31
+ polynomial_expression = None
32
+ for i, coef in enumerate(reversed(coefficients)):
33
+ polynomial_expression = polynomial_expression + coef * x**i if polynomial_expression else coef * x**i
34
+
35
+ # Parameter mapping
36
+ parameters = {coef: value for coef, value in zip(coefficients, process_params[0])}
37
+
38
+ # Substitute parameter values into the expression
39
+ polynomial_with_values = polynomial_expression.subs(parameters)
40
+ latex_representation = sp.latex(polynomial_with_values)
41
+ return fr"$${latex_representation}$$"
42
+
43
+
44
+ def true_process(x, process_params):
45
+ """The true process we want to model."""
46
+ process_params = process_params.astype(float).values.tolist()
47
+ return (
48
+ process_params[0][0] * (x ** 4)
49
+ + process_params[0][1] * (x ** 3)
50
+ + process_params[0][2] * (x ** 2)
51
+ + process_params[0][3] * x
52
+ + process_params[0][4]
53
+ )
54
+
55
+
56
+ def generate_data(num_points, noise_level, process_params):
57
+
58
+ # x is the list of input values
59
+ input_values = np.linspace(-5, 2, num_points)
60
+ input_values_dense = np.linspace(-5, 2, MAX_POINTS)
61
+
62
+ # y = f(x) is the underlying process we want to model
63
+ y = [true_process(x, process_params) for x in input_values]
64
+ y_dense = [true_process(x, process_params) for x in input_values_dense]
65
+
66
+ # however, we can only observe a noisy version of f(x)
67
+ noise = np.random.normal(0, noise_level, len(input_values))
68
+ y_noisy = y + noise
69
+
70
+ return input_values, input_values_dense, y, y_dense, y_noisy
71
+
72
+
73
+ def make_plot(
74
+ num_points, noise_level, process_params,
75
+ show_true_process, show_original_points,
76
+ show_noisy_points, show_added_noise,
77
+ show_learned_process, show_predicted_points,
78
+ show_prediction_error,
79
+ polynomial_degree=None
80
+ ):
81
+
82
+ x, x_dense, y, y_dense, y_noisy = generate_data(num_points, noise_level, process_params)
83
+
84
+ fig = plt.figure(dpi=400)
85
+ if show_true_process:
86
+ plt.plot(
87
+ x_dense, y_dense, "-", color="#363A4F",
88
+ label="True Process",
89
+ lw=1.5,
90
+ )
91
+ if show_added_noise:
92
+ plt.vlines(
93
+ x, y, y_noisy, color="#556D9A",
94
+ linestyles="dashed",
95
+ alpha=0.75,
96
+ lw=1,
97
+ label="Added Noise",
98
+ )
99
+ if show_original_points:
100
+ plt.plot(
101
+ x, y, "-o", color="none",
102
+ ms=6,
103
+ markerfacecolor="white",
104
+ markeredgecolor="#556D9A",
105
+ markeredgewidth=1.2,
106
+ label="Original Points",
107
+ )
108
+ if show_noisy_points and not polynomial_degree:
109
+ plt.plot(
110
+ x, y_noisy, "-o", color="none",
111
+ ms=6.5,
112
+ markerfacecolor="#556D9A",
113
+ markeredgecolor="none",
114
+ markeredgewidth=1.5,
115
+ alpha=1,
116
+ label="Noisy Points",
117
+ )
118
+
119
+ # Fit the selected regression model
120
+ if polynomial_degree:
121
+
122
+ degree = polynomial_degree
123
+ model = make_pipeline(PolynomialFeatures(degree), LinearRegression())
124
+ model.fit(x.reshape(-1, 1), y_noisy)
125
+
126
+ # Plot the fitted regression model
127
+ y_pred_dense = model.predict(x_dense.reshape(-1, 1))
128
+ y_pred = model.predict(x.reshape(-1, 1))
129
+ if show_learned_process:
130
+ plt.plot(
131
+ x_dense, y_pred_dense, "-", color="#327747",
132
+ label="Learned Process",
133
+ lw=1.5,
134
+ alpha=0.75,
135
+ )
136
+ if show_prediction_error:
137
+ plt.vlines(
138
+ x, y_pred, y_noisy, color="#43A461",
139
+ linestyles="dashed",
140
+ alpha=0.75,
141
+ lw=1,
142
+ label="Prediction Error",
143
+ )
144
+ if show_noisy_points:
145
+ plt.plot(
146
+ x, y_noisy, "-o", color="none",
147
+ ms=6.5,
148
+ markerfacecolor="#556D9A",
149
+ markeredgecolor="none",
150
+ markeredgewidth=1.5,
151
+ alpha=1,
152
+ label="Training Points",
153
+ )
154
+ if show_predicted_points:
155
+ plt.plot(
156
+ x, y_pred, "-o", color="none",
157
+ ms=6.5,
158
+ markerfacecolor="#43A461",
159
+ markeredgecolor="none",
160
+ markeredgewidth=1.5,
161
+ label="Predicted Points",
162
+ alpha=1,
163
+ )
164
+
165
+ plt.xlabel("x")
166
+ plt.ylabel("y")
167
+ plt.legend(fontsize=7.5)
168
+ plt.tight_layout()
169
+ return fig
170
+
171
+ # Custom CSS
172
+ css = """
173
+ .train-button {
174
+ font-size: 1.2em;
175
+ width: 20%!important;
176
+ margin: 0;
177
+ }
178
+ .model-section {
179
+ font-size: 1em;
180
+ width: 100%!important;
181
+ margin: 0 0 1em 0;
182
+ }
183
+ .gradio-container {
184
+ width: 40%!important;
185
+ min-width: 800px;
186
+ }
187
+ """
188
+ with gr.Blocks(css=css) as demo:
189
+ with gr.Row():
190
+ with gr.Column():
191
+ gr.Markdown("## Underlying Process")
192
+ with gr.Row():
193
+ process_params = gr.DataFrame(
194
+ value=[[0.5, 2, -0.5, -2, 1]],
195
+ label="Polynomial Coefficients",
196
+ type="pandas",
197
+ column_widths=("2", "1", "1", "1", "1w"),
198
+ headers=["x ** 4", "x ** 3", "x ** 2", "x", "1"],
199
+ interactive=True
200
+ )
201
+ equation = gr.Markdown()
202
+
203
+ gr.Markdown("## Data Generation")
204
+ with gr.Row():
205
+ num_points = gr.Slider(
206
+ minimum=5,
207
+ maximum=MAX_POINTS,
208
+ value=DEFAULT_POINTS,
209
+ step=SLIDE_POINTS_STEP,
210
+ label="Number of Points"
211
+ )
212
+
213
+ noise_level = gr.Slider(
214
+ minimum=0,
215
+ maximum=MAX_NOISE,
216
+ value=DEFAULT_NOISE,
217
+ step=SLIDE_NOISE_STEP,
218
+ label="Noise Level"
219
+ )
220
+
221
+ show_params = []
222
+ with gr.Row():
223
+ with gr.Column():
224
+ show_params.append(gr.Checkbox(label="Underlying Process", value=True))
225
+ show_params.append(gr.Checkbox(label="Original Points", value=True))
226
+ show_params.append(gr.Checkbox(label="Noisy Points", value=True))
227
+ show_params.append(gr.Checkbox(label="Added Noise", value=True))
228
+ with gr.Column():
229
+ show_params.append(gr.Checkbox(label="Learned Process", value=True))
230
+ show_params.append(gr.Checkbox(label="Predicted Points", value=True))
231
+ show_params.append(gr.Checkbox(label="Prediction Error", value=True))
232
+
233
+ # Add model choice dropdown and training trigger button
234
+ gr.Markdown("## Modelisation")
235
+ with gr.Row(elem_classes=["model-section"]):
236
+ polynomial_degree = gr.Number(label="Choose the degree of your regression model", value=1, minimum=1, maximum=15, step=1, scale=2)
237
+ train_button = gr.Button(value="Train Model", elem_classes=["train-button"], scale=1)
238
+
239
+ scatter_plot = gr.Plot(elem_classes=["main-plot"])
240
+
241
+ num_points.change(fn=make_plot, inputs=[num_points, noise_level, process_params, *show_params], outputs=scatter_plot)
242
+ noise_level.change(fn=make_plot, inputs=[num_points, noise_level, process_params, *show_params], outputs=scatter_plot)
243
+ process_params.change(fn=make_plot, inputs=[num_points, noise_level, process_params, *show_params], outputs=scatter_plot)
244
+ process_params.change(fn=generate_equation, inputs=[process_params], outputs=equation)
245
+ train_button.click(make_plot, inputs=[num_points, noise_level, process_params, *show_params, polynomial_degree], outputs=scatter_plot)
246
+ for component in show_params:
247
+ component.change(fn=make_plot, inputs=[num_points, noise_level, process_params, *show_params], outputs=scatter_plot)
248
+ demo.load(fn=make_plot, inputs=[num_points, noise_level, process_params, *show_params], outputs=scatter_plot)
249
+ demo.load(fn=generate_equation, inputs=[process_params], outputs=equation)
250
+
251
+ if __name__ == "__main__":
252
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.10
3
+ # by the following command:
4
+ #
5
+ # pip-compile requirements.in
6
+ #
7
+ aiofiles==23.2.1
8
+ # via gradio
9
+ altair==5.2.0
10
+ # via gradio
11
+ annotated-types==0.6.0
12
+ # via pydantic
13
+ anyio==4.3.0
14
+ # via
15
+ # httpx
16
+ # starlette
17
+ attrs==23.2.0
18
+ # via
19
+ # jsonschema
20
+ # referencing
21
+ certifi==2024.2.2
22
+ # via
23
+ # httpcore
24
+ # httpx
25
+ # requests
26
+ charset-normalizer==3.3.2
27
+ # via requests
28
+ click==8.1.7
29
+ # via
30
+ # typer
31
+ # uvicorn
32
+ colorama==0.4.6
33
+ # via typer
34
+ contourpy==1.2.0
35
+ # via matplotlib
36
+ cycler==0.12.1
37
+ # via matplotlib
38
+ exceptiongroup==1.2.0
39
+ # via anyio
40
+ fastapi==0.110.0
41
+ # via gradio
42
+ ffmpy==0.3.2
43
+ # via gradio
44
+ filelock==3.13.1
45
+ # via huggingface-hub
46
+ fonttools==4.49.0
47
+ # via matplotlib
48
+ fsspec==2024.2.0
49
+ # via
50
+ # gradio-client
51
+ # huggingface-hub
52
+ gradio==4.19.2
53
+ # via -r requirements.in
54
+ gradio-client==0.10.1
55
+ # via gradio
56
+ h11==0.14.0
57
+ # via
58
+ # httpcore
59
+ # uvicorn
60
+ httpcore==1.0.4
61
+ # via httpx
62
+ httpx==0.27.0
63
+ # via
64
+ # gradio
65
+ # gradio-client
66
+ huggingface-hub==0.21.4
67
+ # via
68
+ # gradio
69
+ # gradio-client
70
+ idna==3.6
71
+ # via
72
+ # anyio
73
+ # httpx
74
+ # requests
75
+ importlib-resources==6.1.3
76
+ # via gradio
77
+ jinja2==3.1.3
78
+ # via
79
+ # altair
80
+ # gradio
81
+ joblib==1.3.2
82
+ # via scikit-learn
83
+ jsonschema==4.21.1
84
+ # via altair
85
+ jsonschema-specifications==2023.12.1
86
+ # via jsonschema
87
+ kiwisolver==1.4.5
88
+ # via matplotlib
89
+ markdown-it-py==3.0.0
90
+ # via rich
91
+ markupsafe==2.1.5
92
+ # via
93
+ # gradio
94
+ # jinja2
95
+ matplotlib==3.8.3
96
+ # via
97
+ # gradio
98
+ # seaborn
99
+ mdurl==0.1.2
100
+ # via markdown-it-py
101
+ mpmath==1.3.0
102
+ # via sympy
103
+ numpy==1.26.4
104
+ # via
105
+ # -r requirements.in
106
+ # altair
107
+ # contourpy
108
+ # gradio
109
+ # matplotlib
110
+ # pandas
111
+ # scikit-learn
112
+ # scipy
113
+ # seaborn
114
+ orjson==3.9.15
115
+ # via gradio
116
+ packaging==23.2
117
+ # via
118
+ # altair
119
+ # gradio
120
+ # gradio-client
121
+ # huggingface-hub
122
+ # matplotlib
123
+ pandas==2.2.1
124
+ # via
125
+ # -r requirements.in
126
+ # altair
127
+ # gradio
128
+ # seaborn
129
+ pillow==10.2.0
130
+ # via
131
+ # gradio
132
+ # matplotlib
133
+ pydantic==2.6.3
134
+ # via
135
+ # fastapi
136
+ # gradio
137
+ pydantic-core==2.16.3
138
+ # via pydantic
139
+ pydub==0.25.1
140
+ # via gradio
141
+ pygments==2.17.2
142
+ # via rich
143
+ pyparsing==3.1.2
144
+ # via matplotlib
145
+ python-dateutil==2.9.0.post0
146
+ # via
147
+ # matplotlib
148
+ # pandas
149
+ python-multipart==0.0.9
150
+ # via gradio
151
+ pytz==2024.1
152
+ # via pandas
153
+ pyyaml==6.0.1
154
+ # via
155
+ # gradio
156
+ # huggingface-hub
157
+ referencing==0.33.0
158
+ # via
159
+ # jsonschema
160
+ # jsonschema-specifications
161
+ requests==2.31.0
162
+ # via huggingface-hub
163
+ rich==13.7.1
164
+ # via typer
165
+ rpds-py==0.18.0
166
+ # via
167
+ # jsonschema
168
+ # referencing
169
+ ruff==0.3.1
170
+ # via gradio
171
+ scikit-learn==1.4.1.post1
172
+ # via -r requirements.in
173
+ scipy==1.12.0
174
+ # via scikit-learn
175
+ seaborn==0.13.2
176
+ # via -r requirements.in
177
+ semantic-version==2.10.0
178
+ # via gradio
179
+ shellingham==1.5.4
180
+ # via typer
181
+ six==1.16.0
182
+ # via python-dateutil
183
+ sniffio==1.3.1
184
+ # via
185
+ # anyio
186
+ # httpx
187
+ starlette==0.36.3
188
+ # via fastapi
189
+ sympy==1.12
190
+ # via -r requirements.in
191
+ threadpoolctl==3.3.0
192
+ # via scikit-learn
193
+ tomlkit==0.12.0
194
+ # via gradio
195
+ toolz==0.12.1
196
+ # via altair
197
+ tqdm==4.66.2
198
+ # via huggingface-hub
199
+ typer[all]==0.9.0
200
+ # via gradio
201
+ typing-extensions==4.10.0
202
+ # via
203
+ # altair
204
+ # anyio
205
+ # fastapi
206
+ # gradio
207
+ # gradio-client
208
+ # huggingface-hub
209
+ # pydantic
210
+ # pydantic-core
211
+ # typer
212
+ # uvicorn
213
+ tzdata==2024.1
214
+ # via pandas
215
+ urllib3==2.2.1
216
+ # via requests
217
+ uvicorn==0.27.1
218
+ # via gradio
219
+ websockets==11.0.3
220
+ # via gradio-client