|
import gradio as gr |
|
import pandas as pd |
|
import plotly.express as px |
|
from sklearn.svm import LinearSVC |
|
from sklearn.pipeline import make_pipeline |
|
from sklearn.datasets import make_classification |
|
from sklearn.metrics import classification_report |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.feature_selection import SelectKBest, f_classif |
|
|
|
|
|
def app_fn(k: int, n_features: int, n_informative: int, n_redundant: int): |
|
X, y = make_classification( |
|
n_features=n_features, |
|
n_informative=n_informative, |
|
n_redundant=n_redundant, |
|
n_classes=2, |
|
n_clusters_per_class=2, |
|
random_state=42, |
|
) |
|
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) |
|
anova_filter = SelectKBest(f_classif, k=k) |
|
clf = LinearSVC() |
|
anova_svm = make_pipeline(anova_filter, clf) |
|
anova_svm.fit(X_train, y_train) |
|
|
|
y_pred = anova_svm.predict(X_test) |
|
report = classification_report(y_test, y_pred, output_dict=True) |
|
report_df = pd.DataFrame(report).transpose() |
|
report_df = report_df.reset_index().rename(columns={"index": "class"}).round(2) |
|
report_df["accuracy"] = report_df.loc[report_df["class"]=="accuracy"].values.flatten()[-1] |
|
report_df = report_df.loc[report_df["class"]!="accuracy"] |
|
|
|
features = anova_svm[:-1].inverse_transform(anova_svm[-1].coef_).flatten() > 0 |
|
features = features.astype(int) |
|
fig = px.bar(y=features) |
|
|
|
fig.update_yaxes(ticktext=["False", "True"], tickvals=[0, 1]) |
|
fig.update_layout( |
|
title="Selected Features", |
|
xaxis_title="Feature Index", |
|
yaxis_title="Selected", |
|
legend_title="Selected", |
|
) |
|
return report_df, fig |
|
|
|
title = "Pipeline ANOVA SVM" |
|
with gr.Blocks() as demo: |
|
gr.Markdown(f"# {title}") |
|
gr.Markdown( |
|
""" |
|
### This example creates a pipeline where in the first step k features are selected with ANOVA and then we pass the selected features \ |
|
to a Linear SVM. This pipeline is then trained using a synthetic dataset and evaluated on a test holdout. \ |
|
A table displaying the classification report with the metrics and a char showing the index of the selected features are shown at the bottom. |
|
|
|
See original example [here](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_feature_selection_pipeline.html#sphx-glr-auto-examples-feature-selection-plot-feature-selection-pipeline-py) |
|
""" |
|
) |
|
with gr.Row(): |
|
k = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Number of Features to Select") |
|
n_features = gr.inputs.Slider(minimum=1, maximum=20, default=20, step=1, label="Total Features") |
|
n_informative = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Informative Features") |
|
n_redundant = gr.inputs.Slider(minimum=0, maximum=20, default=0, step=1, label="Redundant Features") |
|
btn = gr.Button(label="Run") |
|
with gr.Row(): |
|
report = gr.DataFrame(label="Classification Report") |
|
features = gr.Plot(label="Selected Features") |
|
|
|
btn.click( |
|
fn=app_fn, |
|
inputs=[k, n_features, n_informative, n_redundant], |
|
outputs=[report, features], |
|
) |
|
demo.load( |
|
fn=app_fn, |
|
inputs=[k, n_features, n_informative, n_redundant], |
|
outputs=[report, features], |
|
) |
|
|
|
demo.launch() |
|
|