EduardoPacheco's picture
Update app.py
761c88a
import gradio as gr
import pandas as pd
import plotly.express as px
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.datasets import make_classification
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_classif
def app_fn(k: int, n_features: int, n_informative: int, n_redundant: int):
X, y = make_classification(
n_features=n_features,
n_informative=n_informative,
n_redundant=n_redundant,
n_classes=2,
n_clusters_per_class=2,
random_state=42,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
anova_filter = SelectKBest(f_classif, k=k)
clf = LinearSVC()
anova_svm = make_pipeline(anova_filter, clf)
anova_svm.fit(X_train, y_train)
y_pred = anova_svm.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df = report_df.reset_index().rename(columns={"index": "class"}).round(2)
report_df["accuracy"] = report_df.loc[report_df["class"]=="accuracy"].values.flatten()[-1]
report_df = report_df.loc[report_df["class"]!="accuracy"]
features = anova_svm[:-1].inverse_transform(anova_svm[-1].coef_).flatten() > 0
features = features.astype(int)
fig = px.bar(y=features)
# Changing y-axis ticks to show 0 and 1 instead of False and True
fig.update_yaxes(ticktext=["False", "True"], tickvals=[0, 1])
fig.update_layout(
title="Selected Features",
xaxis_title="Feature Index",
yaxis_title="Selected",
legend_title="Selected",
)
return report_df, fig
title = "Pipeline ANOVA SVM"
with gr.Blocks() as demo:
gr.Markdown(f"# {title}")
gr.Markdown(
"""
### This example creates a pipeline where in the first step k features are selected with ANOVA and then we pass the selected features \
to a Linear SVM. This pipeline is then trained using a synthetic dataset and evaluated on a test holdout. \
A table displaying the classification report with the metrics and a char showing the index of the selected features are shown at the bottom.
See original example [here](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_feature_selection_pipeline.html#sphx-glr-auto-examples-feature-selection-plot-feature-selection-pipeline-py)
"""
)
with gr.Row():
k = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Number of Features to Select")
n_features = gr.inputs.Slider(minimum=1, maximum=20, default=20, step=1, label="Total Features")
n_informative = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Informative Features")
n_redundant = gr.inputs.Slider(minimum=0, maximum=20, default=0, step=1, label="Redundant Features")
btn = gr.Button(label="Run")
with gr.Row():
report = gr.DataFrame(label="Classification Report")
features = gr.Plot(label="Selected Features")
btn.click(
fn=app_fn,
inputs=[k, n_features, n_informative, n_redundant],
outputs=[report, features],
)
demo.load(
fn=app_fn,
inputs=[k, n_features, n_informative, n_redundant],
outputs=[report, features],
)
demo.launch()