Spaces:
Build error
Build error
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_auc_score | |
from sklearn.preprocessing import StandardScaler | |
import gradio as gr | |
def credit_card_fraud_detection(): | |
# Load the dataset | |
credit_card_data = pd.read_csv("creditcard.csv") | |
# Check for missing values | |
missing_values = credit_card_data.isnull().sum() | |
# Check class distribution | |
class_distribution = credit_card_data['Class'].value_counts() | |
# Visualize class distribution | |
sns.countplot(x='Class', data=credit_card_data) | |
plt.title('Class Distribution') | |
plt.show() | |
"""This Dataset is highly unbalanced | |
0 --> Normal Transaction | |
1 --> fraudulent transaction | |
""" | |
# separating the data for analysis | |
legit = credit_card_data[credit_card_data.Class == 0] | |
fraud = credit_card_data[credit_card_data.Class == 1] | |
# statistical measures of the data | |
legit_amount_stats = legit.Amount.describe() | |
fraud_amount_stats = fraud.Amount.describe() | |
# compare the values for both transactions | |
class_means = credit_card_data.groupby('Class').mean() | |
"""Under-Sampling | |
Build a sample dataset containing a similar distribution of normal transactions and Fraudulent Transactions | |
Number of Fraudulent Transactions --> 492 | |
""" | |
legit_sample = legit.sample(n=492) | |
"""Concatenating two DataFrames""" | |
new_dataset = pd.concat([legit_sample, fraud], axis=0) | |
class_value_counts = new_dataset['Class'].value_counts() | |
class_means_new_dataset = new_dataset.groupby('Class').mean() | |
"""Splitting the data into Features & Targets""" | |
X = new_dataset.drop(columns='Class', axis=1) | |
Y = new_dataset['Class'] | |
"""Split the data into Training data & Testing Data""" | |
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2) | |
"""Model Training - Logistic Regression""" | |
model = LogisticRegression() | |
# training the Logistic Regression Model with Training Data | |
model.fit(X_train, Y_train) | |
"""Model Evaluation - Accuracy Score""" | |
X_train_prediction = model.predict(X_train) | |
training_data_accuracy = accuracy_score(X_train_prediction, Y_train) | |
# accuracy on test data | |
X_test_prediction = model.predict(X_test) | |
test_data_accuracy = accuracy_score(X_test_prediction, Y_test) | |
return { | |
'missing_values': missing_values, | |
'class_distribution': class_distribution, | |
'legit_amount_stats': legit_amount_stats, | |
'fraud_amount_stats': fraud_amount_stats, | |
'class_means': class_means, | |
'class_value_counts': class_value_counts, | |
'class_means_new_dataset': class_means_new_dataset, | |
'training_data_accuracy': training_data_accuracy, | |
'test_data_accuracy': test_data_accuracy | |
} | |
# Launching the Gradio Interface | |
iface = gr.Interface(fn=credit_card_fraud_detection, title="Credit Card Fraud Detection") | |
iface.launch() | |