7jimmy's picture
Create app.py
47696d4 verified
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
import gradio as gr
def credit_card_fraud_detection():
# Load the dataset
credit_card_data = pd.read_csv("creditcard.csv")
# Check for missing values
missing_values = credit_card_data.isnull().sum()
# Check class distribution
class_distribution = credit_card_data['Class'].value_counts()
# Visualize class distribution
sns.countplot(x='Class', data=credit_card_data)
plt.title('Class Distribution')
plt.show()
"""This Dataset is highly unbalanced
0 --> Normal Transaction
1 --> fraudulent transaction
"""
# separating the data for analysis
legit = credit_card_data[credit_card_data.Class == 0]
fraud = credit_card_data[credit_card_data.Class == 1]
# statistical measures of the data
legit_amount_stats = legit.Amount.describe()
fraud_amount_stats = fraud.Amount.describe()
# compare the values for both transactions
class_means = credit_card_data.groupby('Class').mean()
"""Under-Sampling
Build a sample dataset containing a similar distribution of normal transactions and Fraudulent Transactions
Number of Fraudulent Transactions --> 492
"""
legit_sample = legit.sample(n=492)
"""Concatenating two DataFrames"""
new_dataset = pd.concat([legit_sample, fraud], axis=0)
class_value_counts = new_dataset['Class'].value_counts()
class_means_new_dataset = new_dataset.groupby('Class').mean()
"""Splitting the data into Features & Targets"""
X = new_dataset.drop(columns='Class', axis=1)
Y = new_dataset['Class']
"""Split the data into Training data & Testing Data"""
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)
"""Model Training - Logistic Regression"""
model = LogisticRegression()
# training the Logistic Regression Model with Training Data
model.fit(X_train, Y_train)
"""Model Evaluation - Accuracy Score"""
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
# accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
return {
'missing_values': missing_values,
'class_distribution': class_distribution,
'legit_amount_stats': legit_amount_stats,
'fraud_amount_stats': fraud_amount_stats,
'class_means': class_means,
'class_value_counts': class_value_counts,
'class_means_new_dataset': class_means_new_dataset,
'training_data_accuracy': training_data_accuracy,
'test_data_accuracy': test_data_accuracy
}
# Launching the Gradio Interface
iface = gr.Interface(fn=credit_card_fraud_detection, title="Credit Card Fraud Detection")
iface.launch()