Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
def app(): | |
st.title('Exploratory Data Analysis') | |
# Load Data | |
df = pd.read_csv('../Transactions Data.csv') | |
# Creating the table with column names and descriptions | |
data = { | |
"Column Names": [ | |
"step", | |
"type", | |
"amount", | |
"nameOrig", | |
"oldbalanceOrg", | |
"newbalanceOrig", | |
"nameDest", | |
"oldbalanceDest", | |
"newbalanceDest", | |
"isFraud", | |
"isFlaggedFraud" | |
], | |
"Description": [ | |
"Represents a unit of time in the transaction process, though the specific time unit is not specified in the dataset. It could denote hours, days, or another unit, depending on the context.", | |
"Describes the type of transaction, such as transfer, payment, etc. This categorical variable allows for the classification of different transaction behaviors.", | |
"Indicates the monetary value of the transaction, providing insight into the financial magnitude of each transaction.", | |
"Serves as the identifier for the origin account or entity initiating the transaction. This helps trace the source of funds in each transaction.", | |
"Represents the balance in the origin account before the transaction occurred, offering a reference point for understanding changes in account balances.", | |
"Reflects the balance in the origin account after the transaction has been processed, providing insight into how the transaction affects the account balance.", | |
"Functions as the identifier for the destination account or entity receiving the funds in each transaction. It helps track where the money is being transferred to.", | |
"Indicates the balance in the destination account before the transaction, offering a baseline for assessing changes in account balances due to incoming funds.", | |
"Represents the balance in the destination account after the transaction has been completed, providing insight into the impact of incoming funds on the account balance.", | |
"A binary indicator (0 or 1) denoting whether the transaction is fraudulent (1) or legitimate (0). This is the target variable for fraud detection modeling.", | |
"Another binary indicator (0 or 1) which may signal whether a transaction has been flagged as potentially fraudulent. This could serve as an additional feature for fraud detection algorithms." | |
]} | |
# Displaying the table using Streamlit | |
st.subheader('Transaction Dataset Column Descriptions') | |
st.table(data) | |
st.divider() | |
# Data Summary | |
st.header('Data Summary') | |
st.write(df.describe().T) | |
st.divider() | |
# Univariate Exploration | |
st.header('Univariate Analysis') | |
# 1 | |
st.subheader('Distribution of Transactions Types') | |
# Plotting | |
fig, ax = plt.subplots() | |
sns.histplot(df['type'], bins=20, ax=ax) | |
plt.xlabel('Transaction Types') | |
plt.ylabel('Frequency') | |
plt.title('Distribution of Transaction Types') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') | |
# 2 | |
st.subheader('Distribution of Balance Amount') | |
# Plotting | |
fig, ax = plt.subplots() | |
sns.histplot(df['amount'], bins=20, ax=ax) | |
plt.xlabel('Amount') | |
plt.ylabel('Frequency') | |
plt.title('Distribution of Balance Amount') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') | |
# 3 | |
st.subheader('Distribution of Old Balance Origin') | |
# Plotting | |
fig, ax = plt.subplots() | |
sns.histplot(df['oldbalanceOrg'], bins=20, ax=ax) | |
plt.xlabel('Old Balance Origin') | |
plt.ylabel('Frequency') | |
plt.title('Distribution of Old Balance Origin') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') | |
# 4 | |
st.subheader('Distribution of New Balance Origin') | |
# Plotting | |
fig, ax = plt.subplots() | |
sns.histplot(df['newbalanceOrig'], bins=20, ax=ax) | |
plt.xlabel('New Balance Origin') | |
plt.ylabel('Frequency') | |
plt.title('Distribution of New Balance Origin') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') | |
# 5 | |
st.subheader('Distribution of Old Balance Destination') | |
# Plotting | |
fig, ax = plt.subplots() | |
sns.histplot(df['oldbalanceDest'], bins=20, ax=ax) | |
plt.xlabel('Old Balance Origin') | |
plt.ylabel('Frequency') | |
plt.title('Distribution of Old Balance Destination') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') | |
# 5 | |
st.subheader('Distribution of New Balance Destination') | |
# Plotting | |
fig, ax = plt.subplots() | |
sns.histplot(df['newbalanceDest'], bins=20, ax=ax) | |
plt.xlabel('New Balance Origin') | |
plt.ylabel('Frequency') | |
plt.title('Distribution of New Balance Destination') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') | |
# 6 | |
st.subheader('Distribution of Flagged Fraud') | |
# Plotting | |
fig, ax = plt.subplots() | |
sns.histplot(df['isFlaggedFraud'], bins=20, ax=ax) | |
plt.xlabel('Is Flagged Fraud') | |
plt.ylabel('Frequency') | |
plt.title('Distribution of Flagged Fraud') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') | |
# 7 | |
st.subheader('Distribution of Fraud') | |
# Plotting | |
fig, ax = plt.subplots() | |
sns.histplot(df['isFraud'], bins=20, ax=ax) | |
plt.xlabel('Is Fraud') | |
plt.ylabel('Frequency') | |
plt.title('Distribution of Fraud') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') | |
st.divider() | |
# Bivariate analysis | |
st.header('Bivariate Analysis') | |
# 1 | |
st.subheader('Distribution of Amout Balance per Transaction Types') | |
fig, ax = plt.subplots() | |
sns.boxplot(x=df['amount'], y=df['type'], ax=ax) | |
plt.xlabel('Amount') | |
plt.ylabel('Transaction Types') | |
plt.title('Transaction Types vs Amount Balance') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') | |
# 2 | |
st.subheader('Distribution of Old Balance Origin per Transaction Types') | |
fig, ax = plt.subplots() | |
sns.boxplot(x=df['oldbalanceOrg'], y=df['type'], ax=ax) | |
plt.xlabel('Old Balance Origin') | |
plt.ylabel('Transaction Types') | |
plt.title('Transaction Types vs Old Balance Origin') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') | |
# 3 | |
st.subheader('Distribution of New Balance Origin per Transaction Types') | |
fig, ax = plt.subplots() | |
sns.boxplot(x=df['newbalanceOrig'], y=df['type'], ax=ax) | |
plt.xlabel('New Balance Origin') | |
plt.ylabel('Transaction Types') | |
plt.title('Transaction Types vs Old Balance Origin') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') | |
# 4 | |
st.subheader('Distribution of Old Balance Destination per Transaction Types') | |
fig, ax = plt.subplots() | |
sns.boxplot(x=df['oldbalanceDest'], y=df['type'], ax=ax) | |
plt.xlabel('Old Balance Destination') | |
plt.ylabel('Transaction Types') | |
plt.title('Transaction Types vs Old Balance Destination') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') | |
# 5 | |
st.subheader('Distribution of New Balance Destination per Transaction Types') | |
fig, ax = plt.subplots() | |
sns.boxplot(x=df['newbalanceDest'], y=df['type'], ax=ax) | |
plt.xlabel('New Balance Destination') | |
plt.ylabel('Transaction Types') | |
plt.title('Transaction Types vs New Balance Destination') | |
st.pyplot(fig) | |
st.write('bla bla bla') | |
st.write('') |