Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from sklearn.ensemble import IsolationForest | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import classification_report | |
# Streamlit app | |
st.title("Advanced Transaction Anomaly Detection") | |
# File uploader | |
uploaded_file = st.file_uploader("Upload your CSV file", type="csv") | |
if uploaded_file: | |
# Load the data | |
data = pd.read_csv('transaction_anomalies_dataset (1).csv') | |
st.subheader("Dataset Preview") | |
st.write(data.head()) | |
# Data Overview | |
st.subheader("Dataset Overview") | |
st.write("Missing Values:") | |
st.write(data.isnull().sum()) | |
st.write("Descriptive Statistics:") | |
st.write(data.describe()) | |
# Visualization 1: Histogram of Transaction Amount | |
if 'Transaction_Amount' in data.columns: | |
st.subheader("Transaction Amount Distribution") | |
fig_amount = px.histogram(data, x='Transaction_Amount', nbins=30, title="Transaction Amount Distribution") | |
st.plotly_chart(fig_amount) | |
# Visualization 2: Box Plot of Transaction Amount by Account Type | |
if 'Account_Type' in data.columns and 'Transaction_Amount' in data.columns: | |
st.subheader("Box Plot: Transaction Amount by Account Type") | |
fig_box = px.box(data, x='Account_Type', y='Transaction_Amount', title="Transaction Amount by Account Type") | |
st.plotly_chart(fig_box) | |
# Check if 'Day_of_Week' column exists | |
if 'Day_of_Week' in data.columns: | |
# Create bar chart for transactions by day of the week | |
fig_day_of_week = px.bar(data, x='Day_of_Week', title='Count of Transactions by Day of the Week') | |
# Display the chart in the Streamlit app | |
st.plotly_chart(fig_day_of_week) | |
# Visualization 3: Correlation Heatmap (Plotly) | |
st.subheader("Correlation Heatmap") | |
numeric_cols = data.select_dtypes(include=['float64', 'int64']) | |
if not numeric_cols.empty: | |
corr_matrix = numeric_cols.corr() | |
fig_heatmap = go.Figure(data=go.Heatmap( | |
z=corr_matrix.values, | |
x=corr_matrix.columns, | |
y=corr_matrix.columns, | |
colorscale='Viridis', | |
hoverongaps=False, | |
)) | |
fig_heatmap.update_layout(title="Correlation Heatmap", xaxis_title="Features", yaxis_title="Features") | |
st.plotly_chart(fig_heatmap) | |
# Visualization 4: Scatter Plot (Age vs Average Transaction Amount) | |
if 'Age' in data.columns and 'Average_Transaction_Amount' in data.columns: | |
st.subheader("Scatter Plot: Age vs Average Transaction Amount") | |
fig_scatter = px.scatter(data, x='Age', | |
y='Average_Transaction_Amount', | |
color='Account_Type', | |
title='Average Transaction Amount vs. Age', | |
trendline='ols') | |
st.plotly_chart(fig_scatter) | |
# Anomaly Detection with Isolation Forest | |
st.subheader("Anomaly Detection") | |
features = ['Transaction_Amount', 'Average_Transaction_Amount', 'Frequency_of_Transactions'] | |
# Ensure all required features are in the dataset | |
if all(feature in data.columns for feature in features): | |
X = data[features] | |
# Train Isolation Forest | |
st.write("Training Isolation Forest model...") | |
model = IsolationForest(n_estimators=100, contamination=0.1, random_state=42) | |
model.fit(X) | |
# Add anomaly prediction column | |
data['anomaly'] = model.predict(X) | |
data['anomaly'] = data['anomaly'].apply(lambda x: 1 if x == -1 else 0) | |
# Display Results | |
st.write("Anomaly Detection Results:") | |
st.write(data[['anomaly']].value_counts()) | |
# Visualization: Anomalies vs Normal Transactions | |
st.subheader("Anomalies vs Normal Transactions") | |
fig_anomalies = px.histogram(data, x='anomaly', title="Anomalies vs Normal Transactions", | |
labels={'anomaly': 'Anomaly (1) vs Normal (0)'}) | |
st.plotly_chart(fig_anomalies) | |
# User Input for Prediction | |
st.subheader("Predict Anomaly for a New Transaction") | |
user_inputs = {} | |
for feature in features: | |
user_input = st.number_input(f"Enter the value for '{feature}':", value=0.0) | |
user_inputs[feature] = user_input | |
# Create a DataFrame from user inputs | |
user_df = pd.DataFrame([user_inputs]) | |
# Predict anomalies using the model | |
user_anomaly_pred = model.predict(user_df) | |
user_anomaly_pred_binary = 1 if user_anomaly_pred[0] == -1 else 0 | |
if user_anomaly_pred_binary == 1: | |
st.error("Anomaly detected: This transaction is flagged as an anomaly.") | |
else: | |
st.success("No anomaly detected: This transaction is normal.") | |
else: | |
st.error("Required features for anomaly detection are missing in the dataset.") | |