Spaces:
Sleeping
Sleeping
File size: 5,046 Bytes
548a2f1 a5f21e4 548a2f1 628ee13 65afb7b 628ee13 548a2f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
# Streamlit app
st.title("Advanced Transaction Anomaly Detection")
# File uploader
uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
if uploaded_file:
# Load the data
data = pd.read_csv('transaction_anomalies_dataset (1).csv')
st.subheader("Dataset Preview")
st.write(data.head())
# Data Overview
st.subheader("Dataset Overview")
st.write("Missing Values:")
st.write(data.isnull().sum())
st.write("Descriptive Statistics:")
st.write(data.describe())
# Visualization 1: Histogram of Transaction Amount
if 'Transaction_Amount' in data.columns:
st.subheader("Transaction Amount Distribution")
fig_amount = px.histogram(data, x='Transaction_Amount', nbins=30, title="Transaction Amount Distribution")
st.plotly_chart(fig_amount)
# Visualization 2: Box Plot of Transaction Amount by Account Type
if 'Account_Type' in data.columns and 'Transaction_Amount' in data.columns:
st.subheader("Box Plot: Transaction Amount by Account Type")
fig_box = px.box(data, x='Account_Type', y='Transaction_Amount', title="Transaction Amount by Account Type")
st.plotly_chart(fig_box)
# Check if 'Day_of_Week' column exists
if 'Day_of_Week' in data.columns:
# Create bar chart for transactions by day of the week
fig_day_of_week = px.bar(data, x='Day_of_Week', title='Count of Transactions by Day of the Week')
# Display the chart in the Streamlit app
st.plotly_chart(fig_day_of_week)
# Visualization 3: Correlation Heatmap (Plotly)
st.subheader("Correlation Heatmap")
numeric_cols = data.select_dtypes(include=['float64', 'int64'])
if not numeric_cols.empty:
corr_matrix = numeric_cols.corr()
fig_heatmap = go.Figure(data=go.Heatmap(
z=corr_matrix.values,
x=corr_matrix.columns,
y=corr_matrix.columns,
colorscale='Viridis',
hoverongaps=False,
))
fig_heatmap.update_layout(title="Correlation Heatmap", xaxis_title="Features", yaxis_title="Features")
st.plotly_chart(fig_heatmap)
# Visualization 4: Scatter Plot (Age vs Average Transaction Amount)
if 'Age' in data.columns and 'Average_Transaction_Amount' in data.columns:
st.subheader("Scatter Plot: Age vs Average Transaction Amount")
fig_scatter = px.scatter(data, x='Age',
y='Average_Transaction_Amount',
color='Account_Type',
title='Average Transaction Amount vs. Age',
trendline='ols')
st.plotly_chart(fig_scatter)
# Anomaly Detection with Isolation Forest
st.subheader("Anomaly Detection")
features = ['Transaction_Amount', 'Average_Transaction_Amount', 'Frequency_of_Transactions']
# Ensure all required features are in the dataset
if all(feature in data.columns for feature in features):
X = data[features]
# Train Isolation Forest
st.write("Training Isolation Forest model...")
model = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
model.fit(X)
# Add anomaly prediction column
data['anomaly'] = model.predict(X)
data['anomaly'] = data['anomaly'].apply(lambda x: 1 if x == -1 else 0)
# Display Results
st.write("Anomaly Detection Results:")
st.write(data[['anomaly']].value_counts())
# Visualization: Anomalies vs Normal Transactions
st.subheader("Anomalies vs Normal Transactions")
fig_anomalies = px.histogram(data, x='anomaly', title="Anomalies vs Normal Transactions",
labels={'anomaly': 'Anomaly (1) vs Normal (0)'})
st.plotly_chart(fig_anomalies)
# User Input for Prediction
st.subheader("Predict Anomaly for a New Transaction")
user_inputs = {}
for feature in features:
user_input = st.number_input(f"Enter the value for '{feature}':", value=0.0)
user_inputs[feature] = user_input
# Create a DataFrame from user inputs
user_df = pd.DataFrame([user_inputs])
# Predict anomalies using the model
user_anomaly_pred = model.predict(user_df)
user_anomaly_pred_binary = 1 if user_anomaly_pred[0] == -1 else 0
if user_anomaly_pred_binary == 1:
st.error("Anomaly detected: This transaction is flagged as an anomaly.")
else:
st.success("No anomaly detected: This transaction is normal.")
else:
st.error("Required features for anomaly detection are missing in the dataset.")
|