File size: 5,046 Bytes
548a2f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5f21e4
548a2f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
628ee13
65afb7b
628ee13
 
 
 
 
 
 
 
548a2f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Streamlit app
st.title("Advanced Transaction Anomaly Detection")

# File uploader
uploaded_file = st.file_uploader("Upload your CSV file", type="csv")

if uploaded_file:
    # Load the data
    data = pd.read_csv('transaction_anomalies_dataset (1).csv')
    st.subheader("Dataset Preview")
    st.write(data.head())

    # Data Overview
    st.subheader("Dataset Overview")
    st.write("Missing Values:")
    st.write(data.isnull().sum())
    st.write("Descriptive Statistics:")
    st.write(data.describe())

    # Visualization 1: Histogram of Transaction Amount
    if 'Transaction_Amount' in data.columns:
        st.subheader("Transaction Amount Distribution")
        fig_amount = px.histogram(data, x='Transaction_Amount', nbins=30, title="Transaction Amount Distribution")
        st.plotly_chart(fig_amount)

    # Visualization 2: Box Plot of Transaction Amount by Account Type
    if 'Account_Type' in data.columns and 'Transaction_Amount' in data.columns:
        st.subheader("Box Plot: Transaction Amount by Account Type")
        fig_box = px.box(data, x='Account_Type', y='Transaction_Amount', title="Transaction Amount by Account Type")
        st.plotly_chart(fig_box)

        # Check if 'Day_of_Week' column exists
        if 'Day_of_Week' in data.columns:
            # Create bar chart for transactions by day of the week
            fig_day_of_week = px.bar(data, x='Day_of_Week', title='Count of Transactions by Day of the Week')

            # Display the chart in the Streamlit app
            st.plotly_chart(fig_day_of_week)






    # Visualization 3: Correlation Heatmap (Plotly)
    st.subheader("Correlation Heatmap")
    numeric_cols = data.select_dtypes(include=['float64', 'int64'])
    if not numeric_cols.empty:
        corr_matrix = numeric_cols.corr()
        fig_heatmap = go.Figure(data=go.Heatmap(
            z=corr_matrix.values,
            x=corr_matrix.columns,
            y=corr_matrix.columns,
            colorscale='Viridis',
            hoverongaps=False,
        ))
        fig_heatmap.update_layout(title="Correlation Heatmap", xaxis_title="Features", yaxis_title="Features")
        st.plotly_chart(fig_heatmap)

    # Visualization 4: Scatter Plot (Age vs Average Transaction Amount)
    if 'Age' in data.columns and 'Average_Transaction_Amount' in data.columns:
        st.subheader("Scatter Plot: Age vs Average Transaction Amount")
        fig_scatter = px.scatter(data, x='Age',
                                        y='Average_Transaction_Amount',
                                        color='Account_Type',
                                        title='Average Transaction Amount vs. Age',
                                        trendline='ols')
        st.plotly_chart(fig_scatter)









        

    # Anomaly Detection with Isolation Forest
    st.subheader("Anomaly Detection")
    features = ['Transaction_Amount', 'Average_Transaction_Amount', 'Frequency_of_Transactions']

    # Ensure all required features are in the dataset
    if all(feature in data.columns for feature in features):
        X = data[features]

        # Train Isolation Forest
        st.write("Training Isolation Forest model...")
        model = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
        model.fit(X)

        # Add anomaly prediction column
        data['anomaly'] = model.predict(X)
        data['anomaly'] = data['anomaly'].apply(lambda x: 1 if x == -1 else 0)

        # Display Results
        st.write("Anomaly Detection Results:")
        st.write(data[['anomaly']].value_counts())

        # Visualization: Anomalies vs Normal Transactions
        st.subheader("Anomalies vs Normal Transactions")
        fig_anomalies = px.histogram(data, x='anomaly', title="Anomalies vs Normal Transactions",
                                     labels={'anomaly': 'Anomaly (1) vs Normal (0)'})
        st.plotly_chart(fig_anomalies)

        # User Input for Prediction
        st.subheader("Predict Anomaly for a New Transaction")
        user_inputs = {}
        for feature in features:
            user_input = st.number_input(f"Enter the value for '{feature}':", value=0.0)
            user_inputs[feature] = user_input

        # Create a DataFrame from user inputs
        user_df = pd.DataFrame([user_inputs])

        # Predict anomalies using the model
        user_anomaly_pred = model.predict(user_df)
        user_anomaly_pred_binary = 1 if user_anomaly_pred[0] == -1 else 0

        if user_anomaly_pred_binary == 1:
            st.error("Anomaly detected: This transaction is flagged as an anomaly.")
        else:
            st.success("No anomaly detected: This transaction is normal.")
    else:
        st.error("Required features for anomaly detection are missing in the dataset.")