ojas121 commited on
Commit
548a2f1
·
verified ·
1 Parent(s): dd46a87

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -0
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ from sklearn.ensemble import IsolationForest
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.metrics import classification_report
8
+
9
+ # Streamlit app
10
+ st.title("Advanced Transaction Anomaly Detection")
11
+
12
+ # File uploader
13
+ uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
14
+
15
+ if uploaded_file:
16
+ # Load the data
17
+ data = pd.read_csv(uploaded_file)
18
+ st.subheader("Dataset Preview")
19
+ st.write(data.head())
20
+
21
+ # Data Overview
22
+ st.subheader("Dataset Overview")
23
+ st.write("Missing Values:")
24
+ st.write(data.isnull().sum())
25
+ st.write("Descriptive Statistics:")
26
+ st.write(data.describe())
27
+
28
+ # Visualization 1: Histogram of Transaction Amount
29
+ if 'Transaction_Amount' in data.columns:
30
+ st.subheader("Transaction Amount Distribution")
31
+ fig_amount = px.histogram(data, x='Transaction_Amount', nbins=30, title="Transaction Amount Distribution")
32
+ st.plotly_chart(fig_amount)
33
+
34
+ # Visualization 2: Box Plot of Transaction Amount by Account Type
35
+ if 'Account_Type' in data.columns and 'Transaction_Amount' in data.columns:
36
+ st.subheader("Box Plot: Transaction Amount by Account Type")
37
+ fig_box = px.box(data, x='Account_Type', y='Transaction_Amount', title="Transaction Amount by Account Type")
38
+ st.plotly_chart(fig_box)
39
+
40
+ # Check if 'Day_of_Week' column exists
41
+ if 'Day_of_Week' in data.columns:
42
+ # Create bar chart for transactions by day of the week
43
+ fig_day_of_week = px.bar(data, x='Day_of_Week', title='Count of Transactions by Day of the Week')
44
+
45
+ # Display the chart in the Streamlit app
46
+ st.plotly_chart(fig_day_of_week)
47
+
48
+
49
+
50
+
51
+
52
+
53
+ # Visualization 3: Correlation Heatmap (Plotly)
54
+ st.subheader("Correlation Heatmap")
55
+ numeric_cols = data.select_dtypes(include=['float64', 'int64'])
56
+ if not numeric_cols.empty:
57
+ corr_matrix = numeric_cols.corr()
58
+ fig_heatmap = go.Figure(data=go.Heatmap(
59
+ z=corr_matrix.values,
60
+ x=corr_matrix.columns,
61
+ y=corr_matrix.columns,
62
+ colorscale='Viridis',
63
+ hoverongaps=False,
64
+ ))
65
+ fig_heatmap.update_layout(title="Correlation Heatmap", xaxis_title="Features", yaxis_title="Features")
66
+ st.plotly_chart(fig_heatmap)
67
+
68
+ # Visualization 4: Scatter Plot (Age vs Average Transaction Amount)
69
+ if 'Age' in data.columns and 'Average_Transaction_Amount' in data.columns:
70
+ st.subheader("Scatter Plot: Age vs Average Transaction Amount")
71
+ fig_scatter = px.scatter(data, x='Age',
72
+ y='Average_Transaction_Amount',
73
+ color='Account_Type',
74
+ title='Average Transaction Amount vs. Age',
75
+ trendline='ols')
76
+ st.plotly_chart(fig_scatter)
77
+
78
+ # Anomaly Detection with Isolation Forest
79
+ st.subheader("Anomaly Detection")
80
+ features = ['Transaction_Amount', 'Average_Transaction_Amount', 'Frequency_of_Transactions']
81
+
82
+ # Ensure all required features are in the dataset
83
+ if all(feature in data.columns for feature in features):
84
+ X = data[features]
85
+
86
+ # Train Isolation Forest
87
+ st.write("Training Isolation Forest model...")
88
+ model = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
89
+ model.fit(X)
90
+
91
+ # Add anomaly prediction column
92
+ data['anomaly'] = model.predict(X)
93
+ data['anomaly'] = data['anomaly'].apply(lambda x: 1 if x == -1 else 0)
94
+
95
+ # Display Results
96
+ st.write("Anomaly Detection Results:")
97
+ st.write(data[['anomaly']].value_counts())
98
+
99
+ # Visualization: Anomalies vs Normal Transactions
100
+ st.subheader("Anomalies vs Normal Transactions")
101
+ fig_anomalies = px.histogram(data, x='anomaly', title="Anomalies vs Normal Transactions",
102
+ labels={'anomaly': 'Anomaly (1) vs Normal (0)'})
103
+ st.plotly_chart(fig_anomalies)
104
+
105
+ # User Input for Prediction
106
+ st.subheader("Predict Anomaly for a New Transaction")
107
+ user_inputs = {}
108
+ for feature in features:
109
+ user_input = st.number_input(f"Enter the value for '{feature}':", value=0.0)
110
+ user_inputs[feature] = user_input
111
+
112
+ # Create a DataFrame from user inputs
113
+ user_df = pd.DataFrame([user_inputs])
114
+
115
+ # Predict anomalies using the model
116
+ user_anomaly_pred = model.predict(user_df)
117
+ user_anomaly_pred_binary = 1 if user_anomaly_pred[0] == -1 else 0
118
+
119
+ if user_anomaly_pred_binary == 1:
120
+ st.error("Anomaly detected: This transaction is flagged as an anomaly.")
121
+ else:
122
+ st.success("No anomaly detected: This transaction is normal.")
123
+ else:
124
+ st.error("Required features for anomaly detection are missing in the dataset.")