import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go from sklearn.ensemble import IsolationForest from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report # Streamlit app st.title("Advanced Transaction Anomaly Detection") # File uploader uploaded_file = st.file_uploader("Upload your CSV file", type="csv") if uploaded_file: # Load the data data = pd.read_csv('transaction_anomalies_dataset (1).csv') st.subheader("Dataset Preview") st.write(data.head()) # Data Overview st.subheader("Dataset Overview") st.write("Missing Values:") st.write(data.isnull().sum()) st.write("Descriptive Statistics:") st.write(data.describe()) # Visualization 1: Histogram of Transaction Amount if 'Transaction_Amount' in data.columns: st.subheader("Transaction Amount Distribution") fig_amount = px.histogram(data, x='Transaction_Amount', nbins=30, title="Transaction Amount Distribution") st.plotly_chart(fig_amount) # Visualization 2: Box Plot of Transaction Amount by Account Type if 'Account_Type' in data.columns and 'Transaction_Amount' in data.columns: st.subheader("Box Plot: Transaction Amount by Account Type") fig_box = px.box(data, x='Account_Type', y='Transaction_Amount', title="Transaction Amount by Account Type") st.plotly_chart(fig_box) # Check if 'Day_of_Week' column exists if 'Day_of_Week' in data.columns: # Create bar chart for transactions by day of the week fig_day_of_week = px.bar(data, x='Day_of_Week', title='Count of Transactions by Day of the Week') # Display the chart in the Streamlit app st.plotly_chart(fig_day_of_week) # Visualization 3: Correlation Heatmap (Plotly) st.subheader("Correlation Heatmap") numeric_cols = data.select_dtypes(include=['float64', 'int64']) if not numeric_cols.empty: corr_matrix = numeric_cols.corr() fig_heatmap = go.Figure(data=go.Heatmap( z=corr_matrix.values, x=corr_matrix.columns, y=corr_matrix.columns, colorscale='Viridis', hoverongaps=False, )) fig_heatmap.update_layout(title="Correlation Heatmap", xaxis_title="Features", yaxis_title="Features") st.plotly_chart(fig_heatmap) # Visualization 4: Scatter Plot (Age vs Average Transaction Amount) if 'Age' in data.columns and 'Average_Transaction_Amount' in data.columns: st.subheader("Scatter Plot: Age vs Average Transaction Amount") fig_scatter = px.scatter(data, x='Age', y='Average_Transaction_Amount', color='Account_Type', title='Average Transaction Amount vs. Age', trendline='ols') st.plotly_chart(fig_scatter) # Anomaly Detection with Isolation Forest st.subheader("Anomaly Detection") features = ['Transaction_Amount', 'Average_Transaction_Amount', 'Frequency_of_Transactions'] # Ensure all required features are in the dataset if all(feature in data.columns for feature in features): X = data[features] # Train Isolation Forest st.write("Training Isolation Forest model...") model = IsolationForest(n_estimators=100, contamination=0.1, random_state=42) model.fit(X) # Add anomaly prediction column data['anomaly'] = model.predict(X) data['anomaly'] = data['anomaly'].apply(lambda x: 1 if x == -1 else 0) # Display Results st.write("Anomaly Detection Results:") st.write(data[['anomaly']].value_counts()) # Visualization: Anomalies vs Normal Transactions st.subheader("Anomalies vs Normal Transactions") fig_anomalies = px.histogram(data, x='anomaly', title="Anomalies vs Normal Transactions", labels={'anomaly': 'Anomaly (1) vs Normal (0)'}) st.plotly_chart(fig_anomalies) # User Input for Prediction st.subheader("Predict Anomaly for a New Transaction") user_inputs = {} for feature in features: user_input = st.number_input(f"Enter the value for '{feature}':", value=0.0) user_inputs[feature] = user_input # Create a DataFrame from user inputs user_df = pd.DataFrame([user_inputs]) # Predict anomalies using the model user_anomaly_pred = model.predict(user_df) user_anomaly_pred_binary = 1 if user_anomaly_pred[0] == -1 else 0 if user_anomaly_pred_binary == 1: st.error("Anomaly detected: This transaction is flagged as an anomaly.") else: st.success("No anomaly detected: This transaction is normal.") else: st.error("Required features for anomaly detection are missing in the dataset.")