Spaces:

ojas121
/

anomaly_detection

Running

App Files Files Community

ojas121 commited on Nov 21, 2024

Commit

548a2f1

verified ·

1 Parent(s): dd46a87

Create app.py

Browse files

Files changed (1) hide show

app.py +124 -0

app.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import streamlit as st
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.ensemble import IsolationForest
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report
+# Streamlit app
+st.title("Advanced Transaction Anomaly Detection")
+# File uploader
+uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
+if uploaded_file:
+    # Load the data
+    data = pd.read_csv(uploaded_file)
+    st.subheader("Dataset Preview")
+    st.write(data.head())
+    # Data Overview
+    st.subheader("Dataset Overview")
+    st.write("Missing Values:")
+    st.write(data.isnull().sum())
+    st.write("Descriptive Statistics:")
+    st.write(data.describe())
+    # Visualization 1: Histogram of Transaction Amount
+    if 'Transaction_Amount' in data.columns:
+        st.subheader("Transaction Amount Distribution")
+        fig_amount = px.histogram(data, x='Transaction_Amount', nbins=30, title="Transaction Amount Distribution")
+        st.plotly_chart(fig_amount)
+    # Visualization 2: Box Plot of Transaction Amount by Account Type
+    if 'Account_Type' in data.columns and 'Transaction_Amount' in data.columns:
+        st.subheader("Box Plot: Transaction Amount by Account Type")
+        fig_box = px.box(data, x='Account_Type', y='Transaction_Amount', title="Transaction Amount by Account Type")
+        st.plotly_chart(fig_box)
+        # Check if 'Day_of_Week' column exists
+        if 'Day_of_Week' in data.columns:
+            # Create bar chart for transactions by day of the week
+            fig_day_of_week = px.bar(data, x='Day_of_Week', title='Count of Transactions by Day of the Week')
+            # Display the chart in the Streamlit app
+            st.plotly_chart(fig_day_of_week)
+    # Visualization 3: Correlation Heatmap (Plotly)
+    st.subheader("Correlation Heatmap")
+    numeric_cols = data.select_dtypes(include=['float64', 'int64'])
+    if not numeric_cols.empty:
+        corr_matrix = numeric_cols.corr()
+        fig_heatmap = go.Figure(data=go.Heatmap(
+            z=corr_matrix.values,
+            x=corr_matrix.columns,
+            y=corr_matrix.columns,
+            colorscale='Viridis',
+            hoverongaps=False,
+        ))
+        fig_heatmap.update_layout(title="Correlation Heatmap", xaxis_title="Features", yaxis_title="Features")
+        st.plotly_chart(fig_heatmap)
+    # Visualization 4: Scatter Plot (Age vs Average Transaction Amount)
+    if 'Age' in data.columns and 'Average_Transaction_Amount' in data.columns:
+        st.subheader("Scatter Plot: Age vs Average Transaction Amount")
+        fig_scatter = px.scatter(data, x='Age',
+                                        y='Average_Transaction_Amount',
+                                        color='Account_Type',
+                                        title='Average Transaction Amount vs. Age',
+                                        trendline='ols')
+        st.plotly_chart(fig_scatter)
+    # Anomaly Detection with Isolation Forest
+    st.subheader("Anomaly Detection")
+    features = ['Transaction_Amount', 'Average_Transaction_Amount', 'Frequency_of_Transactions']
+    # Ensure all required features are in the dataset
+    if all(feature in data.columns for feature in features):
+        X = data[features]
+        # Train Isolation Forest
+        st.write("Training Isolation Forest model...")
+        model = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
+        model.fit(X)
+        # Add anomaly prediction column
+        data['anomaly'] = model.predict(X)
+        data['anomaly'] = data['anomaly'].apply(lambda x: 1 if x == -1 else 0)
+        # Display Results
+        st.write("Anomaly Detection Results:")
+        st.write(data[['anomaly']].value_counts())
+        # Visualization: Anomalies vs Normal Transactions
+        st.subheader("Anomalies vs Normal Transactions")
+        fig_anomalies = px.histogram(data, x='anomaly', title="Anomalies vs Normal Transactions",
+                                     labels={'anomaly': 'Anomaly (1) vs Normal (0)'})
+        st.plotly_chart(fig_anomalies)
+        # User Input for Prediction
+        st.subheader("Predict Anomaly for a New Transaction")
+        user_inputs = {}
+        for feature in features:
+            user_input = st.number_input(f"Enter the value for '{feature}':", value=0.0)
+            user_inputs[feature] = user_input
+        # Create a DataFrame from user inputs
+        user_df = pd.DataFrame([user_inputs])
+        # Predict anomalies using the model
+        user_anomaly_pred = model.predict(user_df)
+        user_anomaly_pred_binary = 1 if user_anomaly_pred[0] == -1 else 0
+        if user_anomaly_pred_binary == 1:
+            st.error("Anomaly detected: This transaction is flagged as an anomaly.")
+        else:
+            st.success("No anomaly detected: This transaction is normal.")
+    else:
+        st.error("Required features for anomaly detection are missing in the dataset.")