Spaces:

ojas121
/

anomaly_detection

Sleeping

App Files Files Community

anomaly_detection / app.py

ojas121

Update app.py

65afb7b verified 4 months ago

raw

history blame contribute delete

5.05 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from sklearn.ensemble import IsolationForest
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import classification_report

	# Streamlit app
	st.title("Advanced Transaction Anomaly Detection")

	# File uploader
	uploaded_file = st.file_uploader("Upload your CSV file", type="csv")

	if uploaded_file:
	# Load the data
	data = pd.read_csv('transaction_anomalies_dataset (1).csv')
	st.subheader("Dataset Preview")
	st.write(data.head())

	# Data Overview
	st.subheader("Dataset Overview")
	st.write("Missing Values:")
	st.write(data.isnull().sum())
	st.write("Descriptive Statistics:")
	st.write(data.describe())

	# Visualization 1: Histogram of Transaction Amount
	if 'Transaction_Amount' in data.columns:
	st.subheader("Transaction Amount Distribution")
	fig_amount = px.histogram(data, x='Transaction_Amount', nbins=30, title="Transaction Amount Distribution")
	st.plotly_chart(fig_amount)

	# Visualization 2: Box Plot of Transaction Amount by Account Type
	if 'Account_Type' in data.columns and 'Transaction_Amount' in data.columns:
	st.subheader("Box Plot: Transaction Amount by Account Type")
	fig_box = px.box(data, x='Account_Type', y='Transaction_Amount', title="Transaction Amount by Account Type")
	st.plotly_chart(fig_box)

	# Check if 'Day_of_Week' column exists
	if 'Day_of_Week' in data.columns:
	# Create bar chart for transactions by day of the week
	fig_day_of_week = px.bar(data, x='Day_of_Week', title='Count of Transactions by Day of the Week')

	# Display the chart in the Streamlit app
	st.plotly_chart(fig_day_of_week)






	# Visualization 3: Correlation Heatmap (Plotly)
	st.subheader("Correlation Heatmap")
	numeric_cols = data.select_dtypes(include=['float64', 'int64'])
	if not numeric_cols.empty:
	corr_matrix = numeric_cols.corr()
	fig_heatmap = go.Figure(data=go.Heatmap(
	z=corr_matrix.values,
	x=corr_matrix.columns,
	y=corr_matrix.columns,
	colorscale='Viridis',
	hoverongaps=False,
	))
	fig_heatmap.update_layout(title="Correlation Heatmap", xaxis_title="Features", yaxis_title="Features")
	st.plotly_chart(fig_heatmap)

	# Visualization 4: Scatter Plot (Age vs Average Transaction Amount)
	if 'Age' in data.columns and 'Average_Transaction_Amount' in data.columns:
	st.subheader("Scatter Plot: Age vs Average Transaction Amount")
	fig_scatter = px.scatter(data, x='Age',
	y='Average_Transaction_Amount',
	color='Account_Type',
	title='Average Transaction Amount vs. Age',
	trendline='ols')
	st.plotly_chart(fig_scatter)











	# Anomaly Detection with Isolation Forest
	st.subheader("Anomaly Detection")
	features = ['Transaction_Amount', 'Average_Transaction_Amount', 'Frequency_of_Transactions']

	# Ensure all required features are in the dataset
	if all(feature in data.columns for feature in features):
	X = data[features]

	# Train Isolation Forest
	st.write("Training Isolation Forest model...")
	model = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
	model.fit(X)

	# Add anomaly prediction column
	data['anomaly'] = model.predict(X)
	data['anomaly'] = data['anomaly'].apply(lambda x: 1 if x == -1 else 0)

	# Display Results
	st.write("Anomaly Detection Results:")
	st.write(data[['anomaly']].value_counts())

	# Visualization: Anomalies vs Normal Transactions
	st.subheader("Anomalies vs Normal Transactions")
	fig_anomalies = px.histogram(data, x='anomaly', title="Anomalies vs Normal Transactions",
	labels={'anomaly': 'Anomaly (1) vs Normal (0)'})
	st.plotly_chart(fig_anomalies)

	# User Input for Prediction
	st.subheader("Predict Anomaly for a New Transaction")
	user_inputs = {}
	for feature in features:
	user_input = st.number_input(f"Enter the value for '{feature}':", value=0.0)
	user_inputs[feature] = user_input

	# Create a DataFrame from user inputs
	user_df = pd.DataFrame([user_inputs])

	# Predict anomalies using the model
	user_anomaly_pred = model.predict(user_df)
	user_anomaly_pred_binary = 1 if user_anomaly_pred[0] == -1 else 0

	if user_anomaly_pred_binary == 1:
	st.error("Anomaly detected: This transaction is flagged as an anomaly.")
	else:
	st.success("No anomaly detected: This transaction is normal.")
	else:
	st.error("Required features for anomaly detection are missing in the dataset.")