import os import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib as mpl import pycaret import streamlit as st from streamlit_option_menu import option_menu import PIL from PIL import Image from PIL import ImageColor from PIL import ImageDraw from PIL import ImageFont hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True) with st.sidebar: image = Image.open('./itaca_logo.png') st.image(image,use_column_width=True) page = option_menu(menu_title='Menu', menu_icon="robot", options=["Clustering Analysis", "Anomaly Detection"], icons=["chat-dots", "key"], default_index=0 ) st.title('ITACA Insurance Core AI Module') if page == "Clustering Analysis": st.header('Clustering Analysis') st.write( """ """ ) # import pycaret unsupervised models from pycaret.clustering import * # import ClusteringExperiment from pycaret.clustering import ClusteringExperiment # Upload the CSV file uploaded_file = st.file_uploader("Choose a CSV file", type="csv") # Define the unsupervised model clusteringmodel = ['kmeans', 'ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics', 'birch'] selected_model = st.selectbox("Choose a clustering model", clusteringmodel) # Define the options for the dropdown list numclusters = [2, 3, 4, 5, 6] # selected_clusters = st.selectbox("Choose a number of clusters", numclusters) selected_clusters = st.slider("Choose a number of clusters", min_value=2, max_value=10, value=4) # Read and display the CSV file if uploaded_file is not None: try: delimiter = ',' insurance_claims = pd.read_csv (uploaded_file, sep=delimiter) except ValueError: delimiter = '|' insurance_claims = pd.read_csv (uploaded_file, sep=delimiter, encoding='latin-1') s = setup(insurance_claims, session_id = 123, log_experiment='mlflow', experiment_name='fraud_detection') exp_clustering = ClusteringExperiment() # init setup on exp exp_clustering.setup(insurance_claims, session_id = 123) if st.button("Prediction"): with st.spinner("Analyzing..."): # train kmeans model cluster_model = create_model(selected_model, num_clusters = selected_clusters) cluster_model_2 = assign_model(cluster_model) cluster_model_2 all_metrics = get_metrics() all_metrics cluster_results = pull() cluster_results # plot pca cluster plot plot_model(cluster_model, plot = 'cluster', display_format = 'streamlit') if selected_model != 'ap': plot_model(cluster_model, plot = 'tsne', display_format = 'streamlit') if selected_model not in ('ap', 'meanshift', 'dbscan', 'optics'): plot_model(cluster_model, plot = 'elbow', display_format = 'streamlit') if selected_model not in ('ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics'): plot_model(cluster_model, plot = 'silhouette', display_format = 'streamlit') if selected_model not in ('ap', 'sc', 'hclust', 'dbscan', 'optics', 'birch'): plot_model(cluster_model, plot = 'distance', display_format = 'streamlit') if selected_model != 'ap': plot_model(cluster_model, plot = 'distribution', display_format = 'streamlit') elif page == "Anomaly Detection": st.header('Anomaly Detection') st.write( """ """ ) # import pycaret anomaly from pycaret.anomaly import * # import AnomalyExperiment from pycaret.anomaly import AnomalyExperiment # Upload the CSV file uploaded_file = st.file_uploader("Choose a CSV file", type="csv") # Define the unsupervised model anomalymodel = ['abod', 'cluster', 'cof', 'iforest', 'histogram', 'knn', 'lof', 'svm', 'pca', 'mcd', 'sod', 'sos'] selected_model = st.selectbox("Choose an anomaly model", anomalymodel) # Read and display the CSV file if uploaded_file is not None: try: delimiter = ',' insurance_claims = pd.read_csv (uploaded_file, sep=delimiter) except ValueError: delimiter = '|' insurance_claims = pd.read_csv (uploaded_file, sep=delimiter, encoding='latin-1') s = setup(insurance_claims, session_id = 123) exp_anomaly = AnomalyExperiment() # init setup on exp exp_anomaly.setup(insurance_claims, session_id = 123) if st.button("Prediction"): with st.spinner("Analyzing..."): # train model anomaly_model = create_model(selected_model) anomaly_model_2 = assign_model(anomaly_model) anomaly_model_2 anomaly_results = pull() anomaly_results # plot plot_model(anomaly_model, plot = 'tsne', display_format = 'streamlit') plot_model(anomaly_model, plot = 'umap', display_format = 'streamlit')