import matplotlib.pyplot as plt import numpy as np import os import pandas as pd import xgboost as xgb from sklearn.kernel_ridge import KernelRidge from sklearn.linear_model import LinearRegression from sklearn.svm import SVR os.environ["OMP_MAX_ACTIVE_LEVELS"] = "1" import models.fm4m as fm4m # Function to create model based on user input def _create_model( model_name, max_depth=None, n_estimators=None, alpha=None, degree=None, kernel=None ): if model_name == "XGBClassifier": model = xgb.XGBClassifier( objective='binary:logistic', eval_metric='auc', max_depth=max_depth, n_estimators=n_estimators, alpha=alpha, ) elif model_name == "SVR": model = SVR(degree=degree, kernel=kernel) elif model_name == "Kernel Ridge": model = KernelRidge(alpha=alpha, degree=degree, kernel=kernel) elif model_name == "Linear Regression": model = LinearRegression() elif model_name == "Default - Auto": return "Default Settings" else: return "Model not supported." return f"{model_name} * {model.get_params()}" # Function to handle model creation based on input parameters def create_downstream_model(model_name, max_depth, n_estimators, alpha, degree, kernel): if model_name == "XGBClassifier": return _create_model( model_name, max_depth=max_depth, n_estimators=n_estimators, alpha=alpha, ) elif model_name == "SVR": return _create_model(model_name, degree=degree, kernel=kernel) elif model_name == "Kernel Ridge": return _create_model(model_name, alpha=alpha, degree=degree, kernel=kernel) elif model_name == "Linear Regression": return _create_model(model_name) elif model_name == "Default - Auto": return _create_model(model_name) # Function to display evaluation score def display_eval(selected_models, dataset, task_type, downstream, fusion_type, state): result = None try: downstream_model = downstream.split("*")[0].lstrip() downstream_model = downstream_model.rstrip() hyp_param = downstream.split("*")[-1].lstrip() hyp_param = hyp_param.rstrip() hyp_param = hyp_param.replace("nan", "float('nan')") params = eval(hyp_param) except: downstream_model = downstream.split("*")[0].lstrip() downstream_model = downstream_model.rstrip() params = None try: if not selected_models: return "Please select at least one enabled model." if len(selected_models) > 1: if task_type == "Classification": if downstream_model == "Default Settings": downstream_model = "DefaultClassifier" params = None ( result, state["roc_auc"], state["fpr"], state["tpr"], state["x_batch"], state["y_batch"], ) = fm4m.multi_modal( model_list=selected_models, downstream_model=downstream_model, params=params, dataset=dataset, ) elif task_type == "Regression": if downstream_model == "Default Settings": downstream_model = "DefaultRegressor" params = None ( result, state["RMSE"], state["y_batch_test"], state["y_prob"], state["x_batch"], state["y_batch"], ) = fm4m.multi_modal( model_list=selected_models, downstream_model=downstream_model, params=params, dataset=dataset, ) else: if task_type == "Classification": if downstream_model == "Default Settings": downstream_model = "DefaultClassifier" params = None ( result, state["roc_auc"], state["fpr"], state["tpr"], state["x_batch"], state["y_batch"], ) = fm4m.single_modal( model=selected_models[0], downstream_model=downstream_model, params=params, dataset=dataset, ) elif task_type == "Regression": if downstream_model == "Default Settings": downstream_model = "DefaultRegressor" params = None ( result, state["RMSE"], state["y_batch_test"], state["y_prob"], state["x_batch"], state["y_batch"], ) = fm4m.single_modal( model=selected_models[0], downstream_model=downstream_model, params=params, dataset=dataset, ) except Exception as e: return f"An error occurred: {e}" return result or "Data & Model Setting is incorrect" # Function to handle plot display def display_plot(plot_type, state): fig, ax = plt.subplots() if plot_type == "Latent Space": x_batch, y_batch = state.get("x_batch"), state.get("y_batch") ax.set_title("T-SNE Plot") class_0 = x_batch class_1 = y_batch plt.scatter(class_1[:, 0], class_1[:, 1], c='red', label='Class 1') plt.scatter(class_0[:, 0], class_0[:, 1], c='blue', label='Class 0') ax.set_xlabel('Feature 1') ax.set_ylabel('Feature 2') ax.set_title('Dataset Distribution') elif plot_type == "ROC-AUC": roc_auc, fpr, tpr = state.get("roc_auc"), state.get("fpr"), state.get("tpr") ax.set_title("ROC-AUC Curve") try: ax.plot( fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.4f})', ) ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') ax.set_xlim([0.0, 1.0]) ax.set_ylim([0.0, 1.05]) except: pass ax.set_xlabel('False Positive Rate') ax.set_ylabel('True Positive Rate') ax.set_title('Receiver Operating Characteristic') ax.legend(loc='lower right') elif plot_type == "Parity Plot": RMSE, y_batch_test, y_prob = ( state.get("RMSE"), state.get("y_batch_test"), state.get("y_prob"), ) ax.set_title("Parity plot") # change format try: print(y_batch_test) print(y_prob) y_batch_test = np.array(y_batch_test, dtype=float) y_prob = np.array(y_prob, dtype=float) ax.scatter( y_batch_test, y_prob, color="blue", label=f"Predicted vs Actual (RMSE: {RMSE:.4f})", ) min_val = min(min(y_batch_test), min(y_prob)) max_val = max(max(y_batch_test), max(y_prob)) ax.plot([min_val, max_val], [min_val, max_val], 'r-') except: y_batch_test = [] y_prob = [] RMSE = None print(y_batch_test) print(y_prob) ax.set_xlabel('Actual Values') ax.set_ylabel('Predicted Values') ax.legend(loc='lower right') return fig # Function to handle evaluation and logging def evaluate_and_log(models, dataset, task_type, eval_output, state): task_dic = {'Classification': 'CLS', 'Regression': 'RGR'} result = eval_output.replace(" Score", "") new_entry = { "Selected Models": str(models), "Dataset": dataset, "Task": task_dic[task_type], "Result": result, } new_entry_df = pd.DataFrame([new_entry]) state["log_df"] = pd.concat([new_entry_df, state["log_df"]]) return state["log_df"]