VatsalPatel18
/

omics-plip-1

Model card Files Files and versions Community

omics-plip-1 / evaluate_on_test_cohort2.py

VatsalPatel18

Upload 19 files

70884da verified 6 months ago

raw

history blame contribute delete

4.91 kB

	import os
	import numpy as np
	import pandas as pd
	import torch
	import json
	import tensorflow as tf
	from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score
	import argparse

	# Function to load and preprocess the dataset
	def load_and_preprocess_data(metadata_file, data_dir):
	dff = pd.read_csv(metadata_file, skiprows=0)
	if 'Unnamed: 0' in dff.columns:
	del dff['Unnamed: 0']

	# Filter and map classes to 0 and 1
	classified_df = dff[dff['Class'].isin([1, 3])]
	classified_df['Class'] = classified_df['Class'].map({1: 1, 3: 0})
	df = classified_df.set_index('PatientID')

	# Filter for patients that have corresponding WSI data
	available_patients = set(os.listdir(data_dir))
	df = df.loc[df.index.intersection(available_patients)]
	df = df.sample(frac=1)

	return df

	# Function to create bags of tiles
	def create_bags(df, data_dir):
	data = {'test2': {'X': [], 'Y': []}}
	for pID, row in df.iterrows():
	fol_p = os.path.join(data_dir, pID)
	tiles = os.listdir(fol_p)
	tile_data = []
	for tile in tiles:
	tile_p = os.path.join(fol_p, tile)
	np1 = torch.load(tile_p).numpy()
	tile_data.append(np1)
	bag = np.squeeze(tile_data, axis=1)
	bag_label = row['Class']
	data['test2']['X'].append(bag)
	data['test2']['Y'].append(np.array([bag_label]))
	data['test2']['X'] = np.array(data['test2']['X'])
	data['test2']['Y'] = np.array(data['test2']['Y'])
	print(f"Data[test2]['X'] shape: {data['test2']['X'].shape}, dtype: {data['test2']['X'].dtype}")
	return data

	# Function to pad the data to ensure uniform bag length
	def prepare_data_with_padding(data, max_length=2000):
	padded_data = []
	for bag in data:
	if len(bag) < max_length:
	padding = np.zeros((max_length - len(bag), bag.shape[1]))
	padded_bag = np.vstack((bag, padding))
	else:
	padded_bag = bag
	padded_data.append(padded_bag)
	return np.array(padded_data)

	# Function to compute additional metrics using sklearn
	def compute_additional_metrics(X, Y, model):
	predictions = model.predict(X).flatten()
	predictions_binary = (predictions > 0.5).astype(int) # Convert probabilities to class labels (0 or 1)
	auc = roc_auc_score(Y, predictions)
	precision = precision_score(Y, predictions_binary)
	recall = recall_score(Y, predictions_binary)
	f1 = f1_score(Y, predictions_binary)
	return auc, precision, recall, f1, predictions

	# Function to evaluate the model on a given dataset using sklearn metrics
	def evaluate_dataset(model, X, Y, dataset_name, save_dir):
	# Evaluate using TensorFlow's model.evaluate() for loss and accuracy
	eval_metrics = model.evaluate(X, Y, verbose=0)

	# Compute additional metrics using sklearn
	auc, precision, recall, f1, predictions = compute_additional_metrics(X, Y, model)
	metrics = {
	'loss': eval_metrics[0],
	'accuracy': eval_metrics[1],
	'auc': auc,
	'precision': precision,
	'recall': recall,
	'f1_score': f1
	}

	# Save the predictions for each sample
	np.savez_compressed(os.path.join(save_dir, f'{dataset_name}_predictions.npz'), predictions=predictions, labels=Y)

	return metrics

	if __name__ == "__main__":
	# Command line arguments
	parser = argparse.ArgumentParser(description='Evaluate a trained model on a secondary test dataset (test2).')
	parser.add_argument('--metadata_file', type=str, required=True, help='Path to the metadata CSV file for test2.')
	parser.add_argument('--data_dir', type=str, required=True, help='Directory containing the extracted tissue features.')
	parser.add_argument('--model_path', type=str, required=True, help='Path to the saved model file.')
	parser.add_argument('--save_dir', type=str, default='./evaluation_results_test2/', help='Directory to save evaluation results.')

	args = parser.parse_args()

	if not os.path.exists(args.save_dir):
	os.makedirs(args.save_dir)

	# Load and preprocess the test2 data
	df_test2 = load_and_preprocess_data(args.metadata_file, args.data_dir)
	data_test2 = create_bags(df_test2, args.data_dir)

	# Prepare the test2 data with padding
	test2_X = prepare_data_with_padding(data_test2['test2']['X'], max_length=2000)
	test2_Y = np.array(data_test2['test2']['Y']).flatten()

	# Load the saved model
	model = tf.keras.models.load_model(args.model_path)

	# Evaluate the model on the test2 dataset
	test2_metrics = evaluate_dataset(model, test2_X, test2_Y, "test2", args.save_dir)

	# Save the metrics to a JSON file
	with open(os.path.join(args.save_dir, 'evaluation_metrics_test2.json'), 'w') as f:
	json.dump(test2_metrics, f, indent=4)

	print("Evaluation metrics saved to evaluation_metrics_test2.json")