import os import pandas as pd from scripts.PlipDataProcess import PlipDataProcess # Updated folder name from transformers import CLIPImageProcessor import argparse def main(csv_file, root_dir, save_dir): # Load the CSV file and set 'PatientID' as the index df4 = pd.read_csv(csv_file).set_index('PatientID') # List directories in the root directory (assuming each directory corresponds to a patient) files = [file for file in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, file))] # Initialize the image processor img_processor = CLIPImageProcessor.from_pretrained("./plip/") # Initialize the dataset processing object dataset = PlipDataProcess( root_dir=root_dir, files=files, df=df4, img_processor=img_processor, num_tiles_per_patient=2000, max_workers=64, save_dir=save_dir ) # Process each item in the dataset for i in range(len(dataset)): _ = dataset[i] # Trigger processing of the i-th item if __name__ == '__main__': parser = argparse.ArgumentParser(description="Process WSI images and generate tiles") # Define arguments parser.add_argument('--csv_file', type=str, required=True, help='Path to the CSV file with patient scores') parser.add_argument('--root_dir', type=str, required=True, help='Root directory for WSI tiles') parser.add_argument('--save_dir', type=str, required=True, help='Directory to save the processed tile data') # Parse arguments args = parser.parse_args() # Call the main function with the parsed arguments main(csv_file=args.csv_file, root_dir=args.root_dir, save_dir=args.save_dir)