bhimrazy commited on
Commit
31d1d47
1 Parent(s): 65847f8

Adds shell scripts to download and merge dr dataset

Browse files
scripts/download-dr-dataset.sh ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # path where dataset will be downloaded
4
+ DATASET_DIR="data/diabetic-retinopathy-dataset"
5
+ mkdir -p "$DATASET_DIR"
6
+
7
+ # Start time of the script
8
+ start_time=$(date +%s)
9
+
10
+ # Array containing the names of the files you want to download
11
+ # Note: The files are split into multiple parts, so you need to
12
+ # download all parts to get the complete file
13
+ # Reference: Check Data Explorer on Kaggle for the list of files
14
+ # https://www.kaggle.com/c/diabetic-retinopathy-detection/data
15
+ files=(
16
+ "test.zip.001"
17
+ "test.zip.002"
18
+ "test.zip.003"
19
+ "test.zip.004"
20
+ "test.zip.005"
21
+ "test.zip.006"
22
+ "test.zip.007"
23
+ "sampleSubmission.csv.zip"
24
+ "sample.zip"
25
+ "train.zip.001"
26
+ "train.zip.002"
27
+ "train.zip.003"
28
+ "train.zip.004"
29
+ "train.zip.005"
30
+ "trainLabels.csv.zip"
31
+ )
32
+
33
+ # Define a function to download a single file
34
+ download_file() {
35
+ kaggle competitions download -c diabetic-retinopathy-detection -f "$1" -p "$DATASET_DIR"
36
+
37
+ local zip_file="$DATASET_DIR/$1"
38
+
39
+ # If .zip extension not present in $1, append it
40
+ if [[ "$1" != *.zip ]]; then
41
+ zip_file="$zip_file.zip"
42
+ fi
43
+
44
+ # Check if zip file exists
45
+ if [ ! -f "$zip_file" ]; then
46
+ echo "Error: $zip_file does not exist."
47
+ return 1
48
+ fi
49
+
50
+ unzip -o "$zip_file" -d "$DATASET_DIR" # -o flag to overwrite existing files
51
+ rm -rf "$zip_file"
52
+ }
53
+
54
+ # Loop through the array of file names and download each file
55
+ for file in "${files[@]}"; do
56
+ download_file "$file" &
57
+ done
58
+
59
+ # Wait for all background processes to finish
60
+ wait
61
+
62
+ # End time of the script
63
+ end_time=$(date +%s)
64
+
65
+ # Calculate total time taken in minutes
66
+ total_time=$(( (end_time - start_time)/60 ))
67
+
68
+ # Print total time taken
69
+ echo "Total time taken: ${total_time} minutes"
scripts/merge-zip-parts.sh ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Set dataset directory
4
+ DATASET_DIR="diabetic-retinopathy-dataset"
5
+
6
+ # Function to log messages
7
+ log() {
8
+ echo "$(date +"%Y-%m-%d %H:%M:%S") $1"
9
+ }
10
+
11
+ # Function to merge zip parts
12
+ merge_zip() {
13
+ local zip_name="$1"
14
+ log "Merging $zip_name parts into a single zip file..."
15
+ cat "$DATASET_DIR/$zip_name".zip.* > "$DATASET_DIR/$zip_name.zip"
16
+ log "Merged $zip_name.zip created at $DATASET_DIR"
17
+
18
+ # remove partition files
19
+ rm "$DATASET_DIR/$zip_name".zip.*
20
+ log "Removing $zip_name parts"
21
+ }
22
+
23
+ # Merge train.zip parts;
24
+ merge_zip "train" &
25
+
26
+ # Merge test.zip parts
27
+ merge_zip "test" &
28
+
29
+ # End of script
30
+ log "Script execution completed."
31
+
32
+ # Wait for all background processes to finish
33
+ wait