English
music
emotion
kjysmu commited on
Commit
47851eb
·
verified ·
1 Parent(s): c092cd5

Upload 11 files

Browse files
dataset/.DS_Store ADDED
Binary file (6.15 kB). View file
 
dataset/deam/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ DEAM dataset
dataset/emomusic/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ EmoMusic dataset
dataset/jamendo/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ jamendo dataset
dataset/pmemo/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ PMEmo dataset
dataset_loaders/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ "Import all submodules"
2
+
3
+ # from model import
dataset_loaders/deam.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import pickle
4
+ from torch.utils import data
5
+ import torchaudio.transforms as T
6
+ import torchaudio
7
+ import torch
8
+ import csv
9
+ import pytorch_lightning as pl
10
+ from music2latent import EncoderDecoder
11
+ import json
12
+ import math
13
+ from sklearn.preprocessing import StandardScaler
14
+ import pandas as pd
15
+
16
+ class DEAMDataset(data.Dataset):
17
+ def __init__(self, **task_args):
18
+ self.task_args = task_args
19
+ self.tr_val = task_args.get('tr_val', "train")
20
+ self.root = task_args.get('root', "./dataset/deam")
21
+ self.segment_type = task_args.get('segment_type', "all")
22
+ self.cfg = task_args.get('cfg')
23
+
24
+ # Path to the split file (train/val/test)
25
+ self.split_file = os.path.join(self.root, 'meta', 'split', f"{self.tr_val}.txt")
26
+
27
+ # Read file IDs from the split file
28
+ with open(self.split_file, 'r') as f:
29
+ self.file_ids = [line.strip() for line in f.readlines()]
30
+
31
+ # MERT and MP3 directories
32
+ self.mert_dir = os.path.join(self.root, 'mert_30s')
33
+ self.mp3_dir = os.path.join(self.root, 'mp3')
34
+
35
+ # Separate tonic and mode
36
+ tonic_signatures = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"]
37
+ mode_signatures = ["major", "minor"] # Major and minor modes
38
+
39
+ self.tonic_to_idx = {tonic: idx for idx, tonic in enumerate(tonic_signatures)}
40
+ self.mode_to_idx = {mode: idx for idx, mode in enumerate(mode_signatures)}
41
+
42
+ self.idx_to_tonic = {idx: tonic for tonic, idx in self.tonic_to_idx.items()}
43
+ self.idx_to_mode = {idx: mode for mode, idx in self.mode_to_idx.items()}
44
+
45
+
46
+ # Load static annotations (valence and arousal)
47
+ self.annotation_file = os.path.join(self.root, 'meta', 'static_annotations.csv')
48
+ self.annotations = pd.read_csv(self.annotation_file, index_col='song_id')
49
+
50
+ # Load static annotations (valence and arousal)
51
+ self.annotation_tag_file = os.path.join(self.root, 'meta', 'mood_probabilities.csv')
52
+ self.annotations_tag = pd.read_csv(self.annotation_tag_file, index_col='song_id')
53
+
54
+
55
+ with open('dataset/deam/meta/chord.json', 'r') as f:
56
+ self.chord_to_idx = json.load(f)
57
+ with open('dataset/deam/meta/chord_inv.json', 'r') as f:
58
+ self.idx_to_chord = json.load(f)
59
+ self.idx_to_chord = {int(k): v for k, v in self.idx_to_chord.items()} # Ensure keys are ints
60
+
61
+ with open('dataset/emomusic/meta/chord_root.json') as json_file:
62
+ self.chordRootDic = json.load(json_file)
63
+ with open('dataset/emomusic/meta/chord_attr.json') as json_file:
64
+ self.chordAttrDic = json.load(json_file)
65
+
66
+
67
+
68
+ def __len__(self):
69
+ return len(self.file_ids)
70
+
71
+ def __getitem__(self, index):
72
+ file_id = int(self.file_ids[index]) # File ID from split
73
+
74
+ # Get valence and arousal from annotations
75
+ if file_id not in self.annotations.index:
76
+ raise ValueError(f"File ID {file_id} not found in annotations.")
77
+
78
+ valence = self.annotations.loc[file_id, 'valence_mean']
79
+ arousal = self.annotations.loc[file_id, 'arousal_mean']
80
+
81
+ y_valence = torch.tensor(valence, dtype=torch.float32)
82
+ y_arousal = torch.tensor(arousal, dtype=torch.float32)
83
+
84
+ y_mood = np.array(self.annotations_tag.loc[file_id])
85
+ y_mood = y_mood.astype('float32')
86
+ y_mood = torch.from_numpy(y_mood)
87
+
88
+
89
+ # --- Chord feature ---
90
+ fn_chord = os.path.join(self.root, 'chord', 'lab3', str(file_id) + ".lab")
91
+
92
+ chords = []
93
+
94
+ if not os.path.exists(fn_chord):
95
+ chords.append((float(0), float(0), "N"))
96
+ else:
97
+ with open(fn_chord, 'r') as file:
98
+ for line in file:
99
+ start, end, chord = line.strip().split()
100
+ chords.append((float(start), float(end), chord))
101
+
102
+ encoded = []
103
+ encoded_root= []
104
+ encoded_attr=[]
105
+ durations = []
106
+ for start, end, chord in chords:
107
+ chord_arr = chord.split(":")
108
+ if len(chord_arr) == 1:
109
+ chordRootID = self.chordRootDic[chord_arr[0]]
110
+ if chord_arr[0] == "N" or chord_arr[0] == "X":
111
+ chordAttrID = 0
112
+ else:
113
+ chordAttrID = 1
114
+ elif len(chord_arr) == 2:
115
+ chordRootID = self.chordRootDic[chord_arr[0]]
116
+ chordAttrID = self.chordAttrDic[chord_arr[1]]
117
+ encoded_root.append(chordRootID)
118
+ encoded_attr.append(chordAttrID)
119
+
120
+ if chord in self.chord_to_idx:
121
+ encoded.append(self.chord_to_idx[chord])
122
+ else:
123
+ print(f"Warning: Chord {chord} not found in chord.json. Skipping.")
124
+
125
+ durations.append(end - start) # Compute duration
126
+
127
+ encoded_chords = np.array(encoded)
128
+ encoded_chords_root = np.array(encoded_root)
129
+ encoded_chords_attr = np.array(encoded_attr)
130
+
131
+ # Maximum sequence length for chords
132
+ max_sequence_length = 100 # Define this globally or as a parameter
133
+
134
+ # Truncate or pad chord sequences
135
+ if len(encoded_chords) > max_sequence_length:
136
+ # Truncate to max length
137
+ encoded_chords = encoded_chords[:max_sequence_length]
138
+ encoded_chords_root = encoded_chords_root[:max_sequence_length]
139
+ encoded_chords_attr = encoded_chords_attr[:max_sequence_length]
140
+
141
+ else:
142
+ # Pad with zeros (padding value for chords)
143
+ padding = [0] * (max_sequence_length - len(encoded_chords))
144
+ encoded_chords = np.concatenate([encoded_chords, padding])
145
+ encoded_chords_root = np.concatenate([encoded_chords_root, padding])
146
+ encoded_chords_attr = np.concatenate([encoded_chords_attr, padding])
147
+
148
+ # Convert to tensor
149
+ chords_tensor = torch.tensor(encoded_chords, dtype=torch.long) # Fixed length tensor
150
+ chords_root_tensor = torch.tensor(encoded_chords_root, dtype=torch.long) # Fixed length tensor
151
+ chords_attr_tensor = torch.tensor(encoded_chords_attr, dtype=torch.long) # Fixed length tensor
152
+
153
+
154
+ # --- Key feature (Tonic and Mode separation) ---
155
+ fn_key = os.path.join(self.root, 'key', str(file_id) + ".lab")
156
+
157
+ if not os.path.exists(fn_key):
158
+ mode = "major"
159
+ else:
160
+ mode = "major" # Default value
161
+ with open(fn_key, 'r') as file:
162
+ for line in file:
163
+ key = line.strip()
164
+ if key == "None":
165
+ mode = "major"
166
+ else:
167
+ mode = key.split()[-1]
168
+
169
+ encoded_mode = self.mode_to_idx.get(mode, 0)
170
+ mode_tensor = torch.tensor([encoded_mode], dtype=torch.long)
171
+
172
+
173
+ # --- MERT feature ---
174
+ fn_mert = os.path.join(self.mert_dir, str(file_id))
175
+
176
+ embeddings = []
177
+
178
+ # Specify the layers to extract (3rd, 6th, 9th, and 12th layers)
179
+ layers_to_extract = self.cfg.model.layers
180
+
181
+ # Collect all segment embeddings
182
+ segment_embeddings = []
183
+ for filename in sorted(os.listdir(fn_mert)): # Sort files to ensure sequential order
184
+ file_path = os.path.join(fn_mert, filename)
185
+ if os.path.isfile(file_path) and filename.endswith('.npy'):
186
+ segment = np.load(file_path)
187
+
188
+ # Extract and concatenate features for the specified layers
189
+ concatenated_features = np.concatenate(
190
+ [segment[:, layer_idx, :] for layer_idx in layers_to_extract], axis=1
191
+ )
192
+ concatenated_features = np.squeeze(concatenated_features) # Shape: 768 * 2 = 1536
193
+ segment_embeddings.append(concatenated_features)
194
+
195
+ # Convert to numpy array
196
+ segment_embeddings = np.array(segment_embeddings)
197
+
198
+ # Check mode: 'train' or 'val'
199
+ if self.tr_val == "train" and len(segment_embeddings) > 0: # Augmentation for training
200
+ num_segments = len(segment_embeddings)
201
+
202
+ # Randomly choose a starting index and the length of the sequence
203
+ start_idx = np.random.randint(0, num_segments) # Random starting index
204
+ end_idx = np.random.randint(start_idx + 1, num_segments + 1) # Ensure end index is after start index
205
+
206
+ # Extract the sequential subset
207
+ chosen_segments = segment_embeddings[start_idx:end_idx]
208
+
209
+ # Compute the mean of the chosen sequential segments
210
+ final_embedding_mert = np.mean(chosen_segments, axis=0)
211
+ else: # Validation or other modes: Use mean of all segments
212
+ if len(segment_embeddings) > 0:
213
+ final_embedding_mert = np.mean(segment_embeddings, axis=0)
214
+ else:
215
+ # Handle case with no valid embeddings
216
+ final_embedding_mert = np.zeros((1536,)) # Example: Return zero vector of appropriate size
217
+
218
+ # Convert to PyTorch tensor
219
+ final_embedding_mert = torch.from_numpy(final_embedding_mert)
220
+
221
+ # Get the MP3 path
222
+ mp3_path = os.path.join(self.mp3_dir, f"{file_id}.mp3")
223
+ if not os.path.exists(mp3_path):
224
+ raise FileNotFoundError(f"MP3 file not found for {mp3_path}")
225
+
226
+ return {
227
+ "x_mert": final_embedding_mert,
228
+ "x_chord" : chords_tensor,
229
+ "x_chord_root" : chords_root_tensor,
230
+ "x_chord_attr" : chords_attr_tensor,
231
+ "x_key" : mode_tensor,
232
+ "y_va": torch.stack([y_valence, y_arousal], dim=0),
233
+ "y_mood" : y_mood,
234
+ "path": mp3_path
235
+ }
236
+
237
+
dataset_loaders/emomusic.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import pickle
4
+ from torch.utils import data
5
+ import torchaudio.transforms as T
6
+ import torchaudio
7
+ import torch
8
+ import csv
9
+ import pytorch_lightning as pl
10
+ from music2latent import EncoderDecoder
11
+ import json
12
+ import math
13
+ from sklearn.preprocessing import StandardScaler
14
+ import pandas as pd
15
+
16
+ class EmoMusicDataset(data.Dataset):
17
+ def __init__(self, **task_args):
18
+ self.task_args = task_args
19
+ self.tr_val = task_args.get('tr_val', "train")
20
+ self.root = task_args.get('root', "./dataset/emomusic")
21
+ self.segment_type = task_args.get('segment_type', "all")
22
+ self.cfg = task_args.get('cfg')
23
+
24
+ # Path to the split file (train/val/test)
25
+ self.split_file = os.path.join(self.root, 'meta', 'split', f"{self.tr_val}.txt")
26
+
27
+ # Read file IDs from the split file
28
+ with open(self.split_file, 'r') as f:
29
+ self.file_ids = [line.strip() for line in f.readlines()]
30
+
31
+ # Separate tonic and mode
32
+ tonic_signatures = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"]
33
+ mode_signatures = ["major", "minor"] # Major and minor modes
34
+
35
+ self.tonic_to_idx = {tonic: idx for idx, tonic in enumerate(tonic_signatures)}
36
+ self.mode_to_idx = {mode: idx for idx, mode in enumerate(mode_signatures)}
37
+
38
+ self.idx_to_tonic = {idx: tonic for tonic, idx in self.tonic_to_idx.items()}
39
+ self.idx_to_mode = {idx: mode for mode, idx in self.mode_to_idx.items()}
40
+
41
+
42
+ with open('dataset/emomusic/meta/chord.json', 'r') as f:
43
+ self.chord_to_idx = json.load(f)
44
+ with open('dataset/emomusic/meta/chord_inv.json', 'r') as f:
45
+ self.idx_to_chord = json.load(f)
46
+ self.idx_to_chord = {int(k): v for k, v in self.idx_to_chord.items()} # Ensure keys are ints
47
+
48
+ with open('dataset/emomusic/meta/chord_root.json') as json_file:
49
+ self.chordRootDic = json.load(json_file)
50
+ with open('dataset/emomusic/meta/chord_attr.json') as json_file:
51
+ self.chordAttrDic = json.load(json_file)
52
+
53
+
54
+ # MERT and MP3 directories
55
+ self.mert_dir = os.path.join(self.root, 'mert_30s')
56
+ self.mp3_dir = os.path.join(self.root, 'mp3')
57
+
58
+ # Load static annotations (valence and arousal)
59
+ self.annotation_file = os.path.join(self.root, 'meta', 'static_annotations.csv')
60
+ self.annotations = pd.read_csv(self.annotation_file, index_col='song_id')
61
+
62
+ # Load static annotations (valence and arousal)
63
+ self.annotation_tag_file = os.path.join(self.root, 'meta', 'mood_probabilities.csv')
64
+ self.annotations_tag = pd.read_csv(self.annotation_tag_file, index_col='song_id')
65
+
66
+
67
+ def __len__(self):
68
+ return len(self.file_ids)
69
+
70
+ def __getitem__(self, index):
71
+ file_id = int(self.file_ids[index]) # File ID from split
72
+
73
+ # Get valence and arousal from annotations
74
+ if file_id not in self.annotations.index:
75
+ raise ValueError(f"File ID {file_id} not found in annotations.")
76
+
77
+ valence = self.annotations.loc[file_id, 'valence_mean']
78
+ arousal = self.annotations.loc[file_id, 'arousal_mean']
79
+
80
+ y_valence = torch.tensor(valence, dtype=torch.float32)
81
+ y_arousal = torch.tensor(arousal, dtype=torch.float32)
82
+
83
+ y_mood = np.array(self.annotations_tag.loc[file_id])
84
+ y_mood = y_mood.astype('float32')
85
+ y_mood = torch.from_numpy(y_mood)
86
+
87
+
88
+ # --- Chord feature ---
89
+ fn_chord = os.path.join(self.root, 'chord', 'lab3', str(file_id) + ".lab")
90
+
91
+ chords = []
92
+
93
+ if not os.path.exists(fn_chord):
94
+ chords.append((float(0), float(0), "N"))
95
+ else:
96
+ with open(fn_chord, 'r') as file:
97
+ for line in file:
98
+ start, end, chord = line.strip().split()
99
+ chords.append((float(start), float(end), chord))
100
+
101
+ encoded = []
102
+ encoded_root= []
103
+ encoded_attr=[]
104
+ durations = []
105
+ for start, end, chord in chords:
106
+ chord_arr = chord.split(":")
107
+ if len(chord_arr) == 1:
108
+ chordRootID = self.chordRootDic[chord_arr[0]]
109
+ if chord_arr[0] == "N" or chord_arr[0] == "X":
110
+ chordAttrID = 0
111
+ else:
112
+ chordAttrID = 1
113
+ elif len(chord_arr) == 2:
114
+ chordRootID = self.chordRootDic[chord_arr[0]]
115
+ chordAttrID = self.chordAttrDic[chord_arr[1]]
116
+ encoded_root.append(chordRootID)
117
+ encoded_attr.append(chordAttrID)
118
+
119
+ if chord in self.chord_to_idx:
120
+ encoded.append(self.chord_to_idx[chord])
121
+ else:
122
+ print(f"Warning: Chord {chord} not found in chord.json. Skipping.")
123
+
124
+ durations.append(end - start) # Compute duration
125
+
126
+ encoded_chords = np.array(encoded)
127
+ encoded_chords_root = np.array(encoded_root)
128
+ encoded_chords_attr = np.array(encoded_attr)
129
+
130
+ # Maximum sequence length for chords
131
+ max_sequence_length = 100 # Define this globally or as a parameter
132
+
133
+ # Truncate or pad chord sequences
134
+ if len(encoded_chords) > max_sequence_length:
135
+ # Truncate to max length
136
+ encoded_chords = encoded_chords[:max_sequence_length]
137
+ encoded_chords_root = encoded_chords_root[:max_sequence_length]
138
+ encoded_chords_attr = encoded_chords_attr[:max_sequence_length]
139
+
140
+ else:
141
+ # Pad with zeros (padding value for chords)
142
+ padding = [0] * (max_sequence_length - len(encoded_chords))
143
+ encoded_chords = np.concatenate([encoded_chords, padding])
144
+ encoded_chords_root = np.concatenate([encoded_chords_root, padding])
145
+ encoded_chords_attr = np.concatenate([encoded_chords_attr, padding])
146
+
147
+ # Convert to tensor
148
+ chords_tensor = torch.tensor(encoded_chords, dtype=torch.long) # Fixed length tensor
149
+ chords_root_tensor = torch.tensor(encoded_chords_root, dtype=torch.long) # Fixed length tensor
150
+ chords_attr_tensor = torch.tensor(encoded_chords_attr, dtype=torch.long) # Fixed length tensor
151
+
152
+ # --- Key feature (Tonic and Mode separation) ---
153
+ fn_key = os.path.join(self.root, 'key', str(file_id) + ".lab")
154
+
155
+ if not os.path.exists(fn_key):
156
+ mode = "major"
157
+ else:
158
+ mode = "major" # Default value
159
+ with open(fn_key, 'r') as file:
160
+ for line in file:
161
+ key = line.strip()
162
+ if key == "None":
163
+ mode = "major"
164
+ else:
165
+ mode = key.split()[-1]
166
+
167
+ encoded_mode = self.mode_to_idx.get(mode, 0)
168
+ mode_tensor = torch.tensor([encoded_mode], dtype=torch.long)
169
+
170
+ # --- MERT feature ---
171
+ fn_mert = os.path.join(self.mert_dir, str(file_id))
172
+
173
+ embeddings = []
174
+
175
+ # Specify the layers to extract (3rd, 6th, 9th, and 12th layers)
176
+ layers_to_extract = self.cfg.model.layers
177
+
178
+ # Collect all segment embeddings
179
+ segment_embeddings = []
180
+ for filename in sorted(os.listdir(fn_mert)): # Sort files to ensure sequential order
181
+ file_path = os.path.join(fn_mert, filename)
182
+ if os.path.isfile(file_path) and filename.endswith('.npy'):
183
+ segment = np.load(file_path)
184
+
185
+ # Extract and concatenate features for the specified layers
186
+ concatenated_features = np.concatenate(
187
+ [segment[:, layer_idx, :] for layer_idx in layers_to_extract], axis=1
188
+ )
189
+ concatenated_features = np.squeeze(concatenated_features) # Shape: 768 * 2 = 1536
190
+ segment_embeddings.append(concatenated_features)
191
+
192
+ # Convert to numpy array
193
+ segment_embeddings = np.array(segment_embeddings)
194
+
195
+ # Check mode: 'train' or 'val'
196
+ if self.tr_val == "train" and len(segment_embeddings) > 0: # Augmentation for training
197
+ num_segments = len(segment_embeddings)
198
+
199
+ # Randomly choose a starting index and the length of the sequence
200
+ start_idx = np.random.randint(0, num_segments) # Random starting index
201
+ end_idx = np.random.randint(start_idx + 1, num_segments + 1) # Ensure end index is after start index
202
+
203
+ # Extract the sequential subset
204
+ chosen_segments = segment_embeddings[start_idx:end_idx]
205
+
206
+ # Compute the mean of the chosen sequential segments
207
+ final_embedding_mert = np.mean(chosen_segments, axis=0)
208
+ else: # Validation or other modes: Use mean of all segments
209
+ if len(segment_embeddings) > 0:
210
+ final_embedding_mert = np.mean(segment_embeddings, axis=0)
211
+ else:
212
+ # Handle case with no valid embeddings
213
+ final_embedding_mert = np.zeros((1536,)) # Example: Return zero vector of appropriate size
214
+
215
+ # Convert to PyTorch tensor
216
+ final_embedding_mert = torch.from_numpy(final_embedding_mert)
217
+
218
+
219
+ # Get the MP3 path
220
+ mp3_path = os.path.join(self.mp3_dir, f"{file_id}.mp3")
221
+ if not os.path.exists(mp3_path):
222
+ raise FileNotFoundError(f"MP3 file not found for {mp3_path}")
223
+
224
+ return {
225
+ "x_mert": final_embedding_mert,
226
+ "x_chord" : chords_tensor,
227
+ "x_chord_root" : chords_root_tensor,
228
+ "x_chord_attr" : chords_attr_tensor,
229
+ "x_key" : mode_tensor,
230
+ "y_va": torch.stack([y_valence, y_arousal], dim=0),
231
+ "y_mood" : y_mood,
232
+ "path": mp3_path
233
+ }
234
+
235
+
dataset_loaders/jamendo.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import pickle
4
+ from torch.utils import data
5
+ import torchaudio.transforms as T
6
+ import torchaudio
7
+ import torch
8
+ import csv
9
+ import pytorch_lightning as pl
10
+ from music2latent import EncoderDecoder
11
+ import json
12
+ import math
13
+ from sklearn.preprocessing import StandardScaler
14
+ import pandas as pd
15
+
16
+ class JamendoDataset(data.Dataset):
17
+ def __init__(self, **task_args):
18
+ self.task_args = task_args
19
+ self.tr_val = task_args.get('tr_val', "train")
20
+ self.root = task_args.get('root', "./dataset/jamendo")
21
+ self.subset = task_args.get('subset', "moodtheme")
22
+ self.split = task_args.get('split', 0)
23
+ self.segment_type = task_args.get('segment_type', "all")
24
+ self.cfg = task_args.get('cfg')
25
+
26
+ fn = f'dataset/jamendo/splits/split-{self.split}/{self.subset}_{self.tr_val}_dict.pickle'
27
+
28
+ self.tag_list = np.load('dataset/jamendo/meta/tag_list.npy')
29
+ self.tag_list_genre = list(self.tag_list[:87])
30
+ self.tag_list_instrument = list(self.tag_list[87:127])
31
+ self.tag_list_moodtheme = list(self.tag_list[127:])
32
+
33
+ # Separate tonic and mode
34
+ tonic_signatures = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"]
35
+ mode_signatures = ["major", "minor"] # Major and minor modes
36
+
37
+ self.tonic_to_idx = {tonic: idx for idx, tonic in enumerate(tonic_signatures)}
38
+ self.mode_to_idx = {mode: idx for idx, mode in enumerate(mode_signatures)}
39
+
40
+ self.idx_to_tonic = {idx: tonic for tonic, idx in self.tonic_to_idx.items()}
41
+ self.idx_to_mode = {idx: mode for mode, idx in self.mode_to_idx.items()}
42
+
43
+ # Load the CSV file
44
+ file_path_m2va = 'dataset/jamendo/meta/moodtag_va_scores.csv' # Replace with the path to your CSV file
45
+ data_m2va = pd.read_csv(file_path_m2va)
46
+
47
+ # Extract Valence and Arousal columns and convert them to NumPy arrays
48
+ self.valence = data_m2va['Valence'].to_numpy()
49
+ self.arousal = data_m2va['Arousal'].to_numpy()
50
+
51
+ with open('dataset/jamendo/meta/chord.json', 'r') as f:
52
+ self.chord_to_idx = json.load(f)
53
+ with open('dataset/jamendo/meta/chord_inv.json', 'r') as f:
54
+ self.idx_to_chord = json.load(f)
55
+ self.idx_to_chord = {int(k): v for k, v in self.idx_to_chord.items()} # Ensure keys are ints
56
+
57
+ with open('dataset/emomusic/meta/chord_root.json') as json_file:
58
+ self.chordRootDic = json.load(json_file)
59
+ with open('dataset/emomusic/meta/chord_attr.json') as json_file:
60
+ self.chordAttrDic = json.load(json_file)
61
+
62
+
63
+ with open(fn, 'rb') as pf:
64
+ self.dictionary = pickle.load(pf)
65
+ # dictionary :
66
+ # {0: {'path': '48/948.mp3', 'duration': 9968.0, 'tags': array([0., 0., 0., 1., ... , 0.])}, 1: {'path': ... } }
67
+
68
+ def __getitem__(self, index):
69
+ path = self.dictionary[index]['path'] # e.g. path: "47/3347.mp3"
70
+
71
+ # --- Mood (emotion) tag label ---
72
+ y_mood = self.dictionary[index]['tags'] # MOOD TAG LABEL
73
+ y_mood = y_mood.astype('float32')
74
+
75
+ v_score = y_mood*self.valence
76
+ a_score = y_mood*self.arousal
77
+
78
+ v_score = np.mean( v_score[v_score!=0] )
79
+ a_score = np.mean( a_score[a_score!=0] )
80
+
81
+ y_valence = torch.tensor(v_score, dtype=torch.float32)
82
+ y_arousal = torch.tensor(a_score, dtype=torch.float32)
83
+
84
+ y_mood = torch.from_numpy(y_mood)
85
+
86
+ # --- Chord feature ---
87
+ fn_chord = os.path.join(self.root, 'chord', 'lab3', path[:-4] + ".lab")
88
+ chords = []
89
+
90
+ if not os.path.exists(fn_chord):
91
+ chords.append((float(0), float(0), "N"))
92
+ else:
93
+ with open(fn_chord, 'r') as file:
94
+ for line in file:
95
+ start, end, chord = line.strip().split()
96
+ chords.append((float(start), float(end), chord))
97
+
98
+ encoded = []
99
+ encoded_root= []
100
+ encoded_attr=[]
101
+ durations = []
102
+ for start, end, chord in chords:
103
+ chord_arr = chord.split(":")
104
+ if len(chord_arr) == 1:
105
+ chordRootID = self.chordRootDic[chord_arr[0]]
106
+ if chord_arr[0] == "N" or chord_arr[0] == "X":
107
+ chordAttrID = 0
108
+ else:
109
+ chordAttrID = 1
110
+ elif len(chord_arr) == 2:
111
+ chordRootID = self.chordRootDic[chord_arr[0]]
112
+ chordAttrID = self.chordAttrDic[chord_arr[1]]
113
+ encoded_root.append(chordRootID)
114
+ encoded_attr.append(chordAttrID)
115
+
116
+ if chord in self.chord_to_idx:
117
+ encoded.append(self.chord_to_idx[chord])
118
+ else:
119
+ print(f"Warning: Chord {chord} not found in chord.json. Skipping.")
120
+
121
+ durations.append(end - start) # Compute duration
122
+
123
+ encoded_chords = np.array(encoded)
124
+ encoded_chords_root = np.array(encoded_root)
125
+ encoded_chords_attr = np.array(encoded_attr)
126
+
127
+ # Maximum sequence length for chords
128
+ max_sequence_length = 100 # Define this globally or as a parameter
129
+
130
+ # Truncate or pad chord sequences
131
+ if len(encoded_chords) > max_sequence_length:
132
+ # Truncate to max length
133
+ encoded_chords = encoded_chords[:max_sequence_length]
134
+ encoded_chords_root = encoded_chords_root[:max_sequence_length]
135
+ encoded_chords_attr = encoded_chords_attr[:max_sequence_length]
136
+
137
+ else:
138
+ # Pad with zeros (padding value for chords)
139
+ padding = [0] * (max_sequence_length - len(encoded_chords))
140
+ encoded_chords = np.concatenate([encoded_chords, padding])
141
+ encoded_chords_root = np.concatenate([encoded_chords_root, padding])
142
+ encoded_chords_attr = np.concatenate([encoded_chords_attr, padding])
143
+
144
+ # Convert to tensor
145
+ chords_tensor = torch.tensor(encoded_chords, dtype=torch.long) # Fixed length tensor
146
+ chords_root_tensor = torch.tensor(encoded_chords_root, dtype=torch.long) # Fixed length tensor
147
+ chords_attr_tensor = torch.tensor(encoded_chords_attr, dtype=torch.long) # Fixed length tensor
148
+
149
+ # --- Key feature (Tonic and Mode separation) ---
150
+ fn_key = os.path.join(self.root, 'key', path[:-4] + ".lab")
151
+
152
+ if not os.path.exists(fn_key):
153
+ mode = "major"
154
+ else:
155
+ mode = "major" # Default value
156
+ with open(fn_key, 'r') as file:
157
+ for line in file:
158
+ key = line.strip()
159
+ if key == "None":
160
+ mode = "major"
161
+ else:
162
+ mode = key.split()[-1]
163
+
164
+ encoded_mode = self.mode_to_idx.get(mode, 0)
165
+ mode_tensor = torch.tensor([encoded_mode], dtype=torch.long)
166
+
167
+ # --- MERT feature ---
168
+ fn_mert = os.path.join(self.root, 'mert_30s', path[:-4])
169
+ embeddings = []
170
+
171
+ # Specify the layers to extract (3rd, 6th, 9th, and 12th layers)
172
+ layers_to_extract = self.cfg.model.layers
173
+
174
+ # Collect all segment embeddings
175
+ segment_embeddings = []
176
+ for filename in sorted(os.listdir(fn_mert)): # Sort files to ensure sequential order
177
+ file_path = os.path.join(fn_mert, filename)
178
+ if os.path.isfile(file_path) and filename.endswith('.npy'):
179
+ segment = np.load(file_path)
180
+
181
+ # Extract and concatenate features for the specified layers
182
+ concatenated_features = np.concatenate(
183
+ [segment[:, layer_idx, :] for layer_idx in layers_to_extract], axis=1
184
+ )
185
+ concatenated_features = np.squeeze(concatenated_features) # Shape: 768 * 2 = 1536
186
+ segment_embeddings.append(concatenated_features)
187
+
188
+ # Convert to numpy array
189
+ segment_embeddings = np.array(segment_embeddings)
190
+
191
+ # Check mode: 'train' or 'val'
192
+ if self.tr_val == "train" and len(segment_embeddings) > 0: # Augmentation for training
193
+ num_segments = len(segment_embeddings)
194
+
195
+ # Randomly choose a starting index and the length of the sequence
196
+ start_idx = np.random.randint(0, num_segments) # Random starting index
197
+ end_idx = np.random.randint(start_idx + 1, num_segments + 1) # Ensure end index is after start index
198
+
199
+ # Extract the sequential subset
200
+ chosen_segments = segment_embeddings[start_idx:end_idx]
201
+
202
+ # Compute the mean of the chosen sequential segments
203
+ final_embedding_mert = np.mean(chosen_segments, axis=0)
204
+ else: # Validation or other modes: Use mean of all segments
205
+ if len(segment_embeddings) > 0:
206
+ final_embedding_mert = np.mean(segment_embeddings, axis=0)
207
+ else:
208
+ # Handle case with no valid embeddings
209
+ final_embedding_mert = np.zeros((1536,)) # Example: Return zero vector of appropriate size
210
+
211
+ # Convert to PyTorch tensor
212
+ final_embedding_mert = torch.from_numpy(final_embedding_mert)
213
+
214
+
215
+ return {
216
+ "x_mert" : final_embedding_mert,
217
+ "x_chord" : chords_tensor,
218
+ "x_chord_root" : chords_root_tensor,
219
+ "x_chord_attr" : chords_attr_tensor,
220
+ "x_key" : mode_tensor,
221
+ "y_mood" : y_mood,
222
+ "y_va": torch.stack([y_valence, y_arousal], dim=0),
223
+ "path": self.dictionary[index]['path']
224
+ }
225
+
226
+ def __len__(self):
227
+ return len(self.dictionary)
228
+
dataset_loaders/pmemo.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import pickle
4
+ from torch.utils import data
5
+ import torchaudio.transforms as T
6
+ import torchaudio
7
+ import torch
8
+ import csv
9
+ import pytorch_lightning as pl
10
+ from music2latent import EncoderDecoder
11
+ import json
12
+ import math
13
+ from sklearn.preprocessing import StandardScaler
14
+ import pandas as pd
15
+
16
+ class PMEmoDataset(data.Dataset):
17
+ def __init__(self, **task_args):
18
+ self.task_args = task_args
19
+ self.tr_val = task_args.get('tr_val', "train")
20
+ self.root = task_args.get('root', "./dataset/pmemo")
21
+ self.segment_type = task_args.get('segment_type', "all")
22
+ self.cfg = task_args.get('cfg')
23
+
24
+ # Path to the split file (train/val/test)
25
+ self.split_file = os.path.join(self.root, 'meta', 'split', f"{self.tr_val}.txt")
26
+
27
+ # Read file IDs from the split file
28
+ with open(self.split_file, 'r') as f:
29
+ self.file_ids = [line.strip() for line in f.readlines()]
30
+
31
+ # Separate tonic and mode
32
+ tonic_signatures = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"]
33
+ mode_signatures = ["major", "minor"] # Major and minor modes
34
+
35
+ self.tonic_to_idx = {tonic: idx for idx, tonic in enumerate(tonic_signatures)}
36
+ self.mode_to_idx = {mode: idx for idx, mode in enumerate(mode_signatures)}
37
+
38
+ self.idx_to_tonic = {idx: tonic for tonic, idx in self.tonic_to_idx.items()}
39
+ self.idx_to_mode = {idx: mode for mode, idx in self.mode_to_idx.items()}
40
+
41
+ with open('dataset/pmemo/meta/chord.json', 'r') as f:
42
+ self.chord_to_idx = json.load(f)
43
+ with open('dataset/pmemo/meta/chord_inv.json', 'r') as f:
44
+ self.idx_to_chord = json.load(f)
45
+ self.idx_to_chord = {int(k): v for k, v in self.idx_to_chord.items()} # Ensure keys are ints
46
+ with open('dataset/emomusic/meta/chord_root.json') as json_file:
47
+ self.chordRootDic = json.load(json_file)
48
+ with open('dataset/emomusic/meta/chord_attr.json') as json_file:
49
+ self.chordAttrDic = json.load(json_file)
50
+
51
+ # MERT and MP3 directories
52
+ self.mert_dir = os.path.join(self.root, 'mert_30s')
53
+ self.mp3_dir = os.path.join(self.root, 'mp3')
54
+
55
+ # Load static annotations (valence and arousal)
56
+ self.annotation_file = os.path.join(self.root, 'meta', 'static_annotations.csv')
57
+ self.annotations = pd.read_csv(self.annotation_file, index_col='song_id')
58
+
59
+ # Load static annotations (valence and arousal)
60
+ self.annotation_tag_file = os.path.join(self.root, 'meta', 'mood_probabilities.csv')
61
+ self.annotations_tag = pd.read_csv(self.annotation_tag_file, index_col='song_id')
62
+
63
+ def __len__(self):
64
+ return len(self.file_ids)
65
+
66
+ def __getitem__(self, index):
67
+ file_id = int(self.file_ids[index]) # File ID from split
68
+ # Get valence and arousal from annotations
69
+ if file_id not in self.annotations.index:
70
+ raise ValueError(f"File ID {file_id} not found in annotations.")
71
+
72
+ valence = self.annotations.loc[file_id, 'valence_mean']
73
+ arousal = self.annotations.loc[file_id, 'arousal_mean']
74
+
75
+ y_valence = torch.tensor(valence, dtype=torch.float32)
76
+ y_arousal = torch.tensor(arousal, dtype=torch.float32)
77
+
78
+ y_mood = np.array(self.annotations_tag.loc[file_id])
79
+ y_mood = y_mood.astype('float32')
80
+ y_mood = torch.from_numpy(y_mood)
81
+
82
+ # --- Chord feature ---
83
+ fn_chord = os.path.join(self.root, 'chord', 'lab3', str(file_id) + ".lab")
84
+
85
+ chords = []
86
+
87
+ if not os.path.exists(fn_chord):
88
+ chords.append((float(0), float(0), "N"))
89
+ else:
90
+ with open(fn_chord, 'r') as file:
91
+ for line in file:
92
+ start, end, chord = line.strip().split()
93
+ chords.append((float(start), float(end), chord))
94
+
95
+ encoded = []
96
+ encoded_root= []
97
+ encoded_attr=[]
98
+ durations = []
99
+ for start, end, chord in chords:
100
+ chord_arr = chord.split(":")
101
+ if len(chord_arr) == 1:
102
+ chordRootID = self.chordRootDic[chord_arr[0]]
103
+ if chord_arr[0] == "N" or chord_arr[0] == "X":
104
+ chordAttrID = 0
105
+ else:
106
+ chordAttrID = 1
107
+ elif len(chord_arr) == 2:
108
+ chordRootID = self.chordRootDic[chord_arr[0]]
109
+ chordAttrID = self.chordAttrDic[chord_arr[1]]
110
+ encoded_root.append(chordRootID)
111
+ encoded_attr.append(chordAttrID)
112
+
113
+ if chord in self.chord_to_idx:
114
+ encoded.append(self.chord_to_idx[chord])
115
+ else:
116
+ print(f"Warning: Chord {chord} not found in chord.json. Skipping.")
117
+
118
+ durations.append(end - start) # Compute duration
119
+
120
+ encoded_chords = np.array(encoded)
121
+ encoded_chords_root = np.array(encoded_root)
122
+ encoded_chords_attr = np.array(encoded_attr)
123
+
124
+ # Maximum sequence length for chords
125
+ max_sequence_length = 100 # Define this globally or as a parameter
126
+
127
+ # Truncate or pad chord sequences
128
+ if len(encoded_chords) > max_sequence_length:
129
+ # Truncate to max length
130
+ encoded_chords = encoded_chords[:max_sequence_length]
131
+ encoded_chords_root = encoded_chords_root[:max_sequence_length]
132
+ encoded_chords_attr = encoded_chords_attr[:max_sequence_length]
133
+
134
+ else:
135
+ # Pad with zeros (padding value for chords)
136
+ padding = [0] * (max_sequence_length - len(encoded_chords))
137
+ encoded_chords = np.concatenate([encoded_chords, padding])
138
+ encoded_chords_root = np.concatenate([encoded_chords_root, padding])
139
+ encoded_chords_attr = np.concatenate([encoded_chords_attr, padding])
140
+
141
+ # Convert to tensor
142
+ chords_tensor = torch.tensor(encoded_chords, dtype=torch.long) # Fixed length tensor
143
+ chords_root_tensor = torch.tensor(encoded_chords_root, dtype=torch.long) # Fixed length tensor
144
+ chords_attr_tensor = torch.tensor(encoded_chords_attr, dtype=torch.long) # Fixed length tensor
145
+
146
+ # --- Key feature ---
147
+ fn_key = os.path.join(self.root, 'key', str(file_id) + ".lab")
148
+
149
+ if not os.path.exists(fn_key):
150
+ mode = "major"
151
+ else:
152
+ mode = "major" # Default value
153
+ with open(fn_key, 'r') as file:
154
+ for line in file:
155
+ key = line.strip()
156
+ if key == "None":
157
+ mode = "major"
158
+ else:
159
+ mode = key.split()[-1]
160
+
161
+ encoded_mode = self.mode_to_idx.get(mode, 0)
162
+ mode_tensor = torch.tensor([encoded_mode], dtype=torch.long)
163
+
164
+ # --- MERT feature ---
165
+ fn_mert = os.path.join(self.mert_dir, str(file_id))
166
+
167
+ embeddings = []
168
+
169
+ # Specify the layers to extract (3rd, 6th, 9th, and 12th layers)
170
+ layers_to_extract = self.cfg.model.layers
171
+
172
+ # Collect all segment embeddings
173
+ segment_embeddings = []
174
+ for filename in sorted(os.listdir(fn_mert)): # Sort files to ensure sequential order
175
+ file_path = os.path.join(fn_mert, filename)
176
+ if os.path.isfile(file_path) and filename.endswith('.npy'):
177
+ segment = np.load(file_path)
178
+
179
+ # Extract and concatenate features for the specified layers
180
+ concatenated_features = np.concatenate(
181
+ [segment[:, layer_idx, :] for layer_idx in layers_to_extract], axis=1
182
+ )
183
+ concatenated_features = np.squeeze(concatenated_features) # Shape: 768 * 2 = 1536
184
+ segment_embeddings.append(concatenated_features)
185
+
186
+ # Convert to numpy array
187
+ segment_embeddings = np.array(segment_embeddings)
188
+
189
+ # Check mode: 'train' or 'val'
190
+ if self.tr_val == "train" and len(segment_embeddings) > 0: # Augmentation for training
191
+ num_segments = len(segment_embeddings)
192
+
193
+ # Randomly choose a starting index and the length of the sequence
194
+ start_idx = np.random.randint(0, num_segments) # Random starting index
195
+ end_idx = np.random.randint(start_idx + 1, num_segments + 1) # Ensure end index is after start index
196
+
197
+ # Extract the sequential subset
198
+ chosen_segments = segment_embeddings[start_idx:end_idx]
199
+
200
+ # Compute the mean of the chosen sequential segments
201
+ final_embedding_mert = np.mean(chosen_segments, axis=0)
202
+ else: # Validation or other modes: Use mean of all segments
203
+ if len(segment_embeddings) > 0:
204
+ final_embedding_mert = np.mean(segment_embeddings, axis=0)
205
+ else:
206
+ # Handle case with no valid embeddings
207
+ final_embedding_mert = np.zeros((1536,)) # Example: Return zero vector of appropriate size
208
+
209
+ # Convert to PyTorch tensor
210
+ final_embedding_mert = torch.from_numpy(final_embedding_mert)
211
+
212
+ # Get the MP3 path
213
+ mp3_path = os.path.join(self.mp3_dir, f"{file_id}.mp3")
214
+ if not os.path.exists(mp3_path):
215
+ raise FileNotFoundError(f"MP3 file not found for {mp3_path}")
216
+
217
+ return {
218
+ "x_mert": final_embedding_mert,
219
+ "x_chord" : chords_tensor,
220
+ "x_chord_root" : chords_root_tensor,
221
+ "x_chord_attr" : chords_attr_tensor,
222
+ "x_key" : mode_tensor,
223
+ "y_va": torch.stack([y_valence, y_arousal], dim=0),
224
+ "y_mood" : y_mood,
225
+ "path": mp3_path
226
+ }
dataset_loaders/readme.md ADDED
@@ -0,0 +1 @@
 
 
1
+ hi