File size: 2,321 Bytes
1e00de9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from curses import meta
import os
import pandas as pd
import json
from tqdm import tqdm

AUDIOSET_PATH_DICT = {}

segment_label_path = "/mnt/fast/nobackup/scratch4weeks/hl01486/datasets/audiocaps_segment_labels/averaged"

audiocaps_csv = [
    "test.csv",
    "train.csv",
    "val.csv"
]

audioset_path = {
    "bal_unbal_train": "/mnt/fast/nobackup/users/hl01486/metadata/audioset/datafiles/audioset_bal_unbal_train_data.json",
    "eval": "/mnt/fast/nobackup/users/hl01486/metadata/audioset/datafiles/audioset_eval_data.json",
    "bal_train": "/mnt/fast/nobackup/users/hl01486/metadata/audioset/datafiles/audioset_bal_train_data.json",
}

def read_json(dataset_json_file):
    with open(dataset_json_file, 'r') as fp:
        data_json = json.load(fp)
    return data_json['data']

def build_audioset_metadata():
    metadata_list = []
    for k in audioset_path.keys():
        metadata_list += read_json(audioset_path[k])
    for each in metadata_list:
        wav_path = each["wav"]
        audio_basename = os.path.basename(wav_path)[:-4]
        labels = each["labels"]
        AUDIOSET_PATH_DICT[audio_basename] = [wav_path, labels]
    
def read_audiocaps_csv(csvfile):
    df = pd.read_csv(csvfile)
    data_list = []
    for row in tqdm(df.iterrows()):
        instance = {}
        file_object = row[1]
        audiocap_id = file_object["audiocap_id"]
        youtube_id = file_object["youtube_id"]
        start_time = file_object["start_time"]
        caption = file_object["caption"]
        
        try:
            wav, labels = lookup_path(youtube_id)    
        except Exception as e:
            print(e)
            continue
        
        instance["wav"]=wav
        instance["seg_label"]=os.path.join(segment_label_path, "Y%s.npy" % youtube_id)
        instance["labels"]=labels
        instance["caption"]=caption
        data_list.append(instance)
    return data_list
    
def lookup_path(youtube_id):
    metadata = AUDIOSET_PATH_DICT["Y"+youtube_id]
    return metadata[0], metadata[1]

def save_json(wav_list, fname):
    with open('datafiles/%s.json' % fname, 'w') as f:
        json.dump({'data': wav_list}, f, indent=1)

build_audioset_metadata()

for file in audiocaps_csv:
    meta = read_audiocaps_csv(file)
    save_json(meta, fname=file)
        
import ipdb; ipdb.set_trace()