Spaces:
Runtime error
Runtime error
Update pipelines/data/data_module.py
Browse files
pipelines/data/data_module.py
CHANGED
@@ -29,8 +29,6 @@ class AVSRDataLoader:
|
|
29 |
|
30 |
def load_data(self, data_filename, landmarks=None, transform=True):
|
31 |
if self.modality == "audio":
|
32 |
-
# audio, sample_rate = self.load_audio(data_filename)
|
33 |
-
# audio = self.audio_process(audio, sample_rate)
|
34 |
audio = self.load_audio(data_filename)
|
35 |
return self.audio_transform(audio) if self.transform else audio
|
36 |
if self.modality == "video":
|
@@ -40,8 +38,6 @@ class AVSRDataLoader:
|
|
40 |
return self.video_transform(video) if self.transform else video
|
41 |
if self.modality == "audiovisual":
|
42 |
rate_ratio = 640
|
43 |
-
# audio, sample_rate = self.load_audio(data_filename)
|
44 |
-
# audio = self.audio_process(audio, sample_rate)
|
45 |
audio = self.load_audio(data_filename)
|
46 |
video = self.load_video(data_filename)
|
47 |
video = self.video_process(video, landmarks)
|
@@ -58,16 +54,8 @@ class AVSRDataLoader:
|
|
58 |
def load_audio(self, data_filename):
|
59 |
# rtype: [1, T]
|
60 |
waveform = torch.tensor(whisper.load_audio(data_filename)).unsqueeze(0)
|
61 |
-
|
62 |
-
# return waveform, sample_rate
|
63 |
|
64 |
|
65 |
def load_video(self, data_filename):
|
66 |
return torchvision.io.read_video(data_filename, pts_unit='sec')[0].numpy()
|
67 |
-
|
68 |
-
|
69 |
-
# def audio_process(self, waveform, sample_rate, target_sample_rate=16000):
|
70 |
-
# if sample_rate != target_sample_rate:
|
71 |
-
# waveform = torchaudio.functional.resample(waveform, sample_rate, target_sample_rate)
|
72 |
-
# waveform = torch.mean(waveform, dim=0, keepdim=True)
|
73 |
-
# return waveform
|
|
|
29 |
|
30 |
def load_data(self, data_filename, landmarks=None, transform=True):
|
31 |
if self.modality == "audio":
|
|
|
|
|
32 |
audio = self.load_audio(data_filename)
|
33 |
return self.audio_transform(audio) if self.transform else audio
|
34 |
if self.modality == "video":
|
|
|
38 |
return self.video_transform(video) if self.transform else video
|
39 |
if self.modality == "audiovisual":
|
40 |
rate_ratio = 640
|
|
|
|
|
41 |
audio = self.load_audio(data_filename)
|
42 |
video = self.load_video(data_filename)
|
43 |
video = self.video_process(video, landmarks)
|
|
|
54 |
def load_audio(self, data_filename):
|
55 |
# rtype: [1, T]
|
56 |
waveform = torch.tensor(whisper.load_audio(data_filename)).unsqueeze(0)
|
57 |
+
return waveform
|
|
|
58 |
|
59 |
|
60 |
def load_video(self, data_filename):
|
61 |
return torchvision.io.read_video(data_filename, pts_unit='sec')[0].numpy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|