mpc001 commited on
Commit
b3d8e4f
1 Parent(s): e19e5b7

Update pipelines/data/data_module.py

Browse files
Files changed (1) hide show
  1. pipelines/data/data_module.py +1 -13
pipelines/data/data_module.py CHANGED
@@ -29,8 +29,6 @@ class AVSRDataLoader:
29
 
30
  def load_data(self, data_filename, landmarks=None, transform=True):
31
  if self.modality == "audio":
32
- # audio, sample_rate = self.load_audio(data_filename)
33
- # audio = self.audio_process(audio, sample_rate)
34
  audio = self.load_audio(data_filename)
35
  return self.audio_transform(audio) if self.transform else audio
36
  if self.modality == "video":
@@ -40,8 +38,6 @@ class AVSRDataLoader:
40
  return self.video_transform(video) if self.transform else video
41
  if self.modality == "audiovisual":
42
  rate_ratio = 640
43
- # audio, sample_rate = self.load_audio(data_filename)
44
- # audio = self.audio_process(audio, sample_rate)
45
  audio = self.load_audio(data_filename)
46
  video = self.load_video(data_filename)
47
  video = self.video_process(video, landmarks)
@@ -58,16 +54,8 @@ class AVSRDataLoader:
58
  def load_audio(self, data_filename):
59
  # rtype: [1, T]
60
  waveform = torch.tensor(whisper.load_audio(data_filename)).unsqueeze(0)
61
- # waveform, sample_rate = torchaudio.load(data_filename, normalize=True)
62
- # return waveform, sample_rate
63
 
64
 
65
  def load_video(self, data_filename):
66
  return torchvision.io.read_video(data_filename, pts_unit='sec')[0].numpy()
67
-
68
-
69
- # def audio_process(self, waveform, sample_rate, target_sample_rate=16000):
70
- # if sample_rate != target_sample_rate:
71
- # waveform = torchaudio.functional.resample(waveform, sample_rate, target_sample_rate)
72
- # waveform = torch.mean(waveform, dim=0, keepdim=True)
73
- # return waveform
 
29
 
30
  def load_data(self, data_filename, landmarks=None, transform=True):
31
  if self.modality == "audio":
 
 
32
  audio = self.load_audio(data_filename)
33
  return self.audio_transform(audio) if self.transform else audio
34
  if self.modality == "video":
 
38
  return self.video_transform(video) if self.transform else video
39
  if self.modality == "audiovisual":
40
  rate_ratio = 640
 
 
41
  audio = self.load_audio(data_filename)
42
  video = self.load_video(data_filename)
43
  video = self.video_process(video, landmarks)
 
54
  def load_audio(self, data_filename):
55
  # rtype: [1, T]
56
  waveform = torch.tensor(whisper.load_audio(data_filename)).unsqueeze(0)
57
+ return waveform
 
58
 
59
 
60
  def load_video(self, data_filename):
61
  return torchvision.io.read_video(data_filename, pts_unit='sec')[0].numpy()