Kr08 commited on
Commit
37ddb1d
·
verified ·
1 Parent(s): cb9c78a

Update audio_processing.py

Browse files
Files changed (1) hide show
  1. audio_processing.py +56 -2
audio_processing.py CHANGED
@@ -98,6 +98,51 @@ class AudioProcessor:
98
 
99
  return translation
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  @spaces.GPU(duration=60)
102
  def process_audio(self, audio_path, translate=False):
103
  """Main processing function"""
@@ -106,10 +151,19 @@ class AudioProcessor:
106
  waveform, sample_rate = torchaudio.load(audio_path)
107
  if waveform.shape[0] > 1:
108
  waveform = torch.mean(waveform, dim=0)
109
-
 
 
110
  # Resample if necessary
111
  if sample_rate != self.sample_rate:
112
- waveform = torchaudio.transforms.Resample(sample_rate, self.sample_rate)(waveform)
 
 
 
 
 
 
 
113
 
114
  # Load models
115
  models = self.load_models()
 
98
 
99
  return translation
100
 
101
+ def preprocess_audio(self, audio):
102
+ """
103
+ Create overlapping chunks with improved timing logic
104
+ """
105
+ chunk_samples = int(self.chunk_size * self.sample_rate)
106
+ overlap_samples = int(self.overlap * self.sample_rate)
107
+
108
+ chunks_with_times = []
109
+ start_idx = 0
110
+
111
+ while start_idx < len(audio):
112
+ end_idx = min(start_idx + chunk_samples, len(audio))
113
+
114
+ # Add padding for first chunk
115
+ if start_idx == 0:
116
+ chunk = audio[start_idx:end_idx]
117
+ padding = torch.zeros(int(1 * self.sample_rate))
118
+ chunk = torch.cat([padding, chunk])
119
+ else:
120
+ # Include overlap from previous chunk
121
+ actual_start = max(0, start_idx - overlap_samples)
122
+ chunk = audio[actual_start:end_idx]
123
+
124
+ # Pad if necessary
125
+ if len(chunk) < chunk_samples:
126
+ chunk = torch.nn.functional.pad(chunk, (0, chunk_samples - len(chunk)))
127
+
128
+ # Adjust time ranges to account for overlaps
129
+ chunk_start_time = max(0, (start_idx / self.sample_rate) - self.overlap)
130
+ chunk_end_time = min((end_idx / self.sample_rate) + self.overlap, len(audio) / self.sample_rate)
131
+
132
+ chunks_with_times.append({
133
+ 'chunk': chunk,
134
+ 'start_time': start_idx / self.sample_rate,
135
+ 'end_time': end_idx / self.sample_rate,
136
+ 'transcribe_start': chunk_start_time,
137
+ 'transcribe_end': chunk_end_time
138
+ })
139
+
140
+ # Move to next chunk with smaller step size for better continuity
141
+ start_idx += (chunk_samples - overlap_samples)
142
+
143
+ return chunks_with_times
144
+
145
+
146
  @spaces.GPU(duration=60)
147
  def process_audio(self, audio_path, translate=False):
148
  """Main processing function"""
 
151
  waveform, sample_rate = torchaudio.load(audio_path)
152
  if waveform.shape[0] > 1:
153
  waveform = torch.mean(waveform, dim=0)
154
+ else:
155
+ waveform = waveform.squeeze(0)
156
+
157
  # Resample if necessary
158
  if sample_rate != self.sample_rate:
159
+ resampler = torchaudio.transforms.Resample(
160
+ orig_freq=sample_rate,
161
+ new_freq=self.sample_rate
162
+ )
163
+ waveform = resampler(waveform)
164
+
165
+ # if sample_rate != self.sample_rate:
166
+ # waveform = torchaudio.transforms.Resample(sample_rate, self.sample_rate)(waveform)
167
 
168
  # Load models
169
  models = self.load_models()