Kr08 commited on
Commit
bed663f
·
verified ·
1 Parent(s): 8cc69ea

Update audio_processing.py

Browse files
Files changed (1) hide show
  1. audio_processing.py +8 -0
audio_processing.py CHANGED
@@ -10,12 +10,15 @@ from transformers import (
10
  AutoTokenizer,
11
  AutoModelForSeq2SeqLM
12
  )
 
13
  import logging
14
  from difflib import SequenceMatcher
15
 
16
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
17
  logger = logging.getLogger(__name__)
18
 
 
 
19
  class AudioProcessor:
20
  def __init__(self, chunk_size=5, overlap=1, sample_rate=16000):
21
  self.chunk_size = chunk_size
@@ -47,6 +50,7 @@ class AudioProcessor:
47
  'translation': (translation_model, translation_tokenizer)
48
  }
49
 
 
50
  def identify_language(self, audio_chunk, models):
51
  """Identify language of audio chunk"""
52
  lid_model, lid_processor = models['lid']
@@ -59,6 +63,7 @@ class AudioProcessor:
59
 
60
  return detected_lang
61
 
 
62
  def transcribe_chunk(self, audio_chunk, language, models):
63
  """Transcribe audio chunk"""
64
  mms_model, mms_processor = models['mms']
@@ -75,6 +80,7 @@ class AudioProcessor:
75
 
76
  return transcription
77
 
 
78
  def translate_text(self, text, models):
79
  """Translate text to English"""
80
  translation_model, translation_tokenizer = models['translation']
@@ -92,6 +98,7 @@ class AudioProcessor:
92
 
93
  return translation
94
 
 
95
  def process_audio(self, audio_path, translate=False):
96
  """Main processing function"""
97
  try:
@@ -163,6 +170,7 @@ class AudioProcessor:
163
  logger.error(f"Error processing audio: {str(e)}")
164
  raise
165
 
 
166
  def merge_segments(self, segments, time_threshold=0.5, similarity_threshold=0.7):
167
  """Merge similar nearby segments"""
168
  if not segments:
 
10
  AutoTokenizer,
11
  AutoModelForSeq2SeqLM
12
  )
13
+ import spaces
14
  import logging
15
  from difflib import SequenceMatcher
16
 
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
18
  logger = logging.getLogger(__name__)
19
 
20
+
21
+
22
  class AudioProcessor:
23
  def __init__(self, chunk_size=5, overlap=1, sample_rate=16000):
24
  self.chunk_size = chunk_size
 
50
  'translation': (translation_model, translation_tokenizer)
51
  }
52
 
53
+ @spaces.GPU(duration=60)
54
  def identify_language(self, audio_chunk, models):
55
  """Identify language of audio chunk"""
56
  lid_model, lid_processor = models['lid']
 
63
 
64
  return detected_lang
65
 
66
+ @spaces.GPU(duration=60)
67
  def transcribe_chunk(self, audio_chunk, language, models):
68
  """Transcribe audio chunk"""
69
  mms_model, mms_processor = models['mms']
 
80
 
81
  return transcription
82
 
83
+ @spaces.GPU(duration=60)
84
  def translate_text(self, text, models):
85
  """Translate text to English"""
86
  translation_model, translation_tokenizer = models['translation']
 
98
 
99
  return translation
100
 
101
+ @spaces.GPU(duration=60)
102
  def process_audio(self, audio_path, translate=False):
103
  """Main processing function"""
104
  try:
 
170
  logger.error(f"Error processing audio: {str(e)}")
171
  raise
172
 
173
+
174
  def merge_segments(self, segments, time_threshold=0.5, similarity_threshold=0.7):
175
  """Merge similar nearby segments"""
176
  if not segments: