NeuralFalcon commited on
Commit
cdfd068
·
verified ·
1 Parent(s): bc02504

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -58
app.py CHANGED
@@ -42,54 +42,7 @@ def clean_file_name(file_path):
42
 
43
  return clean_file_path
44
 
45
- def get_audio_file(uploaded_file):
46
- global base_path
47
- # ,device
48
- device = "cuda" if torch.cuda.is_available() else "cpu"
49
- # Detect the file type (audio/video)
50
- mime_type, _ = mimetypes.guess_type(uploaded_file)
51
- # Create the folder path to store audio files
52
- audio_folder = f"{base_path}/subtitle_audio"
53
- os.makedirs(audio_folder, exist_ok=True)
54
- # Initialize variable for the audio file path
55
- audio_file_path = ""
56
- if mime_type and mime_type.startswith('audio'):
57
- # If it's an audio file, save it as is
58
- audio_file_path = os.path.join(audio_folder, os.path.basename(uploaded_file))
59
- audio_file_path=clean_file_name(audio_file_path)
60
- shutil.copy(uploaded_file, audio_file_path) # Move file to audio folder
61
-
62
- elif mime_type and mime_type.startswith('video'):
63
- # If it's a video file, extract the audio
64
- audio_file_name = os.path.splitext(os.path.basename(uploaded_file))[0] + ".mp3"
65
- audio_file_path = os.path.join(audio_folder, audio_file_name)
66
- audio_file_path=clean_file_name(audio_file_path)
67
-
68
- # Extract the file extension from the uploaded file
69
- file_extension = os.path.splitext(uploaded_file)[1] # Includes the dot, e.g., '.mp4'
70
-
71
- # Generate a random UUID and create a new file name with the same extension
72
- random_uuid = uuid.uuid4().hex[:6]
73
- new_file_name = random_uuid + file_extension
74
-
75
- # Set the new file path in the subtitle_audio folder
76
- new_file_path = os.path.join(audio_folder, new_file_name)
77
-
78
- # Copy the original video file to the new location with the new name
79
- shutil.copy(uploaded_file, new_file_path)
80
- if device=="cuda":
81
- command = f"ffmpeg -hwaccel cuda -i {new_file_path} {audio_file_path} -y"
82
- else:
83
- command = f"ffmpeg -i {new_file_path} {audio_file_path} -y"
84
-
85
- subprocess.run(command, shell=True)
86
- if os.path.exists(new_file_path):
87
- os.remove(new_file_path)
88
- # Return the saved audio file path
89
- audio = AudioSegment.from_file(audio_file_path)
90
- # Get the duration in seconds
91
- duration_seconds = len(audio) / 1000.0 # pydub measures duration in milliseconds
92
- return audio_file_path,duration_seconds
93
 
94
  def format_segments(segments):
95
  saved_segments = list(segments)
@@ -242,8 +195,7 @@ def whisper_subtitle(uploaded_file,Source_Language,max_words_per_subtitle=8):
242
  device = "cpu"
243
  compute_type = "int8"
244
  faster_whisper_model = WhisperModel("deepdml/faster-whisper-large-v3-turbo-ct2",device=device, compute_type=compute_type)
245
- audio_path,audio_duration=get_audio_file(uploaded_file)
246
-
247
  if Source_Language=="Automatic":
248
  segments,d = faster_whisper_model.transcribe(audio_path, word_timestamps=True)
249
  lang_code=d.language
@@ -267,11 +219,11 @@ def whisper_subtitle(uploaded_file,Source_Language,max_words_per_subtitle=8):
267
  original_srt_name=clean_file_name(save_name)
268
  original_txt_name=original_srt_name.replace(".srt",".txt")
269
  word_level_srt_name=original_srt_name.replace(".srt","_word_level.srt")
270
- default_srt_name=original_srt_name.replace(".srt","_default.srt")
271
 
272
- generate_srt_from_sentences(sentence_timestamp, srt_path=default_srt_name)
273
  word_level_srt(words_timestamp, srt_path=word_level_srt_name)
274
- write_subtitles_to_file(word_segments, filename=original_srt_name)
275
  with open(original_txt_name, 'w', encoding='utf-8') as f1:
276
  f1.write(text)
277
  return default_srt_name,original_srt_name,word_level_srt_name,original_txt_name
@@ -304,9 +256,9 @@ source_lang_list.extend(available_language)
304
  @click.option("--debug", is_flag=True, default=False, help="Enable debug mode.")
305
  @click.option("--share", is_flag=True, default=False, help="Enable sharing of the interface.")
306
  def main(debug, share):
307
- description = """
308
- **Note**: For large video files, upload audio instead. FFmpeg video-to-audio conversion may take a long time.
309
- """
310
  # Define Gradio inputs and outputs
311
  gradio_inputs = [
312
  gr.File(label="Upload Audio or Video File"),
@@ -322,9 +274,9 @@ def main(debug, share):
322
  ]
323
 
324
  # Create Gradio interface
325
- demo = gr.Interface(fn=subtitle_maker, inputs=gradio_inputs, outputs=gradio_outputs, title="Auto Subtitle Generator Using Whisper-Large-V3-Turbo-Ct2",description=description)
326
 
327
  # Launch Gradio with command-line options
328
  demo.queue().launch(debug=debug, share=share)
329
  if __name__ == "__main__":
330
- main()
 
42
 
43
  return clean_file_path
44
 
45
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  def format_segments(segments):
48
  saved_segments = list(segments)
 
195
  device = "cpu"
196
  compute_type = "int8"
197
  faster_whisper_model = WhisperModel("deepdml/faster-whisper-large-v3-turbo-ct2",device=device, compute_type=compute_type)
198
+ audio_path=uploaded_file
 
199
  if Source_Language=="Automatic":
200
  segments,d = faster_whisper_model.transcribe(audio_path, word_timestamps=True)
201
  lang_code=d.language
 
219
  original_srt_name=clean_file_name(save_name)
220
  original_txt_name=original_srt_name.replace(".srt",".txt")
221
  word_level_srt_name=original_srt_name.replace(".srt","_word_level.srt")
222
+ customize_srt_name=original_srt_name.replace(".srt","_customize.srt")
223
 
224
+ generate_srt_from_sentences(sentence_timestamp, srt_path=original_srt_name)
225
  word_level_srt(words_timestamp, srt_path=word_level_srt_name)
226
+ write_subtitles_to_file(word_segments, filename=customize_srt_name)
227
  with open(original_txt_name, 'w', encoding='utf-8') as f1:
228
  f1.write(text)
229
  return default_srt_name,original_srt_name,word_level_srt_name,original_txt_name
 
256
  @click.option("--debug", is_flag=True, default=False, help="Enable debug mode.")
257
  @click.option("--share", is_flag=True, default=False, help="Enable sharing of the interface.")
258
  def main(debug, share):
259
+ # description = """
260
+ # **Note**: For large video files, upload audio instead. FFmpeg video-to-audio conversion may take a long time.
261
+ # """
262
  # Define Gradio inputs and outputs
263
  gradio_inputs = [
264
  gr.File(label="Upload Audio or Video File"),
 
274
  ]
275
 
276
  # Create Gradio interface
277
+ demo = gr.Interface(fn=subtitle_maker, inputs=gradio_inputs, outputs=gradio_outputs, title="Auto Subtitle Generator Using Whisper-Large-V3-Turbo-Ct2")#,description=description)
278
 
279
  # Launch Gradio with command-line options
280
  demo.queue().launch(debug=debug, share=share)
281
  if __name__ == "__main__":
282
+ main()