Spaces:

pratikshahp
/

mp4-to-audio-to-srtfile

Sleeping

App Files Files Community

mp4-to-audio-to-srtfile / app.py

pratikshahp

Create app.py

489d8c3 verified 3 months ago

raw

history blame

2.02 kB

	import gradio as gr
	from moviepy.editor import VideoFileClip
	from transformers import pipeline
	import os

	# Initialize the Whisper model
	whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-large")

	def convert_video_to_wav(video_path):
	# Extract audio from video using moviepy and save as WAV
	video_clip = VideoFileClip(video_path)
	audio = video_clip.audio
	wav_file = "temp_audio.wav"
	audio.write_audiofile(wav_file, codec='pcm_s16le') # Write as WAV format
	return wav_file

	def convert_audio_to_srt(wav_file):
	# Transcribe the audio using the Whisper model
	transcription = whisper_model(wav_file)

	# Save the transcription to an SRT file with simple formatting
	srt_file = "transcription.srt"
	with open(srt_file, "w", encoding="utf-8") as f:
	for i, segment in enumerate(transcription['text'].split('.')):
	f.write(f"{i+1}\n") # Subtitle index
	f.write(f"00:00:{i2:02d},000 --> 00:00:{i2+2:02d},000\n") # Timestamp (basic)
	f.write(f"{segment.strip()}\n\n") # Transcription text

	# Clean up temp audio file
	os.remove(wav_file)

	return srt_file

	def process_video(video):
	# Save the uploaded video file to a temporary location
	video_path = video.name

	# Process the video to extract audio and convert to srt
	wav_file = convert_video_to_wav(video_path) # Convert video to WAV
	srt_file = convert_audio_to_srt(wav_file) # Convert WAV to SRT

	return srt_file # Return the path of the generated SRT file

	# Gradio Interface
	interface = gr.Interface(
	fn=process_video,
	inputs=gr.File(label="Upload video file", file_types=['mp4', 'avi', 'mkv']), # Video file input
	outputs=gr.File(label="Download SRT File"), # Output the SRT file for download
	title="Video to SRT Subtitle Generator",
	description="Upload a video file (e.g., .mp4), and the app will generate a subtitle file (SRT format) using Whisper model."
	)

	interface.launch()