Spaces:
Build error
Build error
!pip install transformers torch soundfile pytube indic-transliteration | |
import gradio as gr | |
from transformers import pipeline | |
from pytube import YouTube | |
import os | |
from indic_transliteration import sanscript | |
from indic_transliteration.sanscript import transliterate | |
# Load the multilingual model for speech recognition | |
transcriber = pipeline("automatic-speech-recognition", model="ai4bharat/wav2vec2-large-xlsr-53-indian-langs") | |
# Function to transliterate text to Roman script | |
def transliterate_to_roman(text): | |
return transliterate(text, sanscript.DEVANAGARI, sanscript.ITRANS) | |
# Function to download audio from YouTube and transcribe it | |
def transcribe_youtube_video(youtube_url): | |
try: | |
# Download audio from YouTube | |
yt = YouTube(youtube_url) | |
audio_stream = yt.streams.filter(only_audio=True).first() | |
audio_file_path = audio_stream.download(filename="audio.mp4") | |
# Transcribe the downloaded audio | |
text = transcriber(audio_file_path)["text"] | |
# Transliterate to Roman script if needed | |
roman_text = transliterate_to_roman(text) | |
# Clean up by removing the downloaded audio file | |
os.remove(audio_file_path) | |
return text, roman_text | |
except Exception as e: | |
return f"Error: {str(e)}", "" | |
# Gradio interface for inputting a YouTube URL | |
iface = gr.Interface( | |
fn=transcribe_youtube_video, | |
inputs=gr.Textbox(label="YouTube Video URL"), | |
outputs=["text", "text"], | |
title="YouTube Video Transcription", | |
description="Enter a YouTube video URL to download and transcribe its audio. The transcription will be provided in both original and Roman script." | |
) | |
iface.launch() | |