amritsar's picture
Create app.py
4af3d4a verified
raw
history blame
1.72 kB
!pip install transformers torch soundfile pytube indic-transliteration
import gradio as gr
from transformers import pipeline
from pytube import YouTube
import os
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
# Load the multilingual model for speech recognition
transcriber = pipeline("automatic-speech-recognition", model="ai4bharat/wav2vec2-large-xlsr-53-indian-langs")
# Function to transliterate text to Roman script
def transliterate_to_roman(text):
return transliterate(text, sanscript.DEVANAGARI, sanscript.ITRANS)
# Function to download audio from YouTube and transcribe it
def transcribe_youtube_video(youtube_url):
try:
# Download audio from YouTube
yt = YouTube(youtube_url)
audio_stream = yt.streams.filter(only_audio=True).first()
audio_file_path = audio_stream.download(filename="audio.mp4")
# Transcribe the downloaded audio
text = transcriber(audio_file_path)["text"]
# Transliterate to Roman script if needed
roman_text = transliterate_to_roman(text)
# Clean up by removing the downloaded audio file
os.remove(audio_file_path)
return text, roman_text
except Exception as e:
return f"Error: {str(e)}", ""
# Gradio interface for inputting a YouTube URL
iface = gr.Interface(
fn=transcribe_youtube_video,
inputs=gr.Textbox(label="YouTube Video URL"),
outputs=["text", "text"],
title="YouTube Video Transcription",
description="Enter a YouTube video URL to download and transcribe its audio. The transcription will be provided in both original and Roman script."
)
iface.launch()