Spaces:

sikhuni
/

yt_to_text_model

Build error

File size: 1,723 Bytes

4af3d4a

!pip install transformers torch soundfile pytube indic-transliteration

import gradio as gr
from transformers import pipeline
from pytube import YouTube
import os
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate

# Load the multilingual model for speech recognition
transcriber = pipeline("automatic-speech-recognition", model="ai4bharat/wav2vec2-large-xlsr-53-indian-langs")

# Function to transliterate text to Roman script
def transliterate_to_roman(text):
    return transliterate(text, sanscript.DEVANAGARI, sanscript.ITRANS)

# Function to download audio from YouTube and transcribe it
def transcribe_youtube_video(youtube_url):
    try:
        # Download audio from YouTube
        yt = YouTube(youtube_url)
        audio_stream = yt.streams.filter(only_audio=True).first()
        audio_file_path = audio_stream.download(filename="audio.mp4")
        
        # Transcribe the downloaded audio
        text = transcriber(audio_file_path)["text"]
        
        # Transliterate to Roman script if needed
        roman_text = transliterate_to_roman(text)
        
        # Clean up by removing the downloaded audio file
        os.remove(audio_file_path)
        
        return text, roman_text
    
    except Exception as e:
        return f"Error: {str(e)}", ""

# Gradio interface for inputting a YouTube URL
iface = gr.Interface(
    fn=transcribe_youtube_video,
    inputs=gr.Textbox(label="YouTube Video URL"),
    outputs=["text", "text"],
    title="YouTube Video Transcription",
    description="Enter a YouTube video URL to download and transcribe its audio. The transcription will be provided in both original and Roman script."
)

iface.launch()