File size: 1,723 Bytes
4af3d4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
!pip install transformers torch soundfile pytube indic-transliteration

import gradio as gr
from transformers import pipeline
from pytube import YouTube
import os
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate

# Load the multilingual model for speech recognition
transcriber = pipeline("automatic-speech-recognition", model="ai4bharat/wav2vec2-large-xlsr-53-indian-langs")

# Function to transliterate text to Roman script
def transliterate_to_roman(text):
    return transliterate(text, sanscript.DEVANAGARI, sanscript.ITRANS)

# Function to download audio from YouTube and transcribe it
def transcribe_youtube_video(youtube_url):
    try:
        # Download audio from YouTube
        yt = YouTube(youtube_url)
        audio_stream = yt.streams.filter(only_audio=True).first()
        audio_file_path = audio_stream.download(filename="audio.mp4")
        
        # Transcribe the downloaded audio
        text = transcriber(audio_file_path)["text"]
        
        # Transliterate to Roman script if needed
        roman_text = transliterate_to_roman(text)
        
        # Clean up by removing the downloaded audio file
        os.remove(audio_file_path)
        
        return text, roman_text
    
    except Exception as e:
        return f"Error: {str(e)}", ""

# Gradio interface for inputting a YouTube URL
iface = gr.Interface(
    fn=transcribe_youtube_video,
    inputs=gr.Textbox(label="YouTube Video URL"),
    outputs=["text", "text"],
    title="YouTube Video Transcription",
    description="Enter a YouTube video URL to download and transcribe its audio. The transcription will be provided in both original and Roman script."
)

iface.launch()