File size: 7,059 Bytes
9d177c3
 
 
 
 
80c2167
5a33213
f595e63
9d177c3
 
57c0815
f595e63
9d177c3
e7a545f
 
 
 
 
 
 
 
 
 
f595e63
9d177c3
 
 
f595e63
 
 
 
 
 
 
 
 
 
6b81e0d
f595e63
e7a545f
f595e63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d177c3
0566ea1
9d177c3
 
 
 
 
 
 
 
 
 
0566ea1
 
 
f6ffd85
0566ea1
 
 
 
9d177c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7a545f
9d177c3
 
3244f42
 
9d177c3
 
 
 
9dafffe
 
9d177c3
 
 
 
 
 
 
 
 
 
 
75f8860
0566ea1
9d177c3
 
 
 
 
 
 
9dafffe
0566ea1
9d177c3
9dafffe
9d177c3
9dafffe
9d177c3
9dafffe
9d177c3
 
9dafffe
9d177c3
 
 
9dafffe
9d177c3
 
 
 
 
 
 
 
 
 
75f8860
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import gradio as gr
import requests
from typing import Optional
import json
import subprocess
import os
import tempfile  # Import tempfile
from pydub import AudioSegment  # Import AudioSegment

# Define the FastAPI URL
API_URL = "http://astarwiz.com:9998"
rapid_key = os.environ.get("RAPID_API_KEY")

def fetch_youtube_id(youtube_url: str) -> str:
    if 'v=' in youtube_url:
        return youtube_url.split("v=")[1]
    elif 'shorts' in youtube_url:
        return youtube_url.split("/")[-1]
    else:
        raise Exception("Unsupported URL format")
    
def download_youtube_audio(youtube_url: str, output_dir: Optional[str] = None) -> str:
    video_id = fetch_youtube_id(youtube_url)
    
    if output_dir is None:
        output_dir = tempfile.gettempdir()

    output_filename = os.path.join(output_dir, f"{video_id}.mp3")
    
    if os.path.exists(output_filename):
        return output_filename  # Return if the file already exists
    
    url = "https://youtube86.p.rapidapi.com/api/youtube/links"
    headers = {
        'Content-Type': 'application/json',
        'x-rapidapi-host': 'youtube86.p.rapidapi.com',
        'x-rapidapi-key': rapid_key  # Replace <key> with your actual API key
    }
    data = {
        "url": youtube_url
    }
    
    response = requests.post(url, headers=headers, json=data)
    print('Fetched audio links')
    
    if response.status_code == 200:
        result = response.json()
        for url in result[0]['urls']:
            if url.get('isBundle'):
                audio_url = url['url']
                extension = url['extension']
                audio_response = requests.get(audio_url)
                
                if audio_response.status_code == 200:
                    temp_filename = os.path.join(output_dir, f"{video_id}.{extension}")
                    with open(temp_filename, 'wb') as audio_file:
                        audio_file.write(audio_response.content)
                    
                    # Convert to MP3 and downsample to 16000 Hz
                    audio = AudioSegment.from_file(temp_filename, format=extension)
                    audio = audio.set_frame_rate(16000)
                    audio.export(output_filename, format="mp3", parameters=["-ar", "16000"])
                    
                    os.remove(temp_filename)  # Remove the temporary file
                    return output_filename  # Return the final MP3 filename
        
        return None  # Return None if no successful download occurs
    else:
        print("Error:", response.status_code, response.text)
        return None  # Return None on failure

def run_asr(audio_file, youtube_url, with_timestamp, model_choice):
    temp_file = None
    try:
        if youtube_url:
            # It's a YouTube URL
            audio_file = download_youtube_audio(youtube_url)
            temp_file = audio_file
        elif not audio_file:
            return "Please provide either an audio file or a YouTube URL."

        files = {'file': open(audio_file, 'rb')}
        
        # Update model_name based on the user's choice
        if model_choice == "whisper_v3":
            model_name = "official-v3"
        else:
            model_name = "whisper-large-v2-imda"
        
        data = {'language': 'en', 'model_name': model_name, 'with_timestamp': with_timestamp}
        response = requests.post(f"{API_URL}/asr", data=data, files=files)

        if response.status_code == 200:
            return response.json().get("text", "")
        else:
            return f"Error: {response.status_code}"
    except Exception as e:
        return f"Error: {str(e)}"
    finally:
        # Clean up the temporary file if it was a YouTube download
        if temp_file and os.path.exists(temp_file):
            os.remove(temp_file)

def embed_youtube(youtube_url):
    if youtube_url:
        try:
            video_id = fetch_youtube_id(youtube_url)
            embed_html = f'<iframe width="560" height="315" src="https://www.youtube.com/embed/{video_id}" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>'
            return gr.update(value=embed_html, visible=True), "", None
        except Exception as e:
            return gr.update(value="", visible=False), f"Invalid YouTube URL: {str(e)}", None
    return gr.update(value="", visible=False), "", None

def clear_on_audio_input(audio):
    if audio is not None:
        return "", gr.update(value="", visible=False), "", gr.update(interactive=True)
    return gr.update(), gr.update(), gr.update(), gr.update(interactive=False)

# Create the Gradio interface with improved aesthetics
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# πŸŽ™οΈ Audio Transcription Service")
    gr.Markdown("Upload an audio file, record your voice, or paste a YouTube URL to get an English transcription.")
    
    with gr.Row():
        with gr.Column(scale=2):
            audio_input = gr.Audio(sources=['microphone', 'upload'], type="filepath", label="Audio Input")
            youtube_input = gr.Textbox(label="YouTube URL", placeholder="Or paste a YouTube URL here...")
            video_player = gr.HTML(visible=False)
            timestamp_toggle = gr.Checkbox(label="Include Timestamps", value=False)
            model_choice = gr.Radio(["local_whisper", "whisper_v3"], label="Model Selection", value="local_whisper")
        with gr.Column(scale=3):
            result = gr.Textbox(
                label="Transcription Result",
                placeholder="Your transcription will appear here...",
                lines=10
            )

    run_button = gr.Button("πŸš€ Transcribe Audio", variant="primary", interactive=False)
    run_button.click(run_asr, inputs=[audio_input, youtube_input, timestamp_toggle, model_choice], outputs=[result])

    # Update video player, clear transcription and audio input, and enable run button when YouTube URL is entered
    youtube_input.change(
        fn=lambda url: (*embed_youtube(url), gr.update(interactive=bool(url))),
        inputs=[youtube_input],
        outputs=[video_player, result, audio_input, run_button]
    )

    # Clear transcription, YouTube input, video player, and update run button when audio is input
    audio_input.change(
        fn=clear_on_audio_input,
        inputs=[audio_input],
        outputs=[result, video_player, youtube_input, run_button]
    )

    gr.Markdown("### How to use:")
    gr.Markdown("1. Upload an audio file or record your voice using the microphone, OR paste a YouTube URL.")
    gr.Markdown("2. If you paste a YouTube URL, the video will be displayed for your reference, and any previous transcription or audio input will be cleared.")
    gr.Markdown("3. If you upload or record audio, any previous transcription, YouTube URL, and video will be cleared.")
    gr.Markdown("4. Click the 'Transcribe Audio' button to start the process.")
    gr.Markdown("5. Wait for a few seconds, and your transcription will appear in the result box.")

# Launch the Gradio interface
demo.launch(server_name='0.0.0.0')