import gradio as gr import requests from typing import Optional import json import subprocess import os import tempfile # Import tempfile from pydub import AudioSegment # Import AudioSegment # Define the FastAPI URL API_URL = "http://astarwiz.com:9998" rapid_key = os.environ.get("RAPID_API_KEY") def fetch_youtube_id(youtube_url: str) -> str: if 'v=' in youtube_url: return youtube_url.split("v=")[1] elif 'shorts' in youtube_url: return youtube_url.split("/")[-1] else: raise Exception("Unsupported URL format") def download_youtube_audio(youtube_url: str, output_dir: Optional[str] = None) -> str: video_id = fetch_youtube_id(youtube_url) if output_dir is None: output_dir = tempfile.gettempdir() output_filename = os.path.join(output_dir, f"{video_id}.mp3") if os.path.exists(output_filename): return output_filename # Return if the file already exists url = "https://youtube86.p.rapidapi.com/api/youtube/links" headers = { 'Content-Type': 'application/json', 'x-rapidapi-host': 'youtube86.p.rapidapi.com', 'x-rapidapi-key': rapid_key # Replace with your actual API key } data = { "url": youtube_url } response = requests.post(url, headers=headers, json=data) print('Fetched audio links') if response.status_code == 200: result = response.json() for url in result[0]['urls']: if url.get('isBundle'): audio_url = url['url'] extension = url['extension'] audio_response = requests.get(audio_url) if audio_response.status_code == 200: temp_filename = os.path.join(output_dir, f"{video_id}.{extension}") with open(temp_filename, 'wb') as audio_file: audio_file.write(audio_response.content) # Convert to MP3 and downsample to 16000 Hz audio = AudioSegment.from_file(temp_filename, format=extension) audio = audio.set_frame_rate(16000) audio.export(output_filename, format="mp3", parameters=["-ar", "16000"]) os.remove(temp_filename) # Remove the temporary file return output_filename # Return the final MP3 filename return None # Return None if no successful download occurs else: print("Error:", response.status_code, response.text) return None # Return None on failure def run_asr(audio_file, youtube_url, with_timestamp, model_choice): temp_file = None try: if youtube_url: # It's a YouTube URL audio_file = download_youtube_audio(youtube_url) temp_file = audio_file elif not audio_file: return "Please provide either an audio file or a YouTube URL." files = {'file': open(audio_file, 'rb')} # Update model_name based on the user's choice if model_choice == "whisper_v3": model_name = "official-v3" else: model_name = "whisper-large-v2-imda" data = {'language': 'en', 'model_name': model_name, 'with_timestamp': with_timestamp} response = requests.post(f"{API_URL}/asr", data=data, files=files) if response.status_code == 200: return response.json().get("text", "") else: return f"Error: {response.status_code}" except Exception as e: return f"Error: {str(e)}" finally: # Clean up the temporary file if it was a YouTube download if temp_file and os.path.exists(temp_file): os.remove(temp_file) def embed_youtube(youtube_url): if youtube_url: try: video_id = fetch_youtube_id(youtube_url) embed_html = f'' return gr.update(value=embed_html, visible=True), "", None except Exception as e: return gr.update(value="", visible=False), f"Invalid YouTube URL: {str(e)}", None return gr.update(value="", visible=False), "", None def clear_on_audio_input(audio): if audio is not None: return "", gr.update(value="", visible=False), "", gr.update(interactive=True) return gr.update(), gr.update(), gr.update(), gr.update(interactive=False) # Create the Gradio interface with improved aesthetics with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🎙️ Audio Transcription Service") gr.Markdown("Upload an audio file, record your voice, or paste a YouTube URL to get an English transcription.") with gr.Row(): with gr.Column(scale=2): audio_input = gr.Audio(sources=['microphone', 'upload'], type="filepath", label="Audio Input") youtube_input = gr.Textbox(label="YouTube URL", placeholder="Or paste a YouTube URL here...") video_player = gr.HTML(visible=False) timestamp_toggle = gr.Checkbox(label="Include Timestamps", value=False) model_choice = gr.Radio(["local_whisper", "whisper_v3"], label="Model Selection", value="local_whisper") with gr.Column(scale=3): result = gr.Textbox( label="Transcription Result", placeholder="Your transcription will appear here...", lines=10 ) run_button = gr.Button("🚀 Transcribe Audio", variant="primary", interactive=False) run_button.click(run_asr, inputs=[audio_input, youtube_input, timestamp_toggle, model_choice], outputs=[result]) # Update video player, clear transcription and audio input, and enable run button when YouTube URL is entered youtube_input.change( fn=lambda url: (*embed_youtube(url), gr.update(interactive=bool(url))), inputs=[youtube_input], outputs=[video_player, result, audio_input, run_button] ) # Clear transcription, YouTube input, video player, and update run button when audio is input audio_input.change( fn=clear_on_audio_input, inputs=[audio_input], outputs=[result, video_player, youtube_input, run_button] ) gr.Markdown("### How to use:") gr.Markdown("1. Upload an audio file or record your voice using the microphone, OR paste a YouTube URL.") gr.Markdown("2. If you paste a YouTube URL, the video will be displayed for your reference, and any previous transcription or audio input will be cleared.") gr.Markdown("3. If you upload or record audio, any previous transcription, YouTube URL, and video will be cleared.") gr.Markdown("4. Click the 'Transcribe Audio' button to start the process.") gr.Markdown("5. Wait for a few seconds, and your transcription will appear in the result box.") # Launch the Gradio interface demo.launch(server_name='0.0.0.0')