File size: 7,059 Bytes
9d177c3 80c2167 5a33213 f595e63 9d177c3 57c0815 f595e63 9d177c3 e7a545f f595e63 9d177c3 f595e63 6b81e0d f595e63 e7a545f f595e63 9d177c3 0566ea1 9d177c3 0566ea1 f6ffd85 0566ea1 9d177c3 e7a545f 9d177c3 3244f42 9d177c3 9dafffe 9d177c3 75f8860 0566ea1 9d177c3 9dafffe 0566ea1 9d177c3 9dafffe 9d177c3 9dafffe 9d177c3 9dafffe 9d177c3 9dafffe 9d177c3 9dafffe 9d177c3 75f8860 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import gradio as gr
import requests
from typing import Optional
import json
import subprocess
import os
import tempfile # Import tempfile
from pydub import AudioSegment # Import AudioSegment
# Define the FastAPI URL
API_URL = "http://astarwiz.com:9998"
rapid_key = os.environ.get("RAPID_API_KEY")
def fetch_youtube_id(youtube_url: str) -> str:
if 'v=' in youtube_url:
return youtube_url.split("v=")[1]
elif 'shorts' in youtube_url:
return youtube_url.split("/")[-1]
else:
raise Exception("Unsupported URL format")
def download_youtube_audio(youtube_url: str, output_dir: Optional[str] = None) -> str:
video_id = fetch_youtube_id(youtube_url)
if output_dir is None:
output_dir = tempfile.gettempdir()
output_filename = os.path.join(output_dir, f"{video_id}.mp3")
if os.path.exists(output_filename):
return output_filename # Return if the file already exists
url = "https://youtube86.p.rapidapi.com/api/youtube/links"
headers = {
'Content-Type': 'application/json',
'x-rapidapi-host': 'youtube86.p.rapidapi.com',
'x-rapidapi-key': rapid_key # Replace <key> with your actual API key
}
data = {
"url": youtube_url
}
response = requests.post(url, headers=headers, json=data)
print('Fetched audio links')
if response.status_code == 200:
result = response.json()
for url in result[0]['urls']:
if url.get('isBundle'):
audio_url = url['url']
extension = url['extension']
audio_response = requests.get(audio_url)
if audio_response.status_code == 200:
temp_filename = os.path.join(output_dir, f"{video_id}.{extension}")
with open(temp_filename, 'wb') as audio_file:
audio_file.write(audio_response.content)
# Convert to MP3 and downsample to 16000 Hz
audio = AudioSegment.from_file(temp_filename, format=extension)
audio = audio.set_frame_rate(16000)
audio.export(output_filename, format="mp3", parameters=["-ar", "16000"])
os.remove(temp_filename) # Remove the temporary file
return output_filename # Return the final MP3 filename
return None # Return None if no successful download occurs
else:
print("Error:", response.status_code, response.text)
return None # Return None on failure
def run_asr(audio_file, youtube_url, with_timestamp, model_choice):
temp_file = None
try:
if youtube_url:
# It's a YouTube URL
audio_file = download_youtube_audio(youtube_url)
temp_file = audio_file
elif not audio_file:
return "Please provide either an audio file or a YouTube URL."
files = {'file': open(audio_file, 'rb')}
# Update model_name based on the user's choice
if model_choice == "whisper_v3":
model_name = "official-v3"
else:
model_name = "whisper-large-v2-imda"
data = {'language': 'en', 'model_name': model_name, 'with_timestamp': with_timestamp}
response = requests.post(f"{API_URL}/asr", data=data, files=files)
if response.status_code == 200:
return response.json().get("text", "")
else:
return f"Error: {response.status_code}"
except Exception as e:
return f"Error: {str(e)}"
finally:
# Clean up the temporary file if it was a YouTube download
if temp_file and os.path.exists(temp_file):
os.remove(temp_file)
def embed_youtube(youtube_url):
if youtube_url:
try:
video_id = fetch_youtube_id(youtube_url)
embed_html = f'<iframe width="560" height="315" src="https://www.youtube.com/embed/{video_id}" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>'
return gr.update(value=embed_html, visible=True), "", None
except Exception as e:
return gr.update(value="", visible=False), f"Invalid YouTube URL: {str(e)}", None
return gr.update(value="", visible=False), "", None
def clear_on_audio_input(audio):
if audio is not None:
return "", gr.update(value="", visible=False), "", gr.update(interactive=True)
return gr.update(), gr.update(), gr.update(), gr.update(interactive=False)
# Create the Gradio interface with improved aesthetics
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# ποΈ Audio Transcription Service")
gr.Markdown("Upload an audio file, record your voice, or paste a YouTube URL to get an English transcription.")
with gr.Row():
with gr.Column(scale=2):
audio_input = gr.Audio(sources=['microphone', 'upload'], type="filepath", label="Audio Input")
youtube_input = gr.Textbox(label="YouTube URL", placeholder="Or paste a YouTube URL here...")
video_player = gr.HTML(visible=False)
timestamp_toggle = gr.Checkbox(label="Include Timestamps", value=False)
model_choice = gr.Radio(["local_whisper", "whisper_v3"], label="Model Selection", value="local_whisper")
with gr.Column(scale=3):
result = gr.Textbox(
label="Transcription Result",
placeholder="Your transcription will appear here...",
lines=10
)
run_button = gr.Button("π Transcribe Audio", variant="primary", interactive=False)
run_button.click(run_asr, inputs=[audio_input, youtube_input, timestamp_toggle, model_choice], outputs=[result])
# Update video player, clear transcription and audio input, and enable run button when YouTube URL is entered
youtube_input.change(
fn=lambda url: (*embed_youtube(url), gr.update(interactive=bool(url))),
inputs=[youtube_input],
outputs=[video_player, result, audio_input, run_button]
)
# Clear transcription, YouTube input, video player, and update run button when audio is input
audio_input.change(
fn=clear_on_audio_input,
inputs=[audio_input],
outputs=[result, video_player, youtube_input, run_button]
)
gr.Markdown("### How to use:")
gr.Markdown("1. Upload an audio file or record your voice using the microphone, OR paste a YouTube URL.")
gr.Markdown("2. If you paste a YouTube URL, the video will be displayed for your reference, and any previous transcription or audio input will be cleared.")
gr.Markdown("3. If you upload or record audio, any previous transcription, YouTube URL, and video will be cleared.")
gr.Markdown("4. Click the 'Transcribe Audio' button to start the process.")
gr.Markdown("5. Wait for a few seconds, and your transcription will appear in the result box.")
# Launch the Gradio interface
demo.launch(server_name='0.0.0.0')
|