|
import os |
|
import replicate |
|
import gradio as gr |
|
from pydub import AudioSegment |
|
|
|
|
|
replicate_token = os.getenv("REPLICATE_API_TOKEN") |
|
|
|
if not replicate_token: |
|
raise ValueError("No se ha encontrado el token de API de Replicate.") |
|
|
|
|
|
def dividir_audio(audio_path, segment_duration_ms=60000): |
|
audio = AudioSegment.from_file(audio_path) |
|
audio_length = len(audio) |
|
segments = [] |
|
|
|
|
|
for i in range(0, audio_length, segment_duration_ms): |
|
segment = audio[i:i+segment_duration_ms] |
|
segment_path = f"segment_{i // 1000}.wav" |
|
segment.export(segment_path, format="wav") |
|
segments.append(segment_path) |
|
|
|
return segments |
|
|
|
|
|
def transcribe_audio(audio_file): |
|
|
|
audio = AudioSegment.from_file(audio_file) |
|
audio_duration_minutes = len(audio) / (1000 * 60) |
|
|
|
|
|
if audio_duration_minutes > 10: |
|
segments = dividir_audio(audio_file, segment_duration_ms=60000) |
|
else: |
|
segments = [audio_file] |
|
|
|
|
|
all_transcriptions = [] |
|
|
|
|
|
for segment_path in segments: |
|
with open(segment_path, "rb") as audio: |
|
output = replicate.run( |
|
"vaibhavs10/incredibly-fast-whisper:3ab86df6c8f54c11309d4d1f930ac292bad43ace52d10c80d87eb258b3c9f79c", |
|
input={ |
|
"task": "transcribe", |
|
"audio": audio, |
|
"language": "None", |
|
"timestamp": "chunk", |
|
"batch_size": 64, |
|
"diarise_audio": False |
|
} |
|
) |
|
|
|
all_transcriptions.append(output['text']) |
|
|
|
|
|
full_transcription = "\n".join(all_transcriptions) |
|
return full_transcription |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Transcripci贸n de Audio usando Whisper") |
|
audio_input = gr.Audio(type="filepath", label="Sube tu archivo de audio") |
|
output_text = gr.Textbox(label="Transcripci贸n") |
|
|
|
transcribe_button = gr.Button("Transcribir") |
|
transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text) |
|
|
|
|
|
demo.launch() |
|
|