Spaces:

ychenNLP
/

just4fun

Runtime error

File size: 4,756 Bytes

import gradio as gr
import openai
import yt_dlp
import os
import io
import tempfile
from pydub import AudioSegment

def split_audio(file_path, chunk_length_ms):
    audio = AudioSegment.from_file(file_path)
    duration = len(audio)
    chunks = []
    start_time = 0
    while start_time < duration:
        end_time = start_time + chunk_length_ms
        if end_time > duration:
            end_time = duration
        chunk = audio[start_time:end_time]
        chunks.append(chunk)
        start_time += chunk_length_ms
    return chunks

def split_string_by_tokens(text, max_tokens=500):
    words = text.split()
    chunks = []
    current_chunk = []

    for word in words:
        current_chunk.append(word)
        if len(current_chunk) >= max_tokens:
            chunks.append(' '.join(current_chunk))
            current_chunk = []

    if current_chunk:
        chunks.append(' '.join(current_chunk))

    return chunks

openai.api_key = os.environ['OPENAI_API_KEY']

def asr(url):
    # delete the video
    os.system("rm *audio_download*")
    # download audio
    # Options for youtube-dl
    ydl_opts = {
       'format': 'bestaudio/best',
        'outtmpl': 'audio_downloaded.%(ext)s',
        'no_continue': True,
    }

    # Create a youtube-dl object
    ydl = yt_dlp.YoutubeDL(ydl_opts)

    # Download the video
    info_dict = ydl.extract_info(url, download=True)
    if info_dict is not None:
        audio_file_name = "audio_downloaded.{}".format(info_dict["ext"])
    else:
        return "下载音频发生错误，请确认链接再试一次。", "Error downloading the audio. Check the URL and try again."
    
    yield "下载视频完成. 开始分割视频...", ""
    chunks = split_audio(audio_file_name, chunk_length_ms=30 * 1000)
    transcripts = []

    for idx, chunk in enumerate(chunks):
        
        temp_file_path = None
        with tempfile.NamedTemporaryFile(mode="wb", suffix=".wav", delete=False) as temp_file:
            temp_file_path = temp_file.name
            chunk.export(temp_file.name, format="wav")

        with open(temp_file_path, "rb") as temp_file:
            transcript = openai.Audio.transcribe("whisper-1", temp_file)

        os.remove(temp_file_path)
        transcripts.append(transcript["text"])
        
        yield "请耐心等待语音识别完成...({}/{})".format(idx + 1, len(chunks)), " ".join(transcripts)
    
    # delete the video
    os.system("rm {}".format(audio_file_name))

    translations = []
    full_transcript = " ".join(transcripts)
    # split into 500 tokens
    transcript_chunks = split_string_by_tokens(full_transcript, max_tokens=500)
    yield "语音识别完成, 开始翻译...(0/{})".format(len(transcript_chunks)), full_transcript
    # split transcripts if its too long
    for idx, transcript in enumerate(transcript_chunks):
        output = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
                {"role": "user", 
                 "content": "Transcript: {transcript}. \n Translate the video conversation transcript into fluent Chinese. Chinese: ".format(transcript=transcript)},
            ],
        stream=True,
        )
        for event in output:
            translations.append(event["choices"][0].get("delta", "").get("content", ""))
        
            yield "请耐心等候翻译：({}/{})...".format(idx+1, len(transcript_chunks)) + "".join(translations), " ".join(transcripts)

    full_translation = "".join(translations)
    yield full_translation, full_transcript

title = """
轻声细译"""
# Create an instruction input component
instruction = """
<div style="border: 2px solid #000; padding: 10px; border-radius: 5px;">
一键输入视频链接，轻松中文翻译，视频无障碍 <span style="color: grey;">-- powered by OpenAI Whisper & ChatGPT.</span>.<br>

1.将视频链接（支持Twitter、YouTube）复制粘贴至输入框，点击提交（Submit）即可;
</div>"""

css = """
@media (prefers-color-scheme: dark) {
    [data-testid='textbox'] {
        color: white !important;
        background-color: black !important;
    }
}
@media (prefers-color-scheme: light) {
    [data-testid='textbox'] {
        color: white !important;
        background-color: black !important;
    }
}
"""

demo = gr.Interface(fn=asr, 
                    inputs=gr.inputs.Textbox(label="粘贴视频链接"),
                    outputs=[
                        gr.outputs.Textbox(label="中文"),
                        gr.outputs.Textbox(label="英文")
                    ],
                    title=title,
                    description=instruction,
                    theme="JohnSmith9982/small_and_pretty",
                    css=css)

demo.queue()
demo.launch()