File size: 4,756 Bytes
bf9542e 2446386 1b12140 922050b 1b12140 bf9542e 922050b bf9542e 6aeab44 51a9e4f bf9542e 2446386 bf9542e 0648a36 922050b 7e868b7 922050b 7e868b7 6aeab44 922050b e6882db 922050b e6882db 922050b e6882db 922050b e6882db 922050b bf9542e 2446386 5f6bd2c 2446386 db145e2 2446386 cc37f03 7321e01 cc37f03 2922952 cc37f03 7321e01 2b5b523 2922952 cc37f03 2446386 bf9542e 2446386 5f6bd2c 2446386 db145e2 cc37f03 922050b cc37f03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
import openai
import yt_dlp
import os
import io
import tempfile
from pydub import AudioSegment
def split_audio(file_path, chunk_length_ms):
audio = AudioSegment.from_file(file_path)
duration = len(audio)
chunks = []
start_time = 0
while start_time < duration:
end_time = start_time + chunk_length_ms
if end_time > duration:
end_time = duration
chunk = audio[start_time:end_time]
chunks.append(chunk)
start_time += chunk_length_ms
return chunks
def split_string_by_tokens(text, max_tokens=500):
words = text.split()
chunks = []
current_chunk = []
for word in words:
current_chunk.append(word)
if len(current_chunk) >= max_tokens:
chunks.append(' '.join(current_chunk))
current_chunk = []
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
openai.api_key = os.environ['OPENAI_API_KEY']
def asr(url):
# delete the video
os.system("rm *audio_download*")
# download audio
# Options for youtube-dl
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'audio_downloaded.%(ext)s',
'no_continue': True,
}
# Create a youtube-dl object
ydl = yt_dlp.YoutubeDL(ydl_opts)
# Download the video
info_dict = ydl.extract_info(url, download=True)
if info_dict is not None:
audio_file_name = "audio_downloaded.{}".format(info_dict["ext"])
else:
return "下载音频发生错误,请确认链接再试一次。", "Error downloading the audio. Check the URL and try again."
yield "下载视频完成. 开始分割视频...", ""
chunks = split_audio(audio_file_name, chunk_length_ms=30 * 1000)
transcripts = []
for idx, chunk in enumerate(chunks):
temp_file_path = None
with tempfile.NamedTemporaryFile(mode="wb", suffix=".wav", delete=False) as temp_file:
temp_file_path = temp_file.name
chunk.export(temp_file.name, format="wav")
with open(temp_file_path, "rb") as temp_file:
transcript = openai.Audio.transcribe("whisper-1", temp_file)
os.remove(temp_file_path)
transcripts.append(transcript["text"])
yield "请耐心等待语音识别完成...({}/{})".format(idx + 1, len(chunks)), " ".join(transcripts)
# delete the video
os.system("rm {}".format(audio_file_name))
translations = []
full_transcript = " ".join(transcripts)
# split into 500 tokens
transcript_chunks = split_string_by_tokens(full_transcript, max_tokens=500)
yield "语音识别完成, 开始翻译...(0/{})".format(len(transcript_chunks)), full_transcript
# split transcripts if its too long
for idx, transcript in enumerate(transcript_chunks):
output = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user",
"content": "Transcript: {transcript}. \n Translate the video conversation transcript into fluent Chinese. Chinese: ".format(transcript=transcript)},
],
stream=True,
)
for event in output:
translations.append(event["choices"][0].get("delta", "").get("content", ""))
yield "请耐心等候翻译:({}/{})...".format(idx+1, len(transcript_chunks)) + "".join(translations), " ".join(transcripts)
full_translation = "".join(translations)
yield full_translation, full_transcript
title = """
轻声细译"""
# Create an instruction input component
instruction = """
<div style="border: 2px solid #000; padding: 10px; border-radius: 5px;">
一键输入视频链接,轻松中文翻译,视频无障碍 <span style="color: grey;">-- powered by OpenAI Whisper & ChatGPT.</span>.<br>
1.将视频链接(支持Twitter、YouTube)复制粘贴至输入框,点击提交(Submit)即可;
</div>"""
css = """
@media (prefers-color-scheme: dark) {
[data-testid='textbox'] {
color: white !important;
background-color: black !important;
}
}
@media (prefers-color-scheme: light) {
[data-testid='textbox'] {
color: white !important;
background-color: black !important;
}
}
"""
demo = gr.Interface(fn=asr,
inputs=gr.inputs.Textbox(label="粘贴视频链接"),
outputs=[
gr.outputs.Textbox(label="中文"),
gr.outputs.Textbox(label="英文")
],
title=title,
description=instruction,
theme="JohnSmith9982/small_and_pretty",
css=css)
demo.queue()
demo.launch()
|