Spaces:
Paused
Paused
# import cv2 | |
# import numpy as np | |
# import os | |
# # 添加更多常见的视频编码器 | |
# codecs = { | |
# 'X264': 'H.264 / AVC', | |
# 'MP4V': 'MPEG-4', | |
# 'AVC1': 'H.264 / AVC', | |
# 'DIVX': 'MPEG-4', | |
# 'XVID': 'MPEG-4', | |
# 'MJPG': 'Motion JPEG', | |
# 'VP80': 'VP8', | |
# 'VP90': 'VP9', | |
# 'HEVC': 'H.265 / HEVC', | |
# 'PIM1': 'MPEG-1', | |
# 'MPG1': 'MPEG-1', | |
# 'MPG2': 'MPEG-2' | |
# } | |
# # 输出MP4视频文件路径 | |
# output_file = 'test_video.mp4' | |
# # 定义一个函数来测试是否支持某种编码器并输出 mp4 文件 | |
# def test_codec(codec_fourcc, codec_name): | |
# # 使用 cv2.VideoWriter 创建视频文件对象 | |
# fourcc = cv2.VideoWriter_fourcc(*codec_fourcc) | |
# out = cv2.VideoWriter(output_file, fourcc, 25.0, (640, 480)) | |
# # 检查是否成功初始化 | |
# if out.isOpened(): | |
# print(f"[SUPPORTED] Codec {codec_name} ({codec_fourcc}) is supported.") | |
# # 写入一些帧,生成简单的测试视频 | |
# for i in range(100): | |
# # 创建一个简单的蓝色帧作为测试 | |
# frame = (255 * (i % 2) * np.ones((480, 640, 3), dtype=np.uint8)) | |
# out.write(frame) | |
# out.release() # 关闭视频写入器 | |
# # 检查文件是否生成 | |
# if os.path.exists(output_file): | |
# print(f"Video file {output_file} successfully created with codec {codec_fourcc}.") | |
# else: | |
# print(f"Failed to create video file {output_file}.") | |
# # 删除测试视频文件 | |
# os.remove(output_file) | |
# # else: | |
# # print(f"[NOT SUPPORTED] Codec {codec_name} ({codec_fourcc}) is not supported.") | |
# # 测试所有编码器 | |
# for fourcc, name in codecs.items(): | |
# test_codec(fourcc, name) | |
import azure.cognitiveservices.speech as speechsdk | |
def ms_tts_gen(text, audio_path): | |
# 创建 Speech 配置 | |
# speech_config = speechsdk.SpeechConfig(subscription="b45d4ca1f5eb4c96950666eb97b9be60", region="eastasia") | |
speech_config = speechsdk.SpeechConfig(subscription="ef47bc3073b74e4e9837e886be2fb6f7", region="eastasia") | |
speech_config.speech_synthesis_voice_name = 'zh-HK-HiuGaaiNeural' # 粤语女声 | |
# 配置音频输出为 MP3 格式 | |
audio_config = speechsdk.audio.AudioOutputConfig(filename=audio_path) | |
# 指定输出格式为 MP3 | |
speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3) | |
# 合成语音并输出到 MP3 文件 | |
synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) | |
synthesizer.speak_text_async(text).get() | |