Spaces:

ZiyuG
/

SignLanguage

Running on Zero

App Files Files Community

SignLanguage / ms_tts.py

ZiyuG

Update ms_tts.py

3a1aaa2 verified 4 days ago

raw

history blame contribute delete

2.73 kB

	# import cv2
	# import numpy as np
	# import os

	# # 添加更多常见的视频编码器
	# codecs = {
	# 'X264': 'H.264 / AVC',
	# 'MP4V': 'MPEG-4',
	# 'AVC1': 'H.264 / AVC',
	# 'DIVX': 'MPEG-4',
	# 'XVID': 'MPEG-4',
	# 'MJPG': 'Motion JPEG',
	# 'VP80': 'VP8',
	# 'VP90': 'VP9',
	# 'HEVC': 'H.265 / HEVC',
	# 'PIM1': 'MPEG-1',
	# 'MPG1': 'MPEG-1',
	# 'MPG2': 'MPEG-2'
	# }

	# # 输出MP4视频文件路径
	# output_file = 'test_video.mp4'

	# # 定义一个函数来测试是否支持某种编码器并输出 mp4 文件
	# def test_codec(codec_fourcc, codec_name):
	# # 使用 cv2.VideoWriter 创建视频文件对象
	# fourcc = cv2.VideoWriter_fourcc(*codec_fourcc)
	# out = cv2.VideoWriter(output_file, fourcc, 25.0, (640, 480))

	# # 检查是否成功初始化
	# if out.isOpened():
	# print(f"[SUPPORTED] Codec {codec_name} ({codec_fourcc}) is supported.")

	# # 写入一些帧，生成简单的测试视频
	# for i in range(100):
	# # 创建一个简单的蓝色帧作为测试
	# frame = (255 * (i % 2) * np.ones((480, 640, 3), dtype=np.uint8))
	# out.write(frame)

	# out.release() # 关闭视频写入器

	# # 检查文件是否生成
	# if os.path.exists(output_file):
	# print(f"Video file {output_file} successfully created with codec {codec_fourcc}.")
	# else:
	# print(f"Failed to create video file {output_file}.")

	# # 删除测试视频文件
	# os.remove(output_file)
	# # else:
	# # print(f"[NOT SUPPORTED] Codec {codec_name} ({codec_fourcc}) is not supported.")

	# # 测试所有编码器
	# for fourcc, name in codecs.items():
	# test_codec(fourcc, name)



	import azure.cognitiveservices.speech as speechsdk

	def ms_tts_gen(text, audio_path):
	# 创建 Speech 配置
	# speech_config = speechsdk.SpeechConfig(subscription="b45d4ca1f5eb4c96950666eb97b9be60", region="eastasia")
	speech_config = speechsdk.SpeechConfig(subscription="3OdH2yIHGxZWNPR2hxmlNiv8tr6neOOZbee5EttR8Zu4OBrYCLGtJQQJ99BBAC3pKaRXJ3w3AAAYACOGcpOD", region="eastasia")
	speech_config.speech_synthesis_voice_name = 'zh-HK-HiuGaaiNeural' # 粤语女声

	# 配置音频输出为 MP3 格式
	audio_config = speechsdk.audio.AudioOutputConfig(filename=audio_path)

	# 指定输出格式为 MP3
	speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)

	# 合成语音并输出到 MP3 文件
	synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
	synthesizer.speak_text_async(text).get()