Spaces:
Sleeping
Sleeping
File size: 4,926 Bytes
d4a5e8c 97cc5f3 d4a5e8c d0a28d9 9d03774 d4a5e8c 27444e5 9d03774 27444e5 d4a5e8c 9d03774 9338b19 9d03774 9338b19 9d03774 9338b19 9d03774 c949392 9d03774 5b71666 9d03774 5b71666 c949392 9d03774 c949392 5b71666 9d03774 5b71666 9d03774 5b71666 9d03774 c949392 9d03774 c949392 d4a5e8c 9d03774 d4a5e8c 5b71666 c949392 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import gradio as gr
import numpy as np
from huggingface_hub import InferenceClient
import os
import requests
import scipy.io.wavfile
client = InferenceClient(
"meta-llama/Meta-Llama-3-8B-Instruct",
token=os.getenv('hf_token')
)
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
def process_audio(audio_data):
if audio_data is None:
return "No audio provided"
print("audio_data:", audio_data) # 添加这行代码
# 检查 audio_data 是否是元组,并提取数据
if isinstance(audio_data, tuple):
sample_rate, data = audio_data
print("Sample rate:", sample_rate)
print("Data type:", type(data))
else:
return "Invalid audio data format"
# Define the local file path to save the WAV file
local_wav_file = "converted_audio.wav"
# Save the audio data as a WAV file
scipy.io.wavfile.write(local_wav_file, sample_rate, data)
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
headers = {"Authorization": f"Bearer {os.getenv('hf_token')}"}
def query(filename):
with open(filename, "rb") as f:
file_data = f.read()
response = requests.post(API_URL, headers=headers, data=file_data)
return response.json()
# Call the API to process the audio
output = query(local_wav_file)
print(output)
# Check the API response
if 'text' in output:
return output['text']
else:
return "Error in processing audio"
# 定义函数以禁用按钮并显示加载指示器
def disable_components():
# 更新 recognized_text 的内容,提示用户正在处理
recognized_text_update = gr.update(value='正在处理,请稍候...')
# 禁用 process_button
process_button_update = gr.update(interactive=False)
# 显示加载动画
loading_animation_update = gr.update(visible=True)
return recognized_text_update, process_button_update, loading_animation_update
# 定义函数以启用按钮并隐藏加载指示器
def enable_components(recognized_text):
# 处理完成后,recognized_text 已经由 process_audio 更新
# 重新启用 process_button
process_button_update = gr.update(interactive=True)
# 隐藏加载动画
loading_animation_update = gr.update(visible=False)
return recognized_text, process_button_update, loading_animation_update
# 创建界面
def create_interface():
with gr.Blocks() as demo:
# 标题
gr.Markdown("# 语音识别与聊天系统")
# 音频输入部分
with gr.Row():
audio_input = gr.Audio(
sources="microphone",
type="numpy", # 获取音频数据和采样率
label="上传音频"
)
# 文本识别输出部分
with gr.Row():
recognized_text = gr.Textbox(label="识别文本")
# 处理音频的按钮
process_button = gr.Button("处理音频")
# 加载动画
loading_animation = gr.HTML(
value='<div style="text-align: center;"><span style="font-size: 18px;">ASR Model is running...</span></div>',
visible=False
)
# 关联音频处理函数,并在点击时更新组件状态
process_button.click(
fn=disable_components,
inputs=[],
outputs=[recognized_text, process_button, loading_animation]
).then(
fn=process_audio,
inputs=[audio_input],
outputs=recognized_text
).then(
fn=enable_components,
inputs=[recognized_text],
outputs=[recognized_text, process_button, loading_animation]
)
# Chatbot 界面
chatbot = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(value="You are a helpful chatbot that answers questions.", label="系统消息")
]
)
# 布局包含 Chatbot
with gr.Row():
chatbot_output = chatbot
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch()
|