Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ speaker_embeddings = torch.tensor(speaker_embeddings).unsqueeze(0)
|
|
12 |
# 加载 Visual Question Answering 模型 microsoft/git-base-vqav2
|
13 |
vqa_pipeline = pipeline("text2text-generation", model="microsoft/git-base-vqav2")
|
14 |
|
15 |
-
#
|
16 |
text_to_speech_pipeline = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
17 |
|
18 |
def main():
|
@@ -23,7 +23,10 @@ def main():
|
|
23 |
|
24 |
if st.button("Get Answer"):
|
25 |
answer = vqa_pipeline(question, image_path)[0]['generated_text']
|
26 |
-
|
|
|
|
|
|
|
27 |
|
28 |
st.write("Answer:", answer)
|
29 |
st.audio(audio_data[0]["audio"], format='audio/wav')
|
|
|
12 |
# 加载 Visual Question Answering 模型 microsoft/git-base-vqav2
|
13 |
vqa_pipeline = pipeline("text2text-generation", model="microsoft/git-base-vqav2")
|
14 |
|
15 |
+
# 加载文本到语音模型
|
16 |
text_to_speech_pipeline = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
17 |
|
18 |
def main():
|
|
|
23 |
|
24 |
if st.button("Get Answer"):
|
25 |
answer = vqa_pipeline(question, image_path)[0]['generated_text']
|
26 |
+
|
27 |
+
# 将说话者的嵌入向量作为文本的一部分传递给文本到语音模型
|
28 |
+
text_with_speaker = f"{answer} Speaker Embeddings: {speaker_embeddings}"
|
29 |
+
audio_data = text_to_speech_pipeline(text_with_speaker)
|
30 |
|
31 |
st.write("Answer:", answer)
|
32 |
st.audio(audio_data[0]["audio"], format='audio/wav')
|