Steven-GU-Yu-Di commited on
Commit
16f7c2d
·
verified ·
1 Parent(s): 3eb6296

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -3
app.py CHANGED
@@ -9,13 +9,11 @@ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validat
9
  speaker_embeddings = embeddings_dataset[7306]["xvector"]
10
  speaker_embeddings = torch.tensor(speaker_embeddings).unsqueeze(0)
11
 
12
-
13
  # 加载 Visual Question Answering 模型 microsoft/git-base-vqav2
14
  vqa_pipeline = pipeline("text2text-generation", model="microsoft/git-base-vqav2")
15
 
16
  # 加载文本到语音模型并手动指定
17
  text_to_speech_pipeline = pipeline("text-to-speech", model="microsoft/speecht5_tts")
18
- audio_data = text_to_speech_pipeline(answer, speaker_embeddings=speaker_embeddings)
19
 
20
  def main():
21
  st.title("Visual Question Answering with Text-to-Speech")
@@ -25,7 +23,7 @@ def main():
25
 
26
  if st.button("Get Answer"):
27
  answer = vqa_pipeline(question, image_path)[0]['generated_text']
28
- audio_data = text_to_speech_pipeline(answer)
29
 
30
  st.write("Answer:", answer)
31
  st.audio(audio_data[0]["audio"], format='audio/wav')
 
9
  speaker_embeddings = embeddings_dataset[7306]["xvector"]
10
  speaker_embeddings = torch.tensor(speaker_embeddings).unsqueeze(0)
11
 
 
12
  # 加载 Visual Question Answering 模型 microsoft/git-base-vqav2
13
  vqa_pipeline = pipeline("text2text-generation", model="microsoft/git-base-vqav2")
14
 
15
  # 加载文本到语音模型并手动指定
16
  text_to_speech_pipeline = pipeline("text-to-speech", model="microsoft/speecht5_tts")
 
17
 
18
  def main():
19
  st.title("Visual Question Answering with Text-to-Speech")
 
23
 
24
  if st.button("Get Answer"):
25
  answer = vqa_pipeline(question, image_path)[0]['generated_text']
26
+ audio_data = text_to_speech_pipeline(answer, speaker_embeddings=speaker_embeddings)
27
 
28
  st.write("Answer:", answer)
29
  st.audio(audio_data[0]["audio"], format='audio/wav')