andgrt commited on
Commit
09be984
1 Parent(s): 2891eba
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -88,9 +88,9 @@ def text_to_speech(text):
88
  return text, (16000, audio.squeeze())
89
 
90
 
91
- def transcribe(image, audio):
92
  if not image or not audio:
93
- return
94
 
95
  sr, y = audio
96
 
@@ -102,11 +102,18 @@ def transcribe(image, audio):
102
 
103
  transcription_text = transcriber({"sampling_rate": sr, "raw": y})["text"]
104
 
105
- return generate_answer(image, transcription_text)
 
 
 
 
 
 
 
106
 
107
 
108
  qa_interface = gr.Interface(
109
- fn=generate_answer,
110
  inputs=[
111
  gr.Image(type="pil"),
112
  gr.Textbox(label="Вопрос (на русском)", placeholder="Ваш вопрос"),
@@ -120,7 +127,7 @@ qa_interface = gr.Interface(
120
  )
121
 
122
  speech_interface = gr.Interface(
123
- fn=transcribe,
124
  inputs=[
125
  gr.Image(type="pil"),
126
  gr.Audio(sources="microphone", label="Голосовой ввод"),
 
88
  return text, (16000, audio.squeeze())
89
 
90
 
91
+ def transcribe_pipeline(image, audio):
92
  if not image or not audio:
93
+ return None, None
94
 
95
  sr, y = audio
96
 
 
102
 
103
  transcription_text = transcriber({"sampling_rate": sr, "raw": y})["text"]
104
 
105
+ return text_to_speech(generate_answer(image, transcription_text))
106
+
107
+
108
+ def text_pipeline(image, question):
109
+ if not image or not question:
110
+ return None, None
111
+
112
+ return text_to_speech(generate_answer(image, question))
113
 
114
 
115
  qa_interface = gr.Interface(
116
+ fn=text_pipeline,
117
  inputs=[
118
  gr.Image(type="pil"),
119
  gr.Textbox(label="Вопрос (на русском)", placeholder="Ваш вопрос"),
 
127
  )
128
 
129
  speech_interface = gr.Interface(
130
+ fn=transcribe_pipeline,
131
  inputs=[
132
  gr.Image(type="pil"),
133
  gr.Audio(sources="microphone", label="Голосовой ввод"),