ronniet commited on
Commit
000c2c2
1 Parent(s): 395a762

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -16
app.py CHANGED
@@ -1,25 +1,26 @@
1
- # import gradio as gr
2
- # from transformers import pipeline
3
- # from TTS.api import TTS
4
 
5
- # captioner = pipeline(model="microsoft/git-base")
6
- # tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False)
7
 
8
- # def predict(image):
9
- # text = captioner(image)[0]["generated_text"]
10
 
11
- # audio_output = "output.wav"
12
- # tts.tts_to_file(text, speaker=tts.speakers[0], language="en", file_path=audio_output)
 
 
 
13
 
14
- # return text, audio_output
15
 
16
- # demo = gr.Interface(
17
- # fn=predict,
18
- # inputs=gr.Image(type="pil"),
19
- # outputs=['text', gr.Audio()]
20
- # )
21
 
22
- # demo.launch()
23
 
24
  # gr.Interface.load("models/ronniet/git-base-env").launch()
25
  # gr.Interface.load("models/microsoft/git-base").launch()
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from TTS.api import TTS
4
 
5
+ captioner = pipeline(model="microsoft/git-base")
6
+ tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False)
7
 
 
 
8
 
9
+ def predict(image):
10
+ text = captioner(image)[0]["generated_text"]
11
+
12
+ audio_output = "output.wav"
13
+ tts.tts_to_file(text, speaker=tts.speakers[0], language="en", file_path=audio_output)
14
 
15
+ return text, audio_output
16
 
17
+ demo = gr.Interface(
18
+ fn=predict,
19
+ inputs=gr.Image(type="pil"),
20
+ outputs=['text', gr.Audio()]
21
+ )
22
 
23
+ demo.launch()
24
 
25
  # gr.Interface.load("models/ronniet/git-base-env").launch()
26
  # gr.Interface.load("models/microsoft/git-base").launch()