Spaces:
Running
on
Zero
Running
on
Zero
Ngaima Sandiman
commited on
Commit
•
332a6dc
1
Parent(s):
390e0ad
Updated license and added images.
Browse files- README.md +7 -1
- app.py +9 -8
- media/images/1.jpeg +0 -0
- media/images/2.jpg +0 -0
- media/images/3.jpg +0 -0
- media/images/4.jpeg +0 -0
- media/images/5.jpeg +0 -0
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 🏢
|
4 |
colorFrom: gray
|
5 |
colorTo: yellow
|
@@ -8,6 +8,12 @@ sdk_version: 4.44.1
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
short_description: Image to speech
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Imagecraft
|
3 |
emoji: 🏢
|
4 |
colorFrom: gray
|
5 |
colorTo: yellow
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
short_description: Image to speech
|
11 |
+
license: cc-by-nc-sa-4.0
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
15 |
+
|
16 |
+
```bash
|
17 |
+
conda create -n imagecraft_gradio python=3.10.13
|
18 |
+
pip install -r requirements.txt
|
19 |
+
```
|
app.py
CHANGED
@@ -12,26 +12,27 @@ from src.model.modules.imagecraft import ImageCraft
|
|
12 |
|
13 |
model = ImageCraft.from_pretrained("nsandiman/imagecraft-ft-co-224")
|
14 |
|
|
|
|
|
15 |
|
16 |
@spaces.GPU
|
17 |
-
def
|
18 |
"""Process image inputs and generate audio response."""
|
19 |
transcript, audio_buffer = model.generate(image_path, output_type="buffer")
|
20 |
|
21 |
return audio_buffer, transcript
|
22 |
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
fn=imagecraft_interface,
|
27 |
inputs=[
|
28 |
-
gr.Image(type="filepath", label="Upload an image"),
|
29 |
],
|
30 |
-
outputs=[gr.Audio(label="Speech"), gr.Textbox(label="
|
31 |
title="ImageCraft",
|
32 |
description="Upload an image and get the speech responses.",
|
33 |
allow_flagging="never",
|
34 |
)
|
35 |
|
36 |
-
|
37 |
-
|
|
|
12 |
|
13 |
model = ImageCraft.from_pretrained("nsandiman/imagecraft-ft-co-224")
|
14 |
|
15 |
+
default_image = "media/images/3.jpg"
|
16 |
+
|
17 |
|
18 |
@spaces.GPU
|
19 |
+
def generate(image_path):
|
20 |
"""Process image inputs and generate audio response."""
|
21 |
transcript, audio_buffer = model.generate(image_path, output_type="buffer")
|
22 |
|
23 |
return audio_buffer, transcript
|
24 |
|
25 |
|
26 |
+
imagecraft_app = gr.Interface(
|
27 |
+
fn=generate,
|
|
|
28 |
inputs=[
|
29 |
+
gr.Image(type="filepath", label="Upload an image", value=default_image),
|
30 |
],
|
31 |
+
outputs=[gr.Audio(label="Speech"), gr.Textbox(label="Text")],
|
32 |
title="ImageCraft",
|
33 |
description="Upload an image and get the speech responses.",
|
34 |
allow_flagging="never",
|
35 |
)
|
36 |
|
37 |
+
if __name__ == "__main__":
|
38 |
+
imagecraft_app.launch()
|
media/images/1.jpeg
ADDED
media/images/2.jpg
ADDED
media/images/3.jpg
ADDED
media/images/4.jpeg
ADDED
media/images/5.jpeg
ADDED