Ngaima Sandiman commited on
Commit
332a6dc
1 Parent(s): 390e0ad

Updated license and added images.

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Uarizona Msis Capstone Group5 Imagecraft
3
  emoji: 🏢
4
  colorFrom: gray
5
  colorTo: yellow
@@ -8,6 +8,12 @@ sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: false
10
  short_description: Image to speech
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
1
  ---
2
+ title: Imagecraft
3
  emoji: 🏢
4
  colorFrom: gray
5
  colorTo: yellow
 
8
  app_file: app.py
9
  pinned: false
10
  short_description: Image to speech
11
+ license: cc-by-nc-sa-4.0
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
15
+
16
+ ```bash
17
+ conda create -n imagecraft_gradio python=3.10.13
18
+ pip install -r requirements.txt
19
+ ```
app.py CHANGED
@@ -12,26 +12,27 @@ from src.model.modules.imagecraft import ImageCraft
12
 
13
  model = ImageCraft.from_pretrained("nsandiman/imagecraft-ft-co-224")
14
 
 
 
15
 
16
  @spaces.GPU
17
- def imagecraft_interface(image_path):
18
  """Process image inputs and generate audio response."""
19
  transcript, audio_buffer = model.generate(image_path, output_type="buffer")
20
 
21
  return audio_buffer, transcript
22
 
23
 
24
- # Define Gradio interface
25
- gradio_interface = gr.Interface(
26
- fn=imagecraft_interface,
27
  inputs=[
28
- gr.Image(type="filepath", label="Upload an image"),
29
  ],
30
- outputs=[gr.Audio(label="Speech"), gr.Textbox(label="Transcript")],
31
  title="ImageCraft",
32
  description="Upload an image and get the speech responses.",
33
  allow_flagging="never",
34
  )
35
 
36
- # Launch the Gradio app
37
- gradio_interface.launch()
 
12
 
13
  model = ImageCraft.from_pretrained("nsandiman/imagecraft-ft-co-224")
14
 
15
+ default_image = "media/images/3.jpg"
16
+
17
 
18
  @spaces.GPU
19
+ def generate(image_path):
20
  """Process image inputs and generate audio response."""
21
  transcript, audio_buffer = model.generate(image_path, output_type="buffer")
22
 
23
  return audio_buffer, transcript
24
 
25
 
26
+ imagecraft_app = gr.Interface(
27
+ fn=generate,
 
28
  inputs=[
29
+ gr.Image(type="filepath", label="Upload an image", value=default_image),
30
  ],
31
+ outputs=[gr.Audio(label="Speech"), gr.Textbox(label="Text")],
32
  title="ImageCraft",
33
  description="Upload an image and get the speech responses.",
34
  allow_flagging="never",
35
  )
36
 
37
+ if __name__ == "__main__":
38
+ imagecraft_app.launch()
media/images/1.jpeg ADDED
media/images/2.jpg ADDED
media/images/3.jpg ADDED
media/images/4.jpeg ADDED
media/images/5.jpeg ADDED