rdezwart commited on
Commit
26352f5
1 Parent(s): 3b11883

Tidy up some loose ends

Browse files
Files changed (3) hide show
  1. README.md +8 -3
  2. app.py +12 -7
  3. requirements.txt +4 -4
README.md CHANGED
@@ -9,15 +9,20 @@ sdk_version: 4.26.0
9
  app_file: app.py
10
  short_description: Final project for IAT 481 at SFU, Spring 2024.
11
  models:
12
- - vikhyatk/moondream2
13
  - hustvl/yolos-small-300
 
14
  pinned: true
15
  license: apache-2.0
16
  preload_from_hub:
17
- - vikhyatk/moondream2
18
  - hustvl/yolos-small-300
 
19
  ---
20
 
21
  # Food Identifier
22
 
23
- Final project for IAT 481 at Simon Fraser University, Spring 2024.
 
 
 
 
 
 
9
  app_file: app.py
10
  short_description: Final project for IAT 481 at SFU, Spring 2024.
11
  models:
 
12
  - hustvl/yolos-small-300
13
+ - vikhyatk/moondream2
14
  pinned: true
15
  license: apache-2.0
16
  preload_from_hub:
 
17
  - hustvl/yolos-small-300
18
+ - vikhyatk/moondream2
19
  ---
20
 
21
  # Food Identifier
22
 
23
+ Final project for IAT 481 at Simon Fraser University, Spring 2024.
24
+
25
+ **Models used:**
26
+
27
+ - [hustvl/yolos-small-300](https://huggingface.co/hustvl/yolos-small-300)
28
+ - [vikhyatk/moondream2](https://huggingface.co/vikhyatk/moondream2)
app.py CHANGED
@@ -7,6 +7,11 @@ from transformers import PreTrainedModel # for type hint
7
  from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer # Moondream
8
  from transformers import YolosImageProcessor, YolosForObjectDetection # YOLOS-small-300
9
 
 
 
 
 
 
10
  # --- Moondream --- #
11
  # Moondream does not support the HuggingFace pipeline system, so we have to do it manually
12
  moondream_id = "vikhyatk/moondream2"
@@ -17,11 +22,6 @@ moondream_model: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
17
  )
18
  moondream_model.eval()
19
 
20
- # --- YOLOS --- #
21
- yolos_id = "hustvl/yolos-small-300"
22
- yolos_processor: YolosImageProcessor = YolosImageProcessor.from_pretrained(yolos_id)
23
- yolos_model: YolosForObjectDetection = YolosForObjectDetection.from_pretrained(yolos_id)
24
-
25
 
26
  def answer_question(img, prompt):
27
  """
@@ -123,6 +123,11 @@ if __name__ == "__main__":
123
  # Food Identifier
124
 
125
  Final project for IAT 481 at Simon Fraser University, Spring 2024.
 
 
 
 
 
126
  """
127
  )
128
  selected_image = gr.Number(visible=False, precision=0)
@@ -135,12 +140,12 @@ if __name__ == "__main__":
135
  yolos_submit = gr.Button("Detect Objects", interactive=False)
136
  yolos_input = gr.Image(label="Input Image", type="pil", interactive=True, mirror_webcam=False)
137
  with gr.Column():
138
- proceed_button = gr.Button("To Moondream", interactive=False)
139
  yolos_gallery = gr.Gallery(label="Detected Objects", object_fit="scale-down", columns=3,
140
  show_share_button=False, selected_index=None, allow_preview=False,
141
  type="pil", interactive=False)
142
 
143
- with gr.Tab("Inference", id='moondream'):
144
  with gr.Row(equal_height=False):
145
  with gr.Column():
146
  with gr.Group():
 
7
  from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer # Moondream
8
  from transformers import YolosImageProcessor, YolosForObjectDetection # YOLOS-small-300
9
 
10
+ # --- YOLOS --- #
11
+ yolos_id = "hustvl/yolos-small-300"
12
+ yolos_processor: YolosImageProcessor = YolosImageProcessor.from_pretrained(yolos_id)
13
+ yolos_model: YolosForObjectDetection = YolosForObjectDetection.from_pretrained(yolos_id)
14
+
15
  # --- Moondream --- #
16
  # Moondream does not support the HuggingFace pipeline system, so we have to do it manually
17
  moondream_id = "vikhyatk/moondream2"
 
22
  )
23
  moondream_model.eval()
24
 
 
 
 
 
 
25
 
26
  def answer_question(img, prompt):
27
  """
 
123
  # Food Identifier
124
 
125
  Final project for IAT 481 at Simon Fraser University, Spring 2024.
126
+
127
+ **Models used:**
128
+
129
+ - [hustvl/yolos-small-300](https://huggingface.co/hustvl/yolos-small-300)
130
+ - [vikhyatk/moondream2](https://huggingface.co/vikhyatk/moondream2)
131
  """
132
  )
133
  selected_image = gr.Number(visible=False, precision=0)
 
140
  yolos_submit = gr.Button("Detect Objects", interactive=False)
141
  yolos_input = gr.Image(label="Input Image", type="pil", interactive=True, mirror_webcam=False)
142
  with gr.Column():
143
+ proceed_button = gr.Button("Select for Captioning", interactive=False)
144
  yolos_gallery = gr.Gallery(label="Detected Objects", object_fit="scale-down", columns=3,
145
  show_share_button=False, selected_index=None, allow_preview=False,
146
  type="pil", interactive=False)
147
 
148
+ with gr.Tab("Captioning", id='moondream'):
149
  with gr.Row(equal_height=False):
150
  with gr.Column():
151
  with gr.Group():
requirements.txt CHANGED
@@ -3,9 +3,9 @@ gradio==4.26.0
3
  torch==2.2.2
4
  timm==0.9.16
5
 
6
- # Moondream
7
  transformers==4.39.3
8
- einops==0.7.0
9
 
10
- # YOLOS
11
- pillow==10.3.0
 
3
  torch==2.2.2
4
  timm==0.9.16
5
 
6
+ # YOLOS
7
  transformers==4.39.3
8
+ pillow==10.3.0
9
 
10
+ # Moondream
11
+ einops==0.7.0