Spaces:

rdezwart
/

FoodIdentifier

Running

App Files Files Community

rdezwart commited on Apr 18

Commit

26352f5

•

1 Parent(s): 3b11883

Tidy up some loose ends

Browse files

Files changed (3) hide show

README.md +8 -3
app.py +12 -7
requirements.txt +4 -4

README.md CHANGED Viewed

@@ -9,15 +9,20 @@ sdk_version: 4.26.0
 app_file: app.py
 short_description: Final project for IAT 481 at SFU, Spring 2024.
 models:
-  - vikhyatk/moondream2
   - hustvl/yolos-small-300
 pinned: true
 license: apache-2.0
 preload_from_hub:
-  - vikhyatk/moondream2
   - hustvl/yolos-small-300
 ---
 # Food Identifier
-Final project for IAT 481 at Simon Fraser University, Spring 2024.

 app_file: app.py
 short_description: Final project for IAT 481 at SFU, Spring 2024.
 models:
   - hustvl/yolos-small-300
+  - vikhyatk/moondream2
 pinned: true
 license: apache-2.0
 preload_from_hub:
   - hustvl/yolos-small-300
+  - vikhyatk/moondream2
 ---
 # Food Identifier
+Final project for IAT 481 at Simon Fraser University, Spring 2024.
+**Models used:**
+- [hustvl/yolos-small-300](https://huggingface.co/hustvl/yolos-small-300)
+- [vikhyatk/moondream2](https://huggingface.co/vikhyatk/moondream2)

app.py CHANGED Viewed

@@ -7,6 +7,11 @@ from transformers import PreTrainedModel  # for type hint
 from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer  # Moondream
 from transformers import YolosImageProcessor, YolosForObjectDetection  # YOLOS-small-300
 # --- Moondream --- #
 # Moondream does not support the HuggingFace pipeline system, so we have to do it manually
 moondream_id = "vikhyatk/moondream2"
@@ -17,11 +22,6 @@ moondream_model: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
 )
 moondream_model.eval()
-# --- YOLOS --- #
-yolos_id = "hustvl/yolos-small-300"
-yolos_processor: YolosImageProcessor = YolosImageProcessor.from_pretrained(yolos_id)
-yolos_model: YolosForObjectDetection = YolosForObjectDetection.from_pretrained(yolos_id)
 def answer_question(img, prompt):
     """
@@ -123,6 +123,11 @@ if __name__ == "__main__":
             # Food Identifier
             Final project for IAT 481 at Simon Fraser University, Spring 2024.
             """
         )
         selected_image = gr.Number(visible=False, precision=0)
@@ -135,12 +140,12 @@ if __name__ == "__main__":
                         yolos_submit = gr.Button("Detect Objects", interactive=False)
                         yolos_input = gr.Image(label="Input Image", type="pil", interactive=True, mirror_webcam=False)
                     with gr.Column():
-                        proceed_button = gr.Button("To Moondream", interactive=False)
                         yolos_gallery = gr.Gallery(label="Detected Objects", object_fit="scale-down", columns=3,
                                                    show_share_button=False, selected_index=None, allow_preview=False,
                                                    type="pil", interactive=False)
-            with gr.Tab("Inference", id='moondream'):
                 with gr.Row(equal_height=False):
                     with gr.Column():
                         with gr.Group():

 from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer  # Moondream
 from transformers import YolosImageProcessor, YolosForObjectDetection  # YOLOS-small-300
+# --- YOLOS --- #
+yolos_id = "hustvl/yolos-small-300"
+yolos_processor: YolosImageProcessor = YolosImageProcessor.from_pretrained(yolos_id)
+yolos_model: YolosForObjectDetection = YolosForObjectDetection.from_pretrained(yolos_id)
 # --- Moondream --- #
 # Moondream does not support the HuggingFace pipeline system, so we have to do it manually
 moondream_id = "vikhyatk/moondream2"
 )
 moondream_model.eval()
 def answer_question(img, prompt):
     """
             # Food Identifier
             Final project for IAT 481 at Simon Fraser University, Spring 2024.
+            **Models used:**
+            - [hustvl/yolos-small-300](https://huggingface.co/hustvl/yolos-small-300)
+            - [vikhyatk/moondream2](https://huggingface.co/vikhyatk/moondream2)
             """
         )
         selected_image = gr.Number(visible=False, precision=0)
                         yolos_submit = gr.Button("Detect Objects", interactive=False)
                         yolos_input = gr.Image(label="Input Image", type="pil", interactive=True, mirror_webcam=False)
                     with gr.Column():
+                        proceed_button = gr.Button("Select for Captioning", interactive=False)
                         yolos_gallery = gr.Gallery(label="Detected Objects", object_fit="scale-down", columns=3,
                                                    show_share_button=False, selected_index=None, allow_preview=False,
                                                    type="pil", interactive=False)
+            with gr.Tab("Captioning", id='moondream'):
                 with gr.Row(equal_height=False):
                     with gr.Column():
                         with gr.Group():

requirements.txt CHANGED Viewed

@@ -3,9 +3,9 @@ gradio==4.26.0
 torch==2.2.2
 timm==0.9.16
-# Moondream
 transformers==4.39.3
-einops==0.7.0
-# YOLOS
-pillow==10.3.0

 torch==2.2.2
 timm==0.9.16
+# YOLOS
 transformers==4.39.3
+pillow==10.3.0
+# Moondream
+einops==0.7.0