onuralpszr commited on
Commit
49d986a
β€’
1 Parent(s): 990acce

docs: πŸ“ better description for intro text

Browse files

Signed-off-by: Onuralp SEZER <thunderbirdtr@gmail.com>

Files changed (2) hide show
  1. app.py +43 -14
  2. requirements.txt +0 -1
app.py CHANGED
@@ -1,6 +1,3 @@
1
- import os
2
- import PIL.Image
3
- import transformers
4
  from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
5
  import torch
6
  import supervision as sv
@@ -20,6 +17,7 @@ model = PaliGemmaForConditionalGeneration.from_pretrained(model_id).eval().to(DE
20
  processor = PaliGemmaProcessor.from_pretrained(model_id)
21
 
22
 
 
23
  @spaces.GPU
24
  def process_image(input_image,input_text,class_names):
25
  class_list = class_names.split(',')
@@ -57,20 +55,51 @@ def process_image(input_image,input_text,class_names):
57
 
58
  return annotated_image, result
59
 
 
 
 
60
 
61
- app = gr.Interface(
62
- fn=process_image,
63
- inputs=[
64
- gr.Image(type="pil", label="Input Image"),
65
- gr.Textbox(lines=2, placeholder="Enter text here...", label="Enter prompt for example 'detect person;dog"),
66
- gr.Textbox(lines=1, placeholder="Enter class names separated by commas...", label="Class Names")
67
- ],
68
- outputs=[gr.Image(type="pil", label="Annotated Image"), gr.Textbox(label="Detection Result")],
69
- title="PaliGemma2 Image Detection with Supervision",
70
- description="Detect objects in an image using PaliGemma2 model."
71
- )
 
 
 
 
 
 
72
 
 
 
 
 
 
 
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  if __name__ == "__main__":
76
  app.launch()
 
 
 
 
1
  from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
2
  import torch
3
  import supervision as sv
 
17
  processor = PaliGemmaProcessor.from_pretrained(model_id)
18
 
19
 
20
+
21
  @spaces.GPU
22
  def process_image(input_image,input_text,class_names):
23
  class_list = class_names.split(',')
 
55
 
56
  return annotated_image, result
57
 
58
+ with gr.Blocks() as app:
59
+ gr.Markdown( """
60
+ ## PaliGemma 2 Detection with Supervision - Demo \n\n
61
 
62
+ <div style="display: flex; gap: 10px;">
63
+ <a href="https://github.com/google-research/big_vision/blob/main/big_vision/configs/proj/paligemma/README.md">
64
+ <img src="https://img.shields.io/badge/Github-100000?style=flat&logo=github&logoColor=white" alt="Github">
65
+ </a>
66
+ <a href="https://huggingface.co/blog/paligemma">
67
+ <img src="https://img.shields.io/badge/Huggingface-FFD21E?style=flat&logo=Huggingface&logoColor=black" alt="Huggingface">
68
+ </a>
69
+ <a href="https://github.com/merveenoyan/smol-vision/blob/main/Fine_tune_PaliGemma.ipynb">
70
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab">
71
+ </a>
72
+ <a href="https://arxiv.org/abs/2412.03555">
73
+ <img src="https://img.shields.io/badge/Arvix-B31B1B?style=flat&logo=arXiv&logoColor=white" alt="Paper">
74
+ </a>
75
+ <a href="https://supervision.roboflow.com/">
76
+ <img src="https://img.shields.io/badge/Supervision-6706CE?style=flat&logo=Roboflow&logoColor=white" alt="Supervision">
77
+ </a>
78
+ </div>
79
 
80
+ \n\n
81
+ PaliGemma 2 is an open vision-language model by Google, inspired by [PaLI-3](https://arxiv.org/abs/2310.09199) and
82
+ built with open components such as the [SigLIP](https://arxiv.org/abs/2303.15343)
83
+ vision model and the [Gemma 2](https://arxiv.org/abs/2408.00118) language model. PaliGemma 2 is designed as a versatile
84
+ model for transfer to a wide range of vision-language tasks such as image and short video caption, visual question
85
+ answering, text reading, object detection and object segmentation.
86
 
87
+ This space show how to use PaliGemma 2 for object detection with supervision.
88
+ You can input an image and a text prompt
89
+ """)
90
+ with gr.Row():
91
+ with gr.Column():
92
+ input_image = gr.Image(type="pil", label="Input Image")
93
+ input_text = gr.Textbox(lines=2, placeholder="Enter text here...", label="Enter prompt for example 'detect person;dog")
94
+ class_names = gr.Textbox(lines=1, placeholder="Enter class names separated by commas...", label="Class Names")
95
+ with gr.Column():
96
+ annotated_image = gr.Image(type="pil", label="Annotated Image")
97
+ detection_result = gr.Textbox(label="Detection Result")
98
+ gr.Button("Submit").click(
99
+ fn=process_image,
100
+ inputs=[input_image, input_text, class_names],
101
+ outputs=[annotated_image, detection_result]
102
+ )
103
 
104
  if __name__ == "__main__":
105
  app.launch()
requirements.txt CHANGED
@@ -2,6 +2,5 @@ supervision
2
  transformers==4.47.0
3
  requests
4
  tqdm
5
- gradio
6
  spaces
7
  torch
 
2
  transformers==4.47.0
3
  requests
4
  tqdm
 
5
  spaces
6
  torch