Spaces:
Running
on
Zero
Running
on
Zero
onuralpszr
commited on
Commit
β’
49d986a
1
Parent(s):
990acce
docs: π better description for intro text
Browse filesSigned-off-by: Onuralp SEZER <thunderbirdtr@gmail.com>
- app.py +43 -14
- requirements.txt +0 -1
app.py
CHANGED
@@ -1,6 +1,3 @@
|
|
1 |
-
import os
|
2 |
-
import PIL.Image
|
3 |
-
import transformers
|
4 |
from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
|
5 |
import torch
|
6 |
import supervision as sv
|
@@ -20,6 +17,7 @@ model = PaliGemmaForConditionalGeneration.from_pretrained(model_id).eval().to(DE
|
|
20 |
processor = PaliGemmaProcessor.from_pretrained(model_id)
|
21 |
|
22 |
|
|
|
23 |
@spaces.GPU
|
24 |
def process_image(input_image,input_text,class_names):
|
25 |
class_list = class_names.split(',')
|
@@ -57,20 +55,51 @@ def process_image(input_image,input_text,class_names):
|
|
57 |
|
58 |
return annotated_image, result
|
59 |
|
|
|
|
|
|
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
if __name__ == "__main__":
|
76 |
app.launch()
|
|
|
|
|
|
|
|
|
1 |
from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
|
2 |
import torch
|
3 |
import supervision as sv
|
|
|
17 |
processor = PaliGemmaProcessor.from_pretrained(model_id)
|
18 |
|
19 |
|
20 |
+
|
21 |
@spaces.GPU
|
22 |
def process_image(input_image,input_text,class_names):
|
23 |
class_list = class_names.split(',')
|
|
|
55 |
|
56 |
return annotated_image, result
|
57 |
|
58 |
+
with gr.Blocks() as app:
|
59 |
+
gr.Markdown( """
|
60 |
+
## PaliGemma 2 Detection with Supervision - Demo \n\n
|
61 |
|
62 |
+
<div style="display: flex; gap: 10px;">
|
63 |
+
<a href="https://github.com/google-research/big_vision/blob/main/big_vision/configs/proj/paligemma/README.md">
|
64 |
+
<img src="https://img.shields.io/badge/Github-100000?style=flat&logo=github&logoColor=white" alt="Github">
|
65 |
+
</a>
|
66 |
+
<a href="https://huggingface.co/blog/paligemma">
|
67 |
+
<img src="https://img.shields.io/badge/Huggingface-FFD21E?style=flat&logo=Huggingface&logoColor=black" alt="Huggingface">
|
68 |
+
</a>
|
69 |
+
<a href="https://github.com/merveenoyan/smol-vision/blob/main/Fine_tune_PaliGemma.ipynb">
|
70 |
+
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab">
|
71 |
+
</a>
|
72 |
+
<a href="https://arxiv.org/abs/2412.03555">
|
73 |
+
<img src="https://img.shields.io/badge/Arvix-B31B1B?style=flat&logo=arXiv&logoColor=white" alt="Paper">
|
74 |
+
</a>
|
75 |
+
<a href="https://supervision.roboflow.com/">
|
76 |
+
<img src="https://img.shields.io/badge/Supervision-6706CE?style=flat&logo=Roboflow&logoColor=white" alt="Supervision">
|
77 |
+
</a>
|
78 |
+
</div>
|
79 |
|
80 |
+
\n\n
|
81 |
+
PaliGemma 2 is an open vision-language model by Google, inspired by [PaLI-3](https://arxiv.org/abs/2310.09199) and
|
82 |
+
built with open components such as the [SigLIP](https://arxiv.org/abs/2303.15343)
|
83 |
+
vision model and the [Gemma 2](https://arxiv.org/abs/2408.00118) language model. PaliGemma 2 is designed as a versatile
|
84 |
+
model for transfer to a wide range of vision-language tasks such as image and short video caption, visual question
|
85 |
+
answering, text reading, object detection and object segmentation.
|
86 |
|
87 |
+
This space show how to use PaliGemma 2 for object detection with supervision.
|
88 |
+
You can input an image and a text prompt
|
89 |
+
""")
|
90 |
+
with gr.Row():
|
91 |
+
with gr.Column():
|
92 |
+
input_image = gr.Image(type="pil", label="Input Image")
|
93 |
+
input_text = gr.Textbox(lines=2, placeholder="Enter text here...", label="Enter prompt for example 'detect person;dog")
|
94 |
+
class_names = gr.Textbox(lines=1, placeholder="Enter class names separated by commas...", label="Class Names")
|
95 |
+
with gr.Column():
|
96 |
+
annotated_image = gr.Image(type="pil", label="Annotated Image")
|
97 |
+
detection_result = gr.Textbox(label="Detection Result")
|
98 |
+
gr.Button("Submit").click(
|
99 |
+
fn=process_image,
|
100 |
+
inputs=[input_image, input_text, class_names],
|
101 |
+
outputs=[annotated_image, detection_result]
|
102 |
+
)
|
103 |
|
104 |
if __name__ == "__main__":
|
105 |
app.launch()
|
requirements.txt
CHANGED
@@ -2,6 +2,5 @@ supervision
|
|
2 |
transformers==4.47.0
|
3 |
requests
|
4 |
tqdm
|
5 |
-
gradio
|
6 |
spaces
|
7 |
torch
|
|
|
2 |
transformers==4.47.0
|
3 |
requests
|
4 |
tqdm
|
|
|
5 |
spaces
|
6 |
torch
|