IbrahimHasani commited on
Commit
057d2a5
1 Parent(s): a6bb994

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import ImageFilter, Image
3
+ from transformers import AutoModelForZeroShotImageClassification, AutoProcessor
4
+ import torch
5
+ import requests
6
+
7
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+
9
+ # Initialize the CLIP-ViT model
10
+ checkpoint = "openai/clip-vit-large-patch14-336"
11
+ model = AutoModelForZeroShotImageClassification.from_pretrained(checkpoint)
12
+ model = model.to(device)
13
+
14
+ processor = AutoProcessor.from_pretrained(checkpoint)
15
+
16
+ def classify_image(image, candidate_labels):
17
+ messages = []
18
+ candidate_labels = [label.strip() for label in candidate_labels.split(",")] + ["other"]
19
+
20
+ # Blur the image
21
+ image = image.filter(ImageFilter.GaussianBlur(radius=5))
22
+
23
+ # Process the image and candidate labels
24
+ inputs = processor(images=image, text=candidate_labels, return_tensors="pt", padding=True)
25
+ inputs = {key: val.to(device) for key, val in inputs.items()}
26
+
27
+ # Get model's output
28
+ with torch.no_grad():
29
+ outputs = model(**inputs)
30
+
31
+ logits = outputs.logits_per_image[0]
32
+ probs = logits.softmax(dim=-1).cpu().numpy()
33
+
34
+ # Organize results
35
+ results = [
36
+ {"score": score, "label": candidate_label}
37
+ for score, candidate_label in sorted(zip(probs, candidate_labels), key=lambda x: -x[0])
38
+ ]
39
+
40
+ # Decision-making logic
41
+ top_label = results[0]["label"]
42
+ second_label = results[1]["label"]
43
+
44
+ # Add messages to understand the scores
45
+ messages.append(f"Top label: {top_label} with score: {results[0]['score']:.2f}")
46
+ messages.append(f"Second label: {second_label} with score: {results[1]['score']:.2f}")
47
+
48
+ # Example decision logic for specific scenarios (can be customized further)
49
+ if top_label == candidate_labels[0] and results[0]["score"] >= 0.58 and second_label != "other":
50
+ messages.append("Triggered the new 0.58 check!")
51
+ result = True
52
+ elif top_label == candidate_labels[0] and second_label in candidate_labels[:-1] and (results[0]['score'] + results[1]['score']) >= 0.90:
53
+ messages.append("Triggered the 90% combined check!")
54
+ result = True
55
+ elif top_label == candidate_labels[1] and second_label == candidate_labels[0] and (results[0]['score'] + results[1]['score']) >= 0.95:
56
+ messages.append("Triggered the 90% reverse order check!")
57
+ result = True
58
+ else:
59
+ result = False
60
+
61
+ return result, top_label, results, messages
62
+
63
+ iface = gr.Interface(
64
+ fn=classify_image,
65
+ inputs=[
66
+ gr.Image(type="pil", label="Upload an Image"),
67
+ gr.Textbox(label="Candidate Labels (comma separated)")
68
+ ],
69
+ outputs=[
70
+ gr.Label(label="Result"),
71
+ gr.Textbox(label="Top Label"),
72
+ gr.Dataframe(label="Details"),
73
+ gr.Textbox(label="Messages")
74
+ ],
75
+ title="General Action Classifier",
76
+ description="Upload an image and specify candidate labels to check if an action is present in the image."
77
+ )
78
+
79
+ if __name__ == "__main__":
80
+ iface.launch()