kotimalla commited on
Commit
4f0b54e
1 Parent(s): 03698c1
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python image as base
2
+ FROM python:3.8-slim-buster
3
+
4
+ # Set the working directory
5
+ WORKDIR /code
6
+
7
+
8
+
9
+ # Copy the requirements file into the container
10
+ COPY requirements.txt requirements.txt
11
+
12
+ # Install dependencies
13
+ RUN pip install -r requirements.txt
14
+
15
+ # Install libgl1-mesa-glx and libglib2.0-0
16
+ RUN apt-get update && apt-get install -y libgl1-mesa-glx libglib2.0-0
17
+ # Copy the rest of the application code into the container
18
+ COPY . .
19
+
20
+ # Expose the port the app runs on
21
+ #EXPOSE 5000
22
+
23
+ # Define the command to run the app when the container starts
24
+ CMD ["python", "-m", "flask", "run", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from flask import Flask, render_template, request, redirect, url_for,send_from_directory
3
+ import cv2
4
+ import numpy as np
5
+ from transformers import DetrImageProcessor, DetrForObjectDetection
6
+ from torchvision.transforms import functional as F
7
+ from ultralytics import YOLO
8
+ import torch
9
+
10
+
11
+
12
+ app = Flask(__name__)
13
+ UPLOAD_FOLDER = 'uploads'
14
+ ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
15
+
16
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
17
+
18
+ def allowed_file(filename):
19
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
20
+
21
+
22
+
23
+ @app.route('/uploads/<filename>')
24
+ def uploaded_file(filename):
25
+ return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
26
+
27
+
28
+ @app.route('/', methods=['GET', 'POST'])
29
+ def index():
30
+ annotated_image_url = None
31
+
32
+ if request.method == 'POST':
33
+
34
+ # Load the YOLOv8 model
35
+ yolo_model = YOLO('yolo/yolov8s.pt')
36
+
37
+ # Load the DETR model
38
+ processor = DetrImageProcessor.from_pretrained("detr")
39
+ model = DetrForObjectDetection.from_pretrained("detr")
40
+
41
+ # Check if a file is selected
42
+ if 'image' not in request.files:
43
+ return redirect(request.url)
44
+
45
+ image = request.files['image']
46
+
47
+ # Check if the file has a valid extension
48
+ if image and allowed_file(image.filename):
49
+ constant_filename = 'my_uploaded_image.jpg' # Specify the constant name
50
+ filename = os.path.join(app.config['UPLOAD_FOLDER'], constant_filename)
51
+ image.save(filename)
52
+
53
+ # Load the image for processing
54
+ image = cv2.imread(filename)
55
+
56
+ # Perform YOLO object detection and annotation
57
+ yolo_results = yolo_model(image, save=False)
58
+ yolo_image = image.copy()
59
+ yolo_names=yolo_results[0].names
60
+ for row in yolo_results[0].boxes.data:
61
+ x1, y1, x2, y2, score, class_id = row.tolist()
62
+ x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
63
+
64
+ class_name = yolo_names.get(int(class_id), 'Unknown')
65
+ label_text = f"Class: {class_name}, Score: {score:.2f}"
66
+ box_color = (0, 0, 255)
67
+ label_color = (255, 255, 255)
68
+
69
+ cv2.rectangle(yolo_image, (x1, y1), (x2, y2), box_color, thickness=2)
70
+ label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
71
+ label_bottom_left = (x1, y1 - 5)
72
+ label_top_right = (label_bottom_left[0] + label_size[0], label_bottom_left[1] - label_size[1])
73
+ cv2.rectangle(yolo_image, label_bottom_left, label_top_right, box_color, cv2.FILLED)
74
+ cv2.putText(yolo_image, label_text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_color, 1, cv2.LINE_AA)
75
+
76
+
77
+
78
+
79
+ annotated_filename = 'annotated_my_uploaded_image.jpg'
80
+ annotated_filepath = os.path.join(app.config['UPLOAD_FOLDER'], annotated_filename)
81
+ cv2.imwrite(annotated_filepath, yolo_image)
82
+ annotated_image_url = url_for('uploaded_file', filename=annotated_filename)
83
+
84
+
85
+
86
+
87
+
88
+ # Process the image using the processor
89
+ inputs = processor(images=image, return_tensors="pt")
90
+ outputs = model(**inputs)
91
+
92
+ # Convert outputs (bounding boxes and class logits) to COCO API format
93
+ # Let's only keep detections with score > 0.9
94
+ target_sizes = torch.tensor([image.shape[:2:]])
95
+ results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.3)[0]
96
+
97
+ # Convert PIL image to NumPy array for OpenCV
98
+ #image_np = np.array(image)
99
+ #image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
100
+ image_cv2 = image.copy()
101
+
102
+ # Define the font for labels
103
+ font = cv2.FONT_HERSHEY_SIMPLEX
104
+ font_scale = 0.5
105
+ font_thickness = 1
106
+ font_color = (255, 255, 255) # White color
107
+
108
+ # Iterate over the results and draw bounding boxes and labels using OpenCV
109
+ for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
110
+ box = [round(i, 2) for i in box.tolist()]
111
+
112
+ # Draw the bounding box
113
+ box = [int(b) for b in box] # Convert to integers for drawing
114
+ cv2.rectangle(image_cv2, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2) # Red rectangle
115
+
116
+ # Draw the label
117
+ label_text = f"{model.config.id2label[label.item()]}: {round(score.item(), 3)}"
118
+ label_size = cv2.getTextSize(label_text, font, font_scale, font_thickness)[0]
119
+ label_bottom_left = (box[0], box[1] - 5) # Adjust label position
120
+ label_top_right = (label_bottom_left[0] + label_size[0], label_bottom_left[1] - label_size[1])
121
+ cv2.rectangle(image_cv2, label_bottom_left, label_top_right, (0, 0, 255), cv2.FILLED) # Red filled rectangle
122
+ cv2.putText(image_cv2, label_text, (box[0], box[1] - 5), font, font_scale, font_color, font_thickness, cv2.LINE_AA)
123
+
124
+
125
+ annotated_filename = 'dert_annotated_my_uploaded_image.jpg'
126
+ annotated_filepath = os.path.join(app.config['UPLOAD_FOLDER'], annotated_filename)
127
+ cv2.imwrite(annotated_filepath, image_cv2)
128
+ dertannotated_image_url = url_for('uploaded_file', filename=annotated_filename)
129
+
130
+
131
+
132
+
133
+
134
+ return render_template('index.html', image1=annotated_image_url ,image2= dertannotated_image_url)
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+ return render_template('index.html', image1=annotated_image_url,image2=annotated_image_url)
146
+
147
+
148
+
149
+ if __name__ == '__main__':
150
+ app.run(debug=True,port=7860)
detr/config.json ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/detr-resnet-50",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "DetrForObjectDetection"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "auxiliary_loss": false,
10
+ "backbone": "resnet50",
11
+ "backbone_config": null,
12
+ "bbox_cost": 5,
13
+ "bbox_loss_coefficient": 5,
14
+ "class_cost": 1,
15
+ "classifier_dropout": 0.0,
16
+ "d_model": 256,
17
+ "decoder_attention_heads": 8,
18
+ "decoder_ffn_dim": 2048,
19
+ "decoder_layerdrop": 0.0,
20
+ "decoder_layers": 6,
21
+ "dice_loss_coefficient": 1,
22
+ "dilation": false,
23
+ "dropout": 0.1,
24
+ "encoder_attention_heads": 8,
25
+ "encoder_ffn_dim": 2048,
26
+ "encoder_layerdrop": 0.0,
27
+ "encoder_layers": 6,
28
+ "eos_coefficient": 0.1,
29
+ "giou_cost": 2,
30
+ "giou_loss_coefficient": 2,
31
+ "id2label": {
32
+ "0": "N/A",
33
+ "1": "person",
34
+ "2": "bicycle",
35
+ "3": "car",
36
+ "4": "motorcycle",
37
+ "5": "airplane",
38
+ "6": "bus",
39
+ "7": "train",
40
+ "8": "truck",
41
+ "9": "boat",
42
+ "10": "traffic light",
43
+ "11": "fire hydrant",
44
+ "12": "street sign",
45
+ "13": "stop sign",
46
+ "14": "parking meter",
47
+ "15": "bench",
48
+ "16": "bird",
49
+ "17": "cat",
50
+ "18": "dog",
51
+ "19": "horse",
52
+ "20": "sheep",
53
+ "21": "cow",
54
+ "22": "elephant",
55
+ "23": "bear",
56
+ "24": "zebra",
57
+ "25": "giraffe",
58
+ "26": "hat",
59
+ "27": "backpack",
60
+ "28": "umbrella",
61
+ "29": "shoe",
62
+ "30": "eye glasses",
63
+ "31": "handbag",
64
+ "32": "tie",
65
+ "33": "suitcase",
66
+ "34": "frisbee",
67
+ "35": "skis",
68
+ "36": "snowboard",
69
+ "37": "sports ball",
70
+ "38": "kite",
71
+ "39": "baseball bat",
72
+ "40": "baseball glove",
73
+ "41": "skateboard",
74
+ "42": "surfboard",
75
+ "43": "tennis racket",
76
+ "44": "bottle",
77
+ "45": "plate",
78
+ "46": "wine glass",
79
+ "47": "cup",
80
+ "48": "fork",
81
+ "49": "knife",
82
+ "50": "spoon",
83
+ "51": "bowl",
84
+ "52": "banana",
85
+ "53": "apple",
86
+ "54": "sandwich",
87
+ "55": "orange",
88
+ "56": "broccoli",
89
+ "57": "carrot",
90
+ "58": "hot dog",
91
+ "59": "pizza",
92
+ "60": "donut",
93
+ "61": "cake",
94
+ "62": "chair",
95
+ "63": "couch",
96
+ "64": "potted plant",
97
+ "65": "bed",
98
+ "66": "mirror",
99
+ "67": "dining table",
100
+ "68": "window",
101
+ "69": "desk",
102
+ "70": "toilet",
103
+ "71": "door",
104
+ "72": "tv",
105
+ "73": "laptop",
106
+ "74": "mouse",
107
+ "75": "remote",
108
+ "76": "keyboard",
109
+ "77": "cell phone",
110
+ "78": "microwave",
111
+ "79": "oven",
112
+ "80": "toaster",
113
+ "81": "sink",
114
+ "82": "refrigerator",
115
+ "83": "blender",
116
+ "84": "book",
117
+ "85": "clock",
118
+ "86": "vase",
119
+ "87": "scissors",
120
+ "88": "teddy bear",
121
+ "89": "hair drier",
122
+ "90": "toothbrush"
123
+ },
124
+ "init_std": 0.02,
125
+ "init_xavier_std": 1.0,
126
+ "is_encoder_decoder": true,
127
+ "label2id": {
128
+ "N/A": 0,
129
+ "airplane": 5,
130
+ "apple": 53,
131
+ "backpack": 27,
132
+ "banana": 52,
133
+ "baseball bat": 39,
134
+ "baseball glove": 40,
135
+ "bear": 23,
136
+ "bed": 65,
137
+ "bench": 15,
138
+ "bicycle": 2,
139
+ "bird": 16,
140
+ "blender": 83,
141
+ "boat": 9,
142
+ "book": 84,
143
+ "bottle": 44,
144
+ "bowl": 51,
145
+ "broccoli": 56,
146
+ "bus": 6,
147
+ "cake": 61,
148
+ "car": 3,
149
+ "carrot": 57,
150
+ "cat": 17,
151
+ "cell phone": 77,
152
+ "chair": 62,
153
+ "clock": 85,
154
+ "couch": 63,
155
+ "cow": 21,
156
+ "cup": 47,
157
+ "desk": 69,
158
+ "dining table": 67,
159
+ "dog": 18,
160
+ "donut": 60,
161
+ "door": 71,
162
+ "elephant": 22,
163
+ "eye glasses": 30,
164
+ "fire hydrant": 11,
165
+ "fork": 48,
166
+ "frisbee": 34,
167
+ "giraffe": 25,
168
+ "hair drier": 89,
169
+ "handbag": 31,
170
+ "hat": 26,
171
+ "horse": 19,
172
+ "hot dog": 58,
173
+ "keyboard": 76,
174
+ "kite": 38,
175
+ "knife": 49,
176
+ "laptop": 73,
177
+ "microwave": 78,
178
+ "mirror": 66,
179
+ "motorcycle": 4,
180
+ "mouse": 74,
181
+ "orange": 55,
182
+ "oven": 79,
183
+ "parking meter": 14,
184
+ "person": 1,
185
+ "pizza": 59,
186
+ "plate": 45,
187
+ "potted plant": 64,
188
+ "refrigerator": 82,
189
+ "remote": 75,
190
+ "sandwich": 54,
191
+ "scissors": 87,
192
+ "sheep": 20,
193
+ "shoe": 29,
194
+ "sink": 81,
195
+ "skateboard": 41,
196
+ "skis": 35,
197
+ "snowboard": 36,
198
+ "spoon": 50,
199
+ "sports ball": 37,
200
+ "stop sign": 13,
201
+ "street sign": 12,
202
+ "suitcase": 33,
203
+ "surfboard": 42,
204
+ "teddy bear": 88,
205
+ "tennis racket": 43,
206
+ "tie": 32,
207
+ "toaster": 80,
208
+ "toilet": 70,
209
+ "toothbrush": 90,
210
+ "traffic light": 10,
211
+ "train": 7,
212
+ "truck": 8,
213
+ "tv": 72,
214
+ "umbrella": 28,
215
+ "vase": 86,
216
+ "window": 68,
217
+ "wine glass": 46,
218
+ "zebra": 24
219
+ },
220
+ "mask_loss_coefficient": 1,
221
+ "max_position_embeddings": 1024,
222
+ "model_type": "detr",
223
+ "num_channels": 3,
224
+ "num_hidden_layers": 6,
225
+ "num_queries": 100,
226
+ "position_embedding_type": "sine",
227
+ "scale_embedding": false,
228
+ "torch_dtype": "float32",
229
+ "transformers_version": "4.33.0",
230
+ "use_pretrained_backbone": true,
231
+ "use_timm_backbone": true
232
+ }
detr/preprocessor_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_pad": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "feature_extractor_type": "DetrFeatureExtractor",
7
+ "format": "coco_detection",
8
+ "image_mean": [
9
+ 0.485,
10
+ 0.456,
11
+ 0.406
12
+ ],
13
+ "image_processor_type": "DetrImageProcessor",
14
+ "image_std": [
15
+ 0.229,
16
+ 0.224,
17
+ 0.225
18
+ ],
19
+ "resample": 2,
20
+ "rescale_factor": 0.00392156862745098,
21
+ "size": {
22
+ "longest_edge": 1333,
23
+ "shortest_edge": 800
24
+ }
25
+ }
detr/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c560657017d19d334bb6f0d0f51eee92b7a3d873b4603f4e4f08f90050ae9efa
3
+ size 166699733
detr/yolov8s.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:268e5bb54c640c96c3510224833bc2eeacab4135c6deb41502156e39986b562d
3
+ size 22573363
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ flask
3
+ torch
4
+ ultralytics
5
+ opencv-python
6
+ timm
templates/index.html ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Image Upload and Display</title>
5
+ <style>
6
+ body {
7
+ font-family: Arial, sans-serif;
8
+ text-align: center;
9
+ }
10
+
11
+ h1 {
12
+ color: #333;
13
+ }
14
+
15
+ form {
16
+ margin: 20px auto;
17
+ padding: 20px;
18
+ border: 1px solid #ccc;
19
+ max-width: 400px;
20
+ }
21
+
22
+ input[type="file"] {
23
+ margin-bottom: 10px;
24
+ }
25
+
26
+ .image-container {
27
+ display: flex;
28
+ justify-content: center;
29
+ align-items: flex-start;
30
+ }
31
+
32
+ .image-column {
33
+ flex: 1;
34
+ padding: 10px;
35
+ }
36
+
37
+ img {
38
+ max-width: 100%;
39
+ max-height: 500px;
40
+ }
41
+ </style>
42
+ </head>
43
+ <body>
44
+ <h1>Upload an Image</h1>
45
+ <form method="POST" enctype="multipart/form-data">
46
+ <input type="file" name="image">
47
+ <input type="submit" value="Upload">
48
+ </form>
49
+
50
+ <div class="image-container">
51
+ <div class="image-column">
52
+ {% if image1 %}
53
+ <h2>YOLO model Image:</h2>
54
+ <img src="{{ image1 }}" alt="YOLO Annotated Image">
55
+ {% endif %}
56
+ </div>
57
+
58
+ <div class="image-column">
59
+ {% if image2 %}
60
+ <h2>DETR Model Image:</h2>
61
+ <img src="{{ image2 }}" alt="DETR Annotated Image">
62
+ {% endif %}
63
+ </div>
64
+ </div>
65
+ </body>
66
+ </html>
uploads/annotated_my_uploaded_image.jpg ADDED
uploads/dert_annotated_my_uploaded_image.jpg ADDED
uploads/my_uploaded_image.jpg ADDED
yolo/yolov8s.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:268e5bb54c640c96c3510224833bc2eeacab4135c6deb41502156e39986b562d
3
+ size 22573363