Gordon Li commited on
Commit
8b51c66
·
1 Parent(s): ee48f73

remove obs file

Browse files
cronjob/abstract_traffic_image_analyzer.py DELETED
@@ -1,362 +0,0 @@
1
- # Traffic Image Analysis Module for HKUST BNB+ Platform
2
-
3
- # This module provides functionality for analyzing traffic camera images to detect and count vehicles.
4
- # It downloads images from traffic cameras, processes them using computer vision models, and records
5
- # traffic data that is used for traffic-based discount calculations in the BNB+ platform.
6
-
7
- # The analyzer connects to a database to retrieve camera locations, downloads and processes images,
8
- # detects vehicles, and stores the results for visualization and analysis.
9
-
10
- # Author: Gordon Li (20317033)
11
- # Date: March 2025
12
-
13
-
14
- import requests
15
- import oracledb
16
- from PIL import Image, ImageDraw, ImageFont
17
- import signal
18
- import io
19
- from datetime import datetime
20
- import logging
21
- import json
22
- import os
23
- import random
24
-
25
-
26
-
27
- class AbstractTrafficImageAnalyzer:
28
- # Initializes the traffic image analyzer with database connection, signal handlers, and directories.
29
-
30
- # Sets up:
31
- # - Database connection parameters
32
- # - Signal handlers for graceful shutdown
33
- # - Vehicle class identifiers for detection
34
- # - Directory structure for storing downloaded images
35
- # - Logging configuration
36
-
37
- def __init__(self):
38
- self.connection_params = {
39
- 'user': 'slliac',
40
- 'password': '7033',
41
- 'dsn': 'imz409.ust.hk:1521/imz409'
42
- }
43
- self.running = True
44
-
45
- signal.signal(signal.SIGINT, self.signal_handler)
46
- signal.signal(signal.SIGTERM, self.signal_handler)
47
-
48
- self.vehicle_classes = {2, 3, 4, 5, 6, 7, 8}
49
-
50
- self.dataset_dir = "traffic_dataset"
51
- self.images_dir = os.path.join(self.dataset_dir, "images")
52
-
53
- os.makedirs(self.images_dir, exist_ok=True)
54
-
55
- random.seed(42)
56
-
57
- self.setup_logging()
58
-
59
- # Handles termination signals to ensure graceful shutdown.
60
-
61
- # Parameters:
62
- # signum: Signal number
63
- # frame: Current stack frame
64
-
65
- def signal_handler(self, signum, frame):
66
- print("\nShutdown signal received. Completing current task...")
67
- self.running = False
68
-
69
- # Sets up logging configuration for the analyzer.
70
- # Creates log files with timestamps and configures console output.
71
-
72
- def setup_logging(self):
73
- logging.basicConfig(
74
- level=logging.INFO,
75
- format='%(asctime)s - %(levelname)s - %(message)s',
76
- handlers=[
77
- logging.FileHandler(f'traffic_analysis_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'),
78
- logging.StreamHandler()
79
- ]
80
- )
81
-
82
- # Retrieves traffic camera locations and URLs from the database.
83
-
84
- # Returns:
85
- # List of tuples containing camera location key and URL
86
-
87
- def get_camera_locations(self):
88
- try:
89
- with oracledb.connect(**self.connection_params) as conn:
90
- cursor = conn.cursor()
91
- cursor.execute("SELECT KEY, URL FROM TD_TRAFFIC_CAMERA_LOCATION")
92
- return cursor.fetchall()
93
- except Exception as e:
94
- logging.error(f"Error fetching camera locations: {str(e)}")
95
- raise
96
-
97
- # Downloads an image from a given URL.
98
-
99
- # Parameters:
100
- # url: URL of the traffic camera image
101
-
102
- # Returns:
103
- # PIL Image object
104
-
105
- def download_image(self, url):
106
- try:
107
- response = requests.get(url)
108
- response.raise_for_status()
109
- return Image.open(io.BytesIO(response.content))
110
- except Exception as e:
111
- logging.error(f"Error downloading image from {url}: {str(e)}")
112
- raise
113
-
114
- # Detects vehicles in an image using a computer vision model.
115
-
116
- # Parameters:
117
- # image: PIL Image object to analyze
118
- # confidence_threshold: Minimum confidence score for detections (default: 0.7)
119
-
120
- # Returns:
121
- # List of vehicle detection dictionaries with bounding boxes and scores
122
-
123
- def detect_vehicles(self, image, confidence_threshold=0.7):
124
- try:
125
- if image.mode == 'RGBA':
126
- image = image.convert('RGB')
127
-
128
- width, height = image.size
129
- inputs = self.processor(images=image, return_tensors="pt")
130
- outputs = self.model(**inputs)
131
-
132
- probas = outputs.logits.softmax(-1)[0, :, :-1]
133
- keep = probas.max(-1).values > confidence_threshold
134
-
135
- probas_to_keep = probas[keep]
136
- boxes_to_keep = outputs.pred_boxes[0][keep]
137
-
138
- scores = probas_to_keep.max(-1)
139
- labels = probas_to_keep.argmax(-1)
140
-
141
- vehicle_detections = []
142
- for score, label, box in zip(scores.values, labels, boxes_to_keep):
143
- x_c, y_c, w, h = box.tolist()
144
-
145
- w_abs = w * width
146
- h_abs = h * height
147
-
148
- x = (x_c - w / 2) * width
149
- y = (y_c - h / 2) * height
150
-
151
- category_id = label.item()
152
-
153
- if category_id in self.vehicle_classes:
154
- vehicle_detections.append({
155
- 'bbox': [float(x), float(y), float(w_abs), float(h_abs)],
156
- 'category_id': int(category_id),
157
- 'area': float(w_abs * h_abs),
158
- 'iscrowd': 0,
159
- 'score': float(score.item())
160
- })
161
-
162
- return vehicle_detections
163
- except Exception as e:
164
- logging.error(f"Error detecting vehicles: {str(e)}")
165
- raise
166
-
167
- # Draws vehicle detection bounding boxes and labels on the image.
168
-
169
- # Parameters:
170
- # image: Original PIL Image
171
- # detections: List of vehicle detection dictionaries
172
-
173
- # Returns:
174
- # New PIL Image with bounding boxes and labels drawn
175
-
176
- def draw_detections(self, image, detections):
177
- try:
178
- draw_image = image.copy()
179
- draw = ImageDraw.Draw(draw_image)
180
-
181
- category_names = {
182
- 2: "bicycle",
183
- 3: "car",
184
- 4: "motorcycle",
185
- 5: "airplane",
186
- 6: "bus",
187
- 7: "train",
188
- 8: "truck"
189
- }
190
-
191
- try:
192
- font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 20)
193
- except:
194
- font = ImageFont.load_default()
195
-
196
- for detection in detections:
197
- x, y, w, h = detection['bbox']
198
- score = detection['score']
199
- category_id = detection['category_id']
200
-
201
- category_name = category_names.get(category_id, f"Vehicle-{category_id}")
202
-
203
- draw.rectangle(
204
- [(x, y), (x + w, y + h)],
205
- outline='red',
206
- width=3
207
- )
208
-
209
- label_text = f"{category_name}: {score:.2f}"
210
- label_bbox = draw.textbbox((x, y - 25), label_text, font=font)
211
- draw.rectangle(
212
- [label_bbox[0] - 5, label_bbox[1] - 5, label_bbox[2] + 5, label_bbox[3] + 5],
213
- fill='red'
214
- )
215
-
216
- draw.text(
217
- (x, y - 25),
218
- label_text,
219
- fill='white',
220
- font=font
221
- )
222
-
223
- return draw_image
224
- except Exception as e:
225
- logging.error(f"Error drawing detections: {str(e)}")
226
- raise
227
-
228
- # Processes all traffic cameras, detects vehicles, and prepares data for storage.
229
-
230
- # This method:
231
- # 1. Gets all camera locations
232
- # 2. Downloads images from each camera
233
- # 3. Detects vehicles in each image
234
- # 4. Processes images to visualize detections
235
- # 5. Prepares data for storage
236
-
237
- def process_traffic_cameras(self):
238
- try:
239
- current_timestamp = datetime.now()
240
- timestamp_str = current_timestamp.strftime("%Y%m%d_%H%M%S")
241
-
242
- logging.info(f"Starting traffic image analysis for all cameras at {timestamp_str}")
243
- camera_locations = self.get_camera_locations()
244
-
245
- batch_data = {
246
- 'capture_time': [],
247
- 'location_id': [],
248
- 'image_id': [],
249
- 'original_image': [],
250
- 'vehicle_count': [],
251
- 'processed_image': [],
252
- 'coco_annotations': []
253
- }
254
-
255
- for image_id, (key, url) in enumerate(camera_locations, start=1):
256
- if not self.running:
257
- break
258
-
259
- try:
260
- logging.info(f"Processing camera at location {key}")
261
-
262
- img_timestamp = datetime.now()
263
- img_timestamp_str = img_timestamp.strftime("%Y%m%d_%H%M%S")
264
- filename = f"{key}_{img_timestamp_str}.jpg"
265
- file_path = os.path.join(self.images_dir, filename)
266
-
267
- image = self.download_image(url)
268
-
269
- image.save(file_path)
270
-
271
- orig_img_byte_arr = io.BytesIO()
272
- image.save(orig_img_byte_arr, format='JPEG')
273
- original_image_data = orig_img_byte_arr.getvalue()
274
-
275
- vehicle_detections = self.detect_vehicles(image)
276
-
277
- processed_image = self.draw_detections(image, vehicle_detections)
278
-
279
- processed_file_path = os.path.join(self.images_dir, f"processed_{filename}")
280
- processed_image.save(processed_file_path)
281
-
282
- proc_img_byte_arr = io.BytesIO()
283
- processed_image.save(proc_img_byte_arr, format='JPEG')
284
- processed_image_data = proc_img_byte_arr.getvalue()
285
-
286
- simplified_annotations = []
287
- for detection in vehicle_detections:
288
- simplified_annotations.append({
289
- "bbox": detection['bbox'],
290
- "category_id": detection['category_id'],
291
- "area": detection['area'],
292
- "iscrowd": detection['iscrowd']
293
- })
294
-
295
- coco_annotation = {
296
- "image_id": image_id,
297
- "annotations": simplified_annotations,
298
- "date": img_timestamp.strftime("%Y-%m-%d"),
299
- "timestamp": img_timestamp.strftime("%Y-%m-%d %H:%M:%S")
300
- }
301
-
302
- batch_data['capture_time'].append(img_timestamp.isoformat())
303
- batch_data['location_id'].append(key)
304
- batch_data['image_id'].append(image_id)
305
- batch_data['original_image'].append(original_image_data)
306
- batch_data['vehicle_count'].append(len(vehicle_detections))
307
- batch_data['processed_image'].append(processed_image_data)
308
- batch_data['coco_annotations'].append(json.dumps(coco_annotation))
309
-
310
- logging.info(f"Completed analysis for location {key}, detected {len(vehicle_detections)} vehicles")
311
- except Exception as e:
312
- logging.error(f"Error processing location {key}: {str(e)}")
313
- continue
314
-
315
- self.update_huggingface_dataset(batch_data, timestamp_str)
316
-
317
- logging.info(f"Completed traffic image analysis for all cameras. Data saved to {self.dataset_dir}")
318
-
319
- except Exception as e:
320
- logging.error(f"Error in process_traffic_cameras: {str(e)}")
321
- raise
322
-
323
- # Updates the HuggingFace dataset with new traffic data.
324
- # This method must be implemented by subclasses.
325
-
326
- # Parameters:
327
- # batch_data: Dictionary containing the batch data to add
328
- # timestamp_str: Timestamp string for the current batch
329
-
330
- def update_huggingface_dataset(self, batch_data, timestamp_str):
331
- raise NotImplementedError("Subclasses must implement update_huggingface_dataset method")
332
-
333
- # Creates COCO annotation files for the dataset.
334
- # This method must be implemented by subclasses.
335
-
336
- # Parameters:
337
- # dataset_dict: Dictionary containing the dataset
338
- # timestamp_str: Timestamp string for the current batch
339
-
340
- def create_coco_annotation_files(self, dataset_dict, timestamp_str):
341
- raise NotImplementedError("Subclasses must implement create_coco_annotation_files method")
342
-
343
- # Updates the README file for the dataset.
344
- # This method must be implemented by subclasses.
345
-
346
- # Parameters:
347
- # dataset_dict: Dictionary containing the dataset
348
- # timestamp_str: Timestamp string for the current batch
349
-
350
- def update_readme(self, dataset_dict, timestamp_str):
351
- raise NotImplementedError("Subclasses must implement update_readme method")
352
-
353
- # Runs the traffic image analyzer, processing all cameras and updating the dataset.
354
-
355
- def run(self):
356
- try:
357
- self.process_traffic_cameras()
358
- logging.info(f"Analysis completed and dataset updated.")
359
- except Exception as e:
360
- logging.error(f"Scheduler error: {str(e)}")
361
- finally:
362
- print("\nScheduler stopped")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cronjob/application_traffic_image_analyzer.py DELETED
@@ -1,275 +0,0 @@
1
- # Application Traffic Image Analyzer Module
2
-
3
- # This module extends the AbstractTrafficImageAnalyzer to provide specific implementation for
4
- # application-specific traffic analysis. It handles the processing of traffic camera images,
5
- # vehicle detection using the DETR model, and updating a HuggingFace dataset with the results.
6
-
7
- # The analyzer is used in the HKUST BNB+ platform to collect and analyze traffic data for
8
- # determining eco-friendly discounts based on traffic conditions.
9
-
10
- # Author: Gordon Li (20317033)
11
- # Date: March 2025
12
-
13
- from transformers import DetrImageProcessor, DetrForObjectDetection
14
- from datasets import Dataset, Features, Value, load_dataset, DatasetDict, concatenate_datasets
15
- from PIL import Image
16
- from datetime import datetime
17
- from abstract_traffic_image_analyzer import AbstractTrafficImageAnalyzer
18
- import io
19
- import json
20
- import os
21
- import logging
22
-
23
-
24
- class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
25
- # Initializes the application traffic analyzer with the DETR model and processor.
26
-
27
- # Sets up:
28
- # - DETR image processor and model for vehicle detection
29
- # - Application-specific directory for storing results
30
-
31
- def __init__(self):
32
- super().__init__()
33
- self.processor = DetrImageProcessor.from_pretrained("slliac/detr-group37-liaujianjie-resnet-50",
34
- revision="main")
35
- self.model = DetrForObjectDetection.from_pretrained("slliac/detr-group37-liaujianjie-resnet-50")
36
-
37
- self.application_dir = os.path.join(self.dataset_dir, "application")
38
- os.makedirs(self.application_dir, exist_ok=True)
39
-
40
- # Updates the HuggingFace dataset with new traffic data.
41
-
42
- # Parameters:
43
- # batch_data: Dictionary containing batch data including capture time, location, images, and vehicle counts
44
- # timestamp_str: Timestamp string for the current batch
45
-
46
- def update_huggingface_dataset(self, batch_data, timestamp_str):
47
- try:
48
- features = Features({
49
- 'capture_time': Value(dtype='string'),
50
- 'location_id': Value(dtype='string'),
51
- 'image_id': Value(dtype='int32'),
52
- 'original_image': Value(dtype='binary'),
53
- 'vehicle_count': Value(dtype='int32'),
54
- 'processed_image': Value(dtype='binary'),
55
- 'coco_annotations': Value(dtype='string')
56
- })
57
-
58
- valid_indices = [i for i, count in enumerate(batch_data['vehicle_count']) if count > 0]
59
- if not valid_indices:
60
- logging.info("No vehicles detected in any images. Skipping dataset update.")
61
- return
62
-
63
- filtered_data = {
64
- 'capture_time': [batch_data['capture_time'][i] for i in valid_indices],
65
- 'location_id': [batch_data['location_id'][i] for i in valid_indices],
66
- 'image_id': [batch_data['image_id'][i] for i in valid_indices],
67
- 'original_image': [batch_data['original_image'][i] for i in valid_indices],
68
- 'vehicle_count': [batch_data['vehicle_count'][i] for i in valid_indices],
69
- 'processed_image': [batch_data['processed_image'][i] for i in valid_indices],
70
- 'coco_annotations': [batch_data['coco_annotations'][i] for i in valid_indices]
71
- }
72
-
73
- new_dataset = Dataset.from_dict(filtered_data, features=features)
74
-
75
- try:
76
- try:
77
- existing_dataset = load_dataset(
78
- "slliac/isom5240-td-application-traffic-analysis",
79
- revision="main"
80
- )
81
- logging.info(f"Found existing dataset in 'application' branch")
82
-
83
- if 'application' in existing_dataset:
84
- combined_dataset = concatenate_datasets([existing_dataset['application'], new_dataset])
85
-
86
- dataset_dict = DatasetDict({
87
- "application": combined_dataset
88
- })
89
- else:
90
- dataset_dict = DatasetDict({
91
- "application": new_dataset
92
- })
93
- except Exception as e:
94
- logging.info(f"Error loading existing dataset: {str(e)}")
95
- dataset_dict = DatasetDict({
96
- "application": new_dataset
97
- })
98
-
99
- dataset_dict.push_to_hub(
100
- "slliac/isom5240-td-application-traffic-analysis",
101
- private=False
102
- )
103
-
104
- logging.info(f"Successfully updated dataset on 'application' branch.")
105
- logging.info(f"Application split: {len(dataset_dict['application'])} records")
106
-
107
- self.create_coco_annotation_files(dataset_dict, timestamp_str)
108
-
109
- except Exception as e:
110
- logging.error(f"Error updating Hugging Face dataset: {str(e)}")
111
- raise
112
-
113
- except Exception as e:
114
- logging.error(f"Error in update_huggingface_dataset: {str(e)}")
115
- raise
116
-
117
- # Creates COCO annotation files for the dataset, which are standard format for object detection.
118
-
119
- # Parameters:
120
- # dataset_dict: Dictionary containing the dataset with traffic observations
121
- # timestamp_str: Timestamp string for the current batch
122
-
123
- def create_coco_annotation_files(self, dataset_dict, timestamp_str):
124
- try:
125
- categories = [
126
- {"id": 2, "name": "bicycle", "supercategory": "vehicle"},
127
- {"id": 3, "name": "car", "supercategory": "vehicle"},
128
- {"id": 4, "name": "motorcycle", "supercategory": "vehicle"},
129
- {"id": 5, "name": "airplane", "supercategory": "vehicle"},
130
- {"id": 6, "name": "bus", "supercategory": "vehicle"},
131
- {"id": 7, "name": "train", "supercategory": "vehicle"},
132
- {"id": 8, "name": "truck", "supercategory": "vehicle"}
133
- ]
134
-
135
- current_datetime = datetime.now()
136
- current_date = current_datetime.strftime("%Y-%m-%d")
137
- current_datetime_str = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
138
-
139
- images_data = []
140
- annotations_data = []
141
- annotation_id = 1
142
-
143
- for i, record in enumerate(dataset_dict['application']):
144
- image_id = record['image_id']
145
-
146
- coco_data = json.loads(record['coco_annotations'])
147
-
148
- img = Image.open(io.BytesIO(record['original_image']))
149
- width, height = img.size
150
-
151
- image_entry = {
152
- "id": image_id,
153
- "width": width,
154
- "height": height,
155
- "file_name": f"{record['location_id']}_{image_id}.jpg",
156
- "license": 1,
157
- "date_captured": record['capture_time'],
158
- "capture_date": datetime.fromisoformat(record['capture_time']).strftime('%Y-%m-%d'),
159
- "capture_timestamp": datetime.fromisoformat(record['capture_time']).strftime(
160
- '%Y-%m-%d %H:%M:%S')
161
- }
162
- images_data.append(image_entry)
163
-
164
- for ann in coco_data['annotations']:
165
- annotation_entry = {
166
- "id": annotation_id,
167
- "image_id": image_id,
168
- "category_id": ann['category_id'],
169
- "bbox": ann['bbox'],
170
- "area": ann['area'],
171
- "iscrowd": ann['iscrowd'],
172
- "segmentation": []
173
- }
174
- annotations_data.append(annotation_entry)
175
- annotation_id += 1
176
-
177
- coco_output = {
178
- "info": {
179
- "year": current_datetime.year,
180
- "version": "1.0",
181
- "description": "Hong Kong Traffic Camera Dataset - Application data",
182
- "contributor": "ISOM5240 Group 37",
183
- "url": "",
184
- "date_created": current_datetime.isoformat(),
185
- "collection_date": current_date,
186
- "collection_timestamp": current_datetime_str,
187
- "batch_timestamp": timestamp_str
188
- },
189
- "licenses": [
190
- {
191
- "id": 1,
192
- "name": "Attribution-NonCommercial",
193
- "url": "http://creativecommons.org/licenses/by-nc/2.0/"
194
- }
195
- ],
196
- "images": images_data,
197
- "annotations": annotations_data,
198
- "categories": categories
199
- }
200
-
201
- annotation_file = os.path.join(self.application_dir, f"application_labels_{timestamp_str}.json")
202
- with open(annotation_file, 'w') as f:
203
- json.dump(coco_output, f, indent=2)
204
-
205
- standard_annotation_file = os.path.join(self.application_dir, "application_labels.json")
206
- with open(standard_annotation_file, 'w') as f:
207
- json.dump(coco_output, f, indent=2)
208
-
209
- logging.info(f"Created COCO annotation files for application data in {self.application_dir}")
210
-
211
- try:
212
- from huggingface_hub import HfApi
213
- api = HfApi()
214
-
215
- api.upload_file(
216
- path_or_fileobj=annotation_file,
217
- path_in_repo=f"application/application_labels_{timestamp_str}.json",
218
- repo_id="slliac/isom5240-td-application-traffic-analysis",
219
- repo_type="dataset",
220
- revision="main"
221
- )
222
-
223
- api.upload_file(
224
- path_or_fileobj=standard_annotation_file,
225
- path_in_repo=f"application/application_labels.json",
226
- repo_id="slliac/isom5240-td-application-traffic-analysis",
227
- repo_type="dataset",
228
- revision="main"
229
- )
230
-
231
- logging.info(
232
- f"Uploaded COCO annotation files to Hugging Face repository in 'application/' directory")
233
- except Exception as e:
234
- logging.error(f"Error uploading COCO annotations file: {str(e)}")
235
-
236
- except Exception as e:
237
- logging.error(f"Error creating COCO annotation files: {str(e)}")
238
-
239
-
240
- # Main function to execute the traffic image analysis process.
241
- # Initializes the analyzer, loads existing data if available, runs the analysis,
242
- # and displays dataset information before and after the process.
243
-
244
-
245
- def main():
246
- analyzer = ApplicationTrafficImageAnalyzer()
247
- try:
248
- try:
249
- initial_dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", revision="main")
250
- print("\nInitial Dataset Info (from 'application' branch):")
251
- for split in initial_dataset:
252
- print(f"Number of {split} records: {len(initial_dataset[split])}")
253
- except Exception as e:
254
- print(f"\nNo existing dataset found in 'application' branch: {str(e)}")
255
- print("Will create new dataset with 'application' split in 'application' branch.")
256
-
257
- analyzer.run()
258
-
259
- try:
260
- final_dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", revision="main")
261
- print("\nFinal Dataset Info (from 'application' branch):")
262
- for split in final_dataset:
263
- print(f"Number of {split} records: {len(final_dataset[split])}")
264
- except Exception as e:
265
- print(f"\nError accessing final dataset from 'application' branch: {str(e)}")
266
-
267
- except Exception as e:
268
- logging.error(f"Main execution error: {str(e)}")
269
- finally:
270
- print("\nProgram terminated")
271
-
272
-
273
- # Entry point for the script.
274
- if __name__ == "__main__":
275
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cronjob/readme.md DELETED
@@ -1,5 +0,0 @@
1
- here is the dataset cronjob for simulate training and application usage dataset:
2
-
3
- 0 */3 * * * /Users/gordonli/Desktop/test/.venv3.12/bin/python /Users/gordonli/Desktop/5240-frontend/cronjob/train_detr_traffic_image_analyzer.py >> /Users/gordonli/Desktop/test/train.log 2>&1
4
-
5
- 0 */1 * * * /Users/gordonli/Desktop/test/.venv3.12/bin/python /Users/gordonli/Desktop/5240-frontend/cronjob/application_traffic_image_analyzer.py >> /Users/gordonli/Desktop/test/app.log 2>&1
 
 
 
 
 
 
cronjob/train_detr_traffic_image_analyzer.py DELETED
@@ -1,260 +0,0 @@
1
- # Traffic Image Analyzer for DETR Model Training
2
-
3
- # This module extends the AbstractTrafficImageAnalyzer to provide implementation for training
4
- # data collection for the DETR object detection model. It processes traffic camera images,
5
- # detects vehicles using the pretrained Facebook DETR ResNet-50 model, and organizes the data
6
- # for model training purposes.
7
-
8
- # The data collected by this analyzer is used to train custom DETR models that improve vehicle
9
- # detection accuracy, which ultimately enhances the traffic analysis component of the HKUST BNB+
10
- # platform's eco-friendly discount system.
11
-
12
- # Author: Gordon Li (20317033)
13
- # Date: March 2025
14
-
15
- from transformers import DetrImageProcessor, DetrForObjectDetection
16
- from datasets import Dataset, Features, Value, load_dataset, concatenate_datasets, DatasetDict
17
- from PIL import Image
18
- from datetime import datetime
19
- from abstract_traffic_image_analyzer import AbstractTrafficImageAnalyzer
20
- import io
21
- import json
22
- import os
23
- import logging
24
-
25
-
26
- class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
27
-
28
-
29
- # Initializes the DETR training data collector with the Facebook pretrained model.
30
-
31
- # Sets up:
32
- # - Facebook DETR ResNet-50 image processor and model
33
- # - Directory structure for storing DETR training data
34
-
35
- def __init__(self):
36
- super().__init__()
37
-
38
- self.processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
39
- self.model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
40
-
41
- self.fb_detr_dir = os.path.join(self.dataset_dir, "fb_detr_res_50")
42
- os.makedirs(self.fb_detr_dir, exist_ok=True)
43
-
44
- # Updates the HuggingFace dataset with new traffic data for DETR model training.
45
-
46
- # Parameters:
47
- # batch_data: Dictionary containing traffic image data and annotations
48
- # timestamp_str: Timestamp string for the current batch
49
-
50
- def update_huggingface_dataset(self, batch_data, timestamp_str):
51
- try:
52
- features = Features({
53
- 'capture_time': Value(dtype='string'),
54
- 'location_id': Value(dtype='string'),
55
- 'image_id': Value(dtype='int32'),
56
- 'original_image': Value(dtype='binary'),
57
- 'vehicle_count': Value(dtype='int32'),
58
- 'processed_image': Value(dtype='binary'),
59
- 'coco_annotations': Value(dtype='string')
60
- })
61
-
62
- # Use all data without filtering by vehicle_count
63
- new_dataset = Dataset.from_dict(batch_data, features=features)
64
- logging.info(f"New dataset has {len(new_dataset)} records")
65
-
66
- try:
67
- existing_dataset = load_dataset("slliac/isom5240-td-traffic-analysis")
68
- logging.info(f"Found existing dataset")
69
-
70
- combined_data = []
71
-
72
- if 'fbDetr50Train' in existing_dataset:
73
- combined_data.append(existing_dataset['fbDetr50Train'])
74
- logging.info(f"Existing fbDetr50Train size: {len(existing_dataset['fbDetr50Train'])}")
75
-
76
- combined_data.append(new_dataset)
77
-
78
- combined_dataset = concatenate_datasets(combined_data)
79
- logging.info(f"Combined dataset has {len(combined_dataset)} records")
80
-
81
- # Create dataset dict with only fbDetr50Train
82
- dataset_dict = DatasetDict({
83
- "fbDetr50Train": combined_dataset.shuffle(seed=42)
84
- })
85
-
86
- except Exception as e:
87
- logging.info(f"Creating new dataset with fb-detr-res-50 splits: {str(e)}")
88
- # If no existing dataset, use new dataset as the full dataset
89
- dataset_dict = DatasetDict({
90
- "fbDetr50Train": new_dataset.shuffle(seed=42)
91
- })
92
-
93
- # Push to hub
94
- dataset_dict.push_to_hub(
95
- "slliac/isom5240-td-traffic-analysis",
96
- private=True
97
- )
98
-
99
- logging.info(f"Successfully updated dataset with fb-detr-res-50 splits.")
100
- logging.info(f"fbDetr50Train split: {len(dataset_dict['fbDetr50Train'])} records")
101
-
102
- self.create_coco_annotation_files(dataset_dict, timestamp_str)
103
-
104
- except Exception as e:
105
- logging.error(f"Error updating Hugging Face dataset: {str(e)}")
106
- raise
107
-
108
- # Creates COCO annotation files for the DETR training dataset.
109
-
110
- # Parameters:
111
- # dataset_dict: Dictionary containing the dataset splits
112
- # timestamp_str: Timestamp string for the current batch
113
-
114
- def create_coco_annotation_files(self, dataset_dict, timestamp_str):
115
- try:
116
- categories = [
117
- {"id": 2, "name": "bicycle", "supercategory": "vehicle"},
118
- {"id": 3, "name": "car", "supercategory": "vehicle"},
119
- {"id": 4, "name": "motorcycle", "supercategory": "vehicle"},
120
- {"id": 5, "name": "airplane", "supercategory": "vehicle"},
121
- {"id": 6, "name": "bus", "supercategory": "vehicle"},
122
- {"id": 7, "name": "train", "supercategory": "vehicle"},
123
- {"id": 8, "name": "truck", "supercategory": "vehicle"}
124
- ]
125
-
126
- current_datetime = datetime.now()
127
- current_date = current_datetime.strftime("%Y-%m-%d")
128
- current_datetime_str = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
129
-
130
- # Create annotations only for train split
131
- for split in ['fbDetr50Train']:
132
- images_data = []
133
- annotations_data = []
134
- annotation_id = 1
135
-
136
- for i, record in enumerate(dataset_dict[split]):
137
- image_id = record['image_id']
138
- coco_data = json.loads(record['coco_annotations'])
139
- img = Image.open(io.BytesIO(record['original_image']))
140
- width, height = img.size
141
-
142
- image_entry = {
143
- "id": image_id,
144
- "width": width,
145
- "height": height,
146
- "file_name": f"{record['location_id']}_{image_id}.jpg",
147
- "license": 1,
148
- "date_captured": record['capture_time'],
149
- "capture_date": datetime.fromisoformat(record['capture_time']).strftime('%Y-%m-%d'),
150
- "capture_timestamp": datetime.fromisoformat(record['capture_time']).strftime(
151
- '%Y-%m-%d %H:%M:%S')
152
- }
153
- images_data.append(image_entry)
154
-
155
- for ann in coco_data['annotations']:
156
- annotation_entry = {
157
- "id": annotation_id,
158
- "image_id": image_id,
159
- "category_id": ann['category_id'],
160
- "bbox": ann['bbox'],
161
- "area": ann['area'],
162
- "iscrowd": ann['iscrowd'],
163
- "segmentation": []
164
- }
165
- annotations_data.append(annotation_entry)
166
- annotation_id += 1
167
-
168
- coco_output = {
169
- "info": {
170
- "year": current_datetime.year,
171
- "version": "1.0",
172
- "description": f"Hong Kong Traffic Camera Dataset - {split} split using Facebook DETR ResNet-50",
173
- "contributor": "ISOM5240 Group 37",
174
- "url": "",
175
- "date_created": current_datetime.isoformat(),
176
- "collection_date": current_date,
177
- "collection_timestamp": current_datetime_str,
178
- "batch_timestamp": timestamp_str
179
- },
180
- "licenses": [
181
- {
182
- "id": 1,
183
- "name": "Attribution-NonCommercial",
184
- "url": "http://creativecommons.org/licenses/by-nc/2.0/"
185
- }
186
- ],
187
- "images": images_data,
188
- "annotations": annotations_data,
189
- "categories": categories
190
- }
191
-
192
- split_filename = split.replace("-", "_")
193
- annotation_file = os.path.join(self.fb_detr_dir, f"{split_filename}_labels_{timestamp_str}.json")
194
- with open(annotation_file, 'w') as f:
195
- json.dump(coco_output, f, indent=2)
196
-
197
- logging.info(f"Created COCO annotation file for {split} split: {annotation_file}")
198
-
199
- try:
200
- from huggingface_hub import HfApi
201
- api = HfApi()
202
-
203
- api.upload_file(
204
- path_or_fileobj=annotation_file,
205
- path_in_repo=f"fb_detr_res_50/{split_filename}_labels_{timestamp_str}.json",
206
- repo_id="slliac/isom5240-td-traffic-analysis",
207
- repo_type="dataset"
208
- )
209
-
210
- api.upload_file(
211
- path_or_fileobj=annotation_file,
212
- path_in_repo=f"fb_detr_res_50/{split_filename}_labels.json",
213
- repo_id="slliac/isom5240-td-traffic-analysis",
214
- repo_type="dataset"
215
- )
216
-
217
- logging.info(
218
- f"Uploaded FB DETR annotation files for {split} to Hugging Face repository")
219
- except Exception as e:
220
- logging.error(f"Error uploading COCO annotations file: {str(e)}")
221
-
222
- except Exception as e:
223
- logging.error(f"Error creating COCO annotation files: {str(e)}")
224
-
225
-
226
- # Main function to execute the DETR training data collection process.
227
- # Initializes the analyzer, loads existing data if available, runs the analysis,
228
- # and displays dataset information before and after the process.
229
-
230
-
231
- def main():
232
- analyzer = TrainDETRTrafficImageAnalyzer()
233
- try:
234
- try:
235
- initial_dataset = load_dataset("slliac/isom5240-td-traffic-analysis")
236
- print("\nInitial Dataset Info:")
237
- for split in initial_dataset:
238
- print(f"Number of {split} records: {len(initial_dataset[split])}")
239
- except Exception as e:
240
- print("\nNo existing dataset found. Will create new dataset with fb-detr-res-50 splits.")
241
-
242
- analyzer.run()
243
-
244
- try:
245
- final_dataset = load_dataset("slliac/isom5240-td-traffic-analysis")
246
- print("\nFinal Dataset Info:")
247
- for split in final_dataset:
248
- print(f"Number of {split} records: {len(final_dataset[split])}")
249
- except Exception as e:
250
- print("\nError accessing final dataset:", str(e))
251
-
252
- except Exception as e:
253
- logging.error(f"Main execution error: {str(e)}")
254
- finally:
255
- print("\nProgram terminated")
256
-
257
-
258
- # Entry point for the script.
259
- if __name__ == "__main__":
260
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
visualiser/hkust_bnb_visualiser.py CHANGED
@@ -6,7 +6,7 @@
6
  # to provide eco-friendly discount calculations based on traffic conditions.
7
 
8
  # Key capabilities:
9
- # - Semantic search functionality using sentence transformers
10
  # - Traffic spot integration for eco-friendly discount calculations
11
 
12
  # Author: Gordon Li (20317033)
 
6
  # to provide eco-friendly discount calculations based on traffic conditions.
7
 
8
  # Key capabilities:
9
+ # - Text search functionality using sentence transformers
10
  # - Traffic spot integration for eco-friendly discount calculations
11
 
12
  # Author: Gordon Li (20317033)