Spaces:

slliac
/

5240-frontend

Sleeping

App Files Files Community

Gordon Li commited on 22 days ago

Commit

8b51c66

1 Parent(s): ee48f73

remove obs file

Browse files

Files changed (5) hide show

cronjob/abstract_traffic_image_analyzer.py +0 -362
cronjob/application_traffic_image_analyzer.py +0 -275
cronjob/readme.md +0 -5
cronjob/train_detr_traffic_image_analyzer.py +0 -260
visualiser/hkust_bnb_visualiser.py +1 -1

cronjob/abstract_traffic_image_analyzer.py DELETED Viewed

@@ -1,362 +0,0 @@
-# Traffic Image Analysis Module for HKUST BNB+ Platform
-# This module provides functionality for analyzing traffic camera images to detect and count vehicles.
-# It downloads images from traffic cameras, processes them using computer vision models, and records
-# traffic data that is used for traffic-based discount calculations in the BNB+ platform.
-# The analyzer connects to a database to retrieve camera locations, downloads and processes images,
-# detects vehicles, and stores the results for visualization and analysis.
-# Author: Gordon Li (20317033)
-# Date: March 2025
-import requests
-import oracledb
-from PIL import Image, ImageDraw, ImageFont
-import signal
-import io
-from datetime import datetime
-import logging
-import json
-import os
-import random
-class AbstractTrafficImageAnalyzer:
-    # Initializes the traffic image analyzer with database connection, signal handlers, and directories.
-    # Sets up:
-    # - Database connection parameters
-    # - Signal handlers for graceful shutdown
-    # - Vehicle class identifiers for detection
-    # - Directory structure for storing downloaded images
-    # - Logging configuration
-    def __init__(self):
-        self.connection_params = {
-            'user': 'slliac',
-            'password': '7033',
-            'dsn': 'imz409.ust.hk:1521/imz409'
-        }
-        self.running = True
-        signal.signal(signal.SIGINT, self.signal_handler)
-        signal.signal(signal.SIGTERM, self.signal_handler)
-        self.vehicle_classes = {2, 3, 4, 5, 6, 7, 8}
-        self.dataset_dir = "traffic_dataset"
-        self.images_dir = os.path.join(self.dataset_dir, "images")
-        os.makedirs(self.images_dir, exist_ok=True)
-        random.seed(42)
-        self.setup_logging()
-    # Handles termination signals to ensure graceful shutdown.
-    # Parameters:
-    #     signum: Signal number
-    #     frame: Current stack frame
-    def signal_handler(self, signum, frame):
-        print("\nShutdown signal received. Completing current task...")
-        self.running = False
-    # Sets up logging configuration for the analyzer.
-    # Creates log files with timestamps and configures console output.
-    def setup_logging(self):
-        logging.basicConfig(
-            level=logging.INFO,
-            format='%(asctime)s - %(levelname)s - %(message)s',
-            handlers=[
-                logging.FileHandler(f'traffic_analysis_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'),
-                logging.StreamHandler()
-            ]
-        )
-    # Retrieves traffic camera locations and URLs from the database.
-    # Returns:
-    #     List of tuples containing camera location key and URL
-    def get_camera_locations(self):
-        try:
-            with oracledb.connect(**self.connection_params) as conn:
-                cursor = conn.cursor()
-                cursor.execute("SELECT KEY, URL FROM TD_TRAFFIC_CAMERA_LOCATION")
-                return cursor.fetchall()
-        except Exception as e:
-            logging.error(f"Error fetching camera locations: {str(e)}")
-            raise
-    # Downloads an image from a given URL.
-    # Parameters:
-    #     url: URL of the traffic camera image
-    # Returns:
-    #     PIL Image object
-    def download_image(self, url):
-        try:
-            response = requests.get(url)
-            response.raise_for_status()
-            return Image.open(io.BytesIO(response.content))
-        except Exception as e:
-            logging.error(f"Error downloading image from {url}: {str(e)}")
-            raise
-    # Detects vehicles in an image using a computer vision model.
-    # Parameters:
-    #     image: PIL Image object to analyze
-    #     confidence_threshold: Minimum confidence score for detections (default: 0.7)
-    # Returns:
-    #     List of vehicle detection dictionaries with bounding boxes and scores
-    def detect_vehicles(self, image, confidence_threshold=0.7):
-        try:
-            if image.mode == 'RGBA':
-                image = image.convert('RGB')
-            width, height = image.size
-            inputs = self.processor(images=image, return_tensors="pt")
-            outputs = self.model(**inputs)
-            probas = outputs.logits.softmax(-1)[0, :, :-1]
-            keep = probas.max(-1).values > confidence_threshold
-            probas_to_keep = probas[keep]
-            boxes_to_keep = outputs.pred_boxes[0][keep]
-            scores = probas_to_keep.max(-1)
-            labels = probas_to_keep.argmax(-1)
-            vehicle_detections = []
-            for score, label, box in zip(scores.values, labels, boxes_to_keep):
-                x_c, y_c, w, h = box.tolist()
-                w_abs = w * width
-                h_abs = h * height
-                x = (x_c - w / 2) * width
-                y = (y_c - h / 2) * height
-                category_id = label.item()
-                if category_id in self.vehicle_classes:
-                    vehicle_detections.append({
-                        'bbox': [float(x), float(y), float(w_abs), float(h_abs)],
-                        'category_id': int(category_id),
-                        'area': float(w_abs * h_abs),
-                        'iscrowd': 0,
-                        'score': float(score.item())
-                    })
-            return vehicle_detections
-        except Exception as e:
-            logging.error(f"Error detecting vehicles: {str(e)}")
-            raise
-    # Draws vehicle detection bounding boxes and labels on the image.
-    # Parameters:
-    #     image: Original PIL Image
-    #     detections: List of vehicle detection dictionaries
-    # Returns:
-    #     New PIL Image with bounding boxes and labels drawn
-    def draw_detections(self, image, detections):
-        try:
-            draw_image = image.copy()
-            draw = ImageDraw.Draw(draw_image)
-            category_names = {
-                2: "bicycle",
-                3: "car",
-                4: "motorcycle",
-                5: "airplane",
-                6: "bus",
-                7: "train",
-                8: "truck"
-            }
-            try:
-                font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 20)
-            except:
-                font = ImageFont.load_default()
-            for detection in detections:
-                x, y, w, h = detection['bbox']
-                score = detection['score']
-                category_id = detection['category_id']
-                category_name = category_names.get(category_id, f"Vehicle-{category_id}")
-                draw.rectangle(
-                    [(x, y), (x + w, y + h)],
-                    outline='red',
-                    width=3
-                )
-                label_text = f"{category_name}: {score:.2f}"
-                label_bbox = draw.textbbox((x, y - 25), label_text, font=font)
-                draw.rectangle(
-                    [label_bbox[0] - 5, label_bbox[1] - 5, label_bbox[2] + 5, label_bbox[3] + 5],
-                    fill='red'
-                )
-                draw.text(
-                    (x, y - 25),
-                    label_text,
-                    fill='white',
-                    font=font
-                )
-            return draw_image
-        except Exception as e:
-            logging.error(f"Error drawing detections: {str(e)}")
-            raise
-    # Processes all traffic cameras, detects vehicles, and prepares data for storage.
-    # This method:
-    # 1. Gets all camera locations
-    # 2. Downloads images from each camera
-    # 3. Detects vehicles in each image
-    # 4. Processes images to visualize detections
-    # 5. Prepares data for storage
-    def process_traffic_cameras(self):
-        try:
-            current_timestamp = datetime.now()
-            timestamp_str = current_timestamp.strftime("%Y%m%d_%H%M%S")
-            logging.info(f"Starting traffic image analysis for all cameras at {timestamp_str}")
-            camera_locations = self.get_camera_locations()
-            batch_data = {
-                'capture_time': [],
-                'location_id': [],
-                'image_id': [],
-                'original_image': [],
-                'vehicle_count': [],
-                'processed_image': [],
-                'coco_annotations': []
-            }
-            for image_id, (key, url) in enumerate(camera_locations, start=1):
-                if not self.running:
-                    break
-                try:
-                    logging.info(f"Processing camera at location {key}")
-                    img_timestamp = datetime.now()
-                    img_timestamp_str = img_timestamp.strftime("%Y%m%d_%H%M%S")
-                    filename = f"{key}_{img_timestamp_str}.jpg"
-                    file_path = os.path.join(self.images_dir, filename)
-                    image = self.download_image(url)
-                    image.save(file_path)
-                    orig_img_byte_arr = io.BytesIO()
-                    image.save(orig_img_byte_arr, format='JPEG')
-                    original_image_data = orig_img_byte_arr.getvalue()
-                    vehicle_detections = self.detect_vehicles(image)
-                    processed_image = self.draw_detections(image, vehicle_detections)
-                    processed_file_path = os.path.join(self.images_dir, f"processed_{filename}")
-                    processed_image.save(processed_file_path)
-                    proc_img_byte_arr = io.BytesIO()
-                    processed_image.save(proc_img_byte_arr, format='JPEG')
-                    processed_image_data = proc_img_byte_arr.getvalue()
-                    simplified_annotations = []
-                    for detection in vehicle_detections:
-                        simplified_annotations.append({
-                            "bbox": detection['bbox'],
-                            "category_id": detection['category_id'],
-                            "area": detection['area'],
-                            "iscrowd": detection['iscrowd']
-                        })
-                    coco_annotation = {
-                        "image_id": image_id,
-                        "annotations": simplified_annotations,
-                        "date": img_timestamp.strftime("%Y-%m-%d"),
-                        "timestamp": img_timestamp.strftime("%Y-%m-%d %H:%M:%S")
-                    }
-                    batch_data['capture_time'].append(img_timestamp.isoformat())
-                    batch_data['location_id'].append(key)
-                    batch_data['image_id'].append(image_id)
-                    batch_data['original_image'].append(original_image_data)
-                    batch_data['vehicle_count'].append(len(vehicle_detections))
-                    batch_data['processed_image'].append(processed_image_data)
-                    batch_data['coco_annotations'].append(json.dumps(coco_annotation))
-                    logging.info(f"Completed analysis for location {key}, detected {len(vehicle_detections)} vehicles")
-                except Exception as e:
-                    logging.error(f"Error processing location {key}: {str(e)}")
-                    continue
-            self.update_huggingface_dataset(batch_data, timestamp_str)
-            logging.info(f"Completed traffic image analysis for all cameras. Data saved to {self.dataset_dir}")
-        except Exception as e:
-            logging.error(f"Error in process_traffic_cameras: {str(e)}")
-            raise
-    # Updates the HuggingFace dataset with new traffic data.
-    # This method must be implemented by subclasses.
-    # Parameters:
-    #     batch_data: Dictionary containing the batch data to add
-    #     timestamp_str: Timestamp string for the current batch
-    def update_huggingface_dataset(self, batch_data, timestamp_str):
-        raise NotImplementedError("Subclasses must implement update_huggingface_dataset method")
-    # Creates COCO annotation files for the dataset.
-    # This method must be implemented by subclasses.
-    # Parameters:
-    #     dataset_dict: Dictionary containing the dataset
-    #     timestamp_str: Timestamp string for the current batch
-    def create_coco_annotation_files(self, dataset_dict, timestamp_str):
-        raise NotImplementedError("Subclasses must implement create_coco_annotation_files method")
-    # Updates the README file for the dataset.
-    # This method must be implemented by subclasses.
-    # Parameters:
-    #     dataset_dict: Dictionary containing the dataset
-    #     timestamp_str: Timestamp string for the current batch
-    def update_readme(self, dataset_dict, timestamp_str):
-        raise NotImplementedError("Subclasses must implement update_readme method")
-    # Runs the traffic image analyzer, processing all cameras and updating the dataset.
-    def run(self):
-        try:
-            self.process_traffic_cameras()
-            logging.info(f"Analysis completed and dataset updated.")
-        except Exception as e:
-            logging.error(f"Scheduler error: {str(e)}")
-        finally:
-            print("\nScheduler stopped")

cronjob/application_traffic_image_analyzer.py DELETED Viewed

@@ -1,275 +0,0 @@
-# Application Traffic Image Analyzer Module
-# This module extends the AbstractTrafficImageAnalyzer to provide specific implementation for
-# application-specific traffic analysis. It handles the processing of traffic camera images,
-# vehicle detection using the DETR model, and updating a HuggingFace dataset with the results.
-# The analyzer is used in the HKUST BNB+ platform to collect and analyze traffic data for
-# determining eco-friendly discounts based on traffic conditions.
-# Author: Gordon Li (20317033)
-# Date: March 2025
-from transformers import DetrImageProcessor, DetrForObjectDetection
-from datasets import Dataset, Features, Value, load_dataset, DatasetDict, concatenate_datasets
-from PIL import Image
-from datetime import datetime
-from abstract_traffic_image_analyzer import AbstractTrafficImageAnalyzer
-import io
-import json
-import os
-import logging
-class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
-    # Initializes the application traffic analyzer with the DETR model and processor.
-    # Sets up:
-    # - DETR image processor and model for vehicle detection
-    # - Application-specific directory for storing results
-    def __init__(self):
-        super().__init__()
-        self.processor = DetrImageProcessor.from_pretrained("slliac/detr-group37-liaujianjie-resnet-50",
-                                                            revision="main")
-        self.model = DetrForObjectDetection.from_pretrained("slliac/detr-group37-liaujianjie-resnet-50")
-        self.application_dir = os.path.join(self.dataset_dir, "application")
-        os.makedirs(self.application_dir, exist_ok=True)
-    # Updates the HuggingFace dataset with new traffic data.
-    # Parameters:
-    #     batch_data: Dictionary containing batch data including capture time, location, images, and vehicle counts
-    #     timestamp_str: Timestamp string for the current batch
-    def update_huggingface_dataset(self, batch_data, timestamp_str):
-        try:
-            features = Features({
-                'capture_time': Value(dtype='string'),
-                'location_id': Value(dtype='string'),
-                'image_id': Value(dtype='int32'),
-                'original_image': Value(dtype='binary'),
-                'vehicle_count': Value(dtype='int32'),
-                'processed_image': Value(dtype='binary'),
-                'coco_annotations': Value(dtype='string')
-            })
-            valid_indices = [i for i, count in enumerate(batch_data['vehicle_count']) if count > 0]
-            if not valid_indices:
-                logging.info("No vehicles detected in any images. Skipping dataset update.")
-                return
-            filtered_data = {
-                'capture_time': [batch_data['capture_time'][i] for i in valid_indices],
-                'location_id': [batch_data['location_id'][i] for i in valid_indices],
-                'image_id': [batch_data['image_id'][i] for i in valid_indices],
-                'original_image': [batch_data['original_image'][i] for i in valid_indices],
-                'vehicle_count': [batch_data['vehicle_count'][i] for i in valid_indices],
-                'processed_image': [batch_data['processed_image'][i] for i in valid_indices],
-                'coco_annotations': [batch_data['coco_annotations'][i] for i in valid_indices]
-            }
-            new_dataset = Dataset.from_dict(filtered_data, features=features)
-            try:
-                try:
-                    existing_dataset = load_dataset(
-                        "slliac/isom5240-td-application-traffic-analysis",
-                        revision="main"
-                    )
-                    logging.info(f"Found existing dataset in 'application' branch")
-                    if 'application' in existing_dataset:
-                        combined_dataset = concatenate_datasets([existing_dataset['application'], new_dataset])
-                        dataset_dict = DatasetDict({
-                            "application": combined_dataset
-                        })
-                    else:
-                        dataset_dict = DatasetDict({
-                            "application": new_dataset
-                        })
-                except Exception as e:
-                    logging.info(f"Error loading existing dataset: {str(e)}")
-                    dataset_dict = DatasetDict({
-                        "application": new_dataset
-                    })
-                dataset_dict.push_to_hub(
-                    "slliac/isom5240-td-application-traffic-analysis",
-                    private=False
-                )
-                logging.info(f"Successfully updated dataset on 'application' branch.")
-                logging.info(f"Application split: {len(dataset_dict['application'])} records")
-                self.create_coco_annotation_files(dataset_dict, timestamp_str)
-            except Exception as e:
-                logging.error(f"Error updating Hugging Face dataset: {str(e)}")
-                raise
-        except Exception as e:
-            logging.error(f"Error in update_huggingface_dataset: {str(e)}")
-            raise
-    # Creates COCO annotation files for the dataset, which are standard format for object detection.
-    # Parameters:
-    #     dataset_dict: Dictionary containing the dataset with traffic observations
-    #     timestamp_str: Timestamp string for the current batch
-    def create_coco_annotation_files(self, dataset_dict, timestamp_str):
-        try:
-            categories = [
-                {"id": 2, "name": "bicycle", "supercategory": "vehicle"},
-                {"id": 3, "name": "car", "supercategory": "vehicle"},
-                {"id": 4, "name": "motorcycle", "supercategory": "vehicle"},
-                {"id": 5, "name": "airplane", "supercategory": "vehicle"},
-                {"id": 6, "name": "bus", "supercategory": "vehicle"},
-                {"id": 7, "name": "train", "supercategory": "vehicle"},
-                {"id": 8, "name": "truck", "supercategory": "vehicle"}
-            ]
-            current_datetime = datetime.now()
-            current_date = current_datetime.strftime("%Y-%m-%d")
-            current_datetime_str = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
-            images_data = []
-            annotations_data = []
-            annotation_id = 1
-            for i, record in enumerate(dataset_dict['application']):
-                image_id = record['image_id']
-                coco_data = json.loads(record['coco_annotations'])
-                img = Image.open(io.BytesIO(record['original_image']))
-                width, height = img.size
-                image_entry = {
-                    "id": image_id,
-                    "width": width,
-                    "height": height,
-                    "file_name": f"{record['location_id']}_{image_id}.jpg",
-                    "license": 1,
-                    "date_captured": record['capture_time'],
-                    "capture_date": datetime.fromisoformat(record['capture_time']).strftime('%Y-%m-%d'),
-                    "capture_timestamp": datetime.fromisoformat(record['capture_time']).strftime(
-                        '%Y-%m-%d %H:%M:%S')
-                }
-                images_data.append(image_entry)
-                for ann in coco_data['annotations']:
-                    annotation_entry = {
-                        "id": annotation_id,
-                        "image_id": image_id,
-                        "category_id": ann['category_id'],
-                        "bbox": ann['bbox'],
-                        "area": ann['area'],
-                        "iscrowd": ann['iscrowd'],
-                        "segmentation": []
-                    }
-                    annotations_data.append(annotation_entry)
-                    annotation_id += 1
-            coco_output = {
-                "info": {
-                    "year": current_datetime.year,
-                    "version": "1.0",
-                    "description": "Hong Kong Traffic Camera Dataset - Application data",
-                    "contributor": "ISOM5240 Group 37",
-                    "url": "",
-                    "date_created": current_datetime.isoformat(),
-                    "collection_date": current_date,
-                    "collection_timestamp": current_datetime_str,
-                    "batch_timestamp": timestamp_str
-                },
-                "licenses": [
-                    {
-                        "id": 1,
-                        "name": "Attribution-NonCommercial",
-                        "url": "http://creativecommons.org/licenses/by-nc/2.0/"
-                    }
-                ],
-                "images": images_data,
-                "annotations": annotations_data,
-                "categories": categories
-            }
-            annotation_file = os.path.join(self.application_dir, f"application_labels_{timestamp_str}.json")
-            with open(annotation_file, 'w') as f:
-                json.dump(coco_output, f, indent=2)
-            standard_annotation_file = os.path.join(self.application_dir, "application_labels.json")
-            with open(standard_annotation_file, 'w') as f:
-                json.dump(coco_output, f, indent=2)
-            logging.info(f"Created COCO annotation files for application data in {self.application_dir}")
-            try:
-                from huggingface_hub import HfApi
-                api = HfApi()
-                api.upload_file(
-                    path_or_fileobj=annotation_file,
-                    path_in_repo=f"application/application_labels_{timestamp_str}.json",
-                    repo_id="slliac/isom5240-td-application-traffic-analysis",
-                    repo_type="dataset",
-                    revision="main"
-                )
-                api.upload_file(
-                    path_or_fileobj=standard_annotation_file,
-                    path_in_repo=f"application/application_labels.json",
-                    repo_id="slliac/isom5240-td-application-traffic-analysis",
-                    repo_type="dataset",
-                    revision="main"
-                )
-                logging.info(
-                    f"Uploaded COCO annotation files to Hugging Face repository in 'application/' directory")
-            except Exception as e:
-                logging.error(f"Error uploading COCO annotations file: {str(e)}")
-        except Exception as e:
-            logging.error(f"Error creating COCO annotation files: {str(e)}")
-# Main function to execute the traffic image analysis process.
-# Initializes the analyzer, loads existing data if available, runs the analysis,
-# and displays dataset information before and after the process.
-def main():
-    analyzer = ApplicationTrafficImageAnalyzer()
-    try:
-        try:
-            initial_dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", revision="main")
-            print("\nInitial Dataset Info (from 'application' branch):")
-            for split in initial_dataset:
-                print(f"Number of {split} records: {len(initial_dataset[split])}")
-        except Exception as e:
-            print(f"\nNo existing dataset found in 'application' branch: {str(e)}")
-            print("Will create new dataset with 'application' split in 'application' branch.")
-        analyzer.run()
-        try:
-            final_dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", revision="main")
-            print("\nFinal Dataset Info (from 'application' branch):")
-            for split in final_dataset:
-                print(f"Number of {split} records: {len(final_dataset[split])}")
-        except Exception as e:
-            print(f"\nError accessing final dataset from 'application' branch: {str(e)}")
-    except Exception as e:
-        logging.error(f"Main execution error: {str(e)}")
-    finally:
-        print("\nProgram terminated")
-# Entry point for the script.
-if __name__ == "__main__":
-    main()

cronjob/readme.md DELETED Viewed

@@ -1,5 +0,0 @@
-here is the dataset cronjob for simulate training and application usage dataset:
-0 */3 * * * /Users/gordonli/Desktop/test/.venv3.12/bin/python /Users/gordonli/Desktop/5240-frontend/cronjob/train_detr_traffic_image_analyzer.py  >> /Users/gordonli/Desktop/test/train.log 2>&1
-0 */1 * * * /Users/gordonli/Desktop/test/.venv3.12/bin/python /Users/gordonli/Desktop/5240-frontend/cronjob/application_traffic_image_analyzer.py  >> /Users/gordonli/Desktop/test/app.log 2>&1

cronjob/train_detr_traffic_image_analyzer.py DELETED Viewed

@@ -1,260 +0,0 @@
-# Traffic Image Analyzer for DETR Model Training
-# This module extends the AbstractTrafficImageAnalyzer to provide implementation for training
-# data collection for the DETR object detection model. It processes traffic camera images,
-# detects vehicles using the pretrained Facebook DETR ResNet-50 model, and organizes the data
-# for model training purposes.
-# The data collected by this analyzer is used to train custom DETR models that improve vehicle
-# detection accuracy, which ultimately enhances the traffic analysis component of the HKUST BNB+
-# platform's eco-friendly discount system.
-# Author: Gordon Li (20317033)
-# Date: March 2025
-from transformers import DetrImageProcessor, DetrForObjectDetection
-from datasets import Dataset, Features, Value, load_dataset, concatenate_datasets, DatasetDict
-from PIL import Image
-from datetime import datetime
-from abstract_traffic_image_analyzer import AbstractTrafficImageAnalyzer
-import io
-import json
-import os
-import logging
-class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
-    # Initializes the DETR training data collector with the Facebook pretrained model.
-    # Sets up:
-    # - Facebook DETR ResNet-50 image processor and model
-    # - Directory structure for storing DETR training data
-    def __init__(self):
-        super().__init__()
-        self.processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
-        self.model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
-        self.fb_detr_dir = os.path.join(self.dataset_dir, "fb_detr_res_50")
-        os.makedirs(self.fb_detr_dir, exist_ok=True)
-    # Updates the HuggingFace dataset with new traffic data for DETR model training.
-    # Parameters:
-    #     batch_data: Dictionary containing traffic image data and annotations
-    #     timestamp_str: Timestamp string for the current batch
-    def update_huggingface_dataset(self, batch_data, timestamp_str):
-        try:
-            features = Features({
-                'capture_time': Value(dtype='string'),
-                'location_id': Value(dtype='string'),
-                'image_id': Value(dtype='int32'),
-                'original_image': Value(dtype='binary'),
-                'vehicle_count': Value(dtype='int32'),
-                'processed_image': Value(dtype='binary'),
-                'coco_annotations': Value(dtype='string')
-            })
-            # Use all data without filtering by vehicle_count
-            new_dataset = Dataset.from_dict(batch_data, features=features)
-            logging.info(f"New dataset has {len(new_dataset)} records")
-            try:
-                existing_dataset = load_dataset("slliac/isom5240-td-traffic-analysis")
-                logging.info(f"Found existing dataset")
-                combined_data = []
-                if 'fbDetr50Train' in existing_dataset:
-                    combined_data.append(existing_dataset['fbDetr50Train'])
-                    logging.info(f"Existing fbDetr50Train size: {len(existing_dataset['fbDetr50Train'])}")
-                combined_data.append(new_dataset)
-                combined_dataset = concatenate_datasets(combined_data)
-                logging.info(f"Combined dataset has {len(combined_dataset)} records")
-                # Create dataset dict with only fbDetr50Train
-                dataset_dict = DatasetDict({
-                    "fbDetr50Train": combined_dataset.shuffle(seed=42)
-                })
-            except Exception as e:
-                logging.info(f"Creating new dataset with fb-detr-res-50 splits: {str(e)}")
-                # If no existing dataset, use new dataset as the full dataset
-                dataset_dict = DatasetDict({
-                    "fbDetr50Train": new_dataset.shuffle(seed=42)
-                })
-            # Push to hub
-            dataset_dict.push_to_hub(
-                "slliac/isom5240-td-traffic-analysis",
-                private=True
-            )
-            logging.info(f"Successfully updated dataset with fb-detr-res-50 splits.")
-            logging.info(f"fbDetr50Train split: {len(dataset_dict['fbDetr50Train'])} records")
-            self.create_coco_annotation_files(dataset_dict, timestamp_str)
-        except Exception as e:
-            logging.error(f"Error updating Hugging Face dataset: {str(e)}")
-            raise
-    # Creates COCO annotation files for the DETR training dataset.
-    # Parameters:
-    #     dataset_dict: Dictionary containing the dataset splits
-    #     timestamp_str: Timestamp string for the current batch
-    def create_coco_annotation_files(self, dataset_dict, timestamp_str):
-        try:
-            categories = [
-                {"id": 2, "name": "bicycle", "supercategory": "vehicle"},
-                {"id": 3, "name": "car", "supercategory": "vehicle"},
-                {"id": 4, "name": "motorcycle", "supercategory": "vehicle"},
-                {"id": 5, "name": "airplane", "supercategory": "vehicle"},
-                {"id": 6, "name": "bus", "supercategory": "vehicle"},
-                {"id": 7, "name": "train", "supercategory": "vehicle"},
-                {"id": 8, "name": "truck", "supercategory": "vehicle"}
-            ]
-            current_datetime = datetime.now()
-            current_date = current_datetime.strftime("%Y-%m-%d")
-            current_datetime_str = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
-            # Create annotations only for train split
-            for split in ['fbDetr50Train']:
-                images_data = []
-                annotations_data = []
-                annotation_id = 1
-                for i, record in enumerate(dataset_dict[split]):
-                    image_id = record['image_id']
-                    coco_data = json.loads(record['coco_annotations'])
-                    img = Image.open(io.BytesIO(record['original_image']))
-                    width, height = img.size
-                    image_entry = {
-                        "id": image_id,
-                        "width": width,
-                        "height": height,
-                        "file_name": f"{record['location_id']}_{image_id}.jpg",
-                        "license": 1,
-                        "date_captured": record['capture_time'],
-                        "capture_date": datetime.fromisoformat(record['capture_time']).strftime('%Y-%m-%d'),
-                        "capture_timestamp": datetime.fromisoformat(record['capture_time']).strftime(
-                            '%Y-%m-%d %H:%M:%S')
-                    }
-                    images_data.append(image_entry)
-                    for ann in coco_data['annotations']:
-                        annotation_entry = {
-                            "id": annotation_id,
-                            "image_id": image_id,
-                            "category_id": ann['category_id'],
-                            "bbox": ann['bbox'],
-                            "area": ann['area'],
-                            "iscrowd": ann['iscrowd'],
-                            "segmentation": []
-                        }
-                        annotations_data.append(annotation_entry)
-                        annotation_id += 1
-                coco_output = {
-                    "info": {
-                        "year": current_datetime.year,
-                        "version": "1.0",
-                        "description": f"Hong Kong Traffic Camera Dataset - {split} split using Facebook DETR ResNet-50",
-                        "contributor": "ISOM5240 Group 37",
-                        "url": "",
-                        "date_created": current_datetime.isoformat(),
-                        "collection_date": current_date,
-                        "collection_timestamp": current_datetime_str,
-                        "batch_timestamp": timestamp_str
-                    },
-                    "licenses": [
-                        {
-                            "id": 1,
-                            "name": "Attribution-NonCommercial",
-                            "url": "http://creativecommons.org/licenses/by-nc/2.0/"
-                        }
-                    ],
-                    "images": images_data,
-                    "annotations": annotations_data,
-                    "categories": categories
-                }
-                split_filename = split.replace("-", "_")
-                annotation_file = os.path.join(self.fb_detr_dir, f"{split_filename}_labels_{timestamp_str}.json")
-                with open(annotation_file, 'w') as f:
-                    json.dump(coco_output, f, indent=2)
-                logging.info(f"Created COCO annotation file for {split} split: {annotation_file}")
-                try:
-                    from huggingface_hub import HfApi
-                    api = HfApi()
-                    api.upload_file(
-                        path_or_fileobj=annotation_file,
-                        path_in_repo=f"fb_detr_res_50/{split_filename}_labels_{timestamp_str}.json",
-                        repo_id="slliac/isom5240-td-traffic-analysis",
-                        repo_type="dataset"
-                    )
-                    api.upload_file(
-                        path_or_fileobj=annotation_file,
-                        path_in_repo=f"fb_detr_res_50/{split_filename}_labels.json",
-                        repo_id="slliac/isom5240-td-traffic-analysis",
-                        repo_type="dataset"
-                    )
-                    logging.info(
-                        f"Uploaded FB DETR annotation files for {split} to Hugging Face repository")
-                except Exception as e:
-                    logging.error(f"Error uploading COCO annotations file: {str(e)}")
-        except Exception as e:
-            logging.error(f"Error creating COCO annotation files: {str(e)}")
-# Main function to execute the DETR training data collection process.
-# Initializes the analyzer, loads existing data if available, runs the analysis,
-# and displays dataset information before and after the process.
-def main():
-    analyzer = TrainDETRTrafficImageAnalyzer()
-    try:
-        try:
-            initial_dataset = load_dataset("slliac/isom5240-td-traffic-analysis")
-            print("\nInitial Dataset Info:")
-            for split in initial_dataset:
-                print(f"Number of {split} records: {len(initial_dataset[split])}")
-        except Exception as e:
-            print("\nNo existing dataset found. Will create new dataset with fb-detr-res-50 splits.")
-        analyzer.run()
-        try:
-            final_dataset = load_dataset("slliac/isom5240-td-traffic-analysis")
-            print("\nFinal Dataset Info:")
-            for split in final_dataset:
-                print(f"Number of {split} records: {len(final_dataset[split])}")
-        except Exception as e:
-            print("\nError accessing final dataset:", str(e))
-    except Exception as e:
-        logging.error(f"Main execution error: {str(e)}")
-    finally:
-        print("\nProgram terminated")
-# Entry point for the script.
-if __name__ == "__main__":
-    main()

visualiser/hkust_bnb_visualiser.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # to provide eco-friendly discount calculations based on traffic conditions.
 # Key capabilities:
-# - Semantic search functionality using sentence transformers
 # - Traffic spot integration for eco-friendly discount calculations
 # Author: Gordon Li (20317033)

 # to provide eco-friendly discount calculations based on traffic conditions.
 # Key capabilities:
+# - Text search functionality using sentence transformers
 # - Traffic spot integration for eco-friendly discount calculations
 # Author: Gordon Li (20317033)