Spaces:

slliac
/

5240-frontend

Sleeping

App Files Files Community

Gordon Li commited on 23 days ago

Commit

ae72b3f

1 Parent(s): ecf5538

comment reformat

Browse files

Files changed (6) hide show

app.py +47 -56
cronjob/abstract_traffic_image_analyzer.py +64 -90
cronjob/application_traffic_image_analyzer.py +25 -36
cronjob/train_detr_traffic_image_analyzer.py +27 -38
visualiser/hkust_bnb_visualiser.py +74 -98
visualiser/td_traffic_spot_visualiser.py +56 -86

app.py CHANGED Viewed

@@ -1,26 +1,24 @@
-"""
-app.py
-This application provides a user interface for HKUST students to browse, search,
-and find accommodations in different neighborhoods of Hong Kong. It features an interactive map
-visualization, listing cards with pricing information, traffic-based discounts, and smart search
-functionality to match user preferences with available properties.
-Key features:
-- Interactive map displaying BNB listings with location markers
-- Neighborhood-based filtering of available accommodations
-- Smart search system that highlights matching terms in descriptions and reviews
-- Traffic-based discount system promoting eco-friendly housing options
-- Detailed view of property reviews with highlighted search terms
-- Responsive pagination for browsing through large sets of listings
-- Loading animations and informative UI elements for better user experience
-The application uses Folium for map visualization, Streamlit for the web interface
-Author: Gordon Li (20317033)
-Company : HKUST Sustainability
-Date: March 2025
-"""
 import os
 import re
@@ -46,24 +44,24 @@ from constant.hkust_bnb_constant import (
 )
-"""
-Loads CSS styles from a file and applies them to the Streamlit application.
-Parameters:
-    css_file: Path to the CSS file to be loaded
-"""
 def load_css(css_file):
     with open(css_file) as f:
         st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
-"""
-Highlights search terms within text by wrapping them in a span with highlight class.
-Parameters:
-    text: The original text to process
-    search_query: The search terms to highlight within the text
-Returns:
-    Text with highlighted search terms
-"""
 def highlight_search_terms(text, search_query):
     if not search_query:
         return text
@@ -80,17 +78,15 @@ def highlight_search_terms(text, search_query):
     return highlighted_text
-"""
-Renders a loading animation using Lottie animation in HTML format.
-"""
 def render_lottie_loading_animation():
     components.html(LOTTIE_HTML, height=750)
-"""
-Renders a dialog containing reviews for the currently selected listing.
-Displays reviewer name, review date, and comments with search terms highlighted.
-"""
 def render_review_dialog():
     with st.container():
         col_title = st.columns([5, 1])
@@ -122,10 +118,9 @@ def render_review_dialog():
             st.info("No reviews available for this listing.")
-"""
-Initializes the session state with default values for various application parameters.
-Sets up the visualizer and loads required resources for the application.
-"""
 def initialize_session_state():
     default_states = {
         'center_lat': None,
@@ -156,11 +151,9 @@ def initialize_session_state():
         st.session_state.loading_complete = True
-"""
-Main function that sets up the Streamlit application interface.
-Handles page configuration, sidebar setup, map rendering, listing display,
-pagination, and user interactions with the application elements.
-"""
 def main():
     st.set_page_config(
         layout="wide",
@@ -405,10 +398,8 @@ def main():
         render_review_dialog()
-"""
-Main entry point for the application. Authenticates with Hugging Face if a token is available,
-then calls the main function to start the application.
-"""
 if __name__ == "__main__":
     token = os.environ.get("HF_TOKEN")
     if token:

+# app.py
+# This application provides a user interface for HKUST students to browse, search,
+# and find accommodations in different neighborhoods of Hong Kong. It features an interactive map
+# visualization, listing cards with pricing information, traffic-based discounts, and smart search
+# functionality to match user preferences with available properties.
+# Key features:
+# - Interactive map displaying BNB listings with location markers
+# - Neighborhood-based filtering of available accommodations
+# - Smart search system that highlights matching terms in descriptions and reviews
+# - Traffic-based discount system promoting eco-friendly housing options
+# - Detailed view of property reviews with highlighted search terms
+# - Responsive pagination for browsing through large sets of listings
+# - Loading animations and informative UI elements for better user experience
+# The application uses Folium for map visualization, Streamlit for the web interface
+# Author: Gordon Li (20317033)
+# Company : HKUST Sustainability
+# Date: March 2025
 import os
 import re
 )
+# Loads CSS styles from a file and applies them to the Streamlit application.
+# Parameters:
+#    css_file: Path to the CSS file to be loaded
 def load_css(css_file):
     with open(css_file) as f:
         st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
+# Highlights search terms within text by wrapping them in a span with highlight class.
+# Parameters:
+#     text: The original text to process
+#     search_query: The search terms to highlight within the text
+# Returns:
+#     Text with highlighted search terms
 def highlight_search_terms(text, search_query):
     if not search_query:
         return text
     return highlighted_text
+# Renders a loading animation using Lottie animation in HTML format.
 def render_lottie_loading_animation():
     components.html(LOTTIE_HTML, height=750)
+# Renders a dialog containing reviews for the currently selected listing.
+# Displays reviewer name, review date, and comments with search terms highlighted.
 def render_review_dialog():
     with st.container():
         col_title = st.columns([5, 1])
             st.info("No reviews available for this listing.")
+# Initializes the session state with default values for various application parameters.
+# Sets up the visualizer and loads required resources for the application.
 def initialize_session_state():
     default_states = {
         'center_lat': None,
         st.session_state.loading_complete = True
+# Main function that sets up the Streamlit application interface.
+# Handles page configuration, sidebar setup, map rendering, listing display,
+# pagination, and user interactions with the application elements.
 def main():
     st.set_page_config(
         layout="wide",
         render_review_dialog()
+# Main entry point for the application. Authenticates with Hugging Face if a token is available,
+# then calls the main function to start the application.
 if __name__ == "__main__":
     token = os.environ.get("HF_TOKEN")
     if token:

cronjob/abstract_traffic_image_analyzer.py CHANGED Viewed

@@ -1,16 +1,14 @@
-"""
-Traffic Image Analysis Module for HKUST BNB+ Platform
-This module provides functionality for analyzing traffic camera images to detect and count vehicles.
-It downloads images from traffic cameras, processes them using computer vision models, and records
-traffic data that is used for traffic-based discount calculations in the BNB+ platform.
-The analyzer connects to a database to retrieve camera locations, downloads and processes images,
-detects vehicles, and stores the results for visualization and analysis.
-Author: Gordon Li (20317033)
-Date: March 2025
-"""
 import requests
@@ -27,16 +25,14 @@ import random
 class AbstractTrafficImageAnalyzer:
-    """
-    Initializes the traffic image analyzer with database connection, signal handlers, and directories.
-    Sets up:
-    - Database connection parameters
-    - Signal handlers for graceful shutdown
-    - Vehicle class identifiers for detection
-    - Directory structure for storing downloaded images
-    - Logging configuration
-    """
     def __init__(self):
         self.connection_params = {
@@ -60,22 +56,18 @@ class AbstractTrafficImageAnalyzer:
         self.setup_logging()
-    """
-    Handles termination signals to ensure graceful shutdown.
-    Parameters:
-        signum: Signal number
-        frame: Current stack frame
-    """
     def signal_handler(self, signum, frame):
         print("\nShutdown signal received. Completing current task...")
         self.running = False
-    """
-    Sets up logging configuration for the analyzer.
-    Creates log files with timestamps and configures console output.
-    """
     def setup_logging(self):
         logging.basicConfig(
@@ -87,12 +79,10 @@ class AbstractTrafficImageAnalyzer:
             ]
         )
-    """
-    Retrieves traffic camera locations and URLs from the database.
-    Returns:
-        List of tuples containing camera location key and URL
-    """
     def get_camera_locations(self):
         try:
@@ -104,15 +94,13 @@ class AbstractTrafficImageAnalyzer:
             logging.error(f"Error fetching camera locations: {str(e)}")
             raise
-    """
-    Downloads an image from a given URL.
-    Parameters:
-        url: URL of the traffic camera image
-    Returns:
-        PIL Image object
-    """
     def download_image(self, url):
         try:
@@ -123,16 +111,14 @@ class AbstractTrafficImageAnalyzer:
             logging.error(f"Error downloading image from {url}: {str(e)}")
             raise
-    """
-    Detects vehicles in an image using a computer vision model.
-    Parameters:
-        image: PIL Image object to analyze
-        confidence_threshold: Minimum confidence score for detections (default: 0.7)
-    Returns:
-        List of vehicle detection dictionaries with bounding boxes and scores
-    """
     def detect_vehicles(self, image, confidence_threshold=0.7):
         try:
@@ -178,16 +164,14 @@ class AbstractTrafficImageAnalyzer:
             logging.error(f"Error detecting vehicles: {str(e)}")
             raise
-    """
-    Draws vehicle detection bounding boxes and labels on the image.
-    Parameters:
-        image: Original PIL Image
-        detections: List of vehicle detection dictionaries
-    Returns:
-        New PIL Image with bounding boxes and labels drawn
-    """
     def draw_detections(self, image, detections):
         try:
@@ -241,16 +225,14 @@ class AbstractTrafficImageAnalyzer:
             logging.error(f"Error drawing detections: {str(e)}")
             raise
-    """
-    Processes all traffic cameras, detects vehicles, and prepares data for storage.
-    This method:
-    1. Gets all camera locations
-    2. Downloads images from each camera
-    3. Detects vehicles in each image
-    4. Processes images to visualize detections
-    5. Prepares data for storage
-    """
     def process_traffic_cameras(self):
         try:
@@ -338,45 +320,37 @@ class AbstractTrafficImageAnalyzer:
             logging.error(f"Error in process_traffic_cameras: {str(e)}")
             raise
-    """
-    Updates the HuggingFace dataset with new traffic data.
-    This method must be implemented by subclasses.
-    Parameters:
-        batch_data: Dictionary containing the batch data to add
-        timestamp_str: Timestamp string for the current batch
-    """
     def update_huggingface_dataset(self, batch_data, timestamp_str):
         raise NotImplementedError("Subclasses must implement update_huggingface_dataset method")
-    """
-    Creates COCO annotation files for the dataset.
-    This method must be implemented by subclasses.
-    Parameters:
-        dataset_dict: Dictionary containing the dataset
-        timestamp_str: Timestamp string for the current batch
-    """
     def create_coco_annotation_files(self, dataset_dict, timestamp_str):
         raise NotImplementedError("Subclasses must implement create_coco_annotation_files method")
-    """
-    Updates the README file for the dataset.
-    This method must be implemented by subclasses.
-    Parameters:
-        dataset_dict: Dictionary containing the dataset
-        timestamp_str: Timestamp string for the current batch
-    """
     def update_readme(self, dataset_dict, timestamp_str):
         raise NotImplementedError("Subclasses must implement update_readme method")
-    """
-    Runs the traffic image analyzer, processing all cameras and updating the dataset.
-    """
     def run(self):
         try:

+# Traffic Image Analysis Module for HKUST BNB+ Platform
+# This module provides functionality for analyzing traffic camera images to detect and count vehicles.
+# It downloads images from traffic cameras, processes them using computer vision models, and records
+# traffic data that is used for traffic-based discount calculations in the BNB+ platform.
+# The analyzer connects to a database to retrieve camera locations, downloads and processes images,
+# detects vehicles, and stores the results for visualization and analysis.
+# Author: Gordon Li (20317033)
+# Date: March 2025
 import requests
 class AbstractTrafficImageAnalyzer:
+    # Initializes the traffic image analyzer with database connection, signal handlers, and directories.
+    # Sets up:
+    # - Database connection parameters
+    # - Signal handlers for graceful shutdown
+    # - Vehicle class identifiers for detection
+    # - Directory structure for storing downloaded images
+    # - Logging configuration
     def __init__(self):
         self.connection_params = {
         self.setup_logging()
+    # Handles termination signals to ensure graceful shutdown.
+    # Parameters:
+    #     signum: Signal number
+    #     frame: Current stack frame
     def signal_handler(self, signum, frame):
         print("\nShutdown signal received. Completing current task...")
         self.running = False
+    # Sets up logging configuration for the analyzer.
+    # Creates log files with timestamps and configures console output.
     def setup_logging(self):
         logging.basicConfig(
             ]
         )
+    # Retrieves traffic camera locations and URLs from the database.
+    # Returns:
+    #     List of tuples containing camera location key and URL
     def get_camera_locations(self):
         try:
             logging.error(f"Error fetching camera locations: {str(e)}")
             raise
+    # Downloads an image from a given URL.
+    # Parameters:
+    #     url: URL of the traffic camera image
+    # Returns:
+    #     PIL Image object
     def download_image(self, url):
         try:
             logging.error(f"Error downloading image from {url}: {str(e)}")
             raise
+    # Detects vehicles in an image using a computer vision model.
+    # Parameters:
+    #     image: PIL Image object to analyze
+    #     confidence_threshold: Minimum confidence score for detections (default: 0.7)
+    # Returns:
+    #     List of vehicle detection dictionaries with bounding boxes and scores
     def detect_vehicles(self, image, confidence_threshold=0.7):
         try:
             logging.error(f"Error detecting vehicles: {str(e)}")
             raise
+    # Draws vehicle detection bounding boxes and labels on the image.
+    # Parameters:
+    #     image: Original PIL Image
+    #     detections: List of vehicle detection dictionaries
+    # Returns:
+    #     New PIL Image with bounding boxes and labels drawn
     def draw_detections(self, image, detections):
         try:
             logging.error(f"Error drawing detections: {str(e)}")
             raise
+    # Processes all traffic cameras, detects vehicles, and prepares data for storage.
+    # This method:
+    # 1. Gets all camera locations
+    # 2. Downloads images from each camera
+    # 3. Detects vehicles in each image
+    # 4. Processes images to visualize detections
+    # 5. Prepares data for storage
     def process_traffic_cameras(self):
         try:
             logging.error(f"Error in process_traffic_cameras: {str(e)}")
             raise
+    # Updates the HuggingFace dataset with new traffic data.
+    # This method must be implemented by subclasses.
+    # Parameters:
+    #     batch_data: Dictionary containing the batch data to add
+    #     timestamp_str: Timestamp string for the current batch
     def update_huggingface_dataset(self, batch_data, timestamp_str):
         raise NotImplementedError("Subclasses must implement update_huggingface_dataset method")
+    # Creates COCO annotation files for the dataset.
+    # This method must be implemented by subclasses.
+    # Parameters:
+    #     dataset_dict: Dictionary containing the dataset
+    #     timestamp_str: Timestamp string for the current batch
     def create_coco_annotation_files(self, dataset_dict, timestamp_str):
         raise NotImplementedError("Subclasses must implement create_coco_annotation_files method")
+    # Updates the README file for the dataset.
+    # This method must be implemented by subclasses.
+    # Parameters:
+    #     dataset_dict: Dictionary containing the dataset
+    #     timestamp_str: Timestamp string for the current batch
     def update_readme(self, dataset_dict, timestamp_str):
         raise NotImplementedError("Subclasses must implement update_readme method")
+    # Runs the traffic image analyzer, processing all cameras and updating the dataset.
     def run(self):
         try:

cronjob/application_traffic_image_analyzer.py CHANGED Viewed

@@ -1,16 +1,14 @@
-"""
-Application Traffic Image Analyzer Module
-This module extends the AbstractTrafficImageAnalyzer to provide specific implementation for
-application-specific traffic analysis. It handles the processing of traffic camera images,
-vehicle detection using the DETR model, and updating a HuggingFace dataset with the results.
-The analyzer is used in the HKUST BNB+ platform to collect and analyze traffic data for
-determining eco-friendly discounts based on traffic conditions.
-Author: Gordon Li (20317033)
-Date: March 2025
-"""
 from transformers import DetrImageProcessor, DetrForObjectDetection
 from datasets import Dataset, Features, Value, load_dataset, DatasetDict, concatenate_datasets
@@ -24,13 +22,12 @@ import logging
 class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
-    """
-    Initializes the application traffic analyzer with the DETR model and processor.
-    Sets up:
-    - DETR image processor and model for vehicle detection
-    - Application-specific directory for storing results
-    """
     def __init__(self):
         super().__init__()
         self.processor = DetrImageProcessor.from_pretrained("slliac/detr-group37-liaujianjie-resnet-50",
@@ -40,13 +37,11 @@ class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
         self.application_dir = os.path.join(self.dataset_dir, "application")
         os.makedirs(self.application_dir, exist_ok=True)
-    """
-    Updates the HuggingFace dataset with new traffic data.
-    Parameters:
-        batch_data: Dictionary containing batch data including capture time, location, images, and vehicle counts
-        timestamp_str: Timestamp string for the current batch
-    """
     def update_huggingface_dataset(self, batch_data, timestamp_str):
         try:
@@ -119,13 +114,11 @@ class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
             logging.error(f"Error in update_huggingface_dataset: {str(e)}")
             raise
-    """
-    Creates COCO annotation files for the dataset, which are standard format for object detection.
-    Parameters:
-        dataset_dict: Dictionary containing the dataset with traffic observations
-        timestamp_str: Timestamp string for the current batch
-    """
     def create_coco_annotation_files(self, dataset_dict, timestamp_str):
         try:
@@ -244,11 +237,9 @@ class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
             logging.error(f"Error creating COCO annotation files: {str(e)}")
-"""
-Main function to execute the traffic image analysis process.
-Initializes the analyzer, loads existing data if available, runs the analysis,
-and displays dataset information before and after the process.
-"""
 def main():
@@ -279,8 +270,6 @@ def main():
         print("\nProgram terminated")
-"""
-Entry point for the script.
-"""
 if __name__ == "__main__":
     main()

+# Application Traffic Image Analyzer Module
+# This module extends the AbstractTrafficImageAnalyzer to provide specific implementation for
+# application-specific traffic analysis. It handles the processing of traffic camera images,
+# vehicle detection using the DETR model, and updating a HuggingFace dataset with the results.
+# The analyzer is used in the HKUST BNB+ platform to collect and analyze traffic data for
+# determining eco-friendly discounts based on traffic conditions.
+# Author: Gordon Li (20317033)
+# Date: March 2025
 from transformers import DetrImageProcessor, DetrForObjectDetection
 from datasets import Dataset, Features, Value, load_dataset, DatasetDict, concatenate_datasets
 class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
+    # Initializes the application traffic analyzer with the DETR model and processor.
+    # Sets up:
+    # - DETR image processor and model for vehicle detection
+    # - Application-specific directory for storing results
     def __init__(self):
         super().__init__()
         self.processor = DetrImageProcessor.from_pretrained("slliac/detr-group37-liaujianjie-resnet-50",
         self.application_dir = os.path.join(self.dataset_dir, "application")
         os.makedirs(self.application_dir, exist_ok=True)
+    # Updates the HuggingFace dataset with new traffic data.
+    # Parameters:
+    #     batch_data: Dictionary containing batch data including capture time, location, images, and vehicle counts
+    #     timestamp_str: Timestamp string for the current batch
     def update_huggingface_dataset(self, batch_data, timestamp_str):
         try:
             logging.error(f"Error in update_huggingface_dataset: {str(e)}")
             raise
+    # Creates COCO annotation files for the dataset, which are standard format for object detection.
+    # Parameters:
+    #     dataset_dict: Dictionary containing the dataset with traffic observations
+    #     timestamp_str: Timestamp string for the current batch
     def create_coco_annotation_files(self, dataset_dict, timestamp_str):
         try:
             logging.error(f"Error creating COCO annotation files: {str(e)}")
+# Main function to execute the traffic image analysis process.
+# Initializes the analyzer, loads existing data if available, runs the analysis,
+# and displays dataset information before and after the process.
 def main():
         print("\nProgram terminated")
+# Entry point for the script.
 if __name__ == "__main__":
     main()

cronjob/train_detr_traffic_image_analyzer.py CHANGED Viewed

@@ -1,18 +1,16 @@
-"""
-Traffic Image Analyzer for DETR Model Training
-This module extends the AbstractTrafficImageAnalyzer to provide implementation for training
-data collection for the DETR object detection model. It processes traffic camera images,
-detects vehicles using the pretrained Facebook DETR ResNet-50 model, and organizes the data
-for model training purposes.
-The data collected by this analyzer is used to train custom DETR models that improve vehicle
-detection accuracy, which ultimately enhances the traffic analysis component of the HKUST BNB+
-platform's eco-friendly discount system.
-Author: Gordon Li (20317033)
-Date: March 2025
-"""
 from transformers import DetrImageProcessor, DetrForObjectDetection
 from datasets import Dataset, Features, Value, load_dataset, concatenate_datasets, DatasetDict
@@ -28,13 +26,12 @@ import logging
 class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
-    """
-    Initializes the DETR training data collector with the Facebook pretrained model.
-    Sets up:
-    - Facebook DETR ResNet-50 image processor and model
-    - Directory structure for storing DETR training data
-    """
     def __init__(self):
         super().__init__()
@@ -44,13 +41,11 @@ class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
         self.fb_detr_dir = os.path.join(self.dataset_dir, "fb_detr_res_50")
         os.makedirs(self.fb_detr_dir, exist_ok=True)
-    """
-    Updates the HuggingFace dataset with new traffic data for DETR model training.
-    Parameters:
-        batch_data: Dictionary containing traffic image data and annotations
-        timestamp_str: Timestamp string for the current batch
-    """
     def update_huggingface_dataset(self, batch_data, timestamp_str):
         try:
@@ -110,13 +105,11 @@ class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
             logging.error(f"Error updating Hugging Face dataset: {str(e)}")
             raise
-    """
-    Creates COCO annotation files for the DETR training dataset.
-    Parameters:
-        dataset_dict: Dictionary containing the dataset splits
-        timestamp_str: Timestamp string for the current batch
-    """
     def create_coco_annotation_files(self, dataset_dict, timestamp_str):
         try:
@@ -230,11 +223,9 @@ class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
             logging.error(f"Error creating COCO annotation files: {str(e)}")
-"""
-Main function to execute the DETR training data collection process.
-Initializes the analyzer, loads existing data if available, runs the analysis,
-and displays dataset information before and after the process.
-"""
 def main():
@@ -264,8 +255,6 @@ def main():
         print("\nProgram terminated")
-"""
-Entry point for the script.
-"""
 if __name__ == "__main__":
     main()

+# Traffic Image Analyzer for DETR Model Training
+# This module extends the AbstractTrafficImageAnalyzer to provide implementation for training
+# data collection for the DETR object detection model. It processes traffic camera images,
+# detects vehicles using the pretrained Facebook DETR ResNet-50 model, and organizes the data
+# for model training purposes.
+# The data collected by this analyzer is used to train custom DETR models that improve vehicle
+# detection accuracy, which ultimately enhances the traffic analysis component of the HKUST BNB+
+# platform's eco-friendly discount system.
+# Author: Gordon Li (20317033)
+# Date: March 2025
 from transformers import DetrImageProcessor, DetrForObjectDetection
 from datasets import Dataset, Features, Value, load_dataset, concatenate_datasets, DatasetDict
 class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
+    # Initializes the DETR training data collector with the Facebook pretrained model.
+    # Sets up:
+    # - Facebook DETR ResNet-50 image processor and model
+    # - Directory structure for storing DETR training data
     def __init__(self):
         super().__init__()
         self.fb_detr_dir = os.path.join(self.dataset_dir, "fb_detr_res_50")
         os.makedirs(self.fb_detr_dir, exist_ok=True)
+    # Updates the HuggingFace dataset with new traffic data for DETR model training.
+    # Parameters:
+    #     batch_data: Dictionary containing traffic image data and annotations
+    #     timestamp_str: Timestamp string for the current batch
     def update_huggingface_dataset(self, batch_data, timestamp_str):
         try:
             logging.error(f"Error updating Hugging Face dataset: {str(e)}")
             raise
+    # Creates COCO annotation files for the DETR training dataset.
+    # Parameters:
+    #     dataset_dict: Dictionary containing the dataset splits
+    #     timestamp_str: Timestamp string for the current batch
     def create_coco_annotation_files(self, dataset_dict, timestamp_str):
         try:
             logging.error(f"Error creating COCO annotation files: {str(e)}")
+# Main function to execute the DETR training data collection process.
+# Initializes the analyzer, loads existing data if available, runs the analysis,
+# and displays dataset information before and after the process.
 def main():
         print("\nProgram terminated")
+# Entry point for the script.
 if __name__ == "__main__":
     main()

visualiser/hkust_bnb_visualiser.py CHANGED Viewed

@@ -1,18 +1,16 @@
-"""
-hkust_bnb_visualiser.py
-This module provides the main visualization for the HKUST BNB+ platform.
-It handles database connections, data retrieval, search relevance calculation, and map visualization
-for BNB listings across different neighborhoods in Hong Kong. The class integrates with traffic data
-to provide eco-friendly discount calculations based on traffic conditions.
-Key capabilities:
-- Semantic search functionality using sentence transformers
-- Traffic spot integration for eco-friendly discount calculations
-Author: Gordon Li (20317033)
-Date: March 2025
-"""
 import oracledb
 import pandas as pd
@@ -36,16 +34,12 @@ from constant.hkust_bnb_constant import (
 )
 class HKUSTBNBVisualiser:
-    """
-    Main class for BNB data visualization and management.
-    Handles database connections, data retrieval, and rendering of interactive maps.
-    """
-    """
-    Initializes the BNB visualizer with database connection, traffic spot manager, and NLP model.
-    Sets up connection pool, loads traffic data, initializes sentence transformer model,
-    and prepares neighborhood data with caching structures.
-    """
     def __init__(self):
         self.connection_params = {
@@ -81,17 +75,15 @@ class HKUSTBNBVisualiser:
             self.cached_listings = {}
             self.cached_embeddings = {}
-    """
-    Finds the nearest traffic spot to a given BNB listing location.
-    Parameters:
-        airbnb_lat: The latitude of the BNB listing
-        airbnb_lng: The longitude of the BNB listing
-        max_distance_km: Maximum distance in kilometers to consider a traffic spot (default: 0.7)
-    Returns:
-        Tuple containing (nearest_traffic_spot, distance_in_km) or (None, None) if no spot is found
-    """
     def find_nearest_traffic_spot(self, airbnb_lat, airbnb_lng, max_distance_km=0.7):
         nearest_spot = None
@@ -111,12 +103,10 @@ class HKUSTBNBVisualiser:
         else:
             return None, None
-    """
-    Retrieves all available neighborhoods from the database.
-    Returns:
-        List of neighborhood names as strings
-    """
     def get_all_neighborhoods(self):
         connection = self.pool.acquire()
@@ -133,16 +123,14 @@ class HKUSTBNBVisualiser:
         finally:
             self.pool.release(connection)
-    """
-    Retrieves BNB listings for a specific neighborhood with caching.
-    Parameters:
-        neighborhood: The neighborhood name to retrieve listings for
-        limit: Maximum number of listings to retrieve (default: 10)
-    Returns:
-        List of listing data rows from the database
-    """
     def get_neighborhood_listings(self, neighborhood, limit=10):
         if limit not in [10, 20, 30, 40, 50]:
@@ -174,15 +162,13 @@ class HKUSTBNBVisualiser:
         finally:
             self.pool.release(connection)
-    """
-    Retrieves reviews for a specific listing ID.
-    Parameters:
-        listing_id: The ID of the listing to get reviews for
-    Returns:
-        List of tuples containing (review_date, reviewer_name, comments)
-    """
     def get_listing_reviews(self, listing_id):
         connection = self.pool.acquire()
@@ -211,15 +197,13 @@ class HKUSTBNBVisualiser:
         finally:
             self.pool.release(connection)
-    """
-    Retrieves review content for search functionality.
-    Parameters:
-        listing_id: The ID of the listing to get reviews for
-    Returns:
-        List of review comment strings for semantic search
-    """
     def get_listing_reviews_for_search(self, listing_id):
         connection = self.pool.acquire()
@@ -246,16 +230,14 @@ class HKUSTBNBVisualiser:
         finally:
             self.pool.release(connection)
-    """
-    Computes cosine similarity between two embeddings.
-    Parameters:
-        query_embedding: Embedding tensor for the search query
-        target_embedding: Embedding tensor for the target text
-    Returns:
-        Float value representing similarity (0.0-1.0)
-    """
     def compute_similarity(self, query_embedding, target_embedding):
         if query_embedding is None or target_embedding is None:
@@ -267,16 +249,14 @@ class HKUSTBNBVisualiser:
             print(f"Error computing similarity: {str(e)}")
             return 0.0
-    """
-    Computes relevance scores for listings based on search query.
-    Parameters:
-        df: DataFrame containing listing data
-        search_query: User's search query string
-    Returns:
-        List of relevance scores for each listing in the DataFrame
-    """
     def compute_search_scores(self, df, search_query):
         if not search_query or self.model is None:
@@ -317,16 +297,14 @@ class HKUSTBNBVisualiser:
             print(f"Error in search scoring: {str(e)}")
             return [0.0] * len(df)
-    """
-    Sorts a DataFrame of listings by their relevance to a search query.
-    Parameters:
-        df: DataFrame containing listing data
-        search_query: User's search query string
-    Returns:
-        DataFrame sorted by relevance to the search query
-    """
     def sort_by_relevance(self, df, search_query):
         if not search_query:
@@ -336,23 +314,21 @@ class HKUSTBNBVisualiser:
         df['relevance_percentage'] = df['relevance_score'] * 100
         return df.sort_values('relevance_score', ascending=False)
-    """
-    Creates an interactive map and DataFrame for display in the UI.
-    Parameters:
-        neighborhood: The neighborhood to display listings for (default: "Sha Tin")
-        show_traffic: Whether to show traffic spots on the map (default: True)
-        center_lat: Latitude to center the map on (default: None, will use mean of listings)
-        center_lng: Longitude to center the map on (default: None, will use mean of listings)
-        selected_id: ID of the currently selected listing (default: None)
-        search_query: User's search query string (default: None)
-        current_page: Current page number for pagination (default: 1)
-        items_per_page: Number of items to show per page (default: 3)
-        listings_limit: Maximum number of listings to retrieve (default: 10)
-    Returns:
-        Tuple containing (folium_map, listings_dataframe)
-    """
     def create_map_and_data(self, neighborhood="Sha Tin", show_traffic=True, center_lat=None, center_lng=None,
                             selected_id=None, search_query=None, current_page=1, items_per_page=3, listings_limit=10):

+# hkust_bnb_visualiser.py
+# This module provides the main visualization for the HKUST BNB+ platform.
+# It handles database connections, data retrieval, search relevance calculation, and map visualization
+# for BNB listings across different neighborhoods in Hong Kong. The class integrates with traffic data
+# to provide eco-friendly discount calculations based on traffic conditions.
+# Key capabilities:
+# - Semantic search functionality using sentence transformers
+# - Traffic spot integration for eco-friendly discount calculations
+# Author: Gordon Li (20317033)
+# Date: March 2025
 import oracledb
 import pandas as pd
 )
 class HKUSTBNBVisualiser:
+    # Main class for BNB data visualization and management.
+    # Handles database connections, data retrieval, and rendering of interactive maps.
+    # Initializes the BNB visualizer with database connection, traffic spot manager, and NLP model.
+    # Sets up connection pool, loads traffic data, initializes sentence transformer model,
+    # and prepares neighborhood data with caching structures.
     def __init__(self):
         self.connection_params = {
             self.cached_listings = {}
             self.cached_embeddings = {}
+    # Finds the nearest traffic spot to a given BNB listing location.
+    # Parameters:
+    #     airbnb_lat: The latitude of the BNB listing
+    #     airbnb_lng: The longitude of the BNB listing
+    #     max_distance_km: Maximum distance in kilometers to consider a traffic spot (default: 0.7)
+    # Returns:
+    #     Tuple containing (nearest_traffic_spot, distance_in_km) or (None, None) if no spot is found
     def find_nearest_traffic_spot(self, airbnb_lat, airbnb_lng, max_distance_km=0.7):
         nearest_spot = None
         else:
             return None, None
+    # Retrieves all available neighborhoods from the database.
+    # Returns:
+    #     List of neighborhood names as strings
     def get_all_neighborhoods(self):
         connection = self.pool.acquire()
         finally:
             self.pool.release(connection)
+    # Retrieves BNB listings for a specific neighborhood with caching.
+    # Parameters:
+    #     neighborhood: The neighborhood name to retrieve listings for
+    #     limit: Maximum number of listings to retrieve (default: 10)
+    # Returns:
+    #     List of listing data rows from the database
     def get_neighborhood_listings(self, neighborhood, limit=10):
         if limit not in [10, 20, 30, 40, 50]:
         finally:
             self.pool.release(connection)
+    # Retrieves reviews for a specific listing ID.
+    # Parameters:
+    #     listing_id: The ID of the listing to get reviews for
+    # Returns:
+    #     List of tuples containing (review_date, reviewer_name, comments)
     def get_listing_reviews(self, listing_id):
         connection = self.pool.acquire()
         finally:
             self.pool.release(connection)
+    # Retrieves review content for search functionality.
+    # Parameters:
+    #     listing_id: The ID of the listing to get reviews for
+    # Returns:
+    #     List of review comment strings for semantic search
     def get_listing_reviews_for_search(self, listing_id):
         connection = self.pool.acquire()
         finally:
             self.pool.release(connection)
+    # Computes cosine similarity between two embeddings.
+    # Parameters:
+    #     query_embedding: Embedding tensor for the search query
+    #     target_embedding: Embedding tensor for the target text
+    # Returns:
+    #     Float value representing similarity (0.0-1.0)
     def compute_similarity(self, query_embedding, target_embedding):
         if query_embedding is None or target_embedding is None:
             print(f"Error computing similarity: {str(e)}")
             return 0.0
+    # Computes relevance scores for listings based on search query.
+    # Parameters:
+    #     df: DataFrame containing listing data
+    #     search_query: User's search query string
+    # Returns:
+    #     List of relevance scores for each listing in the DataFrame
     def compute_search_scores(self, df, search_query):
         if not search_query or self.model is None:
             print(f"Error in search scoring: {str(e)}")
             return [0.0] * len(df)
+    # Sorts a DataFrame of listings by their relevance to a search query.
+    # Parameters:
+    #     df: DataFrame containing listing data
+    #     search_query: User's search query string
+    # Returns:
+    #     DataFrame sorted by relevance to the search query
     def sort_by_relevance(self, df, search_query):
         if not search_query:
         df['relevance_percentage'] = df['relevance_score'] * 100
         return df.sort_values('relevance_score', ascending=False)
+    # Creates an interactive map and DataFrame for display in the UI.
+    # Parameters:
+    #     neighborhood: The neighborhood to display listings for (default: "Sha Tin")
+    #     show_traffic: Whether to show traffic spots on the map (default: True)
+    #     center_lat: Latitude to center the map on (default: None, will use mean of listings)
+    #     center_lng: Longitude to center the map on (default: None, will use mean of listings)
+    #     selected_id: ID of the currently selected listing (default: None)
+    #     search_query: User's search query string (default: None)
+    #     current_page: Current page number for pagination (default: 1)
+    #     items_per_page: Number of items to show per page (default: 3)
+    #     listings_limit: Maximum number of listings to retrieve (default: 10)
+    # Returns:
+    #     Tuple containing (folium_map, listings_dataframe)
     def create_map_and_data(self, neighborhood="Sha Tin", show_traffic=True, center_lat=None, center_lng=None,
                             selected_id=None, search_query=None, current_page=1, items_per_page=3, listings_limit=10):

visualiser/td_traffic_spot_visualiser.py CHANGED Viewed

@@ -1,17 +1,15 @@
-"""
-td_traffic_spot_visualiser.py
-This module handles traffic data integration for the BNB+ platform, providing traffic-based
-discount calculations and map visualization of traffic spots. It includes classes for
-individual traffic spots and a manager to handle collections of spots.
-The module integrates with a dataset of traffic observations to determine traffic conditions
-and calculate eco-friendly discounts for BNB listings based on local traffic volume.
-Author: Gordon Li (20317033)
-Date: March 2025
-"""
 import folium
 import oracledb
 import logging
@@ -31,15 +29,13 @@ from constant.hkust_bnb_constant import (
 class TDTrafficSpot:
-    """
-    Initializes a traffic spot with location and historical traffic data.
-    Parameters:
-        key: Unique identifier for the traffic spot
-        latitude: Geographic latitude of the traffic spot
-        longitude: Geographic longitude of the traffic spot
-        dataset_rows: List of dictionaries containing historical traffic observations (default: None)
-    """
     def __init__(self, key, latitude, longitude, dataset_rows=None):
         self.key = key
@@ -49,25 +45,21 @@ class TDTrafficSpot:
         self.avg_vehicle_count = self.calculate_avg_vehicle_count()
         self.recent_display_rows = self.get_recent_display_rows()
-    """
-    Checks if the traffic spot has valid geographic coordinates.
-    Returns:
-        Boolean indicating whether latitude and longitude are valid
-    """
     def is_valid(self):
         return self.latitude is not None and self.longitude is not None
-    """
-    Gets the most recent traffic observations for display purposes.
-    Parameters:
-        max_display: Maximum number of recent records to return (default: 2)
-    Returns:
-        List of the most recent traffic observation records
-    """
     def get_recent_display_rows(self, max_display=2):
         if not self.dataset_rows:
@@ -76,12 +68,10 @@ class TDTrafficSpot:
         sorted_rows = sorted(self.dataset_rows, key=lambda x: x['capture_time'], reverse=True)
         return sorted_rows[:max_display]
-    """
-    Calculates the average vehicle count based on historical traffic observations.
-    Returns:
-        Float representing the average number of vehicles observed
-    """
     def calculate_avg_vehicle_count(self):
         if not self.dataset_rows:
@@ -94,12 +84,10 @@ class TDTrafficSpot:
         return np.mean(vehicle_counts)
-    """
-    Determines the discount rate based on average traffic volume.
-    Returns:
-        Float representing the discount rate (0.0 to 0.20)
-    """
     def get_discount_rate(self):
         if self.avg_vehicle_count < 2:
@@ -109,12 +97,10 @@ class TDTrafficSpot:
         else:
             return 0.0
-    """
-    Generates a human-readable description of the traffic-based discount.
-    Returns:
-        String describing the discount, if any
-    """
     def get_discount_info(self):
         discount_rate = self.get_discount_rate()
@@ -124,12 +110,10 @@ class TDTrafficSpot:
         return f"{int(discount_rate * 100)}% discount! Low traffic area"
-    """
-    Creates HTML content for the traffic spot's popup on the map.
-    Returns:
-        HTML string for the Folium popup
-    """
     def create_popup_content(self):
         discount_info = self.get_discount_info()
@@ -177,12 +161,10 @@ class TDTrafficSpot:
         html += "</div>"
         return html
-    """
-    Adds the traffic spot to a Folium map with appropriate styling.
-    Parameters:
-        folium_map: Folium map object to add the marker to
-    """
     def add_to_map(self, folium_map):
         if self.is_valid():
@@ -202,16 +184,12 @@ class TDTrafficSpot:
 class TrafficSpotManager:
-    """
-    Manages a collection of traffic spots, handling data loading and map integration.
-    """
-    """
-    Initializes the manager with database connection parameters and loads initial traffic spots.
-    Parameters:
-        connection_params: Dictionary containing Oracle database connection parameters
-    """
     def __init__(self, connection_params):
         self.connection_params = connection_params
@@ -219,12 +197,10 @@ class TrafficSpotManager:
         self.spot_dict = {}
         self.load_limited_traffic_spots()
-    """
-    Loads a limited number of traffic spots for initial display.
-    Parameters:
-        limit: Maximum number of traffic spots to load initially (default: 10)
-    """
     def load_limited_traffic_spots(self, limit=10):
         try:
@@ -286,12 +262,10 @@ class TrafficSpotManager:
             self.traffic_spots = []
             self.spot_dict = {}
-    """
-    Loads specific traffic spots by their keys when needed.
-    Parameters:
-        keys: List of traffic spot keys to load
-    """
     def load_specific_traffic_spots(self, keys):
         needed_keys = [key for key in keys if key not in self.spot_dict]
@@ -341,13 +315,11 @@ class TrafficSpotManager:
         except Exception as e:
             logging.error(f"Error loading specific traffic spots: {str(e)}")
-    """
-    Adds traffic spots to a Folium map.
-    Parameters:
-        folium_map: Folium map object to add markers to
-        spot_keys: Optional list of specific spot keys to add (default: None, adds all spots)
-    """
     def add_spots_to_map(self, folium_map, spot_keys=None):
         if spot_keys is None:
@@ -358,15 +330,13 @@ class TrafficSpotManager:
                 if key in self.spot_dict:
                     self.spot_dict[key].add_to_map(folium_map)
-    """
-    Retrieves a traffic spot by its key, loading it if necessary.
-    Parameters:
-        key: The unique identifier of the traffic spot
-    Returns:
-        TDTrafficSpot object or None if not found
-    """
     def get_spot_by_key(self, key):
         if key in self.spot_dict:

+# td_traffic_spot_visualiser.py
+# This module handles traffic data integration for the BNB+ platform, providing traffic-based
+# discount calculations and map visualization of traffic spots. It includes classes for
+# individual traffic spots and a manager to handle collections of spots.
+# The module integrates with a dataset of traffic observations to determine traffic conditions
+# and calculate eco-friendly discounts for BNB listings based on local traffic volume.
+# Author: Gordon Li (20317033)
+# Date: March 2025
 import folium
 import oracledb
 import logging
 class TDTrafficSpot:
+    # Initializes a traffic spot with location and historical traffic data.
+    # Parameters:
+    #     key: Unique identifier for the traffic spot
+    #     latitude: Geographic latitude of the traffic spot
+    #     longitude: Geographic longitude of the traffic spot
+    #     dataset_rows: List of dictionaries containing historical traffic observations (default: None)
     def __init__(self, key, latitude, longitude, dataset_rows=None):
         self.key = key
         self.avg_vehicle_count = self.calculate_avg_vehicle_count()
         self.recent_display_rows = self.get_recent_display_rows()
+    # Checks if the traffic spot has valid geographic coordinates.
+    # Returns:
+    #     Boolean indicating whether latitude and longitude are valid
     def is_valid(self):
         return self.latitude is not None and self.longitude is not None
+    # Gets the most recent traffic observations for display purposes.
+    # Parameters:
+    #     max_display: Maximum number of recent records to return (default: 2)
+    # Returns:
+    #     List of the most recent traffic observation records
     def get_recent_display_rows(self, max_display=2):
         if not self.dataset_rows:
         sorted_rows = sorted(self.dataset_rows, key=lambda x: x['capture_time'], reverse=True)
         return sorted_rows[:max_display]
+    # Calculates the average vehicle count based on historical traffic observations.
+    # Returns:
+    #     Float representing the average number of vehicles observed
     def calculate_avg_vehicle_count(self):
         if not self.dataset_rows:
         return np.mean(vehicle_counts)
+    # Determines the discount rate based on average traffic volume.
+    # Returns:
+    #     Float representing the discount rate (0.0 to 0.20)
     def get_discount_rate(self):
         if self.avg_vehicle_count < 2:
         else:
             return 0.0
+    # Generates a human-readable description of the traffic-based discount.
+    # Returns:
+    #     String describing the discount, if any
     def get_discount_info(self):
         discount_rate = self.get_discount_rate()
         return f"{int(discount_rate * 100)}% discount! Low traffic area"
+    # Creates HTML content for the traffic spot's popup on the map.
+    # Returns:
+    #     HTML string for the Folium popup
     def create_popup_content(self):
         discount_info = self.get_discount_info()
         html += "</div>"
         return html
+    # Adds the traffic spot to a Folium map with appropriate styling.
+    # Parameters:
+    #     folium_map: Folium map object to add the marker to
     def add_to_map(self, folium_map):
         if self.is_valid():
 class TrafficSpotManager:
+    # Manages a collection of traffic spots, handling data loading and map integration.
+    # Initializes the manager with database connection parameters and loads initial traffic spots.
+    # Parameters:
+    #     connection_params: Dictionary containing Oracle database connection parameters
     def __init__(self, connection_params):
         self.connection_params = connection_params
         self.spot_dict = {}
         self.load_limited_traffic_spots()
+    # Loads a limited number of traffic spots for initial display.
+    # Parameters:
+    #     limit: Maximum number of traffic spots to load initially (default: 10)
     def load_limited_traffic_spots(self, limit=10):
         try:
             self.traffic_spots = []
             self.spot_dict = {}
+    # Loads specific traffic spots by their keys when needed.
+    # Parameters:
+    #     keys: List of traffic spot keys to load
     def load_specific_traffic_spots(self, keys):
         needed_keys = [key for key in keys if key not in self.spot_dict]
         except Exception as e:
             logging.error(f"Error loading specific traffic spots: {str(e)}")
+    # Adds traffic spots to a Folium map.
+    # Parameters:
+    #     folium_map: Folium map object to add markers to
+    #     spot_keys: Optional list of specific spot keys to add (default: None, adds all spots)
     def add_spots_to_map(self, folium_map, spot_keys=None):
         if spot_keys is None:
                 if key in self.spot_dict:
                     self.spot_dict[key].add_to_map(folium_map)
+    # Retrieves a traffic spot by its key, loading it if necessary.
+    # Parameters:
+    #     key: The unique identifier of the traffic spot
+    # Returns:
+    #     TDTrafficSpot object or None if not found
     def get_spot_by_key(self, key):
         if key in self.spot_dict: