Spaces:

ELCA-SA
/

sketch-to-BPMN

Running

App Files Files Community

BenjiELCA commited on Jul 19, 2024

Commit

ca37b38

1 Parent(s): 9467fbe

correct a lot of bugs and allow automatic resize value

Browse files

Files changed (8) hide show

modules/dataset_loader.py +500 -0
modules/display.py +1 -1
modules/eval.py +91 -10
modules/streamlit_utils.py +41 -3
modules/toWizard.py +2 -2
modules/toXML.py +20 -12
modules/train.py +28 -21
modules/utils.py +56 -473

modules/dataset_loader.py ADDED Viewed

	@@ -0,0 +1,500 @@

+from torchvision.models.detection import keypointrcnn_resnet50_fpn
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+from torchvision.models.detection.keypoint_rcnn import KeypointRCNNPredictor
+from torchvision.models.detection import KeypointRCNN_ResNet50_FPN_Weights
+import random
+import torch
+from torch.utils.data import Dataset
+import torchvision.transforms.functional as F
+import numpy as np
+from torch.utils.data.dataloader import default_collate
+import cv2
+import matplotlib.pyplot as plt
+from torch.utils.data import DataLoader, Subset, ConcatDataset
+import streamlit as st
+from modules.utils import object_dict, arrow_dict, resize_boxes, resize_keypoints
+class RandomCrop:
+    def __init__(self, new_size=(1333,800),crop_fraction=0.5, min_objects=4):
+        self.crop_fraction = crop_fraction
+        self.min_objects = min_objects
+        self.new_size = new_size
+    def __call__(self, image, target):
+        new_w1, new_h1 = self.new_size
+        w, h = image.size
+        new_w = int(w * self.crop_fraction)
+        new_h = int(new_w*new_h1/new_w1)
+        i=0
+        for i in range(4):
+          if new_h >= h:
+            i += 0.05
+            new_w = int(w * (self.crop_fraction - i))
+            new_h = int(new_w*new_h1/new_w1)
+          if new_h < h:
+            continue
+        if new_h >= h:
+          return image, target
+        boxes = target["boxes"]
+        if 'keypoints' in target:
+            keypoints = target["keypoints"]
+        else:
+            keypoints = []
+            for i in range(len(boxes)):
+                keypoints.append(torch.zeros((2,3)))
+        # Attempt to find a suitable crop region
+        success = False
+        for _ in range(100):  # Max 100 attempts to find a valid crop
+            top = random.randint(0, h - new_h)
+            left = random.randint(0, w - new_w)
+            crop_region = [left, top, left + new_w, top + new_h]
+            # Check how many objects are fully contained in this region
+            contained_boxes = []
+            contained_keypoints = []
+            for box, kp in zip(boxes, keypoints):
+                if box[0] >= crop_region[0] and box[1] >= crop_region[1] and box[2] <= crop_region[2] and box[3] <= crop_region[3]:
+                    # Adjust box and keypoints coordinates
+                    new_box = box - torch.tensor([crop_region[0], crop_region[1], crop_region[0], crop_region[1]])
+                    new_kp = kp - torch.tensor([crop_region[0], crop_region[1], 0])
+                    contained_boxes.append(new_box)
+                    contained_keypoints.append(new_kp)
+            if len(contained_boxes) >= self.min_objects:
+                success = True
+                break
+        if success:
+            # Perform the actual crop
+            image = F.crop(image, top, left, new_h, new_w)
+            target["boxes"] = torch.stack(contained_boxes) if contained_boxes else torch.zeros((0, 4))
+            if 'keypoints' in target:
+                target["keypoints"] = torch.stack(contained_keypoints) if contained_keypoints else torch.zeros((0, 2, 4))
+        return image, target
+class RandomFlip:
+    def __init__(self, h_flip_prob=0.5, v_flip_prob=0.5):
+        """
+        Initializes the RandomFlip with probabilities for flipping.
+        Parameters:
+        - h_flip_prob (float): Probability of applying a horizontal flip to the image.
+        - v_flip_prob (float): Probability of applying a vertical flip to the image.
+        """
+        self.h_flip_prob = h_flip_prob
+        self.v_flip_prob = v_flip_prob
+    def __call__(self, image, target):
+        """
+        Applies random horizontal and/or vertical flip to the image and updates target data accordingly.
+        Parameters:
+        - image (PIL Image): The image to be flipped.
+        - target (dict): The target dictionary containing 'boxes' and 'keypoints'.
+        Returns:
+        - PIL Image, dict: The flipped image and its updated target dictionary.
+        """
+        if random.random() < self.h_flip_prob:
+            image = F.hflip(image)
+            w, _ = image.size  # Get the new width of the image after flip for bounding box adjustment
+            # Adjust bounding boxes for horizontal flip
+            for i, box in enumerate(target['boxes']):
+                xmin, ymin, xmax, ymax = box
+                target['boxes'][i] = torch.tensor([w - xmax, ymin, w - xmin, ymax], dtype=torch.float32)
+            # Adjust keypoints for horizontal flip
+            if 'keypoints' in target:
+                new_keypoints = []
+                for keypoints_for_object in target['keypoints']:
+                    flipped_keypoints_for_object = []
+                    for kp in keypoints_for_object:
+                        x, y = kp[:2]
+                        new_x = w - x
+                        flipped_keypoints_for_object.append(torch.tensor([new_x, y] + list(kp[2:])))
+                    new_keypoints.append(torch.stack(flipped_keypoints_for_object))
+                target['keypoints'] = torch.stack(new_keypoints)
+        if random.random() < self.v_flip_prob:
+            image = F.vflip(image)
+            _, h = image.size  # Get the new height of the image after flip for bounding box adjustment
+            # Adjust bounding boxes for vertical flip
+            for i, box in enumerate(target['boxes']):
+                xmin, ymin, xmax, ymax = box
+                target['boxes'][i] = torch.tensor([xmin, h - ymax, xmax, h - ymin], dtype=torch.float32)
+            # Adjust keypoints for vertical flip
+            if 'keypoints' in target:
+                new_keypoints = []
+                for keypoints_for_object in target['keypoints']:
+                    flipped_keypoints_for_object = []
+                    for kp in keypoints_for_object:
+                        x, y = kp[:2]
+                        new_y = h - y
+                        flipped_keypoints_for_object.append(torch.tensor([x, new_y] + list(kp[2:])))
+                    new_keypoints.append(torch.stack(flipped_keypoints_for_object))
+                target['keypoints'] = torch.stack(new_keypoints)
+        return image, target
+class RandomRotate:
+    def __init__(self, max_rotate_deg=20, rotate_proba=0.3):
+        """
+        Initializes the RandomRotate with a maximum rotation angle and probability of rotating.
+        Parameters:
+        - max_rotate_deg (int): Maximum degree to rotate the image.
+        - rotate_proba (float): Probability of applying rotation to the image.
+        """
+        self.max_rotate_deg = max_rotate_deg
+        self.rotate_proba = rotate_proba
+    def __call__(self, image, target):
+        """
+        Randomly rotates the image and updates the target data accordingly.
+        Parameters:
+        - image (PIL Image): The image to be rotated.
+        - target (dict): The target dictionary containing 'boxes', 'labels', and 'keypoints'.
+        Returns:
+        - PIL Image, dict: The rotated image and its updated target dictionary.
+        """
+        if random.random() < self.rotate_proba:
+            angle = random.uniform(-self.max_rotate_deg, self.max_rotate_deg)
+            image = F.rotate(image, angle, expand=False, fill=200)
+            # Rotate bounding boxes
+            w, h = image.size
+            cx, cy = w / 2, h / 2
+            boxes = target["boxes"]
+            new_boxes = []
+            for box in boxes:
+                new_box = self.rotate_box(box, angle, cx, cy)
+                new_boxes.append(new_box)
+            target["boxes"] = torch.stack(new_boxes)
+            # Rotate keypoints
+            if 'keypoints' in target:
+                new_keypoints = []
+                for keypoints in target["keypoints"]:
+                    new_kp = self.rotate_keypoints(keypoints, angle, cx, cy)
+                    new_keypoints.append(new_kp)
+                target["keypoints"] = torch.stack(new_keypoints)
+        return image, target
+    def rotate_box(self, box, angle, cx, cy):
+        """
+        Rotates a bounding box by a given angle around the center of the image.
+        """
+        x1, y1, x2, y2 = box
+        corners = torch.tensor([
+            [x1, y1],
+            [x2, y1],
+            [x2, y2],
+            [x1, y2]
+        ])
+        corners = torch.cat((corners, torch.ones(corners.shape[0], 1)), dim=1)
+        M = cv2.getRotationMatrix2D((cx, cy), angle, 1)
+        corners = torch.matmul(torch.tensor(M, dtype=torch.float32), corners.T).T
+        x_ = corners[:, 0]
+        y_ = corners[:, 1]
+        x_min, x_max = torch.min(x_), torch.max(x_)
+        y_min, y_max = torch.min(y_), torch.max(y_)
+        return torch.tensor([x_min, y_min, x_max, y_max], dtype=torch.float32)
+    def rotate_keypoints(self, keypoints, angle, cx, cy):
+        """
+        Rotates keypoints by a given angle around the center of the image.
+        """
+        new_keypoints = []
+        for kp in keypoints:
+            x, y, v = kp
+            point = torch.tensor([x, y, 1])
+            M = cv2.getRotationMatrix2D((cx, cy), angle, 1)
+            new_point = torch.matmul(torch.tensor(M, dtype=torch.float32), point)
+            new_keypoints.append(torch.tensor([new_point[0], new_point[1], v], dtype=torch.float32))
+        return torch.stack(new_keypoints)
+def rotate_90_box(box, angle, w, h):
+    x1, y1, x2, y2 = box
+    if angle == 90:
+        return torch.tensor([y1,h-x2,y2,h-x1])
+    elif angle == 270 or angle == -90:
+        return torch.tensor([w-y2,x1,w-y1,x2])
+    else:
+        print("angle not supported")
+def rotate_90_keypoints(kp, angle, w, h):
+    # Extract coordinates and visibility from each keypoint tensor
+    x1, y1, v1 = kp[0][0], kp[0][1], kp[0][2]
+    x2, y2, v2 = kp[1][0], kp[1][1], kp[1][2]
+    # Swap x and y coordinates for each keypoint
+    if angle == 90:
+        new = [[y1, h-x1, v1], [y2, h-x2, v2]]
+    elif angle == 270 or angle == -90:
+        new = [[w-y1, x1, v1], [w-y2, x2, v2]]
+    return torch.tensor(new, dtype=torch.float32)
+def rotate_vertical(image, target):
+    # Rotate the image and target if the image is vertical
+    new_boxes = []
+    angle = random.choice([-90,90])
+    image = F.rotate(image, angle, expand=True, fill=200)
+    for box in target["boxes"]:
+        new_box = rotate_90_box(box, angle, image.size[0], image.size[1])
+        new_boxes.append(new_box)
+    target["boxes"] = torch.stack(new_boxes)
+    if 'keypoints' in target:
+        new_kp = []
+        for kp in target['keypoints']:
+            new_key = rotate_90_keypoints(kp, angle, image.size[0], image.size[1])
+            new_kp.append(new_key)
+        target['keypoints'] = torch.stack(new_kp)
+    return image, target
+import torchvision.transforms.functional as F
+import torch
+def resize_and_pad(image, target, new_size=(1333, 800)):
+    original_size = image.size
+    # Calculate scale to fit the new size while maintaining aspect ratio
+    scale = min(new_size[0] / original_size[0], new_size[1] / original_size[1])
+    new_scaled_size = (int(original_size[0] * scale), int(original_size[1] * scale))
+    # Resize image to new scaled size
+    image = F.resize(image, (new_scaled_size[1], new_scaled_size[0]))
+    # Calculate padding to center the image
+    pad_left = (new_size[0] - new_scaled_size[0]) // 2
+    pad_top = (new_size[1] - new_scaled_size[1]) // 2
+    pad_right = new_size[0] - new_scaled_size[0] - pad_left
+    pad_bottom = new_size[1] - new_scaled_size[1] - pad_top
+    # Pad the resized image to make it exactly the desired size
+    image = F.pad(image, (pad_left, pad_top, pad_right, pad_bottom), fill=0, padding_mode='constant')
+    # Adjust bounding boxes
+    target['boxes'] = resize_boxes(target['boxes'], original_size, new_scaled_size)
+    target['boxes'][:, 0::2] += pad_left
+    target['boxes'][:, 1::2] += pad_top
+    # Adjust keypoints if they exist in the target
+    if 'keypoints' in target:
+        for i in range(len(target['keypoints'])):
+            target['keypoints'][i] = resize_keypoints(target['keypoints'][i], original_size, new_scaled_size)
+            target['keypoints'][i][:, 0] += pad_left
+            target['keypoints'][i][:, 1] += pad_top
+    return image, target
+class BPMN_Dataset(Dataset):
+    def __init__(self, annotations, transform=None, crop_transform=None, crop_prob=0.3, rotate_90_proba=0.2,
+                 flip_transform=None, rotate_transform=None, new_size=(1333,1333), keep_ratio=0.1, resize=True, model_type='object'):
+        self.annotations = annotations
+        print(f"Loaded {len(self.annotations)} annotations.")
+        self.transform = transform
+        self.crop_transform = crop_transform
+        self.crop_prob = crop_prob
+        self.flip_transform = flip_transform
+        self.rotate_transform = rotate_transform
+        self.resize = resize
+        self.new_size = new_size
+        self.keep_ratio = keep_ratio
+        self.model_type = model_type
+        if model_type == 'object':
+            self.dict = object_dict
+        elif model_type == 'arrow':
+            self.dict = arrow_dict
+        self.rotate_90_proba = rotate_90_proba
+    def __len__(self):
+        return len(self.annotations)
+    def __getitem__(self, idx):
+        annotation = self.annotations[idx]
+        image = annotation.img.convert("RGB")
+        boxes = torch.tensor(np.array(annotation.boxes_ltrb), dtype=torch.float32)
+        labels_names = [ann for ann in annotation.categories]
+        # Only keep the labels, boxes and keypoints that are in the class_dict
+        kept_indices = [i for i, ann in enumerate(annotation.categories) if ann in self.dict.values()]
+        boxes = boxes[kept_indices]
+        labels_names = [ann for i, ann in enumerate(labels_names) if i in kept_indices]
+        # Replace any subprocess by task
+        labels_names = ['task' if ann == 'subProcess' else ann for ann in labels_names]
+        labels_id = torch.tensor([(list(self.dict.values()).index(ann)) for ann in labels_names], dtype=torch.int64)
+        # Initialize keypoints tensor
+        max_keypoints = 2
+        keypoints = torch.zeros((len(labels_id), max_keypoints, 3), dtype=torch.float32)
+        ii = 0
+        for i, ann in enumerate(annotation.annotations):
+            # Only keep the keypoints that are in the kept indices
+            if i not in kept_indices:
+                continue
+            if ann.category in ["sequenceFlow", "messageFlow", "dataAssociation"]:
+                # Fill the keypoints tensor for this annotation, mark as visible (1)
+                kp = np.array(ann.keypoints, dtype=np.float32).reshape(-1, 3)
+                kp = kp[:,:2]
+                visible = np.ones((kp.shape[0], 1), dtype=np.float32)
+                kp = np.hstack([kp, visible])
+                keypoints[ii, :kp.shape[0], :] = torch.tensor(kp, dtype=torch.float32)
+                ii += 1
+        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
+        if self.model_type == 'object':
+            target = {
+                "boxes": boxes,
+                "labels": labels_id,
+                #"area": area,
+            }
+        elif self.model_type == 'arrow':
+            target = {
+                "boxes": boxes,
+                "labels": labels_id,
+                #"area": area,
+                "keypoints": keypoints,
+            }
+        # Randomly apply flip transform
+        if self.flip_transform:
+            image, target = self.flip_transform(image, target)
+        # Randomly apply rotate transform
+        if self.rotate_transform:
+            image, target = self.rotate_transform(image, target)
+        # Randomly apply the custom cropping transform
+        if self.crop_transform and random.random() < self.crop_prob:
+            image, target = self.crop_transform(image, target)
+        # Rotate vertical image
+        if random.random() < self.rotate_90_proba:
+            image, target = rotate_vertical(image, target)
+        if self.resize:
+            if random.random() < self.keep_ratio:
+                # Center and pad the image while keeping the aspect ratio
+                image, target = resize_and_pad(image, target, self.new_size)
+            else:
+                target['boxes'] = resize_boxes(target['boxes'], (image.size[0],image.size[1]), self.new_size)
+                if 'area' in target:
+                    target['area'] = (target['boxes'][:, 3] - target['boxes'][:, 1]) * (target['boxes'][:, 2] - target['boxes'][:, 0])
+                if 'keypoints' in target:
+                    for i in range(len(target['keypoints'])):
+                        target['keypoints'][i] = resize_keypoints(target['keypoints'][i], (image.size[0],image.size[1]), self.new_size)
+                image = F.resize(image, (self.new_size[1], self.new_size[0]))
+        return self.transform(image), target
+def collate_fn(batch):
+    """
+    Custom collation function for DataLoader that handles batches of images and targets.
+    This function ensures that images are properly batched together using PyTorch's default collation,
+    while keeping the targets (such as bounding boxes and labels) in a list of dictionaries,
+    as each image might have a different number of objects detected.
+    Parameters:
+    - batch (list): A list of tuples, where each tuple contains an image and its corresponding target dictionary.
+    Returns:
+    - Tuple containing:
+      - Tensor: Batched images.
+      - List of dicts: Targets corresponding to each image in the batch.
+    """
+    images, targets = zip(*batch)  # Unzip the batch into separate lists for images and targets.
+    # Batch images using the default collate function which handles tensors, numpy arrays, numbers, etc.
+    images = default_collate(images)
+    return images, targets
+def create_loader(new_size,transformation, annotations1, annotations2=None,
+                  batch_size=4, crop_prob=0.2, crop_fraction=0.7, min_objects=3,
+                  h_flip_prob=0.3, v_flip_prob=0.3, max_rotate_deg=20, rotate_90_proba=0.2, rotate_proba=0.3,
+                  seed=42, resize=True, keep_ratio=0.1, model_type = 'object'):
+    """
+    Creates a DataLoader for BPMN datasets with optional transformations and concatenation of two datasets.
+    Parameters:
+    - transformation (callable): Transformation function to apply to each image (e.g., normalization).
+    - annotations1 (list): Primary list of annotations.
+    - annotations2 (list, optional): Secondary list of annotations to concatenate with the first.
+    - batch_size (int): Number of images per batch.
+    - crop_prob (float): Probability of applying the crop transformation.
+    - crop_fraction (float): Fraction of the original width to use when cropping.
+    - min_objects (int): Minimum number of objects required to be within the crop.
+    - h_flip_prob (float): Probability of applying horizontal flip.
+    - v_flip_prob (float): Probability of applying vertical flip.
+    - seed (int): Seed for random number generators for reproducibility.
+    - resize (bool): Flag indicating whether to resize images after transformations.
+    Returns:
+    - DataLoader: Configured data loader for the dataset.
+    """
+    # Initialize custom transformations for cropping and flipping
+    custom_crop_transform = RandomCrop(new_size,crop_fraction, min_objects)
+    custom_flip_transform = RandomFlip(h_flip_prob, v_flip_prob)
+    custom_rotate_transform = RandomRotate(max_rotate_deg, rotate_proba)
+    # Create the primary dataset
+    dataset = BPMN_Dataset(
+        annotations=annotations1,
+        transform=transformation,
+        crop_transform=custom_crop_transform,
+        crop_prob=crop_prob,
+        rotate_90_proba=rotate_90_proba,
+        flip_transform=custom_flip_transform,
+        rotate_transform=custom_rotate_transform,
+        new_size=new_size,
+        keep_ratio=keep_ratio,
+        model_type=model_type,
+        resize=resize
+    )
+    # Optionally concatenate a second dataset
+    if annotations2:
+        dataset2 = BPMN_Dataset(
+            annotations=annotations2,
+            transform=transformation,
+            crop_transform=custom_crop_transform,
+            crop_prob=crop_prob,
+            rotate_90_proba=rotate_90_proba,
+            flip_transform=custom_flip_transform,
+            new_size=new_size,
+            keep_ratio=keep_ratio,
+            model_type=model_type,
+            resize=resize
+        )
+        dataset = ConcatDataset([dataset, dataset2])  # Concatenate the two datasets
+    # Set the seed for reproducibility in random operations within transformations and data loading
+    random.seed(seed)
+    torch.manual_seed(seed)
+    # Create the DataLoader with the dataset
+    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
+    return data_loader

modules/display.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from modules.utils import draw_annotations, create_loader, class_dict, resize_boxes, resize_keypoints, find_other_keypoint
 import cv2
 import numpy as np
 import torch

+from modules.utils import class_dict, resize_boxes, resize_keypoints, find_other_keypoint
 import cv2
 import numpy as np
 import torch

modules/eval.py CHANGED Viewed

@@ -3,8 +3,9 @@ import torch
 from modules.utils import class_dict, object_dict, arrow_dict, find_closest_object, find_other_keypoint, filter_overlap_boxes, iou
 from tqdm import tqdm
 from modules.toXML import get_size_elements, calculate_pool_bounds, create_BPMN_id
-from modules.utils import is_vertical
 import streamlit as st
 def non_maximum_suppression(boxes, scores, labels=None, iou_threshold=0.5):
@@ -101,10 +102,27 @@ def object_prediction(model, image, score_threshold=0.5, iou_threshold=0.5):
         scores = scores[selected_boxes]
         labels = labels[selected_boxes]
         #modify the label of the sub-process to task
         for i in range(len(labels)):
             if labels[i] == list(object_dict.values()).index('subProcess'):
                 labels[i] = list(object_dict.values()).index('task')
         prediction = {
             'boxes': boxes,
@@ -180,7 +198,7 @@ def mix_predictions(objects_pred, arrow_pred):
     return boxes, labels, scores, keypoints
-def regroup_elements_by_pool(boxes, labels, scores, keypoints, class_dict, iou_threshold=0.3):
     pool_dict = {}
     # Filter out pools with IoU greater than the threshold
@@ -188,7 +206,7 @@ def regroup_elements_by_pool(boxes, labels, scores, keypoints, class_dict, iou_t
     for i in range(len(boxes)):
         for j in range(i + 1, len(boxes)):
             if labels[i] == labels[j] and labels[i] == list(class_dict.values()).index('pool'):
-                if iou(np.array(boxes[i]), np.array(boxes[j])) > iou_threshold:
                     to_delete.append(j)
     boxes = np.delete(boxes, to_delete, axis=0)
@@ -210,8 +228,7 @@ def regroup_elements_by_pool(boxes, labels, scores, keypoints, class_dict, iou_t
             if i in pool_indices or class_dict[labels[i]] in ['messageFlow', 'pool']:
                 continue
             for j, pool_box in enumerate(pool_boxes):
-                if (box[0] >= pool_box[0] and box[1] >= pool_box[1] and
-                    box[2] <= pool_box[2] and box[3] <= pool_box[3]):
                     pool_index = pool_indices[j]
                     pool_dict[pool_index].append(i)
                     assigned_to_pool = True
@@ -322,6 +339,53 @@ def correction_labels(boxes, labels, class_dict, pool_dict, flow_links):
     return labels, flow_links
 def last_correction(boxes, labels, scores, keypoints, bpmn_id, links, best_points, pool_dict, limit_area=10000):
@@ -368,6 +432,16 @@ def last_correction(boxes, labels, scores, keypoints, bpmn_id, links, best_point
                         print('delete element', i)
                         delete_elements.append(i)
     #concatenate the delete_elements and the delete_pool
     delete_elements = delete_elements + delete_pool
     #delete double value in delete_elements
@@ -377,13 +451,21 @@ def last_correction(boxes, labels, scores, keypoints, bpmn_id, links, best_point
     labels = np.delete(labels, delete_elements)
     scores = np.delete(scores, delete_elements)
     keypoints = np.delete(keypoints, delete_elements, axis=0)
-    bpmn_id = [point for i, point in enumerate(bpmn_id) if i not in delete_elements]
     links = np.delete(links, delete_elements, axis=0)
     best_points = [point for i, point in enumerate(best_points) if i not in delete_elements]
     #also delete the element in the pool_dict
     for pool_index, elements in pool_dict.items():
-        pool_dict[pool_index] = [i for i in elements if i not in delete_elements]
     return boxes, labels, scores, keypoints, bpmn_id, links, best_points, pool_dict
@@ -420,7 +502,7 @@ def generate_data(image, boxes, labels, scores, keypoints, bpmn_id, flow_links,
             return data
-def develop_prediction(boxes, labels, scores, keypoints, class_dict, correction=True):
     pool_dict, boxes, labels, scores, keypoints = regroup_elements_by_pool(boxes, labels, scores, keypoints, class_dict)
@@ -430,8 +512,7 @@ def develop_prediction(boxes, labels, scores, keypoints, class_dict, correction=
     flow_links, best_points = create_links(keypoints, boxes, labels, class_dict)
     #Correct the labels of some sequenceflow that cross multiple pool
-    if correction:
-        labels, flow_links = correction_labels(boxes, labels, class_dict, pool_dict, flow_links)
     #give a link to event to allow the creation of the BPMN id with start, indermediate and end event
     flow_links = give_link_to_element(flow_links, labels)

 from modules.utils import class_dict, object_dict, arrow_dict, find_closest_object, find_other_keypoint, filter_overlap_boxes, iou
 from tqdm import tqdm
 from modules.toXML import get_size_elements, calculate_pool_bounds, create_BPMN_id
+from modules.utils import is_vertical, proportion_inside
 import streamlit as st
+from builtins import dict
 def non_maximum_suppression(boxes, scores, labels=None, iou_threshold=0.5):
         scores = scores[selected_boxes]
         labels = labels[selected_boxes]
+        #find the outlier object that are too small by the area
+        obj_not_too_small = find_outlier_objects_by_area(boxes, labels, class_dict, std_factor=1.5, element_ref = ['event', 'messageEvent'], mode = "lower")
+        obj_not_too_big = find_outlier_objects_by_area(boxes, labels, class_dict, std_factor=2, element_ref = ['task'], mode = "upper")
+        selected_object = [i for i in range(len(labels)) if i in obj_not_too_small and i in obj_not_too_big]
+        #selected_object = obj_not_too_small
+        boxes = boxes[selected_object]
+        scores = scores[selected_object]
+        labels = labels[selected_object]
         #modify the label of the sub-process to task
         for i in range(len(labels)):
             if labels[i] == list(object_dict.values()).index('subProcess'):
                 labels[i] = list(object_dict.values()).index('task')
+        #delete all lane and also the value in the labels and scores
+        lane_index = [i for i in range(len(labels)) if labels[i] == list(object_dict.values()).index('lane')]
+        boxes = np.delete(boxes, lane_index, axis=0)
+        labels = np.delete(labels, lane_index)
+        scores = np.delete(scores, lane_index)
         prediction = {
             'boxes': boxes,
     return boxes, labels, scores, keypoints
+def regroup_elements_by_pool(boxes, labels, scores, keypoints, class_dict, iou_threshold=0.6):
     pool_dict = {}
     # Filter out pools with IoU greater than the threshold
     for i in range(len(boxes)):
         for j in range(i + 1, len(boxes)):
             if labels[i] == labels[j] and labels[i] == list(class_dict.values()).index('pool'):
+                if proportion_inside(boxes[i], boxes[j]) > iou_threshold:
                     to_delete.append(j)
     boxes = np.delete(boxes, to_delete, axis=0)
             if i in pool_indices or class_dict[labels[i]] in ['messageFlow', 'pool']:
                 continue
             for j, pool_box in enumerate(pool_boxes):
+                if proportion_inside(box, pool_box) > iou_threshold:
                     pool_index = pool_indices[j]
                     pool_dict[pool_index].append(i)
                     assigned_to_pool = True
     return labels, flow_links
+def find_outlier_objects_by_area(boxes, labels, class_dict, std_factor=1.5, element_ref = ['event', 'messageEvent'], mode = "lower"):
+    # Filter out the sizes of events, data objects, and message events
+    event_indices = [i for i, label in enumerate(labels) if class_dict[label] in element_ref]
+    event_boxes = [boxes[i] for i in event_indices]
+    # Calculate the areas of these typical objects
+    event_areas = np.array([(box[2] - box[0]) * (box[3] - box[1]) for box in event_boxes])
+    # Compute the mean and standard deviation for areas
+    mean_area = np.mean(event_areas)
+    std_area = np.std(event_areas)
+    # Define thresholds for outliers
+    area_lower_threshold = mean_area - std_factor * std_area
+    area_upper_threshold = mean_area + std_factor * std_area
+    # Identify indices of outliers and the ones to keep
+    outlier_indices = []
+    kept_indices = []
+    if mode == "lower" or mode == 'both':
+        #check for object that could be too small
+        for idx, (box, label) in enumerate(zip(boxes, labels)):
+            area = (box[2] - box[0]) * (box[3] - box[1])
+            if not (area_lower_threshold <= area):
+                outlier_indices.append(idx)
+                print(f"Element {idx} is an outlier with area {area} that is too small")
+            else:
+                kept_indices.append(idx)
+    if mode == "upper" or mode == 'both':
+        #check for object that could be too big
+        for idx, (box, label) in enumerate(zip(boxes, labels)):
+            if label == list(class_dict.values()).index('pool') or label == list(class_dict.values()).index('lane'):
+                kept_indices.append(idx)
+                continue
+            area = (box[2] - box[0]) * (box[3] - box[1])
+            if not (area_upper_threshold >= area):
+                outlier_indices.append(idx)
+                print(f"Element {idx} is an outlier with area {area} that is too big")
+            else:
+                kept_indices.append(idx)
+    return kept_indices
 def last_correction(boxes, labels, scores, keypoints, bpmn_id, links, best_points, pool_dict, limit_area=10000):
                         print('delete element', i)
                         delete_elements.append(i)
+    #filter box that are inside a text box
+    """tex_pred = st.session_state.text_pred
+    for i in range(len(boxes)):
+        for j in range(len(tex_pred[0])):
+            #check if the box is inside the text box but if the text box is inside the box then it is not a problem
+            if proportion_inside(boxes[i], tex_pred[0][j]) > 0.1:
+                #delete_elements.append(i)
+                print('delete element', i)"""
     #concatenate the delete_elements and the delete_pool
     delete_elements = delete_elements + delete_pool
     #delete double value in delete_elements
     labels = np.delete(labels, delete_elements)
     scores = np.delete(scores, delete_elements)
     keypoints = np.delete(keypoints, delete_elements, axis=0)
     links = np.delete(links, delete_elements, axis=0)
     best_points = [point for i, point in enumerate(best_points) if i not in delete_elements]
+    for i in range(len(delete_pool)):
+        #find the bpmn_id of the pool
+        pool_index = bpmn_id[delete_pool[i]]
+        #delete the pool_index in pool_dict
+        del pool_dict[pool_index]
+    bpmn_id = [point for i, point in enumerate(bpmn_id) if i not in delete_elements]
     #also delete the element in the pool_dict
     for pool_index, elements in pool_dict.items():
+        pool_dict[pool_index] = [i for i in elements if i not in delete_elements]
     return boxes, labels, scores, keypoints, bpmn_id, links, best_points, pool_dict
             return data
+def develop_prediction(boxes, labels, scores, keypoints, class_dict):
     pool_dict, boxes, labels, scores, keypoints = regroup_elements_by_pool(boxes, labels, scores, keypoints, class_dict)
     flow_links, best_points = create_links(keypoints, boxes, labels, class_dict)
     #Correct the labels of some sequenceflow that cross multiple pool
+    labels, flow_links = correction_labels(boxes, labels, class_dict, pool_dict, flow_links)
     #give a link to event to allow the creation of the BPMN id with start, indermediate and end event
     flow_links = give_link_to_element(flow_links, labels)

modules/streamlit_utils.py CHANGED Viewed

@@ -30,6 +30,8 @@ from modules.toWizard import create_wizard_file
 from huggingface_hub import hf_hub_download
 import time
@@ -440,12 +442,13 @@ def modify_results(percentage_text_dist_thresh=0.5):
             new_keypoints = np.concatenate((object_keypoints, arrow_keypoints))
-            boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict = develop_prediction(new_bbox, new_lab, new_scores, new_keypoints, class_dict, correction=True)
             st.session_state.prediction = generate_data(st.session_state.prediction['image'], boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict)
             st.session_state.text_mapping = mapping_text(st.session_state.prediction, st.session_state.text_pred, print_sentences=False, percentage_thresh=percentage_text_dist_thresh)
             if changes:
                 st.rerun()
             return True
@@ -460,14 +463,49 @@ def display_bpmn_modeler(is_mobile, screen_width):
             st.session_state.size_scale, st.session_state.scale
         )
         st.session_state.vizi_file = create_wizard_file(st.session_state.prediction.copy(), st.session_state.text_mapping)
-        display_bpmn_xml(st.session_state.bpmn_xml, st.session_state.vizi_file,  is_mobile=is_mobile, screen_width=int(4/5 * screen_width))
 def modeler_options(is_mobile):
     if not is_mobile:
         with st.expander("Options for BPMN modeler"):
             col1, col2 = st.columns(2)
             with col1:
-                st.session_state.scale = st.slider("Set distance scale for XML file", min_value=0.1, max_value=2.0, value=1.0, step=0.1)
                 st.session_state.size_scale = st.slider("Set size object scale for XML file", min_value=0.5, max_value=2.0, value=1.0, step=0.1)
     else:
         st.session_state.scale = 1.0

 from huggingface_hub import hf_hub_download
 import time
+from modules.toXML import get_size_elements
             new_keypoints = np.concatenate((object_keypoints, arrow_keypoints))
+            boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict = develop_prediction(new_bbox, new_lab, new_scores, new_keypoints, class_dict)
             st.session_state.prediction = generate_data(st.session_state.prediction['image'], boxes, labels, scores, keypoints, bpmn_id, flow_links, best_points, pool_dict)
             st.session_state.text_mapping = mapping_text(st.session_state.prediction, st.session_state.text_pred, print_sentences=False, percentage_thresh=percentage_text_dist_thresh)
             if changes:
+                changes = False
                 st.rerun()
             return True
             st.session_state.size_scale, st.session_state.scale
         )
         st.session_state.vizi_file = create_wizard_file(st.session_state.prediction.copy(), st.session_state.text_mapping)
+        display_bpmn_xml(st.session_state.bpmn_xml, st.session_state.vizi_file, is_mobile=is_mobile, screen_width=int(4/5 * screen_width))
+def find_best_scale(pred, size_elements):
+    boxes = pred['boxes']
+    labels = pred['labels']
+    # Find average size of the tasks in pred
+    avg_size = 0
+    count = 0
+    for i in range(len(boxes)):
+        if class_dict[labels[i]] == 'task':
+            avg_size += (boxes[i][2] - boxes[i][0]) * (boxes[i][3] - boxes[i][1])
+            count += 1
+    if count == 0:
+        raise ValueError("No tasks found in the provided prediction.")
+    avg_size /= count
+    # Get the size of a task element from size_elements dictionary
+    task_size = size_elements['task']
+    task_area = task_size[0] * task_size[1]
+    # Find the best scale
+    best_scale = (avg_size / task_area) ** 0.5
+    if best_scale < 0.5:
+        best_scale = 0.5
+    elif best_scale > 1:
+        best_scale = 1
+    return best_scale
 def modeler_options(is_mobile):
     if not is_mobile:
         with st.expander("Options for BPMN modeler"):
             col1, col2 = st.columns(2)
             with col1:
+                st.session_state.best_scale = find_best_scale(st.session_state.prediction, get_size_elements())
+                print(f"Best scale: {st.session_state.best_scale}")
+                st.session_state.scale = st.slider("Set distance scale for XML file", min_value=0.1, max_value=2.0, value=1/st.session_state.best_scale, step=0.1)
                 st.session_state.size_scale = st.slider("Set size object scale for XML file", min_value=0.5, max_value=2.0, value=1.0, step=0.1)
     else:
         st.session_state.scale = 1.0

modules/toWizard.py CHANGED Viewed

@@ -131,7 +131,7 @@ def create_wizard_file(data, text_mapping):
             ET.SubElement(activity, 'subActivityFlows')
             ET.SubElement(activity, 'messageFlows')
-    activityFlows = ET.SubElement(root, 'activityFlows')
     i=0
     for i, link in enumerate(data['links']):
         if link[0] is None and link[1] is not None and (data['BPMN_id'][i].split('_')[0] == 'event' or data['BPMN_id'][i].split('_')[0] == 'message'):
@@ -145,7 +145,7 @@ def create_wizard_file(data, text_mapping):
             if current_text is None or next_text is None:
                 continue
             ET.SubElement(activityFlows, 'activityFlow', attrib={'activity': current_text, 'endState': '---', 'target': next_text, 'isMerging': 'False', 'isPredefined': 'True'})
-            i+=1
     ET.SubElement(root, 'participants')

             ET.SubElement(activity, 'subActivityFlows')
             ET.SubElement(activity, 'messageFlows')
+    """activityFlows = ET.SubElement(root, 'activityFlows')
     i=0
     for i, link in enumerate(data['links']):
         if link[0] is None and link[1] is not None and (data['BPMN_id'][i].split('_')[0] == 'event' or data['BPMN_id'][i].split('_')[0] == 'message'):
             if current_text is None or next_text is None:
                 continue
             ET.SubElement(activityFlows, 'activityFlow', attrib={'activity': current_text, 'endState': '---', 'target': next_text, 'isMerging': 'False', 'isPredefined': 'True'})
+            i+=1"""
     ET.SubElement(root, 'participants')

modules/toXML.py CHANGED Viewed

@@ -113,8 +113,8 @@ def expand_pool_bounding_boxes(modified_pred, pred, size_elements):
         if pool_width < 300 or pool_height < 30:
             error("The pool is maybe too small, please add more elements or increase the scale by zooming on the image.")
             continue
-        modified_pred['boxes'][position] = [min_x - marge, min_y - marge // 2, min_x + pool_width + marge, min_y + pool_height + marge // 2]
 # Adjust left and right boundaries of all pools
 def adjust_pool_boundaries(modified_pred, pred):
@@ -148,9 +148,9 @@ def align_boxes(pred, size, class_dict):
     pool_groups = calculate_centers_and_group_by_pool(pred, class_dict)
     align_elements_within_pool(modified_pred, pool_groups, class_dict, size)
-    #if len(pred['pool_dict']) > 1:
-        #expand_pool_bounding_boxes(modified_pred, pred, size)
-        #adjust_pool_boundaries(modified_pred, pred)
     return modified_pred['boxes']
@@ -176,10 +176,11 @@ def create_XML(full_pred, text_mapping, size_scale, scale):
         'id': "simpleExample"
     })
     size_elements = get_size_elements(size_scale)
     #if there is no pool or lane, create a pool with all elements
-    if len(full_pred['pool_dict'])==0 or (len(full_pred['pool_dict'])==1 and len(full_pred['pool_dict']['pool_1'])==len(full_pred['labels'])):
         full_pred, text_mapping = create_big_pool(full_pred, text_mapping)
     #modify the boxes positions
@@ -249,13 +250,13 @@ def create_XML(full_pred, text_mapping, size_scale, scale):
     return pretty_xml_as_string
 # Function that creates a single pool with all elements
-def create_big_pool(full_pred, text_mapping):
     # If no pools or lanes are detected, create a single pool with all elements
     new_pool_index = 'pool_1'
     size_elements = get_size_elements(st.session_state.size_scale)
     elements_pool = list(range(len(full_pred['boxes'])))
     min_x, min_y, max_x, max_y = calculate_pool_bounds(full_pred['boxes'],full_pred['labels'], elements_pool, size_elements)
-    box = [min_x, min_y, max_x, max_y]
     full_pred['boxes'] = np.append(full_pred['boxes'], [box], axis=0)
     full_pred['pool_dict'][new_pool_index] = elements_pool
     full_pred['BPMN_id'].append('pool_1')
@@ -264,7 +265,7 @@ def create_big_pool(full_pred, text_mapping):
     return full_pred, text_mapping
 # Function that gives the size of the elements
-def get_size_elements(size_scale):
     size_elements = {
         'event': (size_scale*43.2, size_scale*43.2),
         'task': (size_scale*120, size_scale*96),
@@ -400,8 +401,9 @@ def check_data_association(i, links, labels, keep_elements):
 def create_data_Association(bpmn,data,size,element_id,current_idx,source_id,target_id):
     waypoints = calculate_waypoints(data, size, current_idx, source_id, target_id)
-    add_diagram_edge(bpmn, element_id, waypoints)
 def check_eventBasedGateway(i, links, labels):
     status, links_idx = [], []
     for j, (k,l) in enumerate(links):
@@ -582,7 +584,7 @@ def calculate_pool_bounds(boxes, labels, keep_elements, size):
         max_x = max(max_x, x + element_width)
         max_y = max(max_y, y + element_height)
-    return min_x-50, min_y-50, max_x+50, max_y+50
@@ -680,10 +682,16 @@ def calculate_waypoints(data, size, current_idx, source_id, target_id):
     if source_idx is None or target_idx is None:
         warning()
         return None
     name_source = source_id.split('_')[0]
     name_target = target_id.split('_')[0]
     # Get the position of the source and target
     source_x, source_y = data['boxes'][source_idx][:2]
     target_x, target_y = data['boxes'][target_idx][:2]

         if pool_width < 300 or pool_height < 30:
             error("The pool is maybe too small, please add more elements or increase the scale by zooming on the image.")
             continue
+        modified_pred['boxes'][position] = [min_x - marge//20, min_y - marge, min_x + pool_width + marge//20, min_y + pool_height + marge]
 # Adjust left and right boundaries of all pools
 def adjust_pool_boundaries(modified_pred, pred):
     pool_groups = calculate_centers_and_group_by_pool(pred, class_dict)
     align_elements_within_pool(modified_pred, pool_groups, class_dict, size)
+    if len(pred['pool_dict']) > 1:
+        expand_pool_bounding_boxes(modified_pred, pred, size)
+        adjust_pool_boundaries(modified_pred, pred)
     return modified_pred['boxes']
         'id': "simpleExample"
     })
     size_elements = get_size_elements(size_scale)
     #if there is no pool or lane, create a pool with all elements
+    if len(full_pred['pool_dict']) == 0 or (len(full_pred['pool_dict']) == 1 and len(next(iter(full_pred['pool_dict'].values()))) == len(full_pred['labels'])):
         full_pred, text_mapping = create_big_pool(full_pred, text_mapping)
     #modify the boxes positions
     return pretty_xml_as_string
 # Function that creates a single pool with all elements
+def create_big_pool(full_pred, text_mapping, marge=50):
     # If no pools or lanes are detected, create a single pool with all elements
     new_pool_index = 'pool_1'
     size_elements = get_size_elements(st.session_state.size_scale)
     elements_pool = list(range(len(full_pred['boxes'])))
     min_x, min_y, max_x, max_y = calculate_pool_bounds(full_pred['boxes'],full_pred['labels'], elements_pool, size_elements)
+    box = [min_x-marge, min_y-marge, max_x+marge, max_y+marge]
     full_pred['boxes'] = np.append(full_pred['boxes'], [box], axis=0)
     full_pred['pool_dict'][new_pool_index] = elements_pool
     full_pred['BPMN_id'].append('pool_1')
     return full_pred, text_mapping
 # Function that gives the size of the elements
+def get_size_elements(size_scale=1):
     size_elements = {
         'event': (size_scale*43.2, size_scale*43.2),
         'task': (size_scale*120, size_scale*96),
 def create_data_Association(bpmn,data,size,element_id,current_idx,source_id,target_id):
     waypoints = calculate_waypoints(data, size, current_idx, source_id, target_id)
+    if waypoints is not None:
+        add_diagram_edge(bpmn, element_id, waypoints)
 def check_eventBasedGateway(i, links, labels):
     status, links_idx = [], []
     for j, (k,l) in enumerate(links):
         max_x = max(max_x, x + element_width)
         max_y = max(max_y, y + element_height)
+    return min_x, min_y, max_x, max_y
     if source_idx is None or target_idx is None:
         warning()
         return None
     name_source = source_id.split('_')[0]
     name_target = target_id.split('_')[0]
+    avoid_element = ['pool', 'sequenceFlow', 'messageFlow', 'dataAssociation']
+    if name_target in avoid_element or name_source in avoid_element:
+        warning()
+        return None
     # Get the position of the source and target
     source_x, source_y = data['boxes'][source_idx][:2]
     target_x, target_y = data['boxes'][target_idx][:2]

modules/train.py CHANGED Viewed

@@ -100,7 +100,14 @@ def prepare_model(dict,opti,learning_rate= 0.0003,model_to_load=None, model_type
   return model, optimizer, device
 def evaluate_loss(model, data_loader, device, loss_config=None, print_losses=False):
     model.train()  # Set the model to evaluation mode
@@ -178,13 +185,14 @@ def evaluate_loss(model, data_loader, device, loss_config=None, print_losses=Fal
 def training_model(num_epochs, model, data_loader, subset_test_loader,
-                   optimizer, model_to_load=None, change_learning_rate=5, start_key=30,
-                   batch_size=4, crop_prob=0.2, h_flip_prob=0.3, v_flip_prob=0.3,
-                   max_rotate_deg=20, rotate_proba=0.2, blur_prob=0.2,
                    score_threshold=0.7, iou_threshold=0.5, early_stop_f1_score=0.97,
                    information_training='training', start_epoch=0, loss_config=None, model_type = 'object',
                    eval_metric='f1_score', device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')):
   if loss_config is None:
      print('No loss config found, all losses will be used.')
@@ -219,14 +227,20 @@ def training_model(num_epochs, model, data_loader, subset_test_loader,
   bad_test_loss = 0
   previous_test_loss = 1000
   print(f"Let's go training {model_type} model with {num_epochs} epochs!")
-  print(f"Learning rate: {learning_rate}, Batch size: {batch_size}, Crop prob: {crop_prob}, Flip prob: {h_flip_prob}, Rotate prob: {rotate_proba}, Blur prob: {blur_prob}")
   for epoch in range(num_epochs):
-      if (epoch>0 and (epoch)%change_learning_rate == 0) or bad_test_loss>1:
         learning_rate = 0.7*learning_rate
         optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=learning_rate, eps=1e-08, betas=(0.9, 0.999))
         print(f'Learning rate changed to {learning_rate:.4} and the best epoch for now is {best_epoch}')
         bad_test_loss = 0
       if epoch>0 and (epoch)==start_key:
@@ -315,24 +329,19 @@ def training_model(num_epochs, model, data_loader, subset_test_loader,
         # Evaluate the model on the test set
-      if eval_metric != 'loss':
-        avg_test_loss = 0
-        labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy = main_evaluation(model, subset_test_loader,score_threshold=0.5, iou_threshold=0.5, distance_threshold=10, key_correction=False, model_type=model_type)
-        print(f"Epoch {epoch+1+start_epoch}, Average Loss: {avg_loss:.4f}, Labels_precision: {labels_precision:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1_score:.4f} ", end=", ")
-        if eval_metric == 'all':
-          avg_test_loss = evaluate_loss(model, subset_test_loader, device, loss_config)
-          print(f"Epoch {epoch+1+start_epoch}, Average Test Loss: {avg_test_loss:.4f}", end=", ")
       if eval_metric == 'loss':
         labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy = 0,0,0,0,0,0
         avg_test_loss = evaluate_loss(model, subset_test_loader, device, loss_config)
         print(f"Epoch {epoch+1+start_epoch}, Average Training Loss: {avg_loss:.4f}, Average Test Loss: {avg_test_loss:.4f}", end=", ")
       print(f"Time: {time.time() - start:.2f} [s]")
-      if epoch>0 and (epoch)%start_key == 0:
-        print(f"Keypoints Accuracy: {key_accuracy:.4f}", end=", ")
       if eval_metric == 'f1_score':
         metric_used = f1_score
       elif eval_metric == 'precision':
@@ -357,15 +366,14 @@ def training_model(num_epochs, model, data_loader, subset_test_loader,
       epoch_test_loss.append(avg_test_loss)
       name_model = f"model_{type(optimizer).__name__}_{epoch+1+start_epoch}ep_{batch_size}batch_trainval_blur0{int(blur_prob*10)}_crop0{int(crop_prob*10)}_flip0{int(h_flip_prob*10)}_rotate0{int(rotate_proba*10)}_{information_training}"
       if same >=1 :
-        metrics_list = [epoch_avg_losses,epoch_avg_loss_classifier,epoch_avg_loss_box_reg,epoch_avg_loss_objectness,epoch_avg_loss_rpn_box_reg,epoch_avg_loss_keypoints,epoch_precision,epoch_recall,epoch_f1_score,epoch_test_loss]
         torch.save(best_model_state, './models/'+ name_model +'.pth')
         write_results(name_model,metrics_list,start_epoch)
         break
       if (epoch+1+start_epoch) % 5 == 0:
-        metrics_list = [epoch_avg_losses,epoch_avg_loss_classifier,epoch_avg_loss_box_reg,epoch_avg_loss_objectness,epoch_avg_loss_rpn_box_reg,epoch_avg_loss_keypoints,epoch_precision,epoch_recall,epoch_f1_score,epoch_test_loss]
         torch.save(best_model_state, './models/'+ name_model +'.pth')
         model.load_state_dict(best_model_state)
         write_results(name_model,metrics_list,start_epoch)
@@ -375,12 +383,11 @@ def training_model(num_epochs, model, data_loader, subset_test_loader,
       previous_test_loss = avg_test_loss
-  print(f"\n Total time: {(time.time() - start_tot)/60} minutes, Best Epoch is {best_epoch} with an f1_score of {best_metrics:.4f}")
   if best_model_state:
-      metrics_list = [epoch_avg_losses,epoch_avg_loss_classifier,epoch_avg_loss_box_reg,epoch_avg_loss_objectness,epoch_avg_loss_rpn_box_reg,epoch_avg_loss_keypoints,epoch_precision,epoch_recall,epoch_f1_score,epoch_test_loss]
       torch.save(best_model_state, './models/'+ name_model +'.pth')
       model.load_state_dict(best_model_state)
       write_results(name_model,metrics_list,start_epoch)
       print(f"Name of the best model: model_{type(optimizer).__name__}_{epoch+1+start_epoch}ep_{batch_size}batch_trainval_blur0{int(blur_prob*10)}_crop0{int(crop_prob*10)}_flip0{int(h_flip_prob*10)}_rotate0{int(rotate_proba*10)}_{information_training}")
-  return model, metrics_list

   return model, optimizer, device
+import copy
+from torch.optim import AdamW
+import time
+from modules.train import write_results
+import torch
+import numpy as np
+from tqdm import tqdm
 def evaluate_loss(model, data_loader, device, loss_config=None, print_losses=False):
     model.train()  # Set the model to evaluation mode
 def training_model(num_epochs, model, data_loader, subset_test_loader,
+                   optimizer, model_to_load=None, change_learning_rate=100, start_key=100,
+                   parameters=None, blur_prob=0.02,
                    score_threshold=0.7, iou_threshold=0.5, early_stop_f1_score=0.97,
                    information_training='training', start_epoch=0, loss_config=None, model_type = 'object',
                    eval_metric='f1_score', device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')):
+  # Set the model to training mode
+  model.train()
   if loss_config is None:
      print('No loss config found, all losses will be used.')
   bad_test_loss = 0
   previous_test_loss = 1000
+  if parameters is not None:
+    batch_size, crop_prob, rotate_90_proba, h_flip_prob, v_flip_prob, max_rotate_deg, rotate_proba, keep_ratio = parameters.values()
   print(f"Let's go training {model_type} model with {num_epochs} epochs!")
+  if parameters is not None:
+    print(f"Learning rate: {learning_rate}, Batch size: {batch_size}, Crop prob: {crop_prob}, H flip prob: {h_flip_prob}, V flip prob: {v_flip_prob}, Max rotate deg: {max_rotate_deg}, Rotate proba: {rotate_proba}, Rotate 90 proba: {rotate_90_proba}, Keep ratio: {keep_ratio}")
   for epoch in range(num_epochs):
+      if (epoch>0 and (epoch)%change_learning_rate == 0) or bad_test_loss>=3:
         learning_rate = 0.7*learning_rate
         optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=learning_rate, eps=1e-08, betas=(0.9, 0.999))
+        if best_model_state is not None:
+          model.load_state_dict(best_model_state)
         print(f'Learning rate changed to {learning_rate:.4} and the best epoch for now is {best_epoch}')
         bad_test_loss = 0
       if epoch>0 and (epoch)==start_key:
         # Evaluate the model on the test set
       if eval_metric == 'loss':
         labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy = 0,0,0,0,0,0
         avg_test_loss = evaluate_loss(model, subset_test_loader, device, loss_config)
         print(f"Epoch {epoch+1+start_epoch}, Average Training Loss: {avg_loss:.4f}, Average Test Loss: {avg_test_loss:.4f}", end=", ")
+      else:
+        avg_test_loss = 0
+        labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy = main_evaluation(model, subset_test_loader,score_threshold=0.5, iou_threshold=0.5, distance_threshold=10, key_correction=False, model_type=model_type)
+        print(f"Epoch {epoch+1+start_epoch}, Average Loss: {avg_loss:.4f}, Labels_precision: {labels_precision:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1_score:.4f} ", end=", ")
+        avg_test_loss = evaluate_loss(model, subset_test_loader, device, loss_config)
+        print(f"Epoch {epoch+1+start_epoch}, Average Test Loss: {avg_test_loss:.4f}", end=", ")
       print(f"Time: {time.time() - start:.2f} [s]")
       if eval_metric == 'f1_score':
         metric_used = f1_score
       elif eval_metric == 'precision':
       epoch_test_loss.append(avg_test_loss)
       name_model = f"model_{type(optimizer).__name__}_{epoch+1+start_epoch}ep_{batch_size}batch_trainval_blur0{int(blur_prob*10)}_crop0{int(crop_prob*10)}_flip0{int(h_flip_prob*10)}_rotate0{int(rotate_proba*10)}_{information_training}"
+      metrics_list = [epoch_avg_losses,epoch_avg_loss_classifier,epoch_avg_loss_box_reg,epoch_avg_loss_objectness,epoch_avg_loss_rpn_box_reg,epoch_avg_loss_keypoints,epoch_precision,epoch_recall,epoch_f1_score,epoch_test_loss]
       if same >=1 :
         torch.save(best_model_state, './models/'+ name_model +'.pth')
         write_results(name_model,metrics_list,start_epoch)
         break
       if (epoch+1+start_epoch) % 5 == 0:
         torch.save(best_model_state, './models/'+ name_model +'.pth')
         model.load_state_dict(best_model_state)
         write_results(name_model,metrics_list,start_epoch)
       previous_test_loss = avg_test_loss
+  print(f"\n Total time: {(time.time() - start_tot)/60} minutes, Best Epoch is {best_epoch} with an {eval_metric} of {best_metrics:.4f}")
   if best_model_state:
       torch.save(best_model_state, './models/'+ name_model +'.pth')
       model.load_state_dict(best_model_state)
       write_results(name_model,metrics_list,start_epoch)
       print(f"Name of the best model: model_{type(optimizer).__name__}_{epoch+1+start_epoch}ep_{batch_size}batch_trainval_blur0{int(blur_prob*10)}_crop0{int(crop_prob*10)}_flip0{int(h_flip_prob*10)}_rotate0{int(rotate_proba*10)}_{information_training}")
+  return model

modules/utils.py CHANGED Viewed

@@ -14,6 +14,46 @@ from torch.utils.data import DataLoader, Subset, ConcatDataset
 import streamlit as st
 object_dict = {
     0: 'background',
     1: 'task',
@@ -90,17 +130,26 @@ def iou(box1, box2):
     return inter_area / union_area
 def proportion_inside(box1, box2):
     # Calculate the intersection of the two bounding boxes
-    inter_box = [max(box1[0], box2[0]), max(box1[1], box2[1]), min(box1[2], box2[2]), min(box1[3], box2[3])]
     inter_area = max(0, inter_box[2] - inter_box[0]) * max(0, inter_box[3] - inter_box[1])
-    # Calculate the area of box1
-    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
-    # Calculate the proportion of box1 inside box2
-    if box1_area == 0:
         return 0
-    proportion = inter_area / box1_area
     # Ensure the proportion is at most 100%
     return min(proportion, 1.0)
@@ -164,472 +213,6 @@ def resize_keypoints(keypoints: np.ndarray, original_size: tuple, target_size: t
     return keypoints
-class RandomCrop:
-    def __init__(self, new_size=(1333,800),crop_fraction=0.5, min_objects=4):
-        self.crop_fraction = crop_fraction
-        self.min_objects = min_objects
-        self.new_size = new_size
-    def __call__(self, image, target):
-        new_w1, new_h1 = self.new_size
-        w, h = image.size
-        new_w = int(w * self.crop_fraction)
-        new_h = int(new_w*new_h1/new_w1)
-        i=0
-        for i in range(4):
-          if new_h >= h:
-            i += 0.05
-            new_w = int(w * (self.crop_fraction - i))
-            new_h = int(new_w*new_h1/new_w1)
-          if new_h < h:
-            continue
-        if new_h >= h:
-          return image, target
-        boxes = target["boxes"]
-        if 'keypoints' in target:
-            keypoints = target["keypoints"]
-        else:
-            keypoints = []
-            for i in range(len(boxes)):
-                keypoints.append(torch.zeros((2,3)))
-        # Attempt to find a suitable crop region
-        success = False
-        for _ in range(100):  # Max 100 attempts to find a valid crop
-            top = random.randint(0, h - new_h)
-            left = random.randint(0, w - new_w)
-            crop_region = [left, top, left + new_w, top + new_h]
-            # Check how many objects are fully contained in this region
-            contained_boxes = []
-            contained_keypoints = []
-            for box, kp in zip(boxes, keypoints):
-                if box[0] >= crop_region[0] and box[1] >= crop_region[1] and box[2] <= crop_region[2] and box[3] <= crop_region[3]:
-                    # Adjust box and keypoints coordinates
-                    new_box = box - torch.tensor([crop_region[0], crop_region[1], crop_region[0], crop_region[1]])
-                    new_kp = kp - torch.tensor([crop_region[0], crop_region[1], 0])
-                    contained_boxes.append(new_box)
-                    contained_keypoints.append(new_kp)
-            if len(contained_boxes) >= self.min_objects:
-                success = True
-                break
-        if success:
-            # Perform the actual crop
-            image = F.crop(image, top, left, new_h, new_w)
-            target["boxes"] = torch.stack(contained_boxes) if contained_boxes else torch.zeros((0, 4))
-            if 'keypoints' in target:
-                target["keypoints"] = torch.stack(contained_keypoints) if contained_keypoints else torch.zeros((0, 2, 4))
-        return image, target
-class RandomFlip:
-    def __init__(self, h_flip_prob=0.5, v_flip_prob=0.5):
-        """
-        Initializes the RandomFlip with probabilities for flipping.
-        Parameters:
-        - h_flip_prob (float): Probability of applying a horizontal flip to the image.
-        - v_flip_prob (float): Probability of applying a vertical flip to the image.
-        """
-        self.h_flip_prob = h_flip_prob
-        self.v_flip_prob = v_flip_prob
-    def __call__(self, image, target):
-        """
-        Applies random horizontal and/or vertical flip to the image and updates target data accordingly.
-        Parameters:
-        - image (PIL Image): The image to be flipped.
-        - target (dict): The target dictionary containing 'boxes' and 'keypoints'.
-        Returns:
-        - PIL Image, dict: The flipped image and its updated target dictionary.
-        """
-        if random.random() < self.h_flip_prob:
-            image = F.hflip(image)
-            w, _ = image.size  # Get the new width of the image after flip for bounding box adjustment
-            # Adjust bounding boxes for horizontal flip
-            for i, box in enumerate(target['boxes']):
-                xmin, ymin, xmax, ymax = box
-                target['boxes'][i] = torch.tensor([w - xmax, ymin, w - xmin, ymax], dtype=torch.float32)
-            # Adjust keypoints for horizontal flip
-            if 'keypoints' in target:
-                new_keypoints = []
-                for keypoints_for_object in target['keypoints']:
-                    flipped_keypoints_for_object = []
-                    for kp in keypoints_for_object:
-                        x, y = kp[:2]
-                        new_x = w - x
-                        flipped_keypoints_for_object.append(torch.tensor([new_x, y] + list(kp[2:])))
-                    new_keypoints.append(torch.stack(flipped_keypoints_for_object))
-                target['keypoints'] = torch.stack(new_keypoints)
-        if random.random() < self.v_flip_prob:
-            image = F.vflip(image)
-            _, h = image.size  # Get the new height of the image after flip for bounding box adjustment
-            # Adjust bounding boxes for vertical flip
-            for i, box in enumerate(target['boxes']):
-                xmin, ymin, xmax, ymax = box
-                target['boxes'][i] = torch.tensor([xmin, h - ymax, xmax, h - ymin], dtype=torch.float32)
-            # Adjust keypoints for vertical flip
-            if 'keypoints' in target:
-                new_keypoints = []
-                for keypoints_for_object in target['keypoints']:
-                    flipped_keypoints_for_object = []
-                    for kp in keypoints_for_object:
-                        x, y = kp[:2]
-                        new_y = h - y
-                        flipped_keypoints_for_object.append(torch.tensor([x, new_y] + list(kp[2:])))
-                    new_keypoints.append(torch.stack(flipped_keypoints_for_object))
-                target['keypoints'] = torch.stack(new_keypoints)
-        return image, target
-class RandomRotate:
-    def __init__(self, max_rotate_deg=20, rotate_proba=0.3):
-        """
-        Initializes the RandomRotate with a maximum rotation angle and probability of rotating.
-        Parameters:
-        - max_rotate_deg (int): Maximum degree to rotate the image.
-        - rotate_proba (float): Probability of applying rotation to the image.
-        """
-        self.max_rotate_deg = max_rotate_deg
-        self.rotate_proba = rotate_proba
-    def __call__(self, image, target):
-        """
-        Randomly rotates the image and updates the target data accordingly.
-        Parameters:
-        - image (PIL Image): The image to be rotated.
-        - target (dict): The target dictionary containing 'boxes', 'labels', and 'keypoints'.
-        Returns:
-        - PIL Image, dict: The rotated image and its updated target dictionary.
-        """
-        if random.random() < self.rotate_proba:
-            angle = random.uniform(-self.max_rotate_deg, self.max_rotate_deg)
-            image = F.rotate(image, angle, expand=False, fill=200)
-            # Rotate bounding boxes
-            w, h = image.size
-            cx, cy = w / 2, h / 2
-            boxes = target["boxes"]
-            new_boxes = []
-            for box in boxes:
-                new_box = self.rotate_box(box, angle, cx, cy)
-                new_boxes.append(new_box)
-            target["boxes"] = torch.stack(new_boxes)
-            # Rotate keypoints
-            if 'keypoints' in target:
-                new_keypoints = []
-                for keypoints in target["keypoints"]:
-                    new_kp = self.rotate_keypoints(keypoints, angle, cx, cy)
-                    new_keypoints.append(new_kp)
-                target["keypoints"] = torch.stack(new_keypoints)
-        return image, target
-    def rotate_box(self, box, angle, cx, cy):
-        """
-        Rotates a bounding box by a given angle around the center of the image.
-        """
-        x1, y1, x2, y2 = box
-        corners = torch.tensor([
-            [x1, y1],
-            [x2, y1],
-            [x2, y2],
-            [x1, y2]
-        ])
-        corners = torch.cat((corners, torch.ones(corners.shape[0], 1)), dim=1)
-        M = cv2.getRotationMatrix2D((cx, cy), angle, 1)
-        corners = torch.matmul(torch.tensor(M, dtype=torch.float32), corners.T).T
-        x_ = corners[:, 0]
-        y_ = corners[:, 1]
-        x_min, x_max = torch.min(x_), torch.max(x_)
-        y_min, y_max = torch.min(y_), torch.max(y_)
-        return torch.tensor([x_min, y_min, x_max, y_max], dtype=torch.float32)
-    def rotate_keypoints(self, keypoints, angle, cx, cy):
-        """
-        Rotates keypoints by a given angle around the center of the image.
-        """
-        new_keypoints = []
-        for kp in keypoints:
-            x, y, v = kp
-            point = torch.tensor([x, y, 1])
-            M = cv2.getRotationMatrix2D((cx, cy), angle, 1)
-            new_point = torch.matmul(torch.tensor(M, dtype=torch.float32), point)
-            new_keypoints.append(torch.tensor([new_point[0], new_point[1], v], dtype=torch.float32))
-        return torch.stack(new_keypoints)
-def rotate_90_box(box, angle, w, h):
-    x1, y1, x2, y2 = box
-    if angle == 90:
-        return torch.tensor([y1,h-x2,y2,h-x1])
-    elif angle == 270 or angle == -90:
-        return torch.tensor([w-y2,x1,w-y1,x2])
-    else:
-        print("angle not supported")
-def rotate_90_keypoints(kp, angle, w, h):
-    # Extract coordinates and visibility from each keypoint tensor
-    x1, y1, v1 = kp[0][0], kp[0][1], kp[0][2]
-    x2, y2, v2 = kp[1][0], kp[1][1], kp[1][2]
-    # Swap x and y coordinates for each keypoint
-    if angle == 90:
-        new = [[y1, h-x1, v1], [y2, h-x2, v2]]
-    elif angle == 270 or angle == -90:
-        new = [[w-y1, x1, v1], [w-y2, x2, v2]]
-    return torch.tensor(new, dtype=torch.float32)
-def rotate_vertical(image, target):
-    # Rotate the image and target if the image is vertical
-    new_boxes = []
-    angle = random.choice([-90,90])
-    image = F.rotate(image, angle, expand=True, fill=200)
-    for box in target["boxes"]:
-        new_box = rotate_90_box(box, angle, image.size[0], image.size[1])
-        new_boxes.append(new_box)
-    target["boxes"] = torch.stack(new_boxes)
-    if 'keypoints' in target:
-        new_kp = []
-        for kp in target['keypoints']:
-            new_key = rotate_90_keypoints(kp, angle, image.size[0], image.size[1])
-            new_kp.append(new_key)
-        target['keypoints'] = torch.stack(new_kp)
-    return image, target
-class BPMN_Dataset(Dataset):
-    def __init__(self, annotations, transform=None, crop_transform=None, crop_prob=0.3, rotate_90_proba=0.2, flip_transform=None, rotate_transform=None, new_size=(1333,800),keep_ratio=0.1,resize=True, model_type='object'):
-        self.annotations = annotations
-        print(f"Loaded {len(self.annotations)} annotations.")
-        self.transform = transform
-        self.crop_transform = crop_transform
-        self.crop_prob = crop_prob
-        self.flip_transform = flip_transform
-        self.rotate_transform = rotate_transform
-        self.resize = resize
-        self.new_size = new_size
-        self.keep_ratio = keep_ratio
-        self.model_type = model_type
-        if model_type == 'object':
-            self.dict = object_dict
-        elif model_type == 'arrow':
-            self.dict = arrow_dict
-        self.rotate_90_proba = rotate_90_proba
-    def __len__(self):
-        return len(self.annotations)
-    def __getitem__(self, idx):
-        annotation = self.annotations[idx]
-        image = annotation.img.convert("RGB")
-        boxes = torch.tensor(np.array(annotation.boxes_ltrb), dtype=torch.float32)
-        labels_names = [ann for ann in annotation.categories]
-        #only keep the labels, boxes and keypoints that are in the class_dict
-        kept_indices = [i for i, ann in enumerate(annotation.categories) if ann in self.dict.values()]
-        boxes = boxes[kept_indices]
-        labels_names = [ann for i, ann in enumerate(labels_names) if i in kept_indices]
-        labels_id = torch.tensor([(list(self.dict.values()).index(ann)) for ann in labels_names], dtype=torch.int64)
-        # Initialize keypoints tensor
-        max_keypoints = 2
-        keypoints = torch.zeros((len(labels_id), max_keypoints, 3), dtype=torch.float32)
-        ii=0
-        for i, ann in enumerate(annotation.annotations):
-            #only keep the keypoints that are in the kept indices
-            if i not in kept_indices:
-                continue
-            if ann.category in ["sequenceFlow", "messageFlow", "dataAssociation"]:
-                # Fill the keypoints tensor for this annotation, mark as visible (1)
-                kp = np.array(ann.keypoints, dtype=np.float32).reshape(-1, 3)
-                kp = kp[:,:2]
-                visible = np.ones((kp.shape[0], 1), dtype=np.float32)
-                kp = np.hstack([kp, visible])
-                keypoints[ii, :kp.shape[0], :] = torch.tensor(kp, dtype=torch.float32)
-                ii += 1
-        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
-        if self.model_type == 'object':
-            target = {
-                "boxes": boxes,
-                "labels": labels_id,
-                #"area": area,
-                #"keypoints": keypoints,
-            }
-        elif self.model_type == 'arrow':
-            target = {
-                "boxes": boxes,
-                "labels": labels_id,
-                #"area": area,
-                "keypoints": keypoints,
-            }
-        # Randomly apply flip transform
-        if self.flip_transform:
-            image, target = self.flip_transform(image, target)
-        # Randomly apply rotate transform
-        if self.rotate_transform:
-            image, target = self.rotate_transform(image, target)
-        # Randomly apply the custom cropping transform
-        if self.crop_transform and random.random() < self.crop_prob:
-            image, target = self.crop_transform(image, target)
-        # Rotate vertical image
-        if random.random() < self.rotate_90_proba:
-            image, target = rotate_vertical(image, target)
-        if self.resize:
-            if random.random() < self.keep_ratio:
-                original_size = image.size
-                # Calculate scale to fit the new size while maintaining aspect ratio
-                scale = min(self.new_size[0] / original_size[0], self.new_size[1] / original_size[1])
-                new_scaled_size = (int(original_size[0] * scale), int(original_size[1] * scale))
-                target['boxes'] = resize_boxes(target['boxes'], (image.size[0],image.size[1]), (new_scaled_size))
-                if 'area' in target:
-                    target['area'] = (target['boxes'][:, 3] - target['boxes'][:, 1]) * (target['boxes'][:, 2] - target['boxes'][:, 0])
-                if 'keypoints' in target:
-                    for i in range(len(target['keypoints'])):
-                        target['keypoints'][i] = resize_keypoints(target['keypoints'][i], (image.size[0],image.size[1]), (new_scaled_size))
-                # Resize image to new scaled size
-                image = F.resize(image, (new_scaled_size[1], new_scaled_size[0]))
-                # Pad the resized image to make it exactly the desired size
-                padding = [0, 0, self.new_size[0] - new_scaled_size[0], self.new_size[1] - new_scaled_size[1]]
-                image = F.pad(image, padding, fill=200, padding_mode='constant')
-            else:
-                target['boxes'] = resize_boxes(target['boxes'], (image.size[0],image.size[1]), self.new_size)
-                if 'area' in target:
-                    target['area'] = (target['boxes'][:, 3] - target['boxes'][:, 1]) * (target['boxes'][:, 2] - target['boxes'][:, 0])
-                if 'keypoints' in target:
-                    for i in range(len(target['keypoints'])):
-                        target['keypoints'][i] = resize_keypoints(target['keypoints'][i], (image.size[0],image.size[1]), self.new_size)
-                image = F.resize(image, (self.new_size[1], self.new_size[0]))
-        return self.transform(image), target
-def collate_fn(batch):
-    """
-    Custom collation function for DataLoader that handles batches of images and targets.
-    This function ensures that images are properly batched together using PyTorch's default collation,
-    while keeping the targets (such as bounding boxes and labels) in a list of dictionaries,
-    as each image might have a different number of objects detected.
-    Parameters:
-    - batch (list): A list of tuples, where each tuple contains an image and its corresponding target dictionary.
-    Returns:
-    - Tuple containing:
-      - Tensor: Batched images.
-      - List of dicts: Targets corresponding to each image in the batch.
-    """
-    images, targets = zip(*batch)  # Unzip the batch into separate lists for images and targets.
-    # Batch images using the default collate function which handles tensors, numpy arrays, numbers, etc.
-    images = default_collate(images)
-    return images, targets
-def create_loader(new_size,transformation, annotations1, annotations2=None,
-                  batch_size=4, crop_prob=0.2, crop_fraction=0.7, min_objects=3,
-                  h_flip_prob=0.3, v_flip_prob=0.3, max_rotate_deg=20, rotate_90_proba=0.2, rotate_proba=0.3,
-                  seed=42, resize=True, keep_ratio=0.1, model_type = 'object'):
-    """
-    Creates a DataLoader for BPMN datasets with optional transformations and concatenation of two datasets.
-    Parameters:
-    - transformation (callable): Transformation function to apply to each image (e.g., normalization).
-    - annotations1 (list): Primary list of annotations.
-    - annotations2 (list, optional): Secondary list of annotations to concatenate with the first.
-    - batch_size (int): Number of images per batch.
-    - crop_prob (float): Probability of applying the crop transformation.
-    - crop_fraction (float): Fraction of the original width to use when cropping.
-    - min_objects (int): Minimum number of objects required to be within the crop.
-    - h_flip_prob (float): Probability of applying horizontal flip.
-    - v_flip_prob (float): Probability of applying vertical flip.
-    - seed (int): Seed for random number generators for reproducibility.
-    - resize (bool): Flag indicating whether to resize images after transformations.
-    Returns:
-    - DataLoader: Configured data loader for the dataset.
-    """
-    # Initialize custom transformations for cropping and flipping
-    custom_crop_transform = RandomCrop(new_size,crop_fraction, min_objects)
-    custom_flip_transform = RandomFlip(h_flip_prob, v_flip_prob)
-    custom_rotate_transform = RandomRotate(max_rotate_deg, rotate_proba)
-    # Create the primary dataset
-    dataset = BPMN_Dataset(
-        annotations=annotations1,
-        transform=transformation,
-        crop_transform=custom_crop_transform,
-        crop_prob=crop_prob,
-        rotate_90_proba=rotate_90_proba,
-        flip_transform=custom_flip_transform,
-        rotate_transform=custom_rotate_transform,
-        new_size=new_size,
-        keep_ratio=keep_ratio,
-        model_type=model_type,
-        resize=resize
-    )
-    # Optionally concatenate a second dataset
-    if annotations2:
-        dataset2 = BPMN_Dataset(
-            annotations=annotations2,
-            transform=transformation,
-            crop_transform=custom_crop_transform,
-            crop_prob=crop_prob,
-            rotate_90_proba=rotate_90_proba,
-            flip_transform=custom_flip_transform,
-            new_size=new_size,
-            keep_ratio=keep_ratio,
-            model_type=model_type,
-            resize=resize
-        )
-        dataset = ConcatDataset([dataset, dataset2])  # Concatenate the two datasets
-    # Set the seed for reproducibility in random operations within transformations and data loading
-    random.seed(seed)
-    torch.manual_seed(seed)
-    # Create the DataLoader with the dataset
-    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
-    return data_loader
 def write_results(name_model,metrics_list,start_epoch):
   with open('./results/'+ name_model+ '.txt', 'w') as f:
         for i in range(len(metrics_list[0])):

 import streamlit as st
+"""object_dict = {
+    0: 'background',
+    1: 'task',
+    2: 'exclusiveGateway',
+    3: 'eventBasedGateway',
+    4: 'event',
+    5: 'messageEvent',
+    6: 'timerEvent',
+    7: 'dataObject',
+    8: 'dataStore',
+    9: 'pool',
+    10: 'lane',
+}
+arrow_dict = {
+    0: 'background',
+    1: 'sequenceFlow',
+    2: 'dataAssociation',
+    3: 'messageFlow',
+}
+class_dict = {
+    0: 'background',
+    1: 'task',
+    2: 'exclusiveGateway',
+    3: 'eventBasedGateway',
+    4: 'event',
+    5: 'messageEvent',
+    6: 'timerEvent',
+    7: 'dataObject',
+    8: 'dataStore',
+    9: 'pool',
+    10: 'lane',
+    11: 'sequenceFlow',
+    12: 'dataAssociation',
+    13: 'messageFlow',
+}"""
 object_dict = {
     0: 'background',
     1: 'task',
     return inter_area / union_area
 def proportion_inside(box1, box2):
+    # Calculate the areas of both boxes
+    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    # Determine the bigger and smaller boxes
+    if box1_area > box2_area:
+        big_box = box1
+        small_box = box2
+    else:
+        big_box = box2
+        small_box = box1
     # Calculate the intersection of the two bounding boxes
+    inter_box = [max(small_box[0], big_box[0]), max(small_box[1], big_box[1]), min(small_box[2], big_box[2]), min(small_box[3], big_box[3])]
     inter_area = max(0, inter_box[2] - inter_box[0]) * max(0, inter_box[3] - inter_box[1])
+    # Calculate the proportion of the smaller box inside the bigger box
+    if (small_box[2] - small_box[0]) * (small_box[3] - small_box[1]) == 0:
         return 0
+    proportion = inter_area / ((small_box[2] - small_box[0]) * (small_box[3] - small_box[1]))
     # Ensure the proportion is at most 100%
     return min(proportion, 1.0)
     return keypoints
 def write_results(name_model,metrics_list,start_epoch):
   with open('./results/'+ name_model+ '.txt', 'w') as f:
         for i in range(len(metrics_list[0])):