VikramSingh178 commited on
Commit
c9705bd
β€’
1 Parent(s): e9caf50

Former-commit-id: 74a3d876eb8af5d012d025eb65b2dd9dc3646558

Files changed (49) hide show
  1. logs/app_debug.log +75 -0
  2. logs/app_info.log +75 -0
  3. masks/Cycle_inverted_mask.jpg +0 -0
  4. masks/Cycle_original_mask.jpg +0 -0
  5. masks/bike_inverted_mask.jpg +0 -0
  6. masks/bike_original_mask.jpg +0 -0
  7. masks/chair_inverted_mask.jpg +0 -0
  8. masks/chair_original_mask.jpg +0 -0
  9. masks/cooker_inverted_mask.jpg +0 -0
  10. masks/cooker_original_mask.jpg +0 -0
  11. masks/office_chair_inverted_mask.jpg +0 -0
  12. masks/office_chair_original_mask.jpg +0 -0
  13. masks/pot_inverted_mask.jpg +0 -0
  14. masks/pot_original_mask.jpg +0 -0
  15. masks/tent _inverted_mask.jpg +0 -0
  16. masks/tent _original_mask.jpg +0 -0
  17. masks/tent_inverted_mask.jpg +0 -0
  18. masks/tent_original_mask.jpg +0 -0
  19. masks/toaster_inverted_mask.jpg +0 -0
  20. masks/toaster_original_mask.jpg +0 -0
  21. notebooks/inpainting.ipynb +0 -0
  22. output/Cycle_extended.jpg +0 -0
  23. output/Cycle_output.jpg +0 -0
  24. output/bike_output.jpg +0 -0
  25. output/chair_extended.jpg +0 -0
  26. output/chair_output.jpg +0 -0
  27. output/cooker_extended.jpg +0 -0
  28. output/cooker_output.jpg +0 -0
  29. output/office_chair_output.jpg +0 -0
  30. output/pot_output.jpg +0 -0
  31. output/tent _output.jpg +0 -0
  32. output/tent_extended.jpg +0 -0
  33. output/tent_output.jpg +0 -0
  34. output/toaster_extended.jpg +0 -0
  35. output/toaster_output.jpg +0 -0
  36. scripts/__pycache__/config.cpython-310.pyc +0 -0
  37. scripts/__pycache__/mask_generator.cpython-310.pyc +0 -0
  38. scripts/__pycache__/models.cpython-310.pyc +0 -0
  39. scripts/__pycache__/pipeline.cpython-310.pyc +0 -0
  40. scripts/__pycache__/segment_everything.cpython-310.pyc +0 -0
  41. scripts/config.py +1 -3
  42. scripts/datautils.py +0 -58
  43. scripts/extended_image.jpg +0 -0
  44. scripts/mask_generator.py +41 -23
  45. scripts/models.py +24 -92
  46. scripts/pipeline.py +20 -50
  47. scripts/run.py +23 -11
  48. scripts/segment_everything.py +51 -0
  49. scripts/yolov8m.pt.REMOVED.git-id +1 -0
logs/app_debug.log CHANGED
@@ -1389,3 +1389,78 @@ speed: {'preprocess': 1.9655227661132812, 'inference': 86.20810508728027, 'postp
1389
  2024-03-22 09:47:05,180 [INFO] models - Kandinsky Inpainting Inference
1390
  2024-03-22 09:51:28,523 [INFO] models - Kandinsky Inpainting Inference
1391
  2024-03-22 09:53:18,039 [INFO] models - Kandinsky Inpainting Inference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1389
  2024-03-22 09:47:05,180 [INFO] models - Kandinsky Inpainting Inference
1390
  2024-03-22 09:51:28,523 [INFO] models - Kandinsky Inpainting Inference
1391
  2024-03-22 09:53:18,039 [INFO] models - Kandinsky Inpainting Inference
1392
+ 2024-03-23 08:26:37,691 [INFO] mask_generator - Mask generation completed successfully
1393
+ 2024-03-23 08:29:23,557 [INFO] mask_generator - Mask generation completed successfully
1394
+ 2024-03-23 08:30:47,545 [INFO] mask_generator - Mask generation completed successfully
1395
+ 2024-03-23 08:35:34,516 [INFO] mask_generator - Mask generation completed successfully
1396
+ 2024-03-23 09:31:44,362 [INFO] mask_generator - Mask generation completed successfully
1397
+ 2024-03-23 09:32:16,521 [INFO] mask_generator - Mask generation completed successfully
1398
+ 2024-03-23 09:32:45,559 [INFO] mask_generator - Mask generation completed successfully
1399
+ 2024-03-23 09:35:33,253 [INFO] mask_generator - Mask generation completed successfully
1400
+ 2024-03-23 09:36:06,844 [INFO] mask_generator - Mask generation completed successfully
1401
+ 2024-03-23 09:43:52,587 [INFO] mask_generator - Mask generation completed successfully
1402
+ 2024-03-23 09:44:30,749 [INFO] mask_generator - Mask generation completed successfully
1403
+ 2024-03-23 09:56:41,476 [INFO] mask_generator - Mask generation completed successfully
1404
+ 2024-03-23 09:57:16,318 [INFO] mask_generator - Mask generation completed successfully
1405
+ 2024-03-23 10:07:32,135 [INFO] mask_generator - Mask generation completed successfully
1406
+ 2024-03-23 11:02:29,843 [INFO] mask_generator - Mask generation completed successfully
1407
+ 2024-03-23 11:05:02,471 [INFO] mask_generator - Mask generation completed successfully
1408
+ 2024-03-23 19:23:38,952 [INFO] clear_memory - Memory Cleared
1409
+ 2024-03-23 19:42:39,347 [INFO] clear_memory - Memory Cleared
1410
+ 2024-03-24 12:51:21,021 [INFO] clear_memory - Memory Cleared
1411
+ 2024-03-24 13:07:46,765 [INFO] clear_memory - Memory Cleared
1412
+ 2024-03-24 13:14:03,557 [INFO] clear_memory - Memory Cleared
1413
+ 2024-03-24 13:17:25,009 [INFO] clear_memory - Memory Cleared
1414
+ 2024-03-24 13:23:11,117 [INFO] clear_memory - Memory Cleared
1415
+ 2024-03-24 13:28:30,138 [INFO] clear_memory - Memory Cleared
1416
+ 2024-03-24 13:32:00,626 [INFO] clear_memory - Memory Cleared
1417
+ 2024-03-24 13:35:18,404 [INFO] clear_memory - Memory Cleared
1418
+ 2024-03-24 13:38:37,096 [INFO] clear_memory - Memory Cleared
1419
+ 2024-03-24 13:41:27,631 [INFO] clear_memory - Memory Cleared
1420
+ 2024-03-24 13:44:00,439 [INFO] clear_memory - Memory Cleared
1421
+ 2024-03-24 13:47:29,728 [INFO] clear_memory - Memory Cleared
1422
+ 2024-03-24 13:51:27,224 [INFO] clear_memory - Memory Cleared
1423
+ 2024-03-24 14:29:41,821 [INFO] clear_memory - Memory Cleared
1424
+ 2024-03-24 14:32:41,074 [INFO] clear_memory - Memory Cleared
1425
+ 2024-03-24 14:40:54,554 [INFO] clear_memory - Memory Cleared
1426
+ 2024-03-24 14:43:46,918 [INFO] clear_memory - Memory Cleared
1427
+ 2024-03-24 14:46:15,513 [INFO] clear_memory - Memory Cleared
1428
+ 2024-03-24 14:49:51,695 [INFO] clear_memory - Memory Cleared
1429
+ 2024-03-24 15:21:03,538 [INFO] clear_memory - Memory Cleared
1430
+ 2024-03-24 15:24:31,062 [INFO] clear_memory - Memory Cleared
1431
+ 2024-03-24 18:15:16,198 [INFO] clear_memory - Memory Cleared
1432
+ 2024-03-24 18:30:54,085 [INFO] clear_memory - Memory Cleared
1433
+ 2024-03-24 18:31:39,719 [INFO] clear_memory - Memory Cleared
1434
+ 2024-03-24 18:56:00,754 [INFO] clear_memory - Memory Cleared
1435
+ 2024-03-24 19:01:30,572 [INFO] clear_memory - Memory Cleared
1436
+ 2024-03-24 19:05:33,502 [INFO] clear_memory - Memory Cleared
1437
+ 2024-03-24 19:08:09,191 [INFO] clear_memory - Memory Cleared
1438
+ 2024-03-24 19:10:09,003 [INFO] clear_memory - Memory Cleared
1439
+ 2024-03-24 19:10:54,562 [INFO] clear_memory - Memory Cleared
1440
+ 2024-03-24 19:15:11,151 [INFO] clear_memory - Memory Cleared
1441
+ 2024-03-24 19:20:54,871 [INFO] clear_memory - Memory Cleared
1442
+ 2024-03-24 19:23:27,620 [INFO] clear_memory - Memory Cleared
1443
+ 2024-03-24 19:26:56,507 [INFO] clear_memory - Memory Cleared
1444
+ 2024-03-24 19:39:00,070 [INFO] clear_memory - Memory Cleared
1445
+ 2024-03-24 19:44:51,106 [INFO] clear_memory - Memory Cleared
1446
+ 2024-03-24 19:52:32,346 [INFO] clear_memory - Memory Cleared
1447
+ 2024-03-24 19:58:14,409 [INFO] clear_memory - Memory Cleared
1448
+ 2024-03-24 20:05:13,208 [INFO] clear_memory - Memory Cleared
1449
+ 2024-03-24 20:11:07,106 [INFO] clear_memory - Memory Cleared
1450
+ 2024-03-24 20:18:40,535 [INFO] clear_memory - Memory Cleared
1451
+ 2024-03-24 20:24:46,394 [INFO] clear_memory - Memory Cleared
1452
+ 2024-03-24 20:30:39,800 [INFO] clear_memory - Memory Cleared
1453
+ 2024-03-24 20:37:12,153 [INFO] clear_memory - Memory Cleared
1454
+ 2024-03-24 20:42:47,710 [INFO] clear_memory - Memory Cleared
1455
+ 2024-03-24 20:48:23,383 [INFO] clear_memory - Memory Cleared
1456
+ 2024-03-24 20:56:31,162 [INFO] clear_memory - Memory Cleared
1457
+ 2024-03-24 21:03:44,503 [INFO] clear_memory - Memory Cleared
1458
+ 2024-03-24 21:09:56,651 [INFO] clear_memory - Memory Cleared
1459
+ 2024-03-24 21:17:23,320 [INFO] clear_memory - Memory Cleared
1460
+ 2024-03-24 21:23:06,580 [INFO] clear_memory - Memory Cleared
1461
+ 2024-03-24 21:29:14,870 [INFO] clear_memory - Memory Cleared
1462
+ 2024-03-24 21:36:09,328 [INFO] clear_memory - Memory Cleared
1463
+ 2024-03-24 21:40:40,507 [INFO] clear_memory - Memory Cleared
1464
+ 2024-03-24 21:44:47,907 [INFO] clear_memory - Memory Cleared
1465
+ 2024-03-24 21:48:43,724 [INFO] clear_memory - Memory Cleared
1466
+ 2024-03-24 21:52:50,583 [INFO] clear_memory - Memory Cleared
logs/app_info.log CHANGED
@@ -1389,3 +1389,78 @@ speed: {'preprocess': 1.9655227661132812, 'inference': 86.20810508728027, 'postp
1389
  2024-03-22 09:47:05,180 [INFO] models - Kandinsky Inpainting Inference
1390
  2024-03-22 09:51:28,523 [INFO] models - Kandinsky Inpainting Inference
1391
  2024-03-22 09:53:18,039 [INFO] models - Kandinsky Inpainting Inference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1389
  2024-03-22 09:47:05,180 [INFO] models - Kandinsky Inpainting Inference
1390
  2024-03-22 09:51:28,523 [INFO] models - Kandinsky Inpainting Inference
1391
  2024-03-22 09:53:18,039 [INFO] models - Kandinsky Inpainting Inference
1392
+ 2024-03-23 08:26:37,691 [INFO] mask_generator - Mask generation completed successfully
1393
+ 2024-03-23 08:29:23,557 [INFO] mask_generator - Mask generation completed successfully
1394
+ 2024-03-23 08:30:47,545 [INFO] mask_generator - Mask generation completed successfully
1395
+ 2024-03-23 08:35:34,516 [INFO] mask_generator - Mask generation completed successfully
1396
+ 2024-03-23 09:31:44,362 [INFO] mask_generator - Mask generation completed successfully
1397
+ 2024-03-23 09:32:16,521 [INFO] mask_generator - Mask generation completed successfully
1398
+ 2024-03-23 09:32:45,559 [INFO] mask_generator - Mask generation completed successfully
1399
+ 2024-03-23 09:35:33,253 [INFO] mask_generator - Mask generation completed successfully
1400
+ 2024-03-23 09:36:06,844 [INFO] mask_generator - Mask generation completed successfully
1401
+ 2024-03-23 09:43:52,587 [INFO] mask_generator - Mask generation completed successfully
1402
+ 2024-03-23 09:44:30,749 [INFO] mask_generator - Mask generation completed successfully
1403
+ 2024-03-23 09:56:41,476 [INFO] mask_generator - Mask generation completed successfully
1404
+ 2024-03-23 09:57:16,318 [INFO] mask_generator - Mask generation completed successfully
1405
+ 2024-03-23 10:07:32,135 [INFO] mask_generator - Mask generation completed successfully
1406
+ 2024-03-23 11:02:29,843 [INFO] mask_generator - Mask generation completed successfully
1407
+ 2024-03-23 11:05:02,471 [INFO] mask_generator - Mask generation completed successfully
1408
+ 2024-03-23 19:23:38,952 [INFO] clear_memory - Memory Cleared
1409
+ 2024-03-23 19:42:39,347 [INFO] clear_memory - Memory Cleared
1410
+ 2024-03-24 12:51:21,021 [INFO] clear_memory - Memory Cleared
1411
+ 2024-03-24 13:07:46,765 [INFO] clear_memory - Memory Cleared
1412
+ 2024-03-24 13:14:03,557 [INFO] clear_memory - Memory Cleared
1413
+ 2024-03-24 13:17:25,009 [INFO] clear_memory - Memory Cleared
1414
+ 2024-03-24 13:23:11,117 [INFO] clear_memory - Memory Cleared
1415
+ 2024-03-24 13:28:30,138 [INFO] clear_memory - Memory Cleared
1416
+ 2024-03-24 13:32:00,626 [INFO] clear_memory - Memory Cleared
1417
+ 2024-03-24 13:35:18,404 [INFO] clear_memory - Memory Cleared
1418
+ 2024-03-24 13:38:37,096 [INFO] clear_memory - Memory Cleared
1419
+ 2024-03-24 13:41:27,631 [INFO] clear_memory - Memory Cleared
1420
+ 2024-03-24 13:44:00,439 [INFO] clear_memory - Memory Cleared
1421
+ 2024-03-24 13:47:29,728 [INFO] clear_memory - Memory Cleared
1422
+ 2024-03-24 13:51:27,224 [INFO] clear_memory - Memory Cleared
1423
+ 2024-03-24 14:29:41,821 [INFO] clear_memory - Memory Cleared
1424
+ 2024-03-24 14:32:41,074 [INFO] clear_memory - Memory Cleared
1425
+ 2024-03-24 14:40:54,554 [INFO] clear_memory - Memory Cleared
1426
+ 2024-03-24 14:43:46,918 [INFO] clear_memory - Memory Cleared
1427
+ 2024-03-24 14:46:15,513 [INFO] clear_memory - Memory Cleared
1428
+ 2024-03-24 14:49:51,695 [INFO] clear_memory - Memory Cleared
1429
+ 2024-03-24 15:21:03,538 [INFO] clear_memory - Memory Cleared
1430
+ 2024-03-24 15:24:31,062 [INFO] clear_memory - Memory Cleared
1431
+ 2024-03-24 18:15:16,198 [INFO] clear_memory - Memory Cleared
1432
+ 2024-03-24 18:30:54,085 [INFO] clear_memory - Memory Cleared
1433
+ 2024-03-24 18:31:39,719 [INFO] clear_memory - Memory Cleared
1434
+ 2024-03-24 18:56:00,754 [INFO] clear_memory - Memory Cleared
1435
+ 2024-03-24 19:01:30,572 [INFO] clear_memory - Memory Cleared
1436
+ 2024-03-24 19:05:33,502 [INFO] clear_memory - Memory Cleared
1437
+ 2024-03-24 19:08:09,191 [INFO] clear_memory - Memory Cleared
1438
+ 2024-03-24 19:10:09,003 [INFO] clear_memory - Memory Cleared
1439
+ 2024-03-24 19:10:54,562 [INFO] clear_memory - Memory Cleared
1440
+ 2024-03-24 19:15:11,151 [INFO] clear_memory - Memory Cleared
1441
+ 2024-03-24 19:20:54,871 [INFO] clear_memory - Memory Cleared
1442
+ 2024-03-24 19:23:27,620 [INFO] clear_memory - Memory Cleared
1443
+ 2024-03-24 19:26:56,507 [INFO] clear_memory - Memory Cleared
1444
+ 2024-03-24 19:39:00,070 [INFO] clear_memory - Memory Cleared
1445
+ 2024-03-24 19:44:51,106 [INFO] clear_memory - Memory Cleared
1446
+ 2024-03-24 19:52:32,346 [INFO] clear_memory - Memory Cleared
1447
+ 2024-03-24 19:58:14,409 [INFO] clear_memory - Memory Cleared
1448
+ 2024-03-24 20:05:13,208 [INFO] clear_memory - Memory Cleared
1449
+ 2024-03-24 20:11:07,106 [INFO] clear_memory - Memory Cleared
1450
+ 2024-03-24 20:18:40,535 [INFO] clear_memory - Memory Cleared
1451
+ 2024-03-24 20:24:46,394 [INFO] clear_memory - Memory Cleared
1452
+ 2024-03-24 20:30:39,800 [INFO] clear_memory - Memory Cleared
1453
+ 2024-03-24 20:37:12,153 [INFO] clear_memory - Memory Cleared
1454
+ 2024-03-24 20:42:47,710 [INFO] clear_memory - Memory Cleared
1455
+ 2024-03-24 20:48:23,383 [INFO] clear_memory - Memory Cleared
1456
+ 2024-03-24 20:56:31,162 [INFO] clear_memory - Memory Cleared
1457
+ 2024-03-24 21:03:44,503 [INFO] clear_memory - Memory Cleared
1458
+ 2024-03-24 21:09:56,651 [INFO] clear_memory - Memory Cleared
1459
+ 2024-03-24 21:17:23,320 [INFO] clear_memory - Memory Cleared
1460
+ 2024-03-24 21:23:06,580 [INFO] clear_memory - Memory Cleared
1461
+ 2024-03-24 21:29:14,870 [INFO] clear_memory - Memory Cleared
1462
+ 2024-03-24 21:36:09,328 [INFO] clear_memory - Memory Cleared
1463
+ 2024-03-24 21:40:40,507 [INFO] clear_memory - Memory Cleared
1464
+ 2024-03-24 21:44:47,907 [INFO] clear_memory - Memory Cleared
1465
+ 2024-03-24 21:48:43,724 [INFO] clear_memory - Memory Cleared
1466
+ 2024-03-24 21:52:50,583 [INFO] clear_memory - Memory Cleared
masks/Cycle_inverted_mask.jpg ADDED
masks/Cycle_original_mask.jpg ADDED
masks/bike_inverted_mask.jpg DELETED
Binary file (18 kB)
 
masks/bike_original_mask.jpg DELETED
Binary file (16.3 kB)
 
masks/chair_inverted_mask.jpg ADDED
masks/chair_original_mask.jpg ADDED
masks/cooker_inverted_mask.jpg ADDED
masks/cooker_original_mask.jpg ADDED
masks/office_chair_inverted_mask.jpg DELETED
Binary file (12.1 kB)
 
masks/office_chair_original_mask.jpg DELETED
Binary file (10.6 kB)
 
masks/pot_inverted_mask.jpg DELETED
Binary file (13.4 kB)
 
masks/pot_original_mask.jpg DELETED
Binary file (11.5 kB)
 
masks/tent _inverted_mask.jpg DELETED
Binary file (12.1 kB)
 
masks/tent _original_mask.jpg DELETED
Binary file (10.6 kB)
 
masks/tent_inverted_mask.jpg ADDED
masks/tent_original_mask.jpg ADDED
masks/toaster_inverted_mask.jpg CHANGED
masks/toaster_original_mask.jpg CHANGED
notebooks/inpainting.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
output/Cycle_extended.jpg ADDED
output/Cycle_output.jpg ADDED
output/bike_output.jpg DELETED
Binary file (64.8 kB)
 
output/chair_extended.jpg ADDED
output/chair_output.jpg ADDED
output/cooker_extended.jpg ADDED
output/cooker_output.jpg ADDED
output/office_chair_output.jpg DELETED
Binary file (32.9 kB)
 
output/pot_output.jpg DELETED
Binary file (25.4 kB)
 
output/tent _output.jpg DELETED
Binary file (33.4 kB)
 
output/tent_extended.jpg ADDED
output/tent_output.jpg ADDED
output/toaster_extended.jpg ADDED
output/toaster_output.jpg CHANGED
scripts/__pycache__/config.cpython-310.pyc CHANGED
Binary files a/scripts/__pycache__/config.cpython-310.pyc and b/scripts/__pycache__/config.cpython-310.pyc differ
 
scripts/__pycache__/mask_generator.cpython-310.pyc CHANGED
Binary files a/scripts/__pycache__/mask_generator.cpython-310.pyc and b/scripts/__pycache__/mask_generator.cpython-310.pyc differ
 
scripts/__pycache__/models.cpython-310.pyc CHANGED
Binary files a/scripts/__pycache__/models.cpython-310.pyc and b/scripts/__pycache__/models.cpython-310.pyc differ
 
scripts/__pycache__/pipeline.cpython-310.pyc CHANGED
Binary files a/scripts/__pycache__/pipeline.cpython-310.pyc and b/scripts/__pycache__/pipeline.cpython-310.pyc differ
 
scripts/__pycache__/segment_everything.cpython-310.pyc ADDED
Binary file (1.41 kB). View file
 
scripts/config.py CHANGED
@@ -8,6 +8,4 @@ mask_dir = '../masks'
8
  controlnet_adapter_model_name= 'lllyasviel/control_v11p_sd15_inpaint'
9
  controlnet_base_model_name = "runwayml/stable-diffusion-inpainting"
10
  kandinsky_model_name = 'kandinsky-community/kandinsky-2-2-decoder-inpaint'
11
- width = 512
12
- height = 512
13
- yolo_model = 'yolov8s-seg.pt'
 
8
  controlnet_adapter_model_name= 'lllyasviel/control_v11p_sd15_inpaint'
9
  controlnet_base_model_name = "runwayml/stable-diffusion-inpainting"
10
  kandinsky_model_name = 'kandinsky-community/kandinsky-2-2-decoder-inpaint'
11
+
 
 
scripts/datautils.py DELETED
@@ -1,58 +0,0 @@
1
- from datasets import load_dataset, Image
2
- from config import Dataset_Name, DATA_DIR
3
- from logger import rich_logger as l
4
- import wandb
5
- from config import Project_Name, entity
6
- import pandas as pd
7
- from tqdm import tqdm
8
-
9
-
10
-
11
-
12
-
13
-
14
-
15
-
16
- class DatasetUtils:
17
- """
18
- Utility class for working with datasets.
19
- """
20
- def __init__(self, dataset_name:str,split:str=None):
21
- super().__init__()
22
- """
23
- Initializes a new instance of the DatasetUtils class.
24
-
25
- Args:
26
- dataset_name (str): The name of the dataset to use.
27
- """
28
- self.dataset_name = dataset_name
29
- self.split = split
30
- self.dataset = load_dataset(self.dataset_name,cache_dir=DATA_DIR,split=split)
31
-
32
- self.dataset=self.dataset.remove_columns(['id'])
33
- l.info(f"Initialized dataset: {self.dataset_name}")
34
- l.info(self.dataset.features)
35
-
36
-
37
-
38
-
39
-
40
-
41
-
42
-
43
-
44
-
45
-
46
-
47
-
48
-
49
-
50
-
51
-
52
- if __name__=="__main__":
53
- dataset = DatasetUtils(Dataset_Name,split="train")
54
-
55
-
56
-
57
-
58
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/extended_image.jpg ADDED
scripts/mask_generator.py CHANGED
@@ -1,35 +1,14 @@
1
- from PIL import Image
2
  import numpy as np
3
  from logger import rich_logger as l
4
  from ultralytics import YOLO
5
  import cv2
6
- from config import yolo_model
7
- from pathlib import Path
8
  import PIL.ImageOps
9
 
10
 
11
 
12
 
13
- def generate_mask(image_path: str) -> np.ndarray:
14
- """Method to segment image
15
- Args:
16
- image_path (str): path to input image
17
- Returns:
18
- np.ndarray: segmented image mask
19
- """
20
- model = YOLO(model=yolo_model) # Initialize YOLO model
21
- results = model(image_path) # Perform object detection
22
- for result in results:
23
- orig_img = result.orig_img
24
- masks = result.masks.xy
25
- height, width = result.orig_img.shape[:2]
26
- mask_img = np.ones((height, width), dtype=np.uint8) * 255 # Initialize mask with white background
27
-
28
- for mask in masks:
29
- mask = mask.astype(int)
30
- cv2.fillPoly(mask_img, [mask], 0) # Fill mask with detected object areas
31
-
32
- return mask_img
33
 
34
  def invert_mask(mask_image: Image) -> np.ndarray:
35
  """Method to invert mask
@@ -41,3 +20,42 @@ def invert_mask(mask_image: Image) -> np.ndarray:
41
  inverted_mask_image =PIL.ImageOps.invert(mask_image)
42
  return inverted_mask_image
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image, ImageFilter,ImageDraw
2
  import numpy as np
3
  from logger import rich_logger as l
4
  from ultralytics import YOLO
5
  import cv2
 
 
6
  import PIL.ImageOps
7
 
8
 
9
 
10
 
11
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def invert_mask(mask_image: Image) -> np.ndarray:
14
  """Method to invert mask
 
20
  inverted_mask_image =PIL.ImageOps.invert(mask_image)
21
  return inverted_mask_image
22
 
23
+
24
+
25
+
26
+
27
+ def extend_image(image_path, target_width, target_height, roi_scale=0.5):
28
+ # Open the original image
29
+ original_image = Image.open(image_path)
30
+
31
+ # Get the dimensions of the original image
32
+ original_width, original_height = original_image.size
33
+
34
+ # Calculate the scale to fit the target resolution while keeping the aspect ratio
35
+ scale = min(target_width / original_width, target_height / original_height)
36
+
37
+ # Calculate the new dimensions of the image
38
+ new_width = int(original_width * scale * roi_scale)
39
+ new_height = int(original_height * scale * roi_scale)
40
+
41
+ # Resize the original image with keeping the aspect ratio
42
+ original_image_resized = original_image.resize((new_width, new_height))
43
+
44
+ # Create a new image with white background
45
+ extended_image = Image.new("RGB", (target_width, target_height), "white")
46
+
47
+ # Calculate the position to paste the resized image at the center
48
+ paste_x = (target_width - new_width) // 2
49
+ paste_y = (target_height - new_height) // 2
50
+
51
+ # Paste the resized image onto the new image
52
+ extended_image.paste(original_image_resized, (paste_x, paste_y))
53
+
54
+ return extended_image
55
+
56
+
57
+
58
+
59
+
60
+
61
+
scripts/models.py CHANGED
@@ -2,17 +2,15 @@ from logger import rich_logger as l
2
  from wandb.integration.diffusers import autolog
3
  from config import Project_Name
4
  from clear_memory import clear_memory
5
- from typing import List
6
  import numpy as np
7
  import torch
8
- from PIL import Image,ImageFilter,ImageOps
9
  from mask_generator import invert_mask
10
  from diffusers.utils import load_image
11
- from pipeline import fetch_control_pipeline,fetch_kandinsky_pipeline,fetch_kandinsky_prior_pipeline,fetch_kandinsky_img2img_pipeline
12
  from config import controlnet_adapter_model_name,controlnet_base_model_name,kandinsky_model_name
13
  import cv2
14
-
15
- from transformers import pipeline
16
 
17
 
18
 
@@ -23,39 +21,8 @@ autolog(init=dict(project=Project_Name))
23
 
24
 
25
 
26
- def make_controlnet_condition(image: Image.Image) -> Image.Image:
27
- """
28
- Applies image processing operations to create a controlnet condition image.
29
-
30
- Args:
31
- image (PIL.Image.Image): The input image.
32
-
33
- Returns:
34
- PIL.Image.Image: The controlnet condition image.
35
- """
36
- image = np.array(image)
37
- image = cv2.Canny(image, 100, 200)
38
- image = image[:, :, None]
39
- image = np.concatenate([image, image, image], axis=2)
40
- image = Image.fromarray(image)
41
- return image
42
 
43
  def make_inpaint_condition(init_image, mask_image):
44
- """
45
- Prepare the initial image for inpainting by applying a mask.
46
-
47
- Args:
48
- init_image (PIL.Image.Image): The initial image.
49
- mask_image (PIL.Image.Image): The mask image.
50
-
51
- Returns:
52
- torch.Tensor: The prepared initial image for inpainting.
53
-
54
- Raises:
55
- AssertionError: If the image and mask have different sizes.
56
-
57
- """
58
- # Prepare control image
59
  init_image = np.array(init_image.convert("RGB")).astype(np.float32) / 255.0
60
  mask_image = np.array(mask_image.convert("L")).astype(np.float32) / 255.0
61
 
@@ -66,15 +33,6 @@ def make_inpaint_condition(init_image, mask_image):
66
  return init_image
67
 
68
 
69
- def make_hint(image, depth_estimator):
70
- image = depth_estimator(image)["depth"]
71
- image = np.array(image)
72
- image = image[:, :, None]
73
- image = np.concatenate([image, image, image], axis=2)
74
- detected_map = torch.from_numpy(image).float() / 255.0
75
- hint = detected_map.permute(2, 0, 1)
76
- return hint
77
-
78
 
79
 
80
 
@@ -82,36 +40,7 @@ def make_hint(image, depth_estimator):
82
 
83
 
84
 
85
- def controlnet_inpainting_inference(prompt,
86
- image,
87
- mask_image,
88
- control_image,
89
- num_inference_steps=200,
90
- guidance_scale=1.2,
91
- strength=5.0,
92
- generator=torch.Generator(device="cpu").manual_seed(1)
93
- ) -> List[Image.Image]:
94
- """
95
- Perform inpainting inference on an image using the given parameters.
96
 
97
- Args:
98
- prompt: The prompt for the inpainting inference.
99
- image: The input image to be inpainted.
100
- mask_image: The mask image indicating the regions to be inpainted.
101
- controlnet_conditioning_image: The conditioning image for the controlnet.
102
- num_inference_steps: The number of inference steps to perform (default: 200).
103
- guidance_scale: The scale factor for the guidance loss (default: 1.2).
104
- strength: The strength of the inpainting (default: 5.0).
105
- generator: The random number generator for reproducibility (default: torch.Generator(device="cpu").manual_seed(1)).
106
-
107
- Returns:
108
- A list of inpainted images.
109
-
110
- """
111
- clear_memory()
112
- pipe = fetch_control_pipeline(controlnet_adapter_model_name, controlnet_base_model_name,kandinsky_model_name, control_image)
113
- image = pipe(prompt = prompt,num_inference_steps=num_inference_steps, generator=generator, eta=1.0, image=image, mask_image=mask_image,guidance_scale=guidance_scale,strenght=strength, control_image=control_image).images[0]
114
- return image
115
 
116
  def kandinsky_inpainting_inference(prompt, negative_prompt, image, mask_image):
117
  """
@@ -126,37 +55,40 @@ def kandinsky_inpainting_inference(prompt, negative_prompt, image, mask_image):
126
  Returns:
127
  PIL.Image.Image: The output inpainted image.
128
  """
 
129
  pipe = fetch_kandinsky_pipeline(controlnet_adapter_model_name, controlnet_base_model_name,kandinsky_model_name, image)
130
- output_image = pipe(prompt=prompt,negative_prompt=negative_prompt,image=image,mask_image=mask_image,num_inference_steps=200,strength=1.0).images[0]
131
  return output_image
132
 
133
 
134
 
135
- def kandinsky_controlnet_inpainting_inference(prompt, negative_prompt, image, hint, generator=torch.Generator(device="cuda").manual_seed(43)):
 
 
 
136
  """
137
- Perform inpainting inference using the Kandinsky ControlNet model.
138
 
139
  Args:
140
- prompt (str): The prompt for the inpainting process.
141
- negative_prompt (str): The negative prompt for the inpainting process.
142
- image (torch.Tensor): The input image for inpainting.
143
- hint (torch.Tensor): The hint for guiding the inpainting process.
144
- generator (torch.Generator, optional): The random number generator. Defaults to CUDA generator with seed 43.
145
 
146
  Returns:
147
- torch.Tensor: The inpainted image.
148
-
149
  """
150
- prior_pipe = fetch_kandinsky_prior_pipeline(controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image)
151
- img_embed = prior_pipe(prompt=prompt, image=image, strength=1.0, generator=generator)
152
- negative_embed = prior_pipe(prompt=negative_prompt, image=image, strength=1, generator=generator)
153
- controlnet_pipe = fetch_kandinsky_img2img_pipeline(controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image)
154
- image = controlnet_pipe(image=image, strength=1.0, image_embeds=img_embed.image_embeds, negative_image_embeds=negative_embed.image_embeds, hint=hint, num_inference_steps=200, generator=generator, height=768, width=768).images[0]
 
 
 
155
  return image
156
 
157
-
158
-
159
-
160
 
161
 
162
 
 
2
  from wandb.integration.diffusers import autolog
3
  from config import Project_Name
4
  from clear_memory import clear_memory
 
5
  import numpy as np
6
  import torch
7
+ from PIL import Image
8
  from mask_generator import invert_mask
9
  from diffusers.utils import load_image
10
+ from pipeline import fetch_kandinsky_pipeline
11
  from config import controlnet_adapter_model_name,controlnet_base_model_name,kandinsky_model_name
12
  import cv2
13
+ from diffusers import StableDiffusionInpaintPipeline, DPMSolverMultistepScheduler
 
14
 
15
 
16
 
 
21
 
22
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def make_inpaint_condition(init_image, mask_image):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  init_image = np.array(init_image.convert("RGB")).astype(np.float32) / 255.0
27
  mask_image = np.array(mask_image.convert("L")).astype(np.float32) / 255.0
28
 
 
33
  return init_image
34
 
35
 
 
 
 
 
 
 
 
 
 
36
 
37
 
38
 
 
40
 
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  def kandinsky_inpainting_inference(prompt, negative_prompt, image, mask_image):
46
  """
 
55
  Returns:
56
  PIL.Image.Image: The output inpainted image.
57
  """
58
+ clear_memory()
59
  pipe = fetch_kandinsky_pipeline(controlnet_adapter_model_name, controlnet_base_model_name,kandinsky_model_name, image)
60
+ output_image = pipe(prompt=prompt,negative_prompt=negative_prompt,image=image,mask_image=mask_image,num_inference_steps=800,strength=1.0,guidance_scale = 7.8,height = 1472, width = 2560).images[0]
61
  return output_image
62
 
63
 
64
 
65
+
66
+
67
+
68
+ def sd2_inpainting_inference(prompt, img, mask, repo_id="stabilityai/stable-diffusion-2-inpainting", revision="fp16"):
69
  """
70
+ Generate an image based on a prompt using a pretrained model.
71
 
72
  Args:
73
+ prompt (str): The prompt for the image generation.
74
+ img_url (str): The URL of the initial image.
75
+ mask_url (str): The URL of the mask image.
76
+ repo_id (str, optional): The ID of the repository of the pretrained model. Defaults to "stabilityai/stable-diffusion-2-inpainting".
77
+ revision (str, optional): The revision of the pretrained model. Defaults to "fp16".
78
 
79
  Returns:
80
+ Image: The generated image.
 
81
  """
82
+ init_image = load_image(img)
83
+ mask_image = load_image(mask)
84
+ pipe = StableDiffusionInpaintPipeline.from_pretrained(
85
+ repo_id,
86
+ torch_dtype=torch.float16)
87
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
88
+ pipe = pipe.to("cuda")
89
+ image = pipe(prompt=prompt, image=init_image, mask_image=mask_image, num_inference_steps=400,guidence_scale=7.5).images[0]
90
  return image
91
 
 
 
 
92
 
93
 
94
 
scripts/pipeline.py CHANGED
@@ -1,15 +1,11 @@
1
- from diffusers import ControlNetModel,StableDiffusionControlNetInpaintPipeline,AutoPipelineForInpainting,KandinskyV22ControlnetImg2ImgPipeline,KandinskyV22PriorEmb2EmbPipeline
2
- from diffusers.utils import load_image
3
- import torch
4
- from PIL import Image
5
- import numpy as np
6
- import cv2
7
  import torch
8
 
9
 
10
 
11
 
12
 
 
13
  class PipelineFetcher:
14
  """
15
  A class that fetches different pipelines for image processing.
@@ -56,52 +52,28 @@ class PipelineFetcher:
56
  pipe.to('cuda')
57
  return pipe
58
 
59
- def KandinskyPriorPipeline(self):
60
- """
61
- Fetches the Kandinsky prior pipeline.
62
-
63
- Returns:
64
- prior_pipeline (KandinskyV22PriorEmb2EmbPipeline): The Kandinsky prior pipeline.
65
-
66
- """
67
- prior_pipeline = KandinskyV22PriorEmb2EmbPipeline.from_pretrained(
68
- "kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16, use_safetensors=False
69
- ).to("cuda")
70
- return prior_pipeline
71
 
72
- def KandinskyImg2ImgPipeline(self):
73
- """
74
- Fetches the Kandinsky img2img pipeline.
75
 
76
- Returns:
77
- img2img_pipeline (KandinskyV22ControlnetImg2ImgPipeline): The Kandinsky img2img pipeline.
 
 
 
78
 
79
- """
80
- img2img_pipeline = KandinskyV22ControlnetImg2ImgPipeline.from_pretrained(
81
- "kandinsky-community/kandinsky-2-2-controlnet-depth", torch_dtype=torch.float16, use_safetensors=False
82
- ).to("cuda")
83
- return img2img_pipeline
84
-
85
-
86
-
87
- def fetch_control_pipeline(controlnet_adapter_model_name,controlnet_base_model_name,kandinsky_model_name,image):
88
- pipe_fetcher = PipelineFetcher(controlnet_adapter_model_name,controlnet_base_model_name,kandinsky_model_name,image)
89
  pipe = pipe_fetcher.ControlNetInpaintPipeline()
90
  return pipe
91
 
92
- def fetch_kandinsky_pipeline(controlnet_adapter_model_name,controlnet_base_model_name,kandinsky_model_name,image):
93
- pipe_fetcher = PipelineFetcher(controlnet_adapter_model_name,controlnet_base_model_name,kandinsky_model_name,image)
94
- pipe = pipe_fetcher.KandinskyPipeline()
95
- return pipe
96
 
97
- def fetch_kandinsky_prior_pipeline(controlnet_adapter_model_name,controlnet_base_model_name,kandinsky_model_name,image):
98
- pipe_fetcher = PipelineFetcher(controlnet_adapter_model_name,controlnet_base_model_name,kandinsky_model_name,image)
99
- pipe = pipe_fetcher.KandinskyPriorPipeline()
100
- return pipe
101
-
102
- def fetch_kandinsky_img2img_pipeline(controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image):
103
  """
104
- Fetches the Kandinsky image-to-image pipeline.
105
 
106
  Args:
107
  controlnet_adapter_model_name (str): The name of the controlnet adapter model.
@@ -110,12 +82,10 @@ def fetch_kandinsky_img2img_pipeline(controlnet_adapter_model_name, controlnet_b
110
  image: The input image.
111
 
112
  Returns:
113
- pipe: The Kandinsky image-to-image pipeline.
114
  """
115
  pipe_fetcher = PipelineFetcher(controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image)
116
- pipe = pipe_fetcher.KandinskyImg2ImgPipeline()
117
- return pipe
118
- def fetch_kandinsky_img2img_pipeline(controlnet_adapter_model_name,controlnet_base_model_name,kandinsky_model_name,image):
119
- pipe_fetcher = PipelineFetcher(controlnet_adapter_model_name,controlnet_base_model_name,kandinsky_model_name,image)
120
- pipe = pipe_fetcher.KandinskyImg2ImgPipeline()
121
  return pipe
 
 
 
1
+ from diffusers import ControlNetModel,StableDiffusionControlNetInpaintPipeline,AutoPipelineForInpainting
 
 
 
 
 
2
  import torch
3
 
4
 
5
 
6
 
7
 
8
+
9
  class PipelineFetcher:
10
  """
11
  A class that fetches different pipelines for image processing.
 
52
  pipe.to('cuda')
53
  return pipe
54
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ def fetch_control_pipeline(controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image):
57
+ """
58
+ Fetches the control pipeline for image processing.
59
 
60
+ Args:
61
+ controlnet_adapter_model_name (str): The name of the controlnet adapter model.
62
+ controlnet_base_model_name (str): The name of the controlnet base model.
63
+ kandinsky_model_name (str): The name of the Kandinsky model.
64
+ image: The input image for processing.
65
 
66
+ Returns:
67
+ pipe: The control pipeline for image processing.
68
+ """
69
+ pipe_fetcher = PipelineFetcher(controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image)
 
 
 
 
 
 
70
  pipe = pipe_fetcher.ControlNetInpaintPipeline()
71
  return pipe
72
 
 
 
 
 
73
 
74
+ def fetch_kandinsky_pipeline(controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image):
 
 
 
 
 
75
  """
76
+ Fetches the Kandinsky pipeline.
77
 
78
  Args:
79
  controlnet_adapter_model_name (str): The name of the controlnet adapter model.
 
82
  image: The input image.
83
 
84
  Returns:
85
+ pipe: The Kandinsky pipeline.
86
  """
87
  pipe_fetcher = PipelineFetcher(controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image)
88
+ pipe = pipe_fetcher.KandinskyPipeline()
 
 
 
 
89
  return pipe
90
+
91
+
scripts/run.py CHANGED
@@ -1,37 +1,49 @@
1
  import argparse
2
  import os
3
- from mask_generator import generate_mask, invert_mask
 
4
  from models import kandinsky_inpainting_inference, load_image
5
  from PIL import Image
 
 
 
 
6
 
7
  def main(args):
8
- # Generate mask
9
- mask = generate_mask(args.image_path)
 
 
 
 
 
 
 
 
 
10
  mask_image = Image.fromarray(mask)
11
 
12
  # Save original mask
13
  original_mask_path = os.path.join(args.mask_dir, f'{args.uid}_original_mask.jpg')
14
  mask_image.save(original_mask_path)
15
-
16
-
17
  # Invert mask
18
  mask_image = load_image(original_mask_path)
19
  inverted_mask = invert_mask(mask_image)
20
  inverted_mask_path = os.path.join(args.mask_dir, f'{args.uid}_inverted_mask.jpg')
21
  inverted_mask.save(inverted_mask_path)
22
 
23
- # Load mask and image
24
- invert_mask_image = load_image(inverted_mask_path)
25
- image = load_image(args.image_path)
26
 
27
- # Perform inpainting
28
- output_image = kandinsky_inpainting_inference(args.prompt, args.negative_prompt, image, mask_image)
29
 
30
  # Save output image
31
  output_image_path = os.path.join(args.output_dir, f'{args.uid}_output.jpg')
32
  output_image.save(output_image_path)
33
 
34
-
35
  if __name__ == "__main__":
36
  parser = argparse.ArgumentParser(description='Perform Kandinsky inpainting on an image.')
37
  parser.add_argument('--image_path', type=str, required=True, help='Path to the input image.')
 
1
  import argparse
2
  import os
3
+ from mask_generator import invert_mask
4
+ from segment_everything import generate_mask_from_bbox
5
  from models import kandinsky_inpainting_inference, load_image
6
  from PIL import Image
7
+ from pipeline import fetch_kandinsky_pipeline
8
+ from config import controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name
9
+ from mask_generator import extend_image
10
+
11
 
12
  def main(args):
13
+ # Create directories if they don't exist
14
+ os.makedirs(args.output_dir, exist_ok=True)
15
+ os.makedirs(args.mask_dir, exist_ok=True)
16
+
17
+ # Extend image
18
+ extended_image = extend_image(image_path=args.image_path, target_width=2560, target_height=1440, roi_scale=0.6)
19
+ extended_image_path = os.path.join(args.output_dir, f'{args.uid}_extended.jpg')
20
+ extended_image.save(extended_image_path)
21
+
22
+ # Generate mask from extended image
23
+ mask = generate_mask_from_bbox(extended_image_path)
24
  mask_image = Image.fromarray(mask)
25
 
26
  # Save original mask
27
  original_mask_path = os.path.join(args.mask_dir, f'{args.uid}_original_mask.jpg')
28
  mask_image.save(original_mask_path)
29
+
 
30
  # Invert mask
31
  mask_image = load_image(original_mask_path)
32
  inverted_mask = invert_mask(mask_image)
33
  inverted_mask_path = os.path.join(args.mask_dir, f'{args.uid}_inverted_mask.jpg')
34
  inverted_mask.save(inverted_mask_path)
35
 
36
+ # Load mask and extended image
37
+ mask_image = load_image(inverted_mask_path)
38
+ extended_image = load_image(extended_image_path)
39
 
40
+ # Perform inpainting on extended image
41
+ output_image = kandinsky_inpainting_inference(args.prompt, args.negative_prompt, extended_image, mask_image)
42
 
43
  # Save output image
44
  output_image_path = os.path.join(args.output_dir, f'{args.uid}_output.jpg')
45
  output_image.save(output_image_path)
46
 
 
47
  if __name__ == "__main__":
48
  parser = argparse.ArgumentParser(description='Perform Kandinsky inpainting on an image.')
49
  parser.add_argument('--image_path', type=str, required=True, help='Path to the input image.')
scripts/segment_everything.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from ultralytics import YOLO
3
+ from transformers import SamModel, SamProcessor
4
+ import torch
5
+ from PIL import Image
6
+ from diffusers.utils import load_image
7
+
8
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
9
+
10
+
11
+
12
+
13
+
14
+
15
+ def generate_mask_from_bbox(image_path):
16
+ """
17
+ Generates a mask from the bounding box of an image using YOLO and SAM-ViT models.
18
+
19
+ Args:
20
+ image_path (str): The path to the input image.
21
+
22
+ Returns:
23
+ numpy.ndarray: The generated mask as a NumPy array.
24
+ """
25
+ # Initialize YOLO and Processor
26
+ yolo = YOLO('yolov8m.pt')
27
+ processor = SamProcessor.from_pretrained('facebook/sam-vit-large')
28
+ model = SamModel.from_pretrained("facebook/sam-vit-large").to(device)
29
+
30
+ # Generate bounding boxes
31
+ results = yolo(image_path)
32
+ bboxes = results[0].boxes.xyxy.tolist()
33
+ input_boxes = [[[bboxes[0]]]]
34
+
35
+ # Process inputs
36
+ inputs = processor(load_image(image_path), input_boxes=input_boxes, return_tensors="pt").to("cuda")
37
+
38
+ # Perform inference
39
+ with torch.no_grad():
40
+ outputs = model(**inputs)
41
+
42
+ # Post-process masks
43
+ mask = processor.image_processor.post_process_masks(
44
+ outputs.pred_masks.cpu(),
45
+ inputs["original_sizes"].cpu(),
46
+ inputs["reshaped_input_sizes"].cpu()
47
+ )[0][0][0].numpy()
48
+ print(mask)
49
+ return mask
50
+
51
+
scripts/yolov8m.pt.REMOVED.git-id ADDED
@@ -0,0 +1 @@
 
 
1
+ cdb22b922d8e59c557a944f4560a5358770baad5