meow2018 commited on
Commit
e1e4bbe
·
verified ·
1 Parent(s): a93837d

Update configs/pretrain/yolo_world_s_pretrain_FG_v3.py

Browse files
configs/pretrain/yolo_world_s_pretrain_FG_v3.py CHANGED
@@ -1,7 +1,7 @@
1
  # deploy:
2
  #python deploy/deploy.py /data/taofuyu/models/yolo_world/detection_onnxruntime_static.py /data/taofuyu/models/yolo_world/yolo_world_s_pretrain_FG_v2.py /data/taofuyu/snapshot/yolo_world/fg_pretrain_v2/epoch_1.pth /data/taofuyu/tao_dataset/FG/训练FG_无车顶车窗/192_168_1_123_2_2024-01-15_09-53-05_2024-01-15_09-54-20_0.jpg --test-img /data/taofuyu/tao_dataset/FG/test_wrong/现场问题/第171次车位引导【车头图】识别结果推送_车位2_1_闽A4YY27_None_picture_2023_11_29_18_6_55.jpg --work-dir /data/taofuyu/log/yolo_world/fg_pretrain_v2/
3
- _base_ = (
4
- '/data/taofuyu/repos/YOLO-World/third_party/mmyolo/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py')
5
  custom_imports = dict(
6
  imports=['yolo_world'],
7
  allow_failed_imports=False)
@@ -78,56 +78,45 @@ train_pipeline = [
78
  train_pipeline_stage2 = [*_base_.train_pipeline_stage2[:-1], *text_transform]
79
 
80
 
81
- # part_obj365v1_train_dataset = dict(
82
- # type='MultiModalDataset',
83
- # dataset=dict(
84
- # type='YOLOv5Objects365V1Dataset',
85
- # data_root='/data/taofuyu/tao_dataset/yolo_world/objects365v1/',
86
- # ann_file='annotations/objects365_train.json',
87
- # data_prefix=dict(img='train/'),
88
- # filter_cfg=dict(filter_empty_gt=False, min_size=32)),
89
- # class_text_path='data/texts/obj365v1_class_texts.json',
90
- # pipeline=train_pipeline)
91
 
92
  mg_train_dataset = dict(type='YOLOv5MixedGroundingDataset',
93
- data_root='/data/taofuyu/tao_dataset/yolo_world/mixed_grounding/',
94
  ann_file='annotations/final_mixed_train_no_coco.json',
95
- data_prefix=dict(img='images/'),
96
  filter_cfg=dict(filter_empty_gt=False, min_size=32),
97
  pipeline=train_pipeline)
98
 
99
  flickr_train_dataset = dict(
100
  type='YOLOv5MixedGroundingDataset',
101
- data_root='/data/taofuyu/tao_dataset/yolo_world/flickr/',
102
  ann_file='annotations/final_flickr_separateGT_train.json',
103
- data_prefix=dict(img='flickr30k_images/'),
104
  filter_cfg=dict(filter_empty_gt=True, min_size=32),
105
  pipeline=train_pipeline)
106
 
107
- fg_train_dataset = dict(
108
- type='MultiModalDataset',
109
- dataset=dict(
110
- type='YOLOv5FGDataset',
111
- data_root='',
112
- ann_file='/data/taofuyu/tao_dataset/井盖检测/jinggai_few_shot_3.json',
113
- data_prefix=dict(img=''),
114
- filter_cfg=dict(filter_empty_gt=False, min_size=32)),
115
- class_text_path='/data/taofuyu/repos/YOLO-World/data/texts/fewshot_class_texts.json',
116
- pipeline=train_pipeline)
117
-
118
  train_dataloader = dict(batch_size=train_batch_size_per_gpu,
119
  collate_fn=dict(type='yolow_collate'),
120
  dataset=dict(_delete_=True,
121
  type='ConcatDataset',
122
  datasets=[
123
- fg_train_dataset,
124
  flickr_train_dataset, mg_train_dataset
125
  ],
126
  ignore_keys=['classes', 'palette']))
127
 
128
  test_pipeline = [
129
  *_base_.test_pipeline[:-1],
130
- dict(type='LoadText', text_path='/data/taofuyu/repos/YOLO-World/data/texts/fewshot_class_texts.json'),
131
  dict(type='mmdet.PackDetInputs',
132
  meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
133
  'scale_factor', 'pad_param', 'texts'))
@@ -135,23 +124,20 @@ test_pipeline = [
135
  coco_val_dataset = dict(
136
  _delete_=True,
137
  type='MultiModalDataset',
138
- dataset=dict(
139
- type='YOLOv5FGDataset',
140
- data_root='',
141
- ann_file='/data/taofuyu/tao_dataset/井盖检测/jinggai_few_shot_3.json',
142
- data_prefix=dict(img=''),
143
- filter_cfg=dict(filter_empty_gt=False, min_size=32)),
144
- class_text_path='/data/taofuyu/repos/YOLO-World/data/texts/fewshot_class_texts.json',
145
  pipeline=test_pipeline)
146
  val_dataloader = dict(dataset=coco_val_dataset)
147
  test_dataloader = val_dataloader
148
 
149
- val_evaluator = dict(
150
- _delete_=True,
151
- type='mmdet.CocoMetric',
152
- proposal_nums=(100, 1, 10),
153
- ann_file='/data/taofuyu/tao_dataset/井盖检测/jinggai_few_shot_3.json',
154
- metric='bbox')
155
  test_evaluator = val_evaluator
156
 
157
  # training settings
@@ -170,7 +156,7 @@ custom_hooks = [
170
  switch_pipeline=train_pipeline_stage2)
171
  ]
172
  train_cfg = dict(max_epochs=max_epochs,
173
- val_interval=100,
174
  dynamic_intervals=[((max_epochs - close_mosaic_epochs),
175
  _base_.val_interval_stage2)])
176
  optim_wrapper = dict(optimizer=dict(
 
1
  # deploy:
2
  #python deploy/deploy.py /data/taofuyu/models/yolo_world/detection_onnxruntime_static.py /data/taofuyu/models/yolo_world/yolo_world_s_pretrain_FG_v2.py /data/taofuyu/snapshot/yolo_world/fg_pretrain_v2/epoch_1.pth /data/taofuyu/tao_dataset/FG/训练FG_无车顶车窗/192_168_1_123_2_2024-01-15_09-53-05_2024-01-15_09-54-20_0.jpg --test-img /data/taofuyu/tao_dataset/FG/test_wrong/现场问题/第171次车位引导【车头图】识别结果推送_车位2_1_闽A4YY27_None_picture_2023_11_29_18_6_55.jpg --work-dir /data/taofuyu/log/yolo_world/fg_pretrain_v2/
3
+ _base_ = ('../../third_party/mmyolo/configs/yolov8/'
4
+ 'yolov8_s_syncbn_fast_8xb16-500e_coco.py')
5
  custom_imports = dict(
6
  imports=['yolo_world'],
7
  allow_failed_imports=False)
 
78
  train_pipeline_stage2 = [*_base_.train_pipeline_stage2[:-1], *text_transform]
79
 
80
 
81
+ obj365v1_train_dataset = dict(
82
+ type='MultiModalDataset',
83
+ dataset=dict(
84
+ type='YOLOv5Objects365V1Dataset',
85
+ data_root='data/objects365v1/',
86
+ ann_file='annotations/objects365_train.json',
87
+ data_prefix=dict(img='train/'),
88
+ filter_cfg=dict(filter_empty_gt=False, min_size=32)),
89
+ class_text_path='data/texts/obj365v1_class_texts.json',
90
+ pipeline=train_pipeline)
91
 
92
  mg_train_dataset = dict(type='YOLOv5MixedGroundingDataset',
93
+ data_root='data/mixed_grounding/',
94
  ann_file='annotations/final_mixed_train_no_coco.json',
95
+ data_prefix=dict(img='gqa/images/'),
96
  filter_cfg=dict(filter_empty_gt=False, min_size=32),
97
  pipeline=train_pipeline)
98
 
99
  flickr_train_dataset = dict(
100
  type='YOLOv5MixedGroundingDataset',
101
+ data_root='data/flickr/',
102
  ann_file='annotations/final_flickr_separateGT_train.json',
103
+ data_prefix=dict(img='full_images/'),
104
  filter_cfg=dict(filter_empty_gt=True, min_size=32),
105
  pipeline=train_pipeline)
106
 
 
 
 
 
 
 
 
 
 
 
 
107
  train_dataloader = dict(batch_size=train_batch_size_per_gpu,
108
  collate_fn=dict(type='yolow_collate'),
109
  dataset=dict(_delete_=True,
110
  type='ConcatDataset',
111
  datasets=[
112
+ obj365v1_train_dataset,
113
  flickr_train_dataset, mg_train_dataset
114
  ],
115
  ignore_keys=['classes', 'palette']))
116
 
117
  test_pipeline = [
118
  *_base_.test_pipeline[:-1],
119
+ dict(type='LoadText'),
120
  dict(type='mmdet.PackDetInputs',
121
  meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
122
  'scale_factor', 'pad_param', 'texts'))
 
124
  coco_val_dataset = dict(
125
  _delete_=True,
126
  type='MultiModalDataset',
127
+ dataset=dict(type='YOLOv5LVISV1Dataset',
128
+ data_root='data/coco/',
129
+ test_mode=True,
130
+ ann_file='lvis/lvis_v1_val.json',
131
+ data_prefix=dict(img=''),
132
+ batch_shapes_cfg=None),
133
+ class_text_path='data/texts/lvis_v1_class_texts.json',
134
  pipeline=test_pipeline)
135
  val_dataloader = dict(dataset=coco_val_dataset)
136
  test_dataloader = val_dataloader
137
 
138
+ val_evaluator = dict(type='mmdet.LVISMetric',
139
+ ann_file='data/coco/lvis/lvis_v1_val.json',
140
+ metric='bbox')
 
 
 
141
  test_evaluator = val_evaluator
142
 
143
  # training settings
 
156
  switch_pipeline=train_pipeline_stage2)
157
  ]
158
  train_cfg = dict(max_epochs=max_epochs,
159
+ val_interval=10,
160
  dynamic_intervals=[((max_epochs - close_mosaic_epochs),
161
  _base_.val_interval_stage2)])
162
  optim_wrapper = dict(optimizer=dict(