File size: 2,201 Bytes
48fa639
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
configuration:
  batch_size: 64
  optimizer: torch.optim.AdamW

  lr: 0.001

  trainer: experiment_setup.train_loop
  scorer: experiment_setup.score
  model: models.clipseg.CLIPDensePredT

  lr_scheduler: cosine
  T_max: 20000
  eta_min: 0.0001

  max_iterations: 20000
  val_interval: null

  # dataset
  dataset: datasets.phrasecut.PhraseCut   # <-----------------
  split_mode: pascal_test
  split: train
  mask: text_and_crop_blur_highlight352
  image_size: 352
  normalize: True
  pre_crop_image_size: [sample, 1, 1.5]
  aug: 1new

  # general
  mix: False # <-----------------
  prompt: shuffle+
  norm_cond: True
  mix_text_min: 0.0
  
  # model
  out: 1
  extract_layers: [3, 7, 9]
  reduce_dim: 64
  depth: 3
  fix_shift: False

  loss: torch.nn.functional.binary_cross_entropy_with_logits
  amp: True

test_configuration_common:
  normalize: True
  image_size: 352
  batch_size: 32
  # max_iterations: 5
  # max_iterations: 150
  
test_configuration: 

  -
    name: pc  # old: phrasecut
    metric: metrics.FixedIntervalMetrics
    test_dataset: phrasecut
    split: test
    mask: text
    label_support: True
    sigmoid: True


columns: [i, name, pc_miou_0.3, pc_fgiou_0.3, pc_fgiou_0.5, pc_ap, duration, date]


individual_configurations:

# important ones


- {name: rd64-uni, version: 'ViT-B/16', reduce_dim: 64, with_visual: True, negative_prob: 0.2, mix: True, mix_text_max: 0.5}

# this was accedentally trained using old mask
- {name: rd128-vit16-phrasecut, version: 'ViT-B/16', reduce_dim: 128, mask: text_and_blur3_highlight01}
- {name: rd64-uni-novis, version: 'ViT-B/16', reduce_dim: 64, with_visual: False, negative_prob: 0.2, mix: False}
# this was accedentally trained using old mask
- {name: baseline3-vit16-phrasecut, model: models.clipseg.CLIPDenseBaseline, version: 'ViT-B/16', reduce_dim: 64, reduce2_dim: 64, mask: text_and_blur3_highlight01}

- {name: vit64-uni, version: 'ViT-B/16', model: models.vitseg.VITDensePredT, reduce_dim: 64, with_visual: True, only_visual: True, negative_prob: 0.2, mask: crop_blur_highlight352, lr: 0.0003}
- {name: vit64-uni-novis, version: 'ViT-B/16', model: models.vitseg.VITDensePredT, with_visual: False, reduce_dim: 64, lr: 0.0001}