training:
dataset = images-damian2 last 2 text encoder layers unfrozen 30 epochs at 2e-6, then 20 epochs at 1e-6 (step 240 on blue graph) cond_dropout= default
first 30 epochs: resume:
sample: