Spaces:
Running
Running
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +26 -29
- config/train_docvqa.yaml +23 -0
- config/train_invoices.yaml +22 -0
- config/train_rvlcdip.yaml +23 -0
- config/train_zhtrainticket.yaml +22 -0
- dataset/.gitkeep +1 -0
- misc/overview.png +0 -0
- misc/sample_image_cord_test_receipt_00004.png +3 -0
- misc/sample_image_donut_document.png +0 -0
- misc/sample_synthdog.png +3 -0
- misc/screenshot_gradio_demos.png +3 -0
- result/.gitkeep +1 -0
- synthdog/README.md +63 -0
- synthdog/config_en.yaml +119 -0
- synthdog/config_ja.yaml +119 -0
- synthdog/config_ko.yaml +119 -0
- synthdog/config_zh.yaml +119 -0
- synthdog/elements/__init__.py +12 -0
- synthdog/elements/background.py +24 -0
- synthdog/elements/content.py +118 -0
- synthdog/elements/document.py +65 -0
- synthdog/elements/paper.py +17 -0
- synthdog/elements/textbox.py +43 -0
- synthdog/layouts/__init__.py +9 -0
- synthdog/layouts/grid.py +68 -0
- synthdog/layouts/grid_stack.py +74 -0
- synthdog/resources/background/bedroom_83.jpg +0 -0
- synthdog/resources/background/bob+dylan_83.jpg +0 -0
- synthdog/resources/background/coffee_122.jpg +0 -0
- synthdog/resources/background/coffee_18.jpeg +3 -0
- synthdog/resources/background/crater_141.jpg +3 -0
- synthdog/resources/background/cream_124.jpg +3 -0
- synthdog/resources/background/eagle_110.jpg +0 -0
- synthdog/resources/background/farm_25.jpg +0 -0
- synthdog/resources/background/hiking_18.jpg +0 -0
- synthdog/resources/corpus/enwiki.txt +0 -0
- synthdog/resources/corpus/jawiki.txt +0 -0
- synthdog/resources/corpus/kowiki.txt +0 -0
- synthdog/resources/corpus/zhwiki.txt +0 -0
- synthdog/resources/font/en/NotoSans-Regular.ttf +0 -0
- synthdog/resources/font/en/NotoSerif-Regular.ttf +0 -0
- synthdog/resources/font/ja/NotoSansJP-Regular.otf +3 -0
- synthdog/resources/font/ja/NotoSerifJP-Regular.otf +3 -0
- synthdog/resources/font/ko/NotoSansKR-Regular.otf +3 -0
- synthdog/resources/font/ko/NotoSerifKR-Regular.otf +3 -0
- synthdog/resources/font/zh/NotoSansSC-Regular.otf +3 -0
- synthdog/resources/font/zh/NotoSerifSC-Regular.otf +3 -0
- synthdog/resources/paper/paper_1.jpg +3 -0
- synthdog/resources/paper/paper_2.jpg +3 -0
- synthdog/resources/paper/paper_3.jpg +3 -0
app.py
CHANGED
@@ -1,29 +1,26 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import argparse
|
3 |
-
import torch
|
4 |
-
from PIL import Image
|
5 |
-
from donut import DonutModel
|
6 |
-
def demo_process(input_img):
|
7 |
-
global model, task_prompt, task_name
|
8 |
-
input_img = Image.fromarray(input_img)
|
9 |
-
output = model.inference(image=input_img, prompt=task_prompt)["predictions"][0]
|
10 |
-
return output
|
11 |
-
parser = argparse.ArgumentParser()
|
12 |
-
parser.add_argument("--task", type=str, default="Booking")
|
13 |
-
parser.add_argument("--pretrained_path", type=str, default="
|
14 |
-
args, left_argv = parser.parse_known_args()
|
15 |
-
task_name = args.task
|
16 |
-
task_prompt = f"<s_{task_name}>"
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
model.eval()
|
28 |
-
demo = gr.Interface(fn=demo_process,inputs="image",outputs="json", title=f"Donut 🍩 demonstration for `{task_name}` task", examples=[["CMA_sample.jpg"], ["COSCO_sample.jpg"], ["ONEY_sample.jpg"]],)
|
29 |
-
demo.launch()
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import argparse
|
3 |
+
import torch
|
4 |
+
from PIL import Image
|
5 |
+
from donut import DonutModel
|
6 |
+
def demo_process(input_img):
|
7 |
+
global model, task_prompt, task_name
|
8 |
+
input_img = Image.fromarray(input_img)
|
9 |
+
output = model.inference(image=input_img, prompt=task_prompt)["predictions"][0]
|
10 |
+
return output
|
11 |
+
parser = argparse.ArgumentParser()
|
12 |
+
parser.add_argument("--task", type=str, default="Booking")
|
13 |
+
parser.add_argument("--pretrained_path", type=str, default="result/train_booking/20241112_150925")
|
14 |
+
args, left_argv = parser.parse_known_args()
|
15 |
+
task_name = args.task
|
16 |
+
task_prompt = f"<s_{task_name}>"
|
17 |
+
model = DonutModel.from_pretrained("./result/train_booking/20241112_150925")
|
18 |
+
if torch.cuda.is_available():
|
19 |
+
model.half()
|
20 |
+
device = torch.device("cuda")
|
21 |
+
model.to(device)
|
22 |
+
else:
|
23 |
+
model.encoder.to(torch.bfloat16)
|
24 |
+
model.eval()
|
25 |
+
demo = gr.Interface(fn=demo_process,inputs="image",outputs="json", title=f"Donut 🍩 demonstration for `{task_name}` task",)
|
26 |
+
demo.launch(debug=True)
|
|
|
|
|
|
config/train_docvqa.yaml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
resume_from_checkpoint_path: null
|
2 |
+
result_path: "./result"
|
3 |
+
pretrained_model_name_or_path: "naver-clova-ix/donut-base"
|
4 |
+
dataset_name_or_paths: ["./dataset/docvqa"] # should be prepared from https://rrc.cvc.uab.es/?ch=17
|
5 |
+
sort_json_key: True
|
6 |
+
train_batch_sizes: [2]
|
7 |
+
val_batch_sizes: [4]
|
8 |
+
input_size: [2560, 1920]
|
9 |
+
max_length: 128
|
10 |
+
align_long_axis: False
|
11 |
+
# num_nodes: 8 # memo: donut-base-finetuned-docvqa was trained with 8 nodes
|
12 |
+
num_nodes: 1
|
13 |
+
seed: 2022
|
14 |
+
lr: 3e-5
|
15 |
+
warmup_steps: 10000
|
16 |
+
num_training_samples_per_epoch: 39463
|
17 |
+
max_epochs: 300
|
18 |
+
max_steps: -1
|
19 |
+
num_workers: 8
|
20 |
+
val_check_interval: 1.0
|
21 |
+
check_val_every_n_epoch: 1
|
22 |
+
gradient_clip_val: 0.25
|
23 |
+
verbose: True
|
config/train_invoices.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
resume_from_checkpoint_path: null # only used for resume_from_checkpoint option in PL
|
2 |
+
result_path: "./result"
|
3 |
+
pretrained_model_name_or_path: "naver-clova-ix/donut-base" # loading a pre-trained model (from moldehub or path)
|
4 |
+
dataset_name_or_paths: ["./dataset/SGSInvoice"] # loading datasets (from moldehub or path)
|
5 |
+
sort_json_key: False # cord dataset is preprocessed, and publicly available at https://huggingface.co/datasets/naver-clova-ix/cord-v2
|
6 |
+
train_batch_sizes: [2]
|
7 |
+
val_batch_sizes: [1]
|
8 |
+
input_size: [1280, 960] # when the input resolution differs from the pre-training setting, some weights will be newly initialized (but the model training would be okay)
|
9 |
+
max_length: 768
|
10 |
+
align_long_axis: False
|
11 |
+
num_nodes: 1
|
12 |
+
seed: 2022
|
13 |
+
lr: 3e-5
|
14 |
+
warmup_steps: 60 # 800/8*30/10, 10%
|
15 |
+
num_training_samples_per_epoch: 800
|
16 |
+
max_epochs: 10
|
17 |
+
max_steps: -1
|
18 |
+
num_workers: 2
|
19 |
+
val_check_interval: 1.0
|
20 |
+
check_val_every_n_epoch: 3
|
21 |
+
gradient_clip_val: 1.0
|
22 |
+
verbose: True
|
config/train_rvlcdip.yaml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
resume_from_checkpoint_path: null
|
2 |
+
result_path: "./result"
|
3 |
+
pretrained_model_name_or_path: "naver-clova-ix/donut-base"
|
4 |
+
dataset_name_or_paths: ["./dataset/rvlcdip"] # should be prepared from https://www.cs.cmu.edu/~aharley/rvl-cdip/
|
5 |
+
sort_json_key: True
|
6 |
+
train_batch_sizes: [2]
|
7 |
+
val_batch_sizes: [4]
|
8 |
+
input_size: [2560, 1920]
|
9 |
+
max_length: 8
|
10 |
+
align_long_axis: False
|
11 |
+
# num_nodes: 8 # memo: donut-base-finetuned-rvlcdip was trained with 8 nodes
|
12 |
+
num_nodes: 1
|
13 |
+
seed: 2022
|
14 |
+
lr: 2e-5
|
15 |
+
warmup_steps: 10000
|
16 |
+
num_training_samples_per_epoch: 320000
|
17 |
+
max_epochs: 100
|
18 |
+
max_steps: -1
|
19 |
+
num_workers: 8
|
20 |
+
val_check_interval: 1.0
|
21 |
+
check_val_every_n_epoch: 1
|
22 |
+
gradient_clip_val: 1.0
|
23 |
+
verbose: True
|
config/train_zhtrainticket.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
resume_from_checkpoint_path: null
|
2 |
+
result_path: "./result"
|
3 |
+
pretrained_model_name_or_path: "naver-clova-ix/donut-base"
|
4 |
+
dataset_name_or_paths: ["./dataset/zhtrainticket"] # should be prepared from https://github.com/beacandler/EATEN
|
5 |
+
sort_json_key: True
|
6 |
+
train_batch_sizes: [8]
|
7 |
+
val_batch_sizes: [1]
|
8 |
+
input_size: [960, 1280]
|
9 |
+
max_length: 256
|
10 |
+
align_long_axis: False
|
11 |
+
num_nodes: 1
|
12 |
+
seed: 2022
|
13 |
+
lr: 3e-5
|
14 |
+
warmup_steps: 300
|
15 |
+
num_training_samples_per_epoch: 1368
|
16 |
+
max_epochs: 10
|
17 |
+
max_steps: -1
|
18 |
+
num_workers: 8
|
19 |
+
val_check_interval: 1.0
|
20 |
+
check_val_every_n_epoch: 1
|
21 |
+
gradient_clip_val: 1.0
|
22 |
+
verbose: True
|
dataset/.gitkeep
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
misc/overview.png
ADDED
misc/sample_image_cord_test_receipt_00004.png
ADDED
Git LFS Details
|
misc/sample_image_donut_document.png
ADDED
misc/sample_synthdog.png
ADDED
Git LFS Details
|
misc/screenshot_gradio_demos.png
ADDED
Git LFS Details
|
result/.gitkeep
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
synthdog/README.md
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SynthDoG 🐶: Synthetic Document Generator
|
2 |
+
|
3 |
+
SynthDoG is synthetic document generator for visual document understanding (VDU).
|
4 |
+
|
5 |
+
![image](../misc/sample_synthdog.png)
|
6 |
+
|
7 |
+
## Prerequisites
|
8 |
+
|
9 |
+
- python>=3.6
|
10 |
+
- [synthtiger](https://github.com/clovaai/synthtiger) (`pip install synthtiger`)
|
11 |
+
|
12 |
+
## Usage
|
13 |
+
|
14 |
+
```bash
|
15 |
+
# Set environment variable (for macOS)
|
16 |
+
$ export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
|
17 |
+
|
18 |
+
synthtiger -o ./outputs/SynthDoG_en -c 50 -w 4 -v template.py SynthDoG config_en.yaml
|
19 |
+
|
20 |
+
{'config': 'config_en.yaml',
|
21 |
+
'count': 50,
|
22 |
+
'name': 'SynthDoG',
|
23 |
+
'output': './outputs/SynthDoG_en',
|
24 |
+
'script': 'template.py',
|
25 |
+
'verbose': True,
|
26 |
+
'worker': 4}
|
27 |
+
{'aspect_ratio': [1, 2],
|
28 |
+
.
|
29 |
+
.
|
30 |
+
'quality': [50, 95],
|
31 |
+
'short_size': [720, 1024]}
|
32 |
+
Generated 1 data (task 3)
|
33 |
+
Generated 2 data (task 0)
|
34 |
+
Generated 3 data (task 1)
|
35 |
+
.
|
36 |
+
.
|
37 |
+
Generated 49 data (task 48)
|
38 |
+
Generated 50 data (task 49)
|
39 |
+
46.32 seconds elapsed
|
40 |
+
```
|
41 |
+
|
42 |
+
Some important arguments:
|
43 |
+
|
44 |
+
- `-o` : directory path to save data.
|
45 |
+
- `-c` : number of data to generate.
|
46 |
+
- `-w` : number of workers.
|
47 |
+
- `-s` : random seed.
|
48 |
+
- `-v` : print error messages.
|
49 |
+
|
50 |
+
To generate ECJK samples:
|
51 |
+
```bash
|
52 |
+
# english
|
53 |
+
synthtiger -o {dataset_path} -c {num_of_data} -w {num_of_workers} -v template.py SynthDoG config_en.yaml
|
54 |
+
|
55 |
+
# chinese
|
56 |
+
synthtiger -o {dataset_path} -c {num_of_data} -w {num_of_workers} -v template.py SynthDoG config_zh.yaml
|
57 |
+
|
58 |
+
# japanese
|
59 |
+
synthtiger -o {dataset_path} -c {num_of_data} -w {num_of_workers} -v template.py SynthDoG config_ja.yaml
|
60 |
+
|
61 |
+
# korean
|
62 |
+
synthtiger -o {dataset_path} -c {num_of_data} -w {num_of_workers} -v template.py SynthDoG config_ko.yaml
|
63 |
+
```
|
synthdog/config_en.yaml
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
quality: [50, 95]
|
2 |
+
landscape: 0.5
|
3 |
+
short_size: [720, 1024]
|
4 |
+
aspect_ratio: [1, 2]
|
5 |
+
|
6 |
+
background:
|
7 |
+
image:
|
8 |
+
paths: [resources/background]
|
9 |
+
weights: [1]
|
10 |
+
|
11 |
+
effect:
|
12 |
+
args:
|
13 |
+
# gaussian blur
|
14 |
+
- prob: 1
|
15 |
+
args:
|
16 |
+
sigma: [0, 10]
|
17 |
+
|
18 |
+
document:
|
19 |
+
fullscreen: 0.5
|
20 |
+
landscape: 0.5
|
21 |
+
short_size: [480, 1024]
|
22 |
+
aspect_ratio: [1, 2]
|
23 |
+
|
24 |
+
paper:
|
25 |
+
image:
|
26 |
+
paths: [resources/paper]
|
27 |
+
weights: [1]
|
28 |
+
alpha: [0, 0.2]
|
29 |
+
grayscale: 1
|
30 |
+
crop: 1
|
31 |
+
|
32 |
+
content:
|
33 |
+
margin: [0, 0.1]
|
34 |
+
text:
|
35 |
+
path: resources/corpus/enwiki.txt
|
36 |
+
font:
|
37 |
+
paths: [resources/font/en]
|
38 |
+
weights: [1]
|
39 |
+
bold: 0
|
40 |
+
layout:
|
41 |
+
text_scale: [0.0334, 0.1]
|
42 |
+
max_row: 10
|
43 |
+
max_col: 3
|
44 |
+
fill: [0.5, 1]
|
45 |
+
full: 0.1
|
46 |
+
align: [left, right, center]
|
47 |
+
stack_spacing: [0.0334, 0.0334]
|
48 |
+
stack_fill: [0.5, 1]
|
49 |
+
stack_full: 0.1
|
50 |
+
textbox:
|
51 |
+
fill: [0.5, 1]
|
52 |
+
textbox_color:
|
53 |
+
prob: 0.2
|
54 |
+
args:
|
55 |
+
gray: [0, 64]
|
56 |
+
colorize: 1
|
57 |
+
content_color:
|
58 |
+
prob: 0.2
|
59 |
+
args:
|
60 |
+
gray: [0, 64]
|
61 |
+
colorize: 1
|
62 |
+
|
63 |
+
effect:
|
64 |
+
args:
|
65 |
+
# elastic distortion
|
66 |
+
- prob: 1
|
67 |
+
args:
|
68 |
+
alpha: [0, 1]
|
69 |
+
sigma: [0, 0.5]
|
70 |
+
# gaussian noise
|
71 |
+
- prob: 1
|
72 |
+
args:
|
73 |
+
scale: [0, 8]
|
74 |
+
per_channel: 0
|
75 |
+
# perspective
|
76 |
+
- prob: 1
|
77 |
+
args:
|
78 |
+
weights: [750, 50, 50, 25, 25, 25, 25, 50]
|
79 |
+
args:
|
80 |
+
- percents: [[0.75, 1], [0.75, 1], [0.75, 1], [0.75, 1]]
|
81 |
+
- percents: [[0.75, 1], [1, 1], [0.75, 1], [1, 1]]
|
82 |
+
- percents: [[1, 1], [0.75, 1], [1, 1], [0.75, 1]]
|
83 |
+
- percents: [[0.75, 1], [1, 1], [1, 1], [1, 1]]
|
84 |
+
- percents: [[1, 1], [0.75, 1], [1, 1], [1, 1]]
|
85 |
+
- percents: [[1, 1], [1, 1], [0.75, 1], [1, 1]]
|
86 |
+
- percents: [[1, 1], [1, 1], [1, 1], [0.75, 1]]
|
87 |
+
- percents: [[1, 1], [1, 1], [1, 1], [1, 1]]
|
88 |
+
|
89 |
+
effect:
|
90 |
+
args:
|
91 |
+
# color
|
92 |
+
- prob: 0.2
|
93 |
+
args:
|
94 |
+
rgb: [[0, 255], [0, 255], [0, 255]]
|
95 |
+
alpha: [0, 0.2]
|
96 |
+
# shadow
|
97 |
+
- prob: 1
|
98 |
+
args:
|
99 |
+
intensity: [0, 160]
|
100 |
+
amount: [0, 1]
|
101 |
+
smoothing: [0.5, 1]
|
102 |
+
bidirectional: 0
|
103 |
+
# contrast
|
104 |
+
- prob: 1
|
105 |
+
args:
|
106 |
+
alpha: [1, 1.5]
|
107 |
+
# brightness
|
108 |
+
- prob: 1
|
109 |
+
args:
|
110 |
+
beta: [-48, 0]
|
111 |
+
# motion blur
|
112 |
+
- prob: 0.5
|
113 |
+
args:
|
114 |
+
k: [3, 5]
|
115 |
+
angle: [0, 360]
|
116 |
+
# gaussian blur
|
117 |
+
- prob: 1
|
118 |
+
args:
|
119 |
+
sigma: [0, 1.5]
|
synthdog/config_ja.yaml
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
quality: [50, 95]
|
2 |
+
landscape: 0.5
|
3 |
+
short_size: [720, 1024]
|
4 |
+
aspect_ratio: [1, 2]
|
5 |
+
|
6 |
+
background:
|
7 |
+
image:
|
8 |
+
paths: [resources/background]
|
9 |
+
weights: [1]
|
10 |
+
|
11 |
+
effect:
|
12 |
+
args:
|
13 |
+
# gaussian blur
|
14 |
+
- prob: 1
|
15 |
+
args:
|
16 |
+
sigma: [0, 10]
|
17 |
+
|
18 |
+
document:
|
19 |
+
fullscreen: 0.5
|
20 |
+
landscape: 0.5
|
21 |
+
short_size: [480, 1024]
|
22 |
+
aspect_ratio: [1, 2]
|
23 |
+
|
24 |
+
paper:
|
25 |
+
image:
|
26 |
+
paths: [resources/paper]
|
27 |
+
weights: [1]
|
28 |
+
alpha: [0, 0.2]
|
29 |
+
grayscale: 1
|
30 |
+
crop: 1
|
31 |
+
|
32 |
+
content:
|
33 |
+
margin: [0, 0.1]
|
34 |
+
text:
|
35 |
+
path: resources/corpus/jawiki.txt
|
36 |
+
font:
|
37 |
+
paths: [resources/font/ja]
|
38 |
+
weights: [1]
|
39 |
+
bold: 0
|
40 |
+
layout:
|
41 |
+
text_scale: [0.0334, 0.1]
|
42 |
+
max_row: 10
|
43 |
+
max_col: 3
|
44 |
+
fill: [0.5, 1]
|
45 |
+
full: 0.1
|
46 |
+
align: [left, right, center]
|
47 |
+
stack_spacing: [0.0334, 0.0334]
|
48 |
+
stack_fill: [0.5, 1]
|
49 |
+
stack_full: 0.1
|
50 |
+
textbox:
|
51 |
+
fill: [0.5, 1]
|
52 |
+
textbox_color:
|
53 |
+
prob: 0.2
|
54 |
+
args:
|
55 |
+
gray: [0, 64]
|
56 |
+
colorize: 1
|
57 |
+
content_color:
|
58 |
+
prob: 0.2
|
59 |
+
args:
|
60 |
+
gray: [0, 64]
|
61 |
+
colorize: 1
|
62 |
+
|
63 |
+
effect:
|
64 |
+
args:
|
65 |
+
# elastic distortion
|
66 |
+
- prob: 1
|
67 |
+
args:
|
68 |
+
alpha: [0, 1]
|
69 |
+
sigma: [0, 0.5]
|
70 |
+
# gaussian noise
|
71 |
+
- prob: 1
|
72 |
+
args:
|
73 |
+
scale: [0, 8]
|
74 |
+
per_channel: 0
|
75 |
+
# perspective
|
76 |
+
- prob: 1
|
77 |
+
args:
|
78 |
+
weights: [750, 50, 50, 25, 25, 25, 25, 50]
|
79 |
+
args:
|
80 |
+
- percents: [[0.75, 1], [0.75, 1], [0.75, 1], [0.75, 1]]
|
81 |
+
- percents: [[0.75, 1], [1, 1], [0.75, 1], [1, 1]]
|
82 |
+
- percents: [[1, 1], [0.75, 1], [1, 1], [0.75, 1]]
|
83 |
+
- percents: [[0.75, 1], [1, 1], [1, 1], [1, 1]]
|
84 |
+
- percents: [[1, 1], [0.75, 1], [1, 1], [1, 1]]
|
85 |
+
- percents: [[1, 1], [1, 1], [0.75, 1], [1, 1]]
|
86 |
+
- percents: [[1, 1], [1, 1], [1, 1], [0.75, 1]]
|
87 |
+
- percents: [[1, 1], [1, 1], [1, 1], [1, 1]]
|
88 |
+
|
89 |
+
effect:
|
90 |
+
args:
|
91 |
+
# color
|
92 |
+
- prob: 0.2
|
93 |
+
args:
|
94 |
+
rgb: [[0, 255], [0, 255], [0, 255]]
|
95 |
+
alpha: [0, 0.2]
|
96 |
+
# shadow
|
97 |
+
- prob: 1
|
98 |
+
args:
|
99 |
+
intensity: [0, 160]
|
100 |
+
amount: [0, 1]
|
101 |
+
smoothing: [0.5, 1]
|
102 |
+
bidirectional: 0
|
103 |
+
# contrast
|
104 |
+
- prob: 1
|
105 |
+
args:
|
106 |
+
alpha: [1, 1.5]
|
107 |
+
# brightness
|
108 |
+
- prob: 1
|
109 |
+
args:
|
110 |
+
beta: [-48, 0]
|
111 |
+
# motion blur
|
112 |
+
- prob: 0.5
|
113 |
+
args:
|
114 |
+
k: [3, 5]
|
115 |
+
angle: [0, 360]
|
116 |
+
# gaussian blur
|
117 |
+
- prob: 1
|
118 |
+
args:
|
119 |
+
sigma: [0, 1.5]
|
synthdog/config_ko.yaml
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
quality: [50, 95]
|
2 |
+
landscape: 0.5
|
3 |
+
short_size: [720, 1024]
|
4 |
+
aspect_ratio: [1, 2]
|
5 |
+
|
6 |
+
background:
|
7 |
+
image:
|
8 |
+
paths: [resources/background]
|
9 |
+
weights: [1]
|
10 |
+
|
11 |
+
effect:
|
12 |
+
args:
|
13 |
+
# gaussian blur
|
14 |
+
- prob: 1
|
15 |
+
args:
|
16 |
+
sigma: [0, 10]
|
17 |
+
|
18 |
+
document:
|
19 |
+
fullscreen: 0.5
|
20 |
+
landscape: 0.5
|
21 |
+
short_size: [480, 1024]
|
22 |
+
aspect_ratio: [1, 2]
|
23 |
+
|
24 |
+
paper:
|
25 |
+
image:
|
26 |
+
paths: [resources/paper]
|
27 |
+
weights: [1]
|
28 |
+
alpha: [0, 0.2]
|
29 |
+
grayscale: 1
|
30 |
+
crop: 1
|
31 |
+
|
32 |
+
content:
|
33 |
+
margin: [0, 0.1]
|
34 |
+
text:
|
35 |
+
path: resources/corpus/kowiki.txt
|
36 |
+
font:
|
37 |
+
paths: [resources/font/ko]
|
38 |
+
weights: [1]
|
39 |
+
bold: 0
|
40 |
+
layout:
|
41 |
+
text_scale: [0.0334, 0.1]
|
42 |
+
max_row: 10
|
43 |
+
max_col: 3
|
44 |
+
fill: [0.5, 1]
|
45 |
+
full: 0.1
|
46 |
+
align: [left, right, center]
|
47 |
+
stack_spacing: [0.0334, 0.0334]
|
48 |
+
stack_fill: [0.5, 1]
|
49 |
+
stack_full: 0.1
|
50 |
+
textbox:
|
51 |
+
fill: [0.5, 1]
|
52 |
+
textbox_color:
|
53 |
+
prob: 0.2
|
54 |
+
args:
|
55 |
+
gray: [0, 64]
|
56 |
+
colorize: 1
|
57 |
+
content_color:
|
58 |
+
prob: 0.2
|
59 |
+
args:
|
60 |
+
gray: [0, 64]
|
61 |
+
colorize: 1
|
62 |
+
|
63 |
+
effect:
|
64 |
+
args:
|
65 |
+
# elastic distortion
|
66 |
+
- prob: 1
|
67 |
+
args:
|
68 |
+
alpha: [0, 1]
|
69 |
+
sigma: [0, 0.5]
|
70 |
+
# gaussian noise
|
71 |
+
- prob: 1
|
72 |
+
args:
|
73 |
+
scale: [0, 8]
|
74 |
+
per_channel: 0
|
75 |
+
# perspective
|
76 |
+
- prob: 1
|
77 |
+
args:
|
78 |
+
weights: [750, 50, 50, 25, 25, 25, 25, 50]
|
79 |
+
args:
|
80 |
+
- percents: [[0.75, 1], [0.75, 1], [0.75, 1], [0.75, 1]]
|
81 |
+
- percents: [[0.75, 1], [1, 1], [0.75, 1], [1, 1]]
|
82 |
+
- percents: [[1, 1], [0.75, 1], [1, 1], [0.75, 1]]
|
83 |
+
- percents: [[0.75, 1], [1, 1], [1, 1], [1, 1]]
|
84 |
+
- percents: [[1, 1], [0.75, 1], [1, 1], [1, 1]]
|
85 |
+
- percents: [[1, 1], [1, 1], [0.75, 1], [1, 1]]
|
86 |
+
- percents: [[1, 1], [1, 1], [1, 1], [0.75, 1]]
|
87 |
+
- percents: [[1, 1], [1, 1], [1, 1], [1, 1]]
|
88 |
+
|
89 |
+
effect:
|
90 |
+
args:
|
91 |
+
# color
|
92 |
+
- prob: 0.2
|
93 |
+
args:
|
94 |
+
rgb: [[0, 255], [0, 255], [0, 255]]
|
95 |
+
alpha: [0, 0.2]
|
96 |
+
# shadow
|
97 |
+
- prob: 1
|
98 |
+
args:
|
99 |
+
intensity: [0, 160]
|
100 |
+
amount: [0, 1]
|
101 |
+
smoothing: [0.5, 1]
|
102 |
+
bidirectional: 0
|
103 |
+
# contrast
|
104 |
+
- prob: 1
|
105 |
+
args:
|
106 |
+
alpha: [1, 1.5]
|
107 |
+
# brightness
|
108 |
+
- prob: 1
|
109 |
+
args:
|
110 |
+
beta: [-48, 0]
|
111 |
+
# motion blur
|
112 |
+
- prob: 0.5
|
113 |
+
args:
|
114 |
+
k: [3, 5]
|
115 |
+
angle: [0, 360]
|
116 |
+
# gaussian blur
|
117 |
+
- prob: 1
|
118 |
+
args:
|
119 |
+
sigma: [0, 1.5]
|
synthdog/config_zh.yaml
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
quality: [50, 95]
|
2 |
+
landscape: 0.5
|
3 |
+
short_size: [720, 1024]
|
4 |
+
aspect_ratio: [1, 2]
|
5 |
+
|
6 |
+
background:
|
7 |
+
image:
|
8 |
+
paths: [resources/background]
|
9 |
+
weights: [1]
|
10 |
+
|
11 |
+
effect:
|
12 |
+
args:
|
13 |
+
# gaussian blur
|
14 |
+
- prob: 1
|
15 |
+
args:
|
16 |
+
sigma: [0, 10]
|
17 |
+
|
18 |
+
document:
|
19 |
+
fullscreen: 0.5
|
20 |
+
landscape: 0.5
|
21 |
+
short_size: [480, 1024]
|
22 |
+
aspect_ratio: [1, 2]
|
23 |
+
|
24 |
+
paper:
|
25 |
+
image:
|
26 |
+
paths: [resources/paper]
|
27 |
+
weights: [1]
|
28 |
+
alpha: [0, 0.2]
|
29 |
+
grayscale: 1
|
30 |
+
crop: 1
|
31 |
+
|
32 |
+
content:
|
33 |
+
margin: [0, 0.1]
|
34 |
+
text:
|
35 |
+
path: resources/corpus/zhwiki.txt
|
36 |
+
font:
|
37 |
+
paths: [resources/font/zh]
|
38 |
+
weights: [1]
|
39 |
+
bold: 0
|
40 |
+
layout:
|
41 |
+
text_scale: [0.0334, 0.1]
|
42 |
+
max_row: 10
|
43 |
+
max_col: 3
|
44 |
+
fill: [0.5, 1]
|
45 |
+
full: 0.1
|
46 |
+
align: [left, right, center]
|
47 |
+
stack_spacing: [0.0334, 0.0334]
|
48 |
+
stack_fill: [0.5, 1]
|
49 |
+
stack_full: 0.1
|
50 |
+
textbox:
|
51 |
+
fill: [0.5, 1]
|
52 |
+
textbox_color:
|
53 |
+
prob: 0.2
|
54 |
+
args:
|
55 |
+
gray: [0, 64]
|
56 |
+
colorize: 1
|
57 |
+
content_color:
|
58 |
+
prob: 0.2
|
59 |
+
args:
|
60 |
+
gray: [0, 64]
|
61 |
+
colorize: 1
|
62 |
+
|
63 |
+
effect:
|
64 |
+
args:
|
65 |
+
# elastic distortion
|
66 |
+
- prob: 1
|
67 |
+
args:
|
68 |
+
alpha: [0, 1]
|
69 |
+
sigma: [0, 0.5]
|
70 |
+
# gaussian noise
|
71 |
+
- prob: 1
|
72 |
+
args:
|
73 |
+
scale: [0, 8]
|
74 |
+
per_channel: 0
|
75 |
+
# perspective
|
76 |
+
- prob: 1
|
77 |
+
args:
|
78 |
+
weights: [750, 50, 50, 25, 25, 25, 25, 50]
|
79 |
+
args:
|
80 |
+
- percents: [[0.75, 1], [0.75, 1], [0.75, 1], [0.75, 1]]
|
81 |
+
- percents: [[0.75, 1], [1, 1], [0.75, 1], [1, 1]]
|
82 |
+
- percents: [[1, 1], [0.75, 1], [1, 1], [0.75, 1]]
|
83 |
+
- percents: [[0.75, 1], [1, 1], [1, 1], [1, 1]]
|
84 |
+
- percents: [[1, 1], [0.75, 1], [1, 1], [1, 1]]
|
85 |
+
- percents: [[1, 1], [1, 1], [0.75, 1], [1, 1]]
|
86 |
+
- percents: [[1, 1], [1, 1], [1, 1], [0.75, 1]]
|
87 |
+
- percents: [[1, 1], [1, 1], [1, 1], [1, 1]]
|
88 |
+
|
89 |
+
effect:
|
90 |
+
args:
|
91 |
+
# color
|
92 |
+
- prob: 0.2
|
93 |
+
args:
|
94 |
+
rgb: [[0, 255], [0, 255], [0, 255]]
|
95 |
+
alpha: [0, 0.2]
|
96 |
+
# shadow
|
97 |
+
- prob: 1
|
98 |
+
args:
|
99 |
+
intensity: [0, 160]
|
100 |
+
amount: [0, 1]
|
101 |
+
smoothing: [0.5, 1]
|
102 |
+
bidirectional: 0
|
103 |
+
# contrast
|
104 |
+
- prob: 1
|
105 |
+
args:
|
106 |
+
alpha: [1, 1.5]
|
107 |
+
# brightness
|
108 |
+
- prob: 1
|
109 |
+
args:
|
110 |
+
beta: [-48, 0]
|
111 |
+
# motion blur
|
112 |
+
- prob: 0.5
|
113 |
+
args:
|
114 |
+
k: [3, 5]
|
115 |
+
angle: [0, 360]
|
116 |
+
# gaussian blur
|
117 |
+
- prob: 1
|
118 |
+
args:
|
119 |
+
sigma: [0, 1.5]
|
synthdog/elements/__init__.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Donut
|
3 |
+
Copyright (c) 2022-present NAVER Corp.
|
4 |
+
MIT License
|
5 |
+
"""
|
6 |
+
from elements.background import Background
|
7 |
+
from elements.content import Content
|
8 |
+
from elements.document import Document
|
9 |
+
from elements.paper import Paper
|
10 |
+
from elements.textbox import TextBox
|
11 |
+
|
12 |
+
__all__ = ["Background", "Content", "Document", "Paper", "TextBox"]
|
synthdog/elements/background.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Donut
|
3 |
+
Copyright (c) 2022-present NAVER Corp.
|
4 |
+
MIT License
|
5 |
+
"""
|
6 |
+
from synthtiger import components, layers
|
7 |
+
|
8 |
+
|
9 |
+
class Background:
|
10 |
+
def __init__(self, config):
|
11 |
+
self.image = components.BaseTexture(**config.get("image", {}))
|
12 |
+
self.effect = components.Iterator(
|
13 |
+
[
|
14 |
+
components.Switch(components.GaussianBlur()),
|
15 |
+
],
|
16 |
+
**config.get("effect", {})
|
17 |
+
)
|
18 |
+
|
19 |
+
def generate(self, size):
|
20 |
+
bg_layer = layers.RectLayer(size, (255, 255, 255, 255))
|
21 |
+
self.image.apply([bg_layer])
|
22 |
+
self.effect.apply([bg_layer])
|
23 |
+
|
24 |
+
return bg_layer
|
synthdog/elements/content.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Donut
|
3 |
+
Copyright (c) 2022-present NAVER Corp.
|
4 |
+
MIT License
|
5 |
+
"""
|
6 |
+
from collections import OrderedDict
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
from synthtiger import components
|
10 |
+
|
11 |
+
from elements.textbox import TextBox
|
12 |
+
from layouts import GridStack
|
13 |
+
|
14 |
+
|
15 |
+
class TextReader:
|
16 |
+
def __init__(self, path, cache_size=2 ** 28, block_size=2 ** 20):
|
17 |
+
self.fp = open(path, "r", encoding="utf-8")
|
18 |
+
self.length = 0
|
19 |
+
self.offsets = [0]
|
20 |
+
self.cache = OrderedDict()
|
21 |
+
self.cache_size = cache_size
|
22 |
+
self.block_size = block_size
|
23 |
+
self.bucket_size = cache_size // block_size
|
24 |
+
self.idx = 0
|
25 |
+
|
26 |
+
while True:
|
27 |
+
text = self.fp.read(self.block_size)
|
28 |
+
if not text:
|
29 |
+
break
|
30 |
+
self.length += len(text)
|
31 |
+
self.offsets.append(self.fp.tell())
|
32 |
+
|
33 |
+
def __len__(self):
|
34 |
+
return self.length
|
35 |
+
|
36 |
+
def __iter__(self):
|
37 |
+
return self
|
38 |
+
|
39 |
+
def __next__(self):
|
40 |
+
char = self.get()
|
41 |
+
self.next()
|
42 |
+
return char
|
43 |
+
|
44 |
+
def move(self, idx):
|
45 |
+
self.idx = idx
|
46 |
+
|
47 |
+
def next(self):
|
48 |
+
self.idx = (self.idx + 1) % self.length
|
49 |
+
|
50 |
+
def prev(self):
|
51 |
+
self.idx = (self.idx - 1) % self.length
|
52 |
+
|
53 |
+
def get(self):
|
54 |
+
key = self.idx // self.block_size
|
55 |
+
|
56 |
+
if key in self.cache:
|
57 |
+
text = self.cache[key]
|
58 |
+
else:
|
59 |
+
if len(self.cache) >= self.bucket_size:
|
60 |
+
self.cache.popitem(last=False)
|
61 |
+
|
62 |
+
offset = self.offsets[key]
|
63 |
+
self.fp.seek(offset, 0)
|
64 |
+
text = self.fp.read(self.block_size)
|
65 |
+
self.cache[key] = text
|
66 |
+
|
67 |
+
self.cache.move_to_end(key)
|
68 |
+
char = text[self.idx % self.block_size]
|
69 |
+
return char
|
70 |
+
|
71 |
+
|
72 |
+
class Content:
|
73 |
+
def __init__(self, config):
|
74 |
+
self.margin = config.get("margin", [0, 0.1])
|
75 |
+
self.reader = TextReader(**config.get("text", {}))
|
76 |
+
self.font = components.BaseFont(**config.get("font", {}))
|
77 |
+
self.layout = GridStack(config.get("layout", {}))
|
78 |
+
self.textbox = TextBox(config.get("textbox", {}))
|
79 |
+
self.textbox_color = components.Switch(components.Gray(), **config.get("textbox_color", {}))
|
80 |
+
self.content_color = components.Switch(components.Gray(), **config.get("content_color", {}))
|
81 |
+
|
82 |
+
def generate(self, size):
|
83 |
+
width, height = size
|
84 |
+
|
85 |
+
layout_left = width * np.random.uniform(self.margin[0], self.margin[1])
|
86 |
+
layout_top = height * np.random.uniform(self.margin[0], self.margin[1])
|
87 |
+
layout_width = max(width - layout_left * 2, 0)
|
88 |
+
layout_height = max(height - layout_top * 2, 0)
|
89 |
+
layout_bbox = [layout_left, layout_top, layout_width, layout_height]
|
90 |
+
|
91 |
+
text_layers, texts = [], []
|
92 |
+
layouts = self.layout.generate(layout_bbox)
|
93 |
+
self.reader.move(np.random.randint(len(self.reader)))
|
94 |
+
|
95 |
+
for layout in layouts:
|
96 |
+
font = self.font.sample()
|
97 |
+
|
98 |
+
for bbox, align in layout:
|
99 |
+
x, y, w, h = bbox
|
100 |
+
text_layer, text = self.textbox.generate((w, h), self.reader, font)
|
101 |
+
self.reader.prev()
|
102 |
+
|
103 |
+
if text_layer is None:
|
104 |
+
continue
|
105 |
+
|
106 |
+
text_layer.center = (x + w / 2, y + h / 2)
|
107 |
+
if align == "left":
|
108 |
+
text_layer.left = x
|
109 |
+
if align == "right":
|
110 |
+
text_layer.right = x + w
|
111 |
+
|
112 |
+
self.textbox_color.apply([text_layer])
|
113 |
+
text_layers.append(text_layer)
|
114 |
+
texts.append(text)
|
115 |
+
|
116 |
+
self.content_color.apply(text_layers)
|
117 |
+
|
118 |
+
return text_layers, texts
|
synthdog/elements/document.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Donut
|
3 |
+
Copyright (c) 2022-present NAVER Corp.
|
4 |
+
MIT License
|
5 |
+
"""
|
6 |
+
import numpy as np
|
7 |
+
from synthtiger import components
|
8 |
+
|
9 |
+
from elements.content import Content
|
10 |
+
from elements.paper import Paper
|
11 |
+
|
12 |
+
|
13 |
+
class Document:
|
14 |
+
def __init__(self, config):
|
15 |
+
self.fullscreen = config.get("fullscreen", 0.5)
|
16 |
+
self.landscape = config.get("landscape", 0.5)
|
17 |
+
self.short_size = config.get("short_size", [480, 1024])
|
18 |
+
self.aspect_ratio = config.get("aspect_ratio", [1, 2])
|
19 |
+
self.paper = Paper(config.get("paper", {}))
|
20 |
+
self.content = Content(config.get("content", {}))
|
21 |
+
self.effect = components.Iterator(
|
22 |
+
[
|
23 |
+
components.Switch(components.ElasticDistortion()),
|
24 |
+
components.Switch(components.AdditiveGaussianNoise()),
|
25 |
+
components.Switch(
|
26 |
+
components.Selector(
|
27 |
+
[
|
28 |
+
components.Perspective(),
|
29 |
+
components.Perspective(),
|
30 |
+
components.Perspective(),
|
31 |
+
components.Perspective(),
|
32 |
+
components.Perspective(),
|
33 |
+
components.Perspective(),
|
34 |
+
components.Perspective(),
|
35 |
+
components.Perspective(),
|
36 |
+
]
|
37 |
+
)
|
38 |
+
),
|
39 |
+
],
|
40 |
+
**config.get("effect", {}),
|
41 |
+
)
|
42 |
+
|
43 |
+
def generate(self, size):
|
44 |
+
width, height = size
|
45 |
+
fullscreen = np.random.rand() < self.fullscreen
|
46 |
+
|
47 |
+
if not fullscreen:
|
48 |
+
landscape = np.random.rand() < self.landscape
|
49 |
+
max_size = width if landscape else height
|
50 |
+
short_size = np.random.randint(
|
51 |
+
min(width, height, self.short_size[0]),
|
52 |
+
min(width, height, self.short_size[1]) + 1,
|
53 |
+
)
|
54 |
+
aspect_ratio = np.random.uniform(
|
55 |
+
min(max_size / short_size, self.aspect_ratio[0]),
|
56 |
+
min(max_size / short_size, self.aspect_ratio[1]),
|
57 |
+
)
|
58 |
+
long_size = int(short_size * aspect_ratio)
|
59 |
+
size = (long_size, short_size) if landscape else (short_size, long_size)
|
60 |
+
|
61 |
+
text_layers, texts = self.content.generate(size)
|
62 |
+
paper_layer = self.paper.generate(size)
|
63 |
+
self.effect.apply([*text_layers, paper_layer])
|
64 |
+
|
65 |
+
return paper_layer, text_layers, texts
|
synthdog/elements/paper.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Donut
|
3 |
+
Copyright (c) 2022-present NAVER Corp.
|
4 |
+
MIT License
|
5 |
+
"""
|
6 |
+
from synthtiger import components, layers
|
7 |
+
|
8 |
+
|
9 |
+
class Paper:
|
10 |
+
def __init__(self, config):
|
11 |
+
self.image = components.BaseTexture(**config.get("image", {}))
|
12 |
+
|
13 |
+
def generate(self, size):
|
14 |
+
paper_layer = layers.RectLayer(size, (255, 255, 255, 255))
|
15 |
+
self.image.apply([paper_layer])
|
16 |
+
|
17 |
+
return paper_layer
|
synthdog/elements/textbox.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Donut
|
3 |
+
Copyright (c) 2022-present NAVER Corp.
|
4 |
+
MIT License
|
5 |
+
"""
|
6 |
+
import numpy as np
|
7 |
+
from synthtiger import layers
|
8 |
+
|
9 |
+
|
10 |
+
class TextBox:
|
11 |
+
def __init__(self, config):
|
12 |
+
self.fill = config.get("fill", [1, 1])
|
13 |
+
|
14 |
+
def generate(self, size, text, font):
|
15 |
+
width, height = size
|
16 |
+
|
17 |
+
char_layers, chars = [], []
|
18 |
+
fill = np.random.uniform(self.fill[0], self.fill[1])
|
19 |
+
width = np.clip(width * fill, height, width)
|
20 |
+
font = {**font, "size": int(height)}
|
21 |
+
left, top = 0, 0
|
22 |
+
|
23 |
+
for char in text:
|
24 |
+
if char in "\r\n":
|
25 |
+
continue
|
26 |
+
|
27 |
+
char_layer = layers.TextLayer(char, **font)
|
28 |
+
char_scale = height / char_layer.height
|
29 |
+
char_layer.bbox = [left, top, *(char_layer.size * char_scale)]
|
30 |
+
if char_layer.right > width:
|
31 |
+
break
|
32 |
+
|
33 |
+
char_layers.append(char_layer)
|
34 |
+
chars.append(char)
|
35 |
+
left = char_layer.right
|
36 |
+
|
37 |
+
text = "".join(chars).strip()
|
38 |
+
if len(char_layers) == 0 or len(text) == 0:
|
39 |
+
return None, None
|
40 |
+
|
41 |
+
text_layer = layers.Group(char_layers).merge()
|
42 |
+
|
43 |
+
return text_layer, text
|
synthdog/layouts/__init__.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Donut
|
3 |
+
Copyright (c) 2022-present NAVER Corp.
|
4 |
+
MIT License
|
5 |
+
"""
|
6 |
+
from layouts.grid import Grid
|
7 |
+
from layouts.grid_stack import GridStack
|
8 |
+
|
9 |
+
__all__ = ["Grid", "GridStack"]
|
synthdog/layouts/grid.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Donut
|
3 |
+
Copyright (c) 2022-present NAVER Corp.
|
4 |
+
MIT License
|
5 |
+
"""
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
|
9 |
+
class Grid:
|
10 |
+
def __init__(self, config):
|
11 |
+
self.text_scale = config.get("text_scale", [0.05, 0.1])
|
12 |
+
self.max_row = config.get("max_row", 5)
|
13 |
+
self.max_col = config.get("max_col", 3)
|
14 |
+
self.fill = config.get("fill", [0, 1])
|
15 |
+
self.full = config.get("full", 0)
|
16 |
+
self.align = config.get("align", ["left", "right", "center"])
|
17 |
+
|
18 |
+
def generate(self, bbox):
|
19 |
+
left, top, width, height = bbox
|
20 |
+
|
21 |
+
text_scale = np.random.uniform(self.text_scale[0], self.text_scale[1])
|
22 |
+
text_size = min(width, height) * text_scale
|
23 |
+
grids = np.random.permutation(self.max_row * self.max_col)
|
24 |
+
|
25 |
+
for grid in grids:
|
26 |
+
row = grid // self.max_col + 1
|
27 |
+
col = grid % self.max_col + 1
|
28 |
+
if text_size * (col * 2 - 1) <= width and text_size * row <= height:
|
29 |
+
break
|
30 |
+
else:
|
31 |
+
return None
|
32 |
+
|
33 |
+
bound = max(1 - text_size / width * (col - 1), 0)
|
34 |
+
full = np.random.rand() < self.full
|
35 |
+
fill = np.random.uniform(self.fill[0], self.fill[1])
|
36 |
+
fill = 1 if full else fill
|
37 |
+
fill = np.clip(fill, 0, bound)
|
38 |
+
|
39 |
+
padding = np.random.randint(4) if col > 1 else np.random.randint(1, 4)
|
40 |
+
padding = (bool(padding // 2), bool(padding % 2))
|
41 |
+
|
42 |
+
weights = np.zeros(col * 2 + 1)
|
43 |
+
weights[1:-1] = text_size / width
|
44 |
+
probs = 1 - np.random.rand(col * 2 + 1)
|
45 |
+
probs[0] = 0 if not padding[0] else probs[0]
|
46 |
+
probs[-1] = 0 if not padding[-1] else probs[-1]
|
47 |
+
probs[1::2] *= max(fill - sum(weights[1::2]), 0) / sum(probs[1::2])
|
48 |
+
probs[::2] *= max(1 - fill - sum(weights[::2]), 0) / sum(probs[::2])
|
49 |
+
weights += probs
|
50 |
+
|
51 |
+
widths = [width * weights[c] for c in range(col * 2 + 1)]
|
52 |
+
heights = [text_size for _ in range(row)]
|
53 |
+
|
54 |
+
xs = np.cumsum([0] + widths)
|
55 |
+
ys = np.cumsum([0] + heights)
|
56 |
+
|
57 |
+
layout = []
|
58 |
+
|
59 |
+
for c in range(col):
|
60 |
+
align = self.align[np.random.randint(len(self.align))]
|
61 |
+
|
62 |
+
for r in range(row):
|
63 |
+
x, y = xs[c * 2 + 1], ys[r]
|
64 |
+
w, h = xs[c * 2 + 2] - x, ys[r + 1] - y
|
65 |
+
bbox = [left + x, top + y, w, h]
|
66 |
+
layout.append((bbox, align))
|
67 |
+
|
68 |
+
return layout
|
synthdog/layouts/grid_stack.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Donut
|
3 |
+
Copyright (c) 2022-present NAVER Corp.
|
4 |
+
MIT License
|
5 |
+
"""
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
from layouts import Grid
|
9 |
+
|
10 |
+
|
11 |
+
class GridStack:
|
12 |
+
def __init__(self, config):
|
13 |
+
self.text_scale = config.get("text_scale", [0.05, 0.1])
|
14 |
+
self.max_row = config.get("max_row", 5)
|
15 |
+
self.max_col = config.get("max_col", 3)
|
16 |
+
self.fill = config.get("fill", [0, 1])
|
17 |
+
self.full = config.get("full", 0)
|
18 |
+
self.align = config.get("align", ["left", "right", "center"])
|
19 |
+
self.stack_spacing = config.get("stack_spacing", [0, 0.05])
|
20 |
+
self.stack_fill = config.get("stack_fill", [1, 1])
|
21 |
+
self.stack_full = config.get("stack_full", 0)
|
22 |
+
self._grid = Grid(
|
23 |
+
{
|
24 |
+
"text_scale": self.text_scale,
|
25 |
+
"max_row": self.max_row,
|
26 |
+
"max_col": self.max_col,
|
27 |
+
"align": self.align,
|
28 |
+
}
|
29 |
+
)
|
30 |
+
|
31 |
+
def generate(self, bbox):
|
32 |
+
left, top, width, height = bbox
|
33 |
+
|
34 |
+
stack_spacing = np.random.uniform(self.stack_spacing[0], self.stack_spacing[1])
|
35 |
+
stack_spacing *= min(width, height)
|
36 |
+
|
37 |
+
stack_full = np.random.rand() < self.stack_full
|
38 |
+
stack_fill = np.random.uniform(self.stack_fill[0], self.stack_fill[1])
|
39 |
+
stack_fill = 1 if stack_full else stack_fill
|
40 |
+
|
41 |
+
full = np.random.rand() < self.full
|
42 |
+
fill = np.random.uniform(self.fill[0], self.fill[1])
|
43 |
+
fill = 1 if full else fill
|
44 |
+
self._grid.fill = [fill, fill]
|
45 |
+
|
46 |
+
layouts = []
|
47 |
+
line = 0
|
48 |
+
|
49 |
+
while True:
|
50 |
+
grid_size = (width, height * stack_fill - line)
|
51 |
+
text_scale = np.random.uniform(self.text_scale[0], self.text_scale[1])
|
52 |
+
text_size = min(width, height) * text_scale
|
53 |
+
text_scale = text_size / min(grid_size)
|
54 |
+
self._grid.text_scale = [text_scale, text_scale]
|
55 |
+
|
56 |
+
layout = self._grid.generate([left, top + line, *grid_size])
|
57 |
+
if layout is None:
|
58 |
+
break
|
59 |
+
|
60 |
+
line = max(y + h - top for (_, y, _, h), _ in layout) + stack_spacing
|
61 |
+
layouts.append(layout)
|
62 |
+
|
63 |
+
line = max(line - stack_spacing, 0)
|
64 |
+
space = max(height - line, 0)
|
65 |
+
spaces = np.random.rand(len(layouts) + 1)
|
66 |
+
spaces *= space / sum(spaces) if sum(spaces) > 0 else 0
|
67 |
+
spaces = np.cumsum(spaces)
|
68 |
+
|
69 |
+
for layout, space in zip(layouts, spaces):
|
70 |
+
for bbox, _ in layout:
|
71 |
+
x, y, w, h = bbox
|
72 |
+
bbox[:] = [x, y + space, w, h]
|
73 |
+
|
74 |
+
return layouts
|
synthdog/resources/background/bedroom_83.jpg
ADDED
synthdog/resources/background/bob+dylan_83.jpg
ADDED
synthdog/resources/background/coffee_122.jpg
ADDED
synthdog/resources/background/coffee_18.jpeg
ADDED
Git LFS Details
|
synthdog/resources/background/crater_141.jpg
ADDED
Git LFS Details
|
synthdog/resources/background/cream_124.jpg
ADDED
Git LFS Details
|
synthdog/resources/background/eagle_110.jpg
ADDED
synthdog/resources/background/farm_25.jpg
ADDED
synthdog/resources/background/hiking_18.jpg
ADDED
synthdog/resources/corpus/enwiki.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
synthdog/resources/corpus/jawiki.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
synthdog/resources/corpus/kowiki.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
synthdog/resources/corpus/zhwiki.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
synthdog/resources/font/en/NotoSans-Regular.ttf
ADDED
Binary file (399 kB). View file
|
|
synthdog/resources/font/en/NotoSerif-Regular.ttf
ADDED
Binary file (375 kB). View file
|
|
synthdog/resources/font/ja/NotoSansJP-Regular.otf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47c29251c03bd7731461efd9aff279d04058025c24fc08ed49552aeec20adc6d
|
3 |
+
size 4548148
|
synthdog/resources/font/ja/NotoSerifJP-Regular.otf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a0f0ab1c97dc9b1bb857f3259f5de23c6caa224c14d12878575b0a84676db8f
|
3 |
+
size 6169384
|
synthdog/resources/font/ko/NotoSansKR-Regular.otf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f62e282b5ff3694c09af182d0dfc29d46ce6b85303c0da74f159c098e75991b
|
3 |
+
size 4744644
|
synthdog/resources/font/ko/NotoSerifKR-Regular.otf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13196f84ee798b18eddd80077051e5d88ff869696c43200dc54c66807884f74e
|
3 |
+
size 7437596
|
synthdog/resources/font/zh/NotoSansSC-Regular.otf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63fdadb47c21197170f3cda6c60e98e481b8a1eb28e5f44102da51bec17d123b
|
3 |
+
size 8481960
|
synthdog/resources/font/zh/NotoSerifSC-Regular.otf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66080541a111a7a31179700496013aac4b64a53cc41f570bd21c2628c75e4628
|
3 |
+
size 11214568
|
synthdog/resources/paper/paper_1.jpg
ADDED
Git LFS Details
|
synthdog/resources/paper/paper_2.jpg
ADDED
Git LFS Details
|
synthdog/resources/paper/paper_3.jpg
ADDED
Git LFS Details
|