uartimcs commited on
Commit
608a96e
1 Parent(s): 001cc92

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +26 -29
  2. config/train_docvqa.yaml +23 -0
  3. config/train_invoices.yaml +22 -0
  4. config/train_rvlcdip.yaml +23 -0
  5. config/train_zhtrainticket.yaml +22 -0
  6. dataset/.gitkeep +1 -0
  7. misc/overview.png +0 -0
  8. misc/sample_image_cord_test_receipt_00004.png +3 -0
  9. misc/sample_image_donut_document.png +0 -0
  10. misc/sample_synthdog.png +3 -0
  11. misc/screenshot_gradio_demos.png +3 -0
  12. result/.gitkeep +1 -0
  13. synthdog/README.md +63 -0
  14. synthdog/config_en.yaml +119 -0
  15. synthdog/config_ja.yaml +119 -0
  16. synthdog/config_ko.yaml +119 -0
  17. synthdog/config_zh.yaml +119 -0
  18. synthdog/elements/__init__.py +12 -0
  19. synthdog/elements/background.py +24 -0
  20. synthdog/elements/content.py +118 -0
  21. synthdog/elements/document.py +65 -0
  22. synthdog/elements/paper.py +17 -0
  23. synthdog/elements/textbox.py +43 -0
  24. synthdog/layouts/__init__.py +9 -0
  25. synthdog/layouts/grid.py +68 -0
  26. synthdog/layouts/grid_stack.py +74 -0
  27. synthdog/resources/background/bedroom_83.jpg +0 -0
  28. synthdog/resources/background/bob+dylan_83.jpg +0 -0
  29. synthdog/resources/background/coffee_122.jpg +0 -0
  30. synthdog/resources/background/coffee_18.jpeg +3 -0
  31. synthdog/resources/background/crater_141.jpg +3 -0
  32. synthdog/resources/background/cream_124.jpg +3 -0
  33. synthdog/resources/background/eagle_110.jpg +0 -0
  34. synthdog/resources/background/farm_25.jpg +0 -0
  35. synthdog/resources/background/hiking_18.jpg +0 -0
  36. synthdog/resources/corpus/enwiki.txt +0 -0
  37. synthdog/resources/corpus/jawiki.txt +0 -0
  38. synthdog/resources/corpus/kowiki.txt +0 -0
  39. synthdog/resources/corpus/zhwiki.txt +0 -0
  40. synthdog/resources/font/en/NotoSans-Regular.ttf +0 -0
  41. synthdog/resources/font/en/NotoSerif-Regular.ttf +0 -0
  42. synthdog/resources/font/ja/NotoSansJP-Regular.otf +3 -0
  43. synthdog/resources/font/ja/NotoSerifJP-Regular.otf +3 -0
  44. synthdog/resources/font/ko/NotoSansKR-Regular.otf +3 -0
  45. synthdog/resources/font/ko/NotoSerifKR-Regular.otf +3 -0
  46. synthdog/resources/font/zh/NotoSansSC-Regular.otf +3 -0
  47. synthdog/resources/font/zh/NotoSerifSC-Regular.otf +3 -0
  48. synthdog/resources/paper/paper_1.jpg +3 -0
  49. synthdog/resources/paper/paper_2.jpg +3 -0
  50. synthdog/resources/paper/paper_3.jpg +3 -0
app.py CHANGED
@@ -1,29 +1,26 @@
1
- import gradio as gr
2
- import argparse
3
- import torch
4
- from PIL import Image
5
- from donut import DonutModel
6
- def demo_process(input_img):
7
- global model, task_prompt, task_name
8
- input_img = Image.fromarray(input_img)
9
- output = model.inference(image=input_img, prompt=task_prompt)["predictions"][0]
10
- return output
11
- parser = argparse.ArgumentParser()
12
- parser.add_argument("--task", type=str, default="Booking")
13
- parser.add_argument("--pretrained_path", type=str, default="uartimcs/donut-booking-extract")
14
- args, left_argv = parser.parse_known_args()
15
- task_name = args.task
16
- task_prompt = f"<s_{task_name}>"
17
-
18
- image = Image.open("./sample-booking/CMA_150.jpg")
19
- image.save("CMA_sample.jpg")
20
- image = Image.open("./sample-booking/COSCO_150.jpg")
21
- image.save("COSCO_sample.jpg")
22
- image = Image.open("./sample-booking/ONEY_150.jpg")
23
- image.save("ONEY_sample.jpg")
24
-
25
-
26
- model = DonutModel.from_pretrained("uartimcs/donut-booking-extract")
27
- model.eval()
28
- demo = gr.Interface(fn=demo_process,inputs="image",outputs="json", title=f"Donut 🍩 demonstration for `{task_name}` task", examples=[["CMA_sample.jpg"], ["COSCO_sample.jpg"], ["ONEY_sample.jpg"]],)
29
- demo.launch()
 
1
+ import gradio as gr
2
+ import argparse
3
+ import torch
4
+ from PIL import Image
5
+ from donut import DonutModel
6
+ def demo_process(input_img):
7
+ global model, task_prompt, task_name
8
+ input_img = Image.fromarray(input_img)
9
+ output = model.inference(image=input_img, prompt=task_prompt)["predictions"][0]
10
+ return output
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument("--task", type=str, default="Booking")
13
+ parser.add_argument("--pretrained_path", type=str, default="result/train_booking/20241112_150925")
14
+ args, left_argv = parser.parse_known_args()
15
+ task_name = args.task
16
+ task_prompt = f"<s_{task_name}>"
17
+ model = DonutModel.from_pretrained("./result/train_booking/20241112_150925")
18
+ if torch.cuda.is_available():
19
+ model.half()
20
+ device = torch.device("cuda")
21
+ model.to(device)
22
+ else:
23
+ model.encoder.to(torch.bfloat16)
24
+ model.eval()
25
+ demo = gr.Interface(fn=demo_process,inputs="image",outputs="json", title=f"Donut 🍩 demonstration for `{task_name}` task",)
26
+ demo.launch(debug=True)
 
 
 
config/train_docvqa.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ resume_from_checkpoint_path: null
2
+ result_path: "./result"
3
+ pretrained_model_name_or_path: "naver-clova-ix/donut-base"
4
+ dataset_name_or_paths: ["./dataset/docvqa"] # should be prepared from https://rrc.cvc.uab.es/?ch=17
5
+ sort_json_key: True
6
+ train_batch_sizes: [2]
7
+ val_batch_sizes: [4]
8
+ input_size: [2560, 1920]
9
+ max_length: 128
10
+ align_long_axis: False
11
+ # num_nodes: 8 # memo: donut-base-finetuned-docvqa was trained with 8 nodes
12
+ num_nodes: 1
13
+ seed: 2022
14
+ lr: 3e-5
15
+ warmup_steps: 10000
16
+ num_training_samples_per_epoch: 39463
17
+ max_epochs: 300
18
+ max_steps: -1
19
+ num_workers: 8
20
+ val_check_interval: 1.0
21
+ check_val_every_n_epoch: 1
22
+ gradient_clip_val: 0.25
23
+ verbose: True
config/train_invoices.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ resume_from_checkpoint_path: null # only used for resume_from_checkpoint option in PL
2
+ result_path: "./result"
3
+ pretrained_model_name_or_path: "naver-clova-ix/donut-base" # loading a pre-trained model (from moldehub or path)
4
+ dataset_name_or_paths: ["./dataset/SGSInvoice"] # loading datasets (from moldehub or path)
5
+ sort_json_key: False # cord dataset is preprocessed, and publicly available at https://huggingface.co/datasets/naver-clova-ix/cord-v2
6
+ train_batch_sizes: [2]
7
+ val_batch_sizes: [1]
8
+ input_size: [1280, 960] # when the input resolution differs from the pre-training setting, some weights will be newly initialized (but the model training would be okay)
9
+ max_length: 768
10
+ align_long_axis: False
11
+ num_nodes: 1
12
+ seed: 2022
13
+ lr: 3e-5
14
+ warmup_steps: 60 # 800/8*30/10, 10%
15
+ num_training_samples_per_epoch: 800
16
+ max_epochs: 10
17
+ max_steps: -1
18
+ num_workers: 2
19
+ val_check_interval: 1.0
20
+ check_val_every_n_epoch: 3
21
+ gradient_clip_val: 1.0
22
+ verbose: True
config/train_rvlcdip.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ resume_from_checkpoint_path: null
2
+ result_path: "./result"
3
+ pretrained_model_name_or_path: "naver-clova-ix/donut-base"
4
+ dataset_name_or_paths: ["./dataset/rvlcdip"] # should be prepared from https://www.cs.cmu.edu/~aharley/rvl-cdip/
5
+ sort_json_key: True
6
+ train_batch_sizes: [2]
7
+ val_batch_sizes: [4]
8
+ input_size: [2560, 1920]
9
+ max_length: 8
10
+ align_long_axis: False
11
+ # num_nodes: 8 # memo: donut-base-finetuned-rvlcdip was trained with 8 nodes
12
+ num_nodes: 1
13
+ seed: 2022
14
+ lr: 2e-5
15
+ warmup_steps: 10000
16
+ num_training_samples_per_epoch: 320000
17
+ max_epochs: 100
18
+ max_steps: -1
19
+ num_workers: 8
20
+ val_check_interval: 1.0
21
+ check_val_every_n_epoch: 1
22
+ gradient_clip_val: 1.0
23
+ verbose: True
config/train_zhtrainticket.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ resume_from_checkpoint_path: null
2
+ result_path: "./result"
3
+ pretrained_model_name_or_path: "naver-clova-ix/donut-base"
4
+ dataset_name_or_paths: ["./dataset/zhtrainticket"] # should be prepared from https://github.com/beacandler/EATEN
5
+ sort_json_key: True
6
+ train_batch_sizes: [8]
7
+ val_batch_sizes: [1]
8
+ input_size: [960, 1280]
9
+ max_length: 256
10
+ align_long_axis: False
11
+ num_nodes: 1
12
+ seed: 2022
13
+ lr: 3e-5
14
+ warmup_steps: 300
15
+ num_training_samples_per_epoch: 1368
16
+ max_epochs: 10
17
+ max_steps: -1
18
+ num_workers: 8
19
+ val_check_interval: 1.0
20
+ check_val_every_n_epoch: 1
21
+ gradient_clip_val: 1.0
22
+ verbose: True
dataset/.gitkeep ADDED
@@ -0,0 +1 @@
 
 
1
+
misc/overview.png ADDED
misc/sample_image_cord_test_receipt_00004.png ADDED

Git LFS Details

  • SHA256: 8f3eee7068c96e86cdb2e4b5c53085cb5e1439462edd55c373548cb1962801ad
  • Pointer size: 132 Bytes
  • Size of remote file: 1.64 MB
misc/sample_image_donut_document.png ADDED
misc/sample_synthdog.png ADDED

Git LFS Details

  • SHA256: 26ca7665ceb4cb850e19aaf6f4cbc9b37ea5780c5e9d512764dad6a83b7931f1
  • Pointer size: 132 Bytes
  • Size of remote file: 1.44 MB
misc/screenshot_gradio_demos.png ADDED

Git LFS Details

  • SHA256: f0f063308ddc48feb5a493560a18d057c68f8989fdc00eb91c171e0e9b552f3e
  • Pointer size: 132 Bytes
  • Size of remote file: 1.39 MB
result/.gitkeep ADDED
@@ -0,0 +1 @@
 
 
1
+
synthdog/README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SynthDoG 🐶: Synthetic Document Generator
2
+
3
+ SynthDoG is synthetic document generator for visual document understanding (VDU).
4
+
5
+ ![image](../misc/sample_synthdog.png)
6
+
7
+ ## Prerequisites
8
+
9
+ - python>=3.6
10
+ - [synthtiger](https://github.com/clovaai/synthtiger) (`pip install synthtiger`)
11
+
12
+ ## Usage
13
+
14
+ ```bash
15
+ # Set environment variable (for macOS)
16
+ $ export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
17
+
18
+ synthtiger -o ./outputs/SynthDoG_en -c 50 -w 4 -v template.py SynthDoG config_en.yaml
19
+
20
+ {'config': 'config_en.yaml',
21
+ 'count': 50,
22
+ 'name': 'SynthDoG',
23
+ 'output': './outputs/SynthDoG_en',
24
+ 'script': 'template.py',
25
+ 'verbose': True,
26
+ 'worker': 4}
27
+ {'aspect_ratio': [1, 2],
28
+ .
29
+ .
30
+ 'quality': [50, 95],
31
+ 'short_size': [720, 1024]}
32
+ Generated 1 data (task 3)
33
+ Generated 2 data (task 0)
34
+ Generated 3 data (task 1)
35
+ .
36
+ .
37
+ Generated 49 data (task 48)
38
+ Generated 50 data (task 49)
39
+ 46.32 seconds elapsed
40
+ ```
41
+
42
+ Some important arguments:
43
+
44
+ - `-o` : directory path to save data.
45
+ - `-c` : number of data to generate.
46
+ - `-w` : number of workers.
47
+ - `-s` : random seed.
48
+ - `-v` : print error messages.
49
+
50
+ To generate ECJK samples:
51
+ ```bash
52
+ # english
53
+ synthtiger -o {dataset_path} -c {num_of_data} -w {num_of_workers} -v template.py SynthDoG config_en.yaml
54
+
55
+ # chinese
56
+ synthtiger -o {dataset_path} -c {num_of_data} -w {num_of_workers} -v template.py SynthDoG config_zh.yaml
57
+
58
+ # japanese
59
+ synthtiger -o {dataset_path} -c {num_of_data} -w {num_of_workers} -v template.py SynthDoG config_ja.yaml
60
+
61
+ # korean
62
+ synthtiger -o {dataset_path} -c {num_of_data} -w {num_of_workers} -v template.py SynthDoG config_ko.yaml
63
+ ```
synthdog/config_en.yaml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ quality: [50, 95]
2
+ landscape: 0.5
3
+ short_size: [720, 1024]
4
+ aspect_ratio: [1, 2]
5
+
6
+ background:
7
+ image:
8
+ paths: [resources/background]
9
+ weights: [1]
10
+
11
+ effect:
12
+ args:
13
+ # gaussian blur
14
+ - prob: 1
15
+ args:
16
+ sigma: [0, 10]
17
+
18
+ document:
19
+ fullscreen: 0.5
20
+ landscape: 0.5
21
+ short_size: [480, 1024]
22
+ aspect_ratio: [1, 2]
23
+
24
+ paper:
25
+ image:
26
+ paths: [resources/paper]
27
+ weights: [1]
28
+ alpha: [0, 0.2]
29
+ grayscale: 1
30
+ crop: 1
31
+
32
+ content:
33
+ margin: [0, 0.1]
34
+ text:
35
+ path: resources/corpus/enwiki.txt
36
+ font:
37
+ paths: [resources/font/en]
38
+ weights: [1]
39
+ bold: 0
40
+ layout:
41
+ text_scale: [0.0334, 0.1]
42
+ max_row: 10
43
+ max_col: 3
44
+ fill: [0.5, 1]
45
+ full: 0.1
46
+ align: [left, right, center]
47
+ stack_spacing: [0.0334, 0.0334]
48
+ stack_fill: [0.5, 1]
49
+ stack_full: 0.1
50
+ textbox:
51
+ fill: [0.5, 1]
52
+ textbox_color:
53
+ prob: 0.2
54
+ args:
55
+ gray: [0, 64]
56
+ colorize: 1
57
+ content_color:
58
+ prob: 0.2
59
+ args:
60
+ gray: [0, 64]
61
+ colorize: 1
62
+
63
+ effect:
64
+ args:
65
+ # elastic distortion
66
+ - prob: 1
67
+ args:
68
+ alpha: [0, 1]
69
+ sigma: [0, 0.5]
70
+ # gaussian noise
71
+ - prob: 1
72
+ args:
73
+ scale: [0, 8]
74
+ per_channel: 0
75
+ # perspective
76
+ - prob: 1
77
+ args:
78
+ weights: [750, 50, 50, 25, 25, 25, 25, 50]
79
+ args:
80
+ - percents: [[0.75, 1], [0.75, 1], [0.75, 1], [0.75, 1]]
81
+ - percents: [[0.75, 1], [1, 1], [0.75, 1], [1, 1]]
82
+ - percents: [[1, 1], [0.75, 1], [1, 1], [0.75, 1]]
83
+ - percents: [[0.75, 1], [1, 1], [1, 1], [1, 1]]
84
+ - percents: [[1, 1], [0.75, 1], [1, 1], [1, 1]]
85
+ - percents: [[1, 1], [1, 1], [0.75, 1], [1, 1]]
86
+ - percents: [[1, 1], [1, 1], [1, 1], [0.75, 1]]
87
+ - percents: [[1, 1], [1, 1], [1, 1], [1, 1]]
88
+
89
+ effect:
90
+ args:
91
+ # color
92
+ - prob: 0.2
93
+ args:
94
+ rgb: [[0, 255], [0, 255], [0, 255]]
95
+ alpha: [0, 0.2]
96
+ # shadow
97
+ - prob: 1
98
+ args:
99
+ intensity: [0, 160]
100
+ amount: [0, 1]
101
+ smoothing: [0.5, 1]
102
+ bidirectional: 0
103
+ # contrast
104
+ - prob: 1
105
+ args:
106
+ alpha: [1, 1.5]
107
+ # brightness
108
+ - prob: 1
109
+ args:
110
+ beta: [-48, 0]
111
+ # motion blur
112
+ - prob: 0.5
113
+ args:
114
+ k: [3, 5]
115
+ angle: [0, 360]
116
+ # gaussian blur
117
+ - prob: 1
118
+ args:
119
+ sigma: [0, 1.5]
synthdog/config_ja.yaml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ quality: [50, 95]
2
+ landscape: 0.5
3
+ short_size: [720, 1024]
4
+ aspect_ratio: [1, 2]
5
+
6
+ background:
7
+ image:
8
+ paths: [resources/background]
9
+ weights: [1]
10
+
11
+ effect:
12
+ args:
13
+ # gaussian blur
14
+ - prob: 1
15
+ args:
16
+ sigma: [0, 10]
17
+
18
+ document:
19
+ fullscreen: 0.5
20
+ landscape: 0.5
21
+ short_size: [480, 1024]
22
+ aspect_ratio: [1, 2]
23
+
24
+ paper:
25
+ image:
26
+ paths: [resources/paper]
27
+ weights: [1]
28
+ alpha: [0, 0.2]
29
+ grayscale: 1
30
+ crop: 1
31
+
32
+ content:
33
+ margin: [0, 0.1]
34
+ text:
35
+ path: resources/corpus/jawiki.txt
36
+ font:
37
+ paths: [resources/font/ja]
38
+ weights: [1]
39
+ bold: 0
40
+ layout:
41
+ text_scale: [0.0334, 0.1]
42
+ max_row: 10
43
+ max_col: 3
44
+ fill: [0.5, 1]
45
+ full: 0.1
46
+ align: [left, right, center]
47
+ stack_spacing: [0.0334, 0.0334]
48
+ stack_fill: [0.5, 1]
49
+ stack_full: 0.1
50
+ textbox:
51
+ fill: [0.5, 1]
52
+ textbox_color:
53
+ prob: 0.2
54
+ args:
55
+ gray: [0, 64]
56
+ colorize: 1
57
+ content_color:
58
+ prob: 0.2
59
+ args:
60
+ gray: [0, 64]
61
+ colorize: 1
62
+
63
+ effect:
64
+ args:
65
+ # elastic distortion
66
+ - prob: 1
67
+ args:
68
+ alpha: [0, 1]
69
+ sigma: [0, 0.5]
70
+ # gaussian noise
71
+ - prob: 1
72
+ args:
73
+ scale: [0, 8]
74
+ per_channel: 0
75
+ # perspective
76
+ - prob: 1
77
+ args:
78
+ weights: [750, 50, 50, 25, 25, 25, 25, 50]
79
+ args:
80
+ - percents: [[0.75, 1], [0.75, 1], [0.75, 1], [0.75, 1]]
81
+ - percents: [[0.75, 1], [1, 1], [0.75, 1], [1, 1]]
82
+ - percents: [[1, 1], [0.75, 1], [1, 1], [0.75, 1]]
83
+ - percents: [[0.75, 1], [1, 1], [1, 1], [1, 1]]
84
+ - percents: [[1, 1], [0.75, 1], [1, 1], [1, 1]]
85
+ - percents: [[1, 1], [1, 1], [0.75, 1], [1, 1]]
86
+ - percents: [[1, 1], [1, 1], [1, 1], [0.75, 1]]
87
+ - percents: [[1, 1], [1, 1], [1, 1], [1, 1]]
88
+
89
+ effect:
90
+ args:
91
+ # color
92
+ - prob: 0.2
93
+ args:
94
+ rgb: [[0, 255], [0, 255], [0, 255]]
95
+ alpha: [0, 0.2]
96
+ # shadow
97
+ - prob: 1
98
+ args:
99
+ intensity: [0, 160]
100
+ amount: [0, 1]
101
+ smoothing: [0.5, 1]
102
+ bidirectional: 0
103
+ # contrast
104
+ - prob: 1
105
+ args:
106
+ alpha: [1, 1.5]
107
+ # brightness
108
+ - prob: 1
109
+ args:
110
+ beta: [-48, 0]
111
+ # motion blur
112
+ - prob: 0.5
113
+ args:
114
+ k: [3, 5]
115
+ angle: [0, 360]
116
+ # gaussian blur
117
+ - prob: 1
118
+ args:
119
+ sigma: [0, 1.5]
synthdog/config_ko.yaml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ quality: [50, 95]
2
+ landscape: 0.5
3
+ short_size: [720, 1024]
4
+ aspect_ratio: [1, 2]
5
+
6
+ background:
7
+ image:
8
+ paths: [resources/background]
9
+ weights: [1]
10
+
11
+ effect:
12
+ args:
13
+ # gaussian blur
14
+ - prob: 1
15
+ args:
16
+ sigma: [0, 10]
17
+
18
+ document:
19
+ fullscreen: 0.5
20
+ landscape: 0.5
21
+ short_size: [480, 1024]
22
+ aspect_ratio: [1, 2]
23
+
24
+ paper:
25
+ image:
26
+ paths: [resources/paper]
27
+ weights: [1]
28
+ alpha: [0, 0.2]
29
+ grayscale: 1
30
+ crop: 1
31
+
32
+ content:
33
+ margin: [0, 0.1]
34
+ text:
35
+ path: resources/corpus/kowiki.txt
36
+ font:
37
+ paths: [resources/font/ko]
38
+ weights: [1]
39
+ bold: 0
40
+ layout:
41
+ text_scale: [0.0334, 0.1]
42
+ max_row: 10
43
+ max_col: 3
44
+ fill: [0.5, 1]
45
+ full: 0.1
46
+ align: [left, right, center]
47
+ stack_spacing: [0.0334, 0.0334]
48
+ stack_fill: [0.5, 1]
49
+ stack_full: 0.1
50
+ textbox:
51
+ fill: [0.5, 1]
52
+ textbox_color:
53
+ prob: 0.2
54
+ args:
55
+ gray: [0, 64]
56
+ colorize: 1
57
+ content_color:
58
+ prob: 0.2
59
+ args:
60
+ gray: [0, 64]
61
+ colorize: 1
62
+
63
+ effect:
64
+ args:
65
+ # elastic distortion
66
+ - prob: 1
67
+ args:
68
+ alpha: [0, 1]
69
+ sigma: [0, 0.5]
70
+ # gaussian noise
71
+ - prob: 1
72
+ args:
73
+ scale: [0, 8]
74
+ per_channel: 0
75
+ # perspective
76
+ - prob: 1
77
+ args:
78
+ weights: [750, 50, 50, 25, 25, 25, 25, 50]
79
+ args:
80
+ - percents: [[0.75, 1], [0.75, 1], [0.75, 1], [0.75, 1]]
81
+ - percents: [[0.75, 1], [1, 1], [0.75, 1], [1, 1]]
82
+ - percents: [[1, 1], [0.75, 1], [1, 1], [0.75, 1]]
83
+ - percents: [[0.75, 1], [1, 1], [1, 1], [1, 1]]
84
+ - percents: [[1, 1], [0.75, 1], [1, 1], [1, 1]]
85
+ - percents: [[1, 1], [1, 1], [0.75, 1], [1, 1]]
86
+ - percents: [[1, 1], [1, 1], [1, 1], [0.75, 1]]
87
+ - percents: [[1, 1], [1, 1], [1, 1], [1, 1]]
88
+
89
+ effect:
90
+ args:
91
+ # color
92
+ - prob: 0.2
93
+ args:
94
+ rgb: [[0, 255], [0, 255], [0, 255]]
95
+ alpha: [0, 0.2]
96
+ # shadow
97
+ - prob: 1
98
+ args:
99
+ intensity: [0, 160]
100
+ amount: [0, 1]
101
+ smoothing: [0.5, 1]
102
+ bidirectional: 0
103
+ # contrast
104
+ - prob: 1
105
+ args:
106
+ alpha: [1, 1.5]
107
+ # brightness
108
+ - prob: 1
109
+ args:
110
+ beta: [-48, 0]
111
+ # motion blur
112
+ - prob: 0.5
113
+ args:
114
+ k: [3, 5]
115
+ angle: [0, 360]
116
+ # gaussian blur
117
+ - prob: 1
118
+ args:
119
+ sigma: [0, 1.5]
synthdog/config_zh.yaml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ quality: [50, 95]
2
+ landscape: 0.5
3
+ short_size: [720, 1024]
4
+ aspect_ratio: [1, 2]
5
+
6
+ background:
7
+ image:
8
+ paths: [resources/background]
9
+ weights: [1]
10
+
11
+ effect:
12
+ args:
13
+ # gaussian blur
14
+ - prob: 1
15
+ args:
16
+ sigma: [0, 10]
17
+
18
+ document:
19
+ fullscreen: 0.5
20
+ landscape: 0.5
21
+ short_size: [480, 1024]
22
+ aspect_ratio: [1, 2]
23
+
24
+ paper:
25
+ image:
26
+ paths: [resources/paper]
27
+ weights: [1]
28
+ alpha: [0, 0.2]
29
+ grayscale: 1
30
+ crop: 1
31
+
32
+ content:
33
+ margin: [0, 0.1]
34
+ text:
35
+ path: resources/corpus/zhwiki.txt
36
+ font:
37
+ paths: [resources/font/zh]
38
+ weights: [1]
39
+ bold: 0
40
+ layout:
41
+ text_scale: [0.0334, 0.1]
42
+ max_row: 10
43
+ max_col: 3
44
+ fill: [0.5, 1]
45
+ full: 0.1
46
+ align: [left, right, center]
47
+ stack_spacing: [0.0334, 0.0334]
48
+ stack_fill: [0.5, 1]
49
+ stack_full: 0.1
50
+ textbox:
51
+ fill: [0.5, 1]
52
+ textbox_color:
53
+ prob: 0.2
54
+ args:
55
+ gray: [0, 64]
56
+ colorize: 1
57
+ content_color:
58
+ prob: 0.2
59
+ args:
60
+ gray: [0, 64]
61
+ colorize: 1
62
+
63
+ effect:
64
+ args:
65
+ # elastic distortion
66
+ - prob: 1
67
+ args:
68
+ alpha: [0, 1]
69
+ sigma: [0, 0.5]
70
+ # gaussian noise
71
+ - prob: 1
72
+ args:
73
+ scale: [0, 8]
74
+ per_channel: 0
75
+ # perspective
76
+ - prob: 1
77
+ args:
78
+ weights: [750, 50, 50, 25, 25, 25, 25, 50]
79
+ args:
80
+ - percents: [[0.75, 1], [0.75, 1], [0.75, 1], [0.75, 1]]
81
+ - percents: [[0.75, 1], [1, 1], [0.75, 1], [1, 1]]
82
+ - percents: [[1, 1], [0.75, 1], [1, 1], [0.75, 1]]
83
+ - percents: [[0.75, 1], [1, 1], [1, 1], [1, 1]]
84
+ - percents: [[1, 1], [0.75, 1], [1, 1], [1, 1]]
85
+ - percents: [[1, 1], [1, 1], [0.75, 1], [1, 1]]
86
+ - percents: [[1, 1], [1, 1], [1, 1], [0.75, 1]]
87
+ - percents: [[1, 1], [1, 1], [1, 1], [1, 1]]
88
+
89
+ effect:
90
+ args:
91
+ # color
92
+ - prob: 0.2
93
+ args:
94
+ rgb: [[0, 255], [0, 255], [0, 255]]
95
+ alpha: [0, 0.2]
96
+ # shadow
97
+ - prob: 1
98
+ args:
99
+ intensity: [0, 160]
100
+ amount: [0, 1]
101
+ smoothing: [0.5, 1]
102
+ bidirectional: 0
103
+ # contrast
104
+ - prob: 1
105
+ args:
106
+ alpha: [1, 1.5]
107
+ # brightness
108
+ - prob: 1
109
+ args:
110
+ beta: [-48, 0]
111
+ # motion blur
112
+ - prob: 0.5
113
+ args:
114
+ k: [3, 5]
115
+ angle: [0, 360]
116
+ # gaussian blur
117
+ - prob: 1
118
+ args:
119
+ sigma: [0, 1.5]
synthdog/elements/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Donut
3
+ Copyright (c) 2022-present NAVER Corp.
4
+ MIT License
5
+ """
6
+ from elements.background import Background
7
+ from elements.content import Content
8
+ from elements.document import Document
9
+ from elements.paper import Paper
10
+ from elements.textbox import TextBox
11
+
12
+ __all__ = ["Background", "Content", "Document", "Paper", "TextBox"]
synthdog/elements/background.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Donut
3
+ Copyright (c) 2022-present NAVER Corp.
4
+ MIT License
5
+ """
6
+ from synthtiger import components, layers
7
+
8
+
9
+ class Background:
10
+ def __init__(self, config):
11
+ self.image = components.BaseTexture(**config.get("image", {}))
12
+ self.effect = components.Iterator(
13
+ [
14
+ components.Switch(components.GaussianBlur()),
15
+ ],
16
+ **config.get("effect", {})
17
+ )
18
+
19
+ def generate(self, size):
20
+ bg_layer = layers.RectLayer(size, (255, 255, 255, 255))
21
+ self.image.apply([bg_layer])
22
+ self.effect.apply([bg_layer])
23
+
24
+ return bg_layer
synthdog/elements/content.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Donut
3
+ Copyright (c) 2022-present NAVER Corp.
4
+ MIT License
5
+ """
6
+ from collections import OrderedDict
7
+
8
+ import numpy as np
9
+ from synthtiger import components
10
+
11
+ from elements.textbox import TextBox
12
+ from layouts import GridStack
13
+
14
+
15
+ class TextReader:
16
+ def __init__(self, path, cache_size=2 ** 28, block_size=2 ** 20):
17
+ self.fp = open(path, "r", encoding="utf-8")
18
+ self.length = 0
19
+ self.offsets = [0]
20
+ self.cache = OrderedDict()
21
+ self.cache_size = cache_size
22
+ self.block_size = block_size
23
+ self.bucket_size = cache_size // block_size
24
+ self.idx = 0
25
+
26
+ while True:
27
+ text = self.fp.read(self.block_size)
28
+ if not text:
29
+ break
30
+ self.length += len(text)
31
+ self.offsets.append(self.fp.tell())
32
+
33
+ def __len__(self):
34
+ return self.length
35
+
36
+ def __iter__(self):
37
+ return self
38
+
39
+ def __next__(self):
40
+ char = self.get()
41
+ self.next()
42
+ return char
43
+
44
+ def move(self, idx):
45
+ self.idx = idx
46
+
47
+ def next(self):
48
+ self.idx = (self.idx + 1) % self.length
49
+
50
+ def prev(self):
51
+ self.idx = (self.idx - 1) % self.length
52
+
53
+ def get(self):
54
+ key = self.idx // self.block_size
55
+
56
+ if key in self.cache:
57
+ text = self.cache[key]
58
+ else:
59
+ if len(self.cache) >= self.bucket_size:
60
+ self.cache.popitem(last=False)
61
+
62
+ offset = self.offsets[key]
63
+ self.fp.seek(offset, 0)
64
+ text = self.fp.read(self.block_size)
65
+ self.cache[key] = text
66
+
67
+ self.cache.move_to_end(key)
68
+ char = text[self.idx % self.block_size]
69
+ return char
70
+
71
+
72
+ class Content:
73
+ def __init__(self, config):
74
+ self.margin = config.get("margin", [0, 0.1])
75
+ self.reader = TextReader(**config.get("text", {}))
76
+ self.font = components.BaseFont(**config.get("font", {}))
77
+ self.layout = GridStack(config.get("layout", {}))
78
+ self.textbox = TextBox(config.get("textbox", {}))
79
+ self.textbox_color = components.Switch(components.Gray(), **config.get("textbox_color", {}))
80
+ self.content_color = components.Switch(components.Gray(), **config.get("content_color", {}))
81
+
82
+ def generate(self, size):
83
+ width, height = size
84
+
85
+ layout_left = width * np.random.uniform(self.margin[0], self.margin[1])
86
+ layout_top = height * np.random.uniform(self.margin[0], self.margin[1])
87
+ layout_width = max(width - layout_left * 2, 0)
88
+ layout_height = max(height - layout_top * 2, 0)
89
+ layout_bbox = [layout_left, layout_top, layout_width, layout_height]
90
+
91
+ text_layers, texts = [], []
92
+ layouts = self.layout.generate(layout_bbox)
93
+ self.reader.move(np.random.randint(len(self.reader)))
94
+
95
+ for layout in layouts:
96
+ font = self.font.sample()
97
+
98
+ for bbox, align in layout:
99
+ x, y, w, h = bbox
100
+ text_layer, text = self.textbox.generate((w, h), self.reader, font)
101
+ self.reader.prev()
102
+
103
+ if text_layer is None:
104
+ continue
105
+
106
+ text_layer.center = (x + w / 2, y + h / 2)
107
+ if align == "left":
108
+ text_layer.left = x
109
+ if align == "right":
110
+ text_layer.right = x + w
111
+
112
+ self.textbox_color.apply([text_layer])
113
+ text_layers.append(text_layer)
114
+ texts.append(text)
115
+
116
+ self.content_color.apply(text_layers)
117
+
118
+ return text_layers, texts
synthdog/elements/document.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Donut
3
+ Copyright (c) 2022-present NAVER Corp.
4
+ MIT License
5
+ """
6
+ import numpy as np
7
+ from synthtiger import components
8
+
9
+ from elements.content import Content
10
+ from elements.paper import Paper
11
+
12
+
13
+ class Document:
14
+ def __init__(self, config):
15
+ self.fullscreen = config.get("fullscreen", 0.5)
16
+ self.landscape = config.get("landscape", 0.5)
17
+ self.short_size = config.get("short_size", [480, 1024])
18
+ self.aspect_ratio = config.get("aspect_ratio", [1, 2])
19
+ self.paper = Paper(config.get("paper", {}))
20
+ self.content = Content(config.get("content", {}))
21
+ self.effect = components.Iterator(
22
+ [
23
+ components.Switch(components.ElasticDistortion()),
24
+ components.Switch(components.AdditiveGaussianNoise()),
25
+ components.Switch(
26
+ components.Selector(
27
+ [
28
+ components.Perspective(),
29
+ components.Perspective(),
30
+ components.Perspective(),
31
+ components.Perspective(),
32
+ components.Perspective(),
33
+ components.Perspective(),
34
+ components.Perspective(),
35
+ components.Perspective(),
36
+ ]
37
+ )
38
+ ),
39
+ ],
40
+ **config.get("effect", {}),
41
+ )
42
+
43
+ def generate(self, size):
44
+ width, height = size
45
+ fullscreen = np.random.rand() < self.fullscreen
46
+
47
+ if not fullscreen:
48
+ landscape = np.random.rand() < self.landscape
49
+ max_size = width if landscape else height
50
+ short_size = np.random.randint(
51
+ min(width, height, self.short_size[0]),
52
+ min(width, height, self.short_size[1]) + 1,
53
+ )
54
+ aspect_ratio = np.random.uniform(
55
+ min(max_size / short_size, self.aspect_ratio[0]),
56
+ min(max_size / short_size, self.aspect_ratio[1]),
57
+ )
58
+ long_size = int(short_size * aspect_ratio)
59
+ size = (long_size, short_size) if landscape else (short_size, long_size)
60
+
61
+ text_layers, texts = self.content.generate(size)
62
+ paper_layer = self.paper.generate(size)
63
+ self.effect.apply([*text_layers, paper_layer])
64
+
65
+ return paper_layer, text_layers, texts
synthdog/elements/paper.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Donut
3
+ Copyright (c) 2022-present NAVER Corp.
4
+ MIT License
5
+ """
6
+ from synthtiger import components, layers
7
+
8
+
9
+ class Paper:
10
+ def __init__(self, config):
11
+ self.image = components.BaseTexture(**config.get("image", {}))
12
+
13
+ def generate(self, size):
14
+ paper_layer = layers.RectLayer(size, (255, 255, 255, 255))
15
+ self.image.apply([paper_layer])
16
+
17
+ return paper_layer
synthdog/elements/textbox.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Donut
3
+ Copyright (c) 2022-present NAVER Corp.
4
+ MIT License
5
+ """
6
+ import numpy as np
7
+ from synthtiger import layers
8
+
9
+
10
+ class TextBox:
11
+ def __init__(self, config):
12
+ self.fill = config.get("fill", [1, 1])
13
+
14
+ def generate(self, size, text, font):
15
+ width, height = size
16
+
17
+ char_layers, chars = [], []
18
+ fill = np.random.uniform(self.fill[0], self.fill[1])
19
+ width = np.clip(width * fill, height, width)
20
+ font = {**font, "size": int(height)}
21
+ left, top = 0, 0
22
+
23
+ for char in text:
24
+ if char in "\r\n":
25
+ continue
26
+
27
+ char_layer = layers.TextLayer(char, **font)
28
+ char_scale = height / char_layer.height
29
+ char_layer.bbox = [left, top, *(char_layer.size * char_scale)]
30
+ if char_layer.right > width:
31
+ break
32
+
33
+ char_layers.append(char_layer)
34
+ chars.append(char)
35
+ left = char_layer.right
36
+
37
+ text = "".join(chars).strip()
38
+ if len(char_layers) == 0 or len(text) == 0:
39
+ return None, None
40
+
41
+ text_layer = layers.Group(char_layers).merge()
42
+
43
+ return text_layer, text
synthdog/layouts/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Donut
3
+ Copyright (c) 2022-present NAVER Corp.
4
+ MIT License
5
+ """
6
+ from layouts.grid import Grid
7
+ from layouts.grid_stack import GridStack
8
+
9
+ __all__ = ["Grid", "GridStack"]
synthdog/layouts/grid.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Donut
3
+ Copyright (c) 2022-present NAVER Corp.
4
+ MIT License
5
+ """
6
+ import numpy as np
7
+
8
+
9
+ class Grid:
10
+ def __init__(self, config):
11
+ self.text_scale = config.get("text_scale", [0.05, 0.1])
12
+ self.max_row = config.get("max_row", 5)
13
+ self.max_col = config.get("max_col", 3)
14
+ self.fill = config.get("fill", [0, 1])
15
+ self.full = config.get("full", 0)
16
+ self.align = config.get("align", ["left", "right", "center"])
17
+
18
+ def generate(self, bbox):
19
+ left, top, width, height = bbox
20
+
21
+ text_scale = np.random.uniform(self.text_scale[0], self.text_scale[1])
22
+ text_size = min(width, height) * text_scale
23
+ grids = np.random.permutation(self.max_row * self.max_col)
24
+
25
+ for grid in grids:
26
+ row = grid // self.max_col + 1
27
+ col = grid % self.max_col + 1
28
+ if text_size * (col * 2 - 1) <= width and text_size * row <= height:
29
+ break
30
+ else:
31
+ return None
32
+
33
+ bound = max(1 - text_size / width * (col - 1), 0)
34
+ full = np.random.rand() < self.full
35
+ fill = np.random.uniform(self.fill[0], self.fill[1])
36
+ fill = 1 if full else fill
37
+ fill = np.clip(fill, 0, bound)
38
+
39
+ padding = np.random.randint(4) if col > 1 else np.random.randint(1, 4)
40
+ padding = (bool(padding // 2), bool(padding % 2))
41
+
42
+ weights = np.zeros(col * 2 + 1)
43
+ weights[1:-1] = text_size / width
44
+ probs = 1 - np.random.rand(col * 2 + 1)
45
+ probs[0] = 0 if not padding[0] else probs[0]
46
+ probs[-1] = 0 if not padding[-1] else probs[-1]
47
+ probs[1::2] *= max(fill - sum(weights[1::2]), 0) / sum(probs[1::2])
48
+ probs[::2] *= max(1 - fill - sum(weights[::2]), 0) / sum(probs[::2])
49
+ weights += probs
50
+
51
+ widths = [width * weights[c] for c in range(col * 2 + 1)]
52
+ heights = [text_size for _ in range(row)]
53
+
54
+ xs = np.cumsum([0] + widths)
55
+ ys = np.cumsum([0] + heights)
56
+
57
+ layout = []
58
+
59
+ for c in range(col):
60
+ align = self.align[np.random.randint(len(self.align))]
61
+
62
+ for r in range(row):
63
+ x, y = xs[c * 2 + 1], ys[r]
64
+ w, h = xs[c * 2 + 2] - x, ys[r + 1] - y
65
+ bbox = [left + x, top + y, w, h]
66
+ layout.append((bbox, align))
67
+
68
+ return layout
synthdog/layouts/grid_stack.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Donut
3
+ Copyright (c) 2022-present NAVER Corp.
4
+ MIT License
5
+ """
6
+ import numpy as np
7
+
8
+ from layouts import Grid
9
+
10
+
11
+ class GridStack:
12
+ def __init__(self, config):
13
+ self.text_scale = config.get("text_scale", [0.05, 0.1])
14
+ self.max_row = config.get("max_row", 5)
15
+ self.max_col = config.get("max_col", 3)
16
+ self.fill = config.get("fill", [0, 1])
17
+ self.full = config.get("full", 0)
18
+ self.align = config.get("align", ["left", "right", "center"])
19
+ self.stack_spacing = config.get("stack_spacing", [0, 0.05])
20
+ self.stack_fill = config.get("stack_fill", [1, 1])
21
+ self.stack_full = config.get("stack_full", 0)
22
+ self._grid = Grid(
23
+ {
24
+ "text_scale": self.text_scale,
25
+ "max_row": self.max_row,
26
+ "max_col": self.max_col,
27
+ "align": self.align,
28
+ }
29
+ )
30
+
31
+ def generate(self, bbox):
32
+ left, top, width, height = bbox
33
+
34
+ stack_spacing = np.random.uniform(self.stack_spacing[0], self.stack_spacing[1])
35
+ stack_spacing *= min(width, height)
36
+
37
+ stack_full = np.random.rand() < self.stack_full
38
+ stack_fill = np.random.uniform(self.stack_fill[0], self.stack_fill[1])
39
+ stack_fill = 1 if stack_full else stack_fill
40
+
41
+ full = np.random.rand() < self.full
42
+ fill = np.random.uniform(self.fill[0], self.fill[1])
43
+ fill = 1 if full else fill
44
+ self._grid.fill = [fill, fill]
45
+
46
+ layouts = []
47
+ line = 0
48
+
49
+ while True:
50
+ grid_size = (width, height * stack_fill - line)
51
+ text_scale = np.random.uniform(self.text_scale[0], self.text_scale[1])
52
+ text_size = min(width, height) * text_scale
53
+ text_scale = text_size / min(grid_size)
54
+ self._grid.text_scale = [text_scale, text_scale]
55
+
56
+ layout = self._grid.generate([left, top + line, *grid_size])
57
+ if layout is None:
58
+ break
59
+
60
+ line = max(y + h - top for (_, y, _, h), _ in layout) + stack_spacing
61
+ layouts.append(layout)
62
+
63
+ line = max(line - stack_spacing, 0)
64
+ space = max(height - line, 0)
65
+ spaces = np.random.rand(len(layouts) + 1)
66
+ spaces *= space / sum(spaces) if sum(spaces) > 0 else 0
67
+ spaces = np.cumsum(spaces)
68
+
69
+ for layout, space in zip(layouts, spaces):
70
+ for bbox, _ in layout:
71
+ x, y, w, h = bbox
72
+ bbox[:] = [x, y + space, w, h]
73
+
74
+ return layouts
synthdog/resources/background/bedroom_83.jpg ADDED
synthdog/resources/background/bob+dylan_83.jpg ADDED
synthdog/resources/background/coffee_122.jpg ADDED
synthdog/resources/background/coffee_18.jpeg ADDED

Git LFS Details

  • SHA256: 3be69b618a13243f755bb686b14cc5ded952d328f3fd06ed0932599aa993e27c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.78 MB
synthdog/resources/background/crater_141.jpg ADDED

Git LFS Details

  • SHA256: 8993258d37d02a95c3d4de7a25c81af44c86281086631fdd3edfdf8b94f0844b
  • Pointer size: 132 Bytes
  • Size of remote file: 1.82 MB
synthdog/resources/background/cream_124.jpg ADDED

Git LFS Details

  • SHA256: a12e36c3edbb8eae45ceada56b3e38963398e85618fc582a9910fbdb63156ff9
  • Pointer size: 132 Bytes
  • Size of remote file: 2.24 MB
synthdog/resources/background/eagle_110.jpg ADDED
synthdog/resources/background/farm_25.jpg ADDED
synthdog/resources/background/hiking_18.jpg ADDED
synthdog/resources/corpus/enwiki.txt ADDED
The diff for this file is too large to render. See raw diff
 
synthdog/resources/corpus/jawiki.txt ADDED
The diff for this file is too large to render. See raw diff
 
synthdog/resources/corpus/kowiki.txt ADDED
The diff for this file is too large to render. See raw diff
 
synthdog/resources/corpus/zhwiki.txt ADDED
The diff for this file is too large to render. See raw diff
 
synthdog/resources/font/en/NotoSans-Regular.ttf ADDED
Binary file (399 kB). View file
 
synthdog/resources/font/en/NotoSerif-Regular.ttf ADDED
Binary file (375 kB). View file
 
synthdog/resources/font/ja/NotoSansJP-Regular.otf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c29251c03bd7731461efd9aff279d04058025c24fc08ed49552aeec20adc6d
3
+ size 4548148
synthdog/resources/font/ja/NotoSerifJP-Regular.otf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a0f0ab1c97dc9b1bb857f3259f5de23c6caa224c14d12878575b0a84676db8f
3
+ size 6169384
synthdog/resources/font/ko/NotoSansKR-Regular.otf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f62e282b5ff3694c09af182d0dfc29d46ce6b85303c0da74f159c098e75991b
3
+ size 4744644
synthdog/resources/font/ko/NotoSerifKR-Regular.otf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13196f84ee798b18eddd80077051e5d88ff869696c43200dc54c66807884f74e
3
+ size 7437596
synthdog/resources/font/zh/NotoSansSC-Regular.otf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63fdadb47c21197170f3cda6c60e98e481b8a1eb28e5f44102da51bec17d123b
3
+ size 8481960
synthdog/resources/font/zh/NotoSerifSC-Regular.otf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66080541a111a7a31179700496013aac4b64a53cc41f570bd21c2628c75e4628
3
+ size 11214568
synthdog/resources/paper/paper_1.jpg ADDED

Git LFS Details

  • SHA256: 6be25c214d7772f44cf2406db54de5b75dd085b98c0bbe003f56ae1231a788af
  • Pointer size: 132 Bytes
  • Size of remote file: 2.39 MB
synthdog/resources/paper/paper_2.jpg ADDED

Git LFS Details

  • SHA256: 323283e8e6210a73223798274949ce083d97c6162860bf13b3bbbf6821796e0d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.88 MB
synthdog/resources/paper/paper_3.jpg ADDED

Git LFS Details

  • SHA256: 189bda5655f2ef2b63ed07d28a511f716a2e2e7bcdc8efb0def77e80330cb356
  • Pointer size: 132 Bytes
  • Size of remote file: 2.52 MB