feat: add exclusion rules
Browse files- configs/font.yml +3 -0
- font_dataset/font.py +12 -3
- font_ds_generate_script.py +10 -3
- font_ds_stat.py +5 -1
configs/font.yml
CHANGED
@@ -11,6 +11,9 @@
|
|
11 |
# - Others
|
12 |
dataset:
|
13 |
path: ./dataset/fonts
|
|
|
|
|
|
|
14 |
specs:
|
15 |
- path:
|
16 |
- ./Adobe/CJK
|
|
|
11 |
# - Others
|
12 |
dataset:
|
13 |
path: ./dataset/fonts
|
14 |
+
exclusion:
|
15 |
+
- ./Founder Type(方正)/韩文/方正朝文中圆.TTF
|
16 |
+
- ./Founder Type(方正)/简繁/ttf/方正宋体S-超大字符集(SIP).TTF
|
17 |
specs:
|
18 |
- path:
|
19 |
- ./Adobe/CJK
|
font_dataset/font.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import yaml
|
2 |
import os
|
3 |
-
from typing import List
|
4 |
|
5 |
|
6 |
from .utils import get_files
|
@@ -15,7 +14,7 @@ class DSFont:
|
|
15 |
self.language = language
|
16 |
|
17 |
|
18 |
-
def load_fonts(config_path="configs/font.yml")
|
19 |
with open(config_path, "r", encoding="utf-8") as f:
|
20 |
config = yaml.safe_load(f)
|
21 |
|
@@ -41,4 +40,14 @@ def load_fonts(config_path="configs/font.yml") -> List[DSFont]:
|
|
41 |
font_list.append(DSFont(file, spec["language"]))
|
42 |
|
43 |
font_list.sort(key=lambda x: x.path)
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import yaml
|
2 |
import os
|
|
|
3 |
|
4 |
|
5 |
from .utils import get_files
|
|
|
14 |
self.language = language
|
15 |
|
16 |
|
17 |
+
def load_fonts(config_path="configs/font.yml"):
|
18 |
with open(config_path, "r", encoding="utf-8") as f:
|
19 |
config = yaml.safe_load(f)
|
20 |
|
|
|
40 |
font_list.append(DSFont(file, spec["language"]))
|
41 |
|
42 |
font_list.sort(key=lambda x: x.path)
|
43 |
+
|
44 |
+
exclusion_list = ds_config["exclusion"]
|
45 |
+
exclusion_list = [os.path.join(ds_path, path) for path in exclusion_list]
|
46 |
+
|
47 |
+
def exclusion_rule(font: DSFont):
|
48 |
+
for exclusion in exclusion_list:
|
49 |
+
if os.path.samefile(font.path, exclusion):
|
50 |
+
return True
|
51 |
+
return False
|
52 |
+
|
53 |
+
return font_list, exclusion_rule
|
font_ds_generate_script.py
CHANGED
@@ -31,7 +31,7 @@ dataset_path = "./dataset/font_img"
|
|
31 |
os.makedirs(dataset_path, exist_ok=True)
|
32 |
|
33 |
|
34 |
-
fonts = load_fonts()
|
35 |
corpus_manager = CorpusGeneratorManager()
|
36 |
images = background_image_generator()
|
37 |
|
@@ -41,10 +41,17 @@ def generate_dataset(dataset_type: str, cnt: int):
|
|
41 |
os.makedirs(dataset_bath_dir, exist_ok=True)
|
42 |
|
43 |
def _generate_single(args):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
while True:
|
45 |
try:
|
46 |
-
i, j, font = args
|
47 |
-
|
48 |
image_file_name = f"font_{i}_img_{j}.jpg"
|
49 |
label_file_name = f"font_{i}_img_{j}.bin"
|
50 |
|
|
|
31 |
os.makedirs(dataset_path, exist_ok=True)
|
32 |
|
33 |
|
34 |
+
fonts, exclusion_rule = load_fonts()
|
35 |
corpus_manager = CorpusGeneratorManager()
|
36 |
images = background_image_generator()
|
37 |
|
|
|
41 |
os.makedirs(dataset_bath_dir, exist_ok=True)
|
42 |
|
43 |
def _generate_single(args):
|
44 |
+
i, j, font = args
|
45 |
+
print(
|
46 |
+
f"Generating {dataset_type} font: {font.path} {i} / {len(fonts)}, image {j}"
|
47 |
+
)
|
48 |
+
|
49 |
+
if exclusion_rule(font):
|
50 |
+
print(f"Excluded font: {font.path}")
|
51 |
+
return
|
52 |
+
|
53 |
while True:
|
54 |
try:
|
|
|
|
|
55 |
image_file_name = f"font_{i}_img_{j}.jpg"
|
56 |
label_file_name = f"font_{i}_img_{j}.bin"
|
57 |
|
font_ds_stat.py
CHANGED
@@ -23,12 +23,16 @@ test_cnt_cjk = int(test_cnt * cjk_ratio)
|
|
23 |
dataset_path = "./dataset/font_img"
|
24 |
os.makedirs(dataset_path, exist_ok=True)
|
25 |
|
26 |
-
fonts = load_fonts()
|
27 |
|
28 |
|
29 |
cnt = 0
|
30 |
|
31 |
for font in fonts:
|
|
|
|
|
|
|
|
|
32 |
if font.language == "CJK":
|
33 |
cnt += cjk_ratio
|
34 |
else:
|
|
|
23 |
dataset_path = "./dataset/font_img"
|
24 |
os.makedirs(dataset_path, exist_ok=True)
|
25 |
|
26 |
+
fonts, exclusion_rule = load_fonts()
|
27 |
|
28 |
|
29 |
cnt = 0
|
30 |
|
31 |
for font in fonts:
|
32 |
+
if exclusion_rule(font):
|
33 |
+
print(f"Excluded font: {font.path}")
|
34 |
+
continue
|
35 |
+
|
36 |
if font.language == "CJK":
|
37 |
cnt += cjk_ratio
|
38 |
else:
|