File size: 3,219 Bytes
a89d9fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import json
import os


def poly_to_string(poly):
    if len(poly.shape) > 1:
        poly = np.array(poly).flatten()

    string = "\t".join(str(i) for i in poly)
    return string


def convert_label(label_dir, mode="gt", save_dir="./save_results/"):
    if not os.path.exists(label_dir):
        raise ValueError(f"The file {label_dir} does not exist!")

    assert label_dir != save_dir, "hahahhaha"

    label_file = open(label_dir, 'r')
    data = label_file.readlines()

    gt_dict = {}

    for line in data:
        try:
            tmp = line.split('\t')
            assert len(tmp) == 2, ""
        except:
            tmp = line.strip().split('    ')

        gt_lists = []

        if tmp[0].split('/')[0] is not None:
            img_path = tmp[0]
            anno = json.loads(tmp[1])
            gt_collect = []
            for dic in anno:
                #txt = dic['transcription'].replace(' ', '')  # ignore blank
                txt = dic['transcription']
                if 'score' in dic and float(dic['score']) < 0.5:
                    continue
                if u'\u3000' in txt: txt = txt.replace(u'\u3000', u' ')
                #while ' ' in txt:
                #    txt = txt.replace(' ', '')
                poly = np.array(dic['points']).flatten()
                if txt == "###":
                    txt_tag = 1  ## ignore 1
                else:
                    txt_tag = 0
                if mode == "gt":
                    gt_label = poly_to_string(poly) + "\t" + str(
                        txt_tag) + "\t" + txt + "\n"
                else:
                    gt_label = poly_to_string(poly) + "\t" + txt + "\n"

                gt_lists.append(gt_label)

            gt_dict[img_path] = gt_lists
        else:
            continue

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for img_name in gt_dict.keys():
        save_name = img_name.split("/")[-1]
        save_file = os.path.join(save_dir, save_name + ".txt")
        with open(save_file, "w") as f:
            f.writelines(gt_dict[img_name])

    print("The convert label saved in {}".format(save_dir))


def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description="args")
    parser.add_argument("--label_path", type=str, required=True)
    parser.add_argument("--save_folder", type=str, required=True)
    parser.add_argument("--mode", type=str, default=False)
    args = parser.parse_args()
    return args


if __name__ == "__main__":
    args = parse_args()
    convert_label(args.label_path, args.mode, args.save_folder)