Spaces:

rogerxavier
/

ocr_with_fastapi

Running

App Files Files Community

rogerxavier commited on May 25

Commit

1fe6a2c

•

1 Parent(s): f3b7d3b

Upload 20 files

Browse files

Files changed (20) hide show

easyocrlite/__init__.py +1 -0
easyocrlite/__pycache__/__init__.cpython-38.pyc +0 -0
easyocrlite/__pycache__/reader.cpython-38.pyc +0 -0
easyocrlite/__pycache__/types.cpython-38.pyc +0 -0
easyocrlite/model/__init__.py +1 -0
easyocrlite/model/__pycache__/__init__.cpython-38.pyc +0 -0
easyocrlite/model/__pycache__/craft.cpython-38.pyc +0 -0
easyocrlite/model/craft.py +174 -0
easyocrlite/reader.py +275 -0
easyocrlite/types.py +5 -0
easyocrlite/utils/__init__.py +0 -0
easyocrlite/utils/__pycache__/__init__.cpython-38.pyc +0 -0
easyocrlite/utils/__pycache__/detect_utils.cpython-38.pyc +0 -0
easyocrlite/utils/__pycache__/download_utils.cpython-38.pyc +0 -0
easyocrlite/utils/__pycache__/image_utils.cpython-38.pyc +0 -0
easyocrlite/utils/__pycache__/utils.cpython-38.pyc +0 -0
easyocrlite/utils/detect_utils.py +327 -0
easyocrlite/utils/download_utils.py +92 -0
easyocrlite/utils/image_utils.py +93 -0
easyocrlite/utils/utils.py +43 -0

easyocrlite/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from easyocrlite.reader import ReaderLite

easyocrlite/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (304 Bytes). View file

easyocrlite/__pycache__/reader.cpython-38.pyc ADDED Viewed

Binary file (7 kB). View file

easyocrlite/__pycache__/types.cpython-38.pyc ADDED Viewed

Binary file (369 Bytes). View file

easyocrlite/model/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .craft import CRAFT

easyocrlite/model/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (292 Bytes). View file

easyocrlite/model/__pycache__/craft.cpython-38.pyc ADDED Viewed

Binary file (5.01 kB). View file

easyocrlite/model/craft.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""
+Copyright (c) 2019-present NAVER Corp.
+MIT License
+"""
+from __future__ import annotations
+from collections import namedtuple
+from typing import Iterable, Tuple
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+from packaging import version
+from torchvision import models
+VGGOutputs = namedtuple(
+    "VggOutputs", ["fc7", "relu5_3", "relu4_3", "relu3_2", "relu2_2"]
+)
+def init_weights(modules: Iterable[nn.Module]):
+    for m in modules:
+        if isinstance(m, nn.Conv2d):
+            nn.init.xavier_uniform_(m.weight)
+            if m.bias is not None:
+                nn.init.zeros_(m.bias)
+        elif isinstance(m, nn.BatchNorm2d):
+            nn.init.constant_(m.weight, 1.0)
+            nn.init.zeros_(m.bias)
+        elif isinstance(m, nn.Linear):
+            nn.init.normal_(m.weight, 0, 0.01)
+            nn.init.zeros_(m.bias)
+class VGG16_BN(nn.Module):
+    def __init__(self, pretrained: bool=True, freeze: bool=True):
+        super().__init__()
+        if version.parse(torchvision.__version__) >= version.parse("0.13"):
+            vgg_pretrained_features = models.vgg16_bn(
+                weights=models.VGG16_BN_Weights.DEFAULT if pretrained else None
+            ).features
+        else:  # torchvision.__version__ < 0.13
+            models.vgg.model_urls["vgg16_bn"] = models.vgg.model_urls[
+                "vgg16_bn"
+            ].replace("https://", "http://")
+            vgg_pretrained_features = models.vgg16_bn(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        for x in range(12):  # conv2_2
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(12, 19):  # conv3_3
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(19, 29):  # conv4_3
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(29, 39):  # conv5_3
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        # fc6, fc7 without atrous conv
+        self.slice5 = torch.nn.Sequential(
+            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
+            nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6),
+            nn.Conv2d(1024, 1024, kernel_size=1),
+        )
+        if not pretrained:
+            init_weights(self.slice1.modules())
+            init_weights(self.slice2.modules())
+            init_weights(self.slice3.modules())
+            init_weights(self.slice4.modules())
+        init_weights(self.slice5.modules())  # no pretrained model for fc6 and fc7
+        if freeze:
+            for param in self.slice1.parameters():  # only first conv
+                param.requires_grad = False
+    def forward(self, x: torch.Tensor) -> VGGOutputs:
+        h = self.slice1(x)
+        h_relu2_2 = h
+        h = self.slice2(h)
+        h_relu3_2 = h
+        h = self.slice3(h)
+        h_relu4_3 = h
+        h = self.slice4(h)
+        h_relu5_3 = h
+        h = self.slice5(h)
+        h_fc7 = h
+        out = VGGOutputs(h_fc7, h_relu5_3, h_relu4_3, h_relu3_2, h_relu2_2)
+        return out
+class DoubleConv(nn.Module):
+    def __init__(self, in_ch: int, mid_ch: int, out_ch: int):
+        super().__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_ch + mid_ch, mid_ch, kernel_size=1),
+            nn.BatchNorm2d(mid_ch),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(mid_ch, out_ch, kernel_size=3, padding=1),
+            nn.BatchNorm2d(out_ch),
+            nn.ReLU(inplace=True),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.conv(x)
+        return x
+class CRAFT(nn.Module):
+    def __init__(self, pretrained: bool=False, freeze: bool=False):
+        super(CRAFT, self).__init__()
+        """ Base network """
+        self.basenet = VGG16_BN(pretrained, freeze)
+        """ U network """
+        self.upconv1 = DoubleConv(1024, 512, 256)
+        self.upconv2 = DoubleConv(512, 256, 128)
+        self.upconv3 = DoubleConv(256, 128, 64)
+        self.upconv4 = DoubleConv(128, 64, 32)
+        num_class = 2
+        self.conv_cls = nn.Sequential(
+            nn.Conv2d(32, 32, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(32, 32, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(32, 16, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(16, 16, kernel_size=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(16, num_class, kernel_size=1),
+        )
+        init_weights(self.upconv1.modules())
+        init_weights(self.upconv2.modules())
+        init_weights(self.upconv3.modules())
+        init_weights(self.upconv4.modules())
+        init_weights(self.conv_cls.modules())
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Base network"""
+        sources = self.basenet(x)
+        """ U network """
+        y = torch.cat([sources[0], sources[1]], dim=1)
+        y = self.upconv1(y)
+        y = F.interpolate(
+            y, size=sources[2].size()[2:], mode="bilinear", align_corners=False
+        )
+        y = torch.cat([y, sources[2]], dim=1)
+        y = self.upconv2(y)
+        y = F.interpolate(
+            y, size=sources[3].size()[2:], mode="bilinear", align_corners=False
+        )
+        y = torch.cat([y, sources[3]], dim=1)
+        y = self.upconv3(y)
+        y = F.interpolate(
+            y, size=sources[4].size()[2:], mode="bilinear", align_corners=False
+        )
+        y = torch.cat([y, sources[4]], dim=1)
+        feature = self.upconv4(y)
+        y = self.conv_cls(feature)
+        return y.permute(0, 2, 3, 1), feature

easyocrlite/reader.py ADDED Viewed

	@@ -0,0 +1,275 @@

+from __future__ import annotations
+import logging
+from typing import Union
+import os
+from pathlib import Path
+from typing import Tuple
+import PIL.Image
+import cv2
+import numpy as np
+import torch
+from PIL import Image, ImageEnhance
+from easyocrlite.model import CRAFT
+from easyocrlite.utils.download_utils import prepare_model
+from easyocrlite.utils.image_utils import (
+    adjust_result_coordinates,
+    boxed_transform,
+    normalize_mean_variance,
+    resize_aspect_ratio,
+)
+from easyocrlite.utils.detect_utils import (
+    extract_boxes,
+    extract_regions_from_boxes,
+    box_expand,
+    greedy_merge,
+)
+from easyocrlite.types import BoxTuple, RegionTuple
+import easyocrlite.utils.utils as utils
+logger = logging.getLogger(__name__)
+MODULE_PATH = (
+    os.environ.get("EASYOCR_MODULE_PATH")
+    or os.environ.get("MODULE_PATH")
+    or os.path.expanduser("~/.EasyOCR/")
+)
+class ReaderLite(object):
+    def __init__(
+        self,
+        gpu=True,
+        model_storage_directory=None,
+        download_enabled=True,
+        verbose=True,
+        quantize=True,
+        cudnn_benchmark=False,
+    ):
+        self.verbose = verbose
+        model_storage_directory = Path(
+            model_storage_directory
+            if model_storage_directory
+            else MODULE_PATH + "/model"
+        )
+        self.detector_path = prepare_model(
+            model_storage_directory, download_enabled, verbose
+        )
+        self.quantize = quantize
+        self.cudnn_benchmark = cudnn_benchmark
+        if gpu is False:
+            self.device = "cpu"
+            if verbose:
+                logger.warning(
+                    "Using CPU. Note: This module is much faster with a GPU."
+                )
+        elif not torch.cuda.is_available():
+            self.device = "cpu"
+            if verbose:
+                logger.warning(
+                    "CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU."
+                )
+        elif gpu is True:
+            self.device = "cuda"
+        else:
+            self.device = gpu
+        self.detector = CRAFT()
+        state_dict = torch.load(self.detector_path, map_location=self.device)
+        if list(state_dict.keys())[0].startswith("module"):
+            state_dict = {k[7:]: v for k, v in state_dict.items()}
+        self.detector.load_state_dict(state_dict)
+        if self.device == "cpu":
+            if self.quantize:
+                try:
+                    torch.quantization.quantize_dynamic(
+                        self.detector, dtype=torch.qint8, inplace=True
+                    )
+                except:
+                    pass
+        else:
+            self.detector = torch.nn.DataParallel(self.detector).to(self.device)
+            import torch.backends.cudnn as cudnn
+            cudnn.benchmark = self.cudnn_benchmark
+        self.detector.eval()
+    def process(
+        self,
+        image_path: Union[str, PIL.Image.Image],
+        max_size: int = 960,
+        expand_ratio: float = 1.0,
+        sharp: float = 1.0,
+        contrast: float = 1.0,
+        text_confidence: float = 0.7,
+        text_threshold: float = 0.4,
+        link_threshold: float = 0.4,
+        slope_ths: float = 0.1,
+        ratio_ths: float = 0.5,
+        center_ths: float = 0.5,
+        dim_ths: float = 0.5,
+        space_ths: float = 1.0,
+        add_margin: float = 0.1,
+        min_size: float = 0.01,
+    ) -> Tuple[BoxTuple, list[np.ndarray]]:
+        if isinstance(image_path, str):
+            image = Image.open(image_path).convert('RGB')
+        elif isinstance(image_path, PIL.Image.Image):
+            image = image_path.convert('RGB')
+        tensor, inverse_ratio = self.preprocess(
+            image, max_size, expand_ratio, sharp, contrast
+        )
+        scores = self.forward_net(tensor)
+        boxes = self.detect(scores, text_confidence, text_threshold, link_threshold)
+        image = np.array(image)
+        region_list, box_list = self.postprocess(
+            image,
+            boxes,
+            inverse_ratio,
+            slope_ths,
+            ratio_ths,
+            center_ths,
+            dim_ths,
+            space_ths,
+            add_margin,
+            min_size,
+        )
+        # get cropped image
+        image_list = []
+        for region in region_list:
+            x_min, x_max, y_min, y_max = region
+            crop_img = image[y_min:y_max, x_min:x_max, :]
+            image_list.append(
+                (
+                    ((x_min, y_min), (x_max, y_min), (x_max, y_max), (x_min, y_max)),
+                    crop_img,
+                )
+            )
+        for box in box_list:
+            transformed_img = boxed_transform(image, np.array(box, dtype="float32"))
+            image_list.append((box, transformed_img))
+        # sort by top left point
+        image_list = sorted(image_list, key=lambda x: (x[0][0][1], x[0][0][0]))
+        return image_list
+    def preprocess(
+        self,
+        image: Image.Image,
+        max_size: int,
+        expand_ratio: float = 1.0,
+        sharp: float = 1.0,
+        contrast: float = 1.0,
+    ) -> torch.Tensor:
+        if sharp != 1:
+            enhancer = ImageEnhance.Sharpness(image)
+            image = enhancer.enhance(sharp)
+        if contrast != 1:
+            enhancer = ImageEnhance.Contrast(image)
+            image = enhancer.enhance(contrast)
+        image = np.array(image)
+        image, target_ratio = resize_aspect_ratio(
+            image, max_size, interpolation=cv2.INTER_LINEAR, expand_ratio=expand_ratio
+        )
+        inverse_ratio = 1 / target_ratio
+        x = np.transpose(normalize_mean_variance(image), (2, 0, 1))
+        x = torch.tensor(np.array([x]), device=self.device)
+        return x, inverse_ratio
+    @torch.no_grad()
+    def forward_net(self, tensor: torch.Tensor) -> torch.Tensor:
+        scores, feature = self.detector(tensor)
+        return scores[0]
+    def detect(
+        self,
+        scores: torch.Tensor,
+        text_confidence: float = 0.7,
+        text_threshold: float = 0.4,
+        link_threshold: float = 0.4,
+    ) -> list[BoxTuple]:
+        # make score and link map
+        score_text = scores[:, :, 0].cpu().data.numpy()
+        score_link = scores[:, :, 1].cpu().data.numpy()
+        # extract box
+        boxes, _ = extract_boxes(
+            score_text, score_link, text_confidence, text_threshold, link_threshold
+        )
+        return boxes
+    def postprocess(
+        self,
+        image: np.ndarray,
+        boxes: list[BoxTuple],
+        inverse_ratio: float,
+        slope_ths: float = 0.1,
+        ratio_ths: float = 0.5,
+        center_ths: float = 0.5,
+        dim_ths: float = 0.5,
+        space_ths: float = 1.0,
+        add_margin: float = 0.1,
+        min_size: int = 0,
+    ) -> Tuple[list[RegionTuple], list[BoxTuple]]:
+        # coordinate adjustment
+        boxes = adjust_result_coordinates(boxes, inverse_ratio)
+        max_y, max_x, _ = image.shape
+        # extract region and merge
+        region_list, box_list = extract_regions_from_boxes(boxes, slope_ths)
+        region_list = greedy_merge(
+            region_list,
+            ratio_ths=ratio_ths,
+            center_ths=center_ths,
+            dim_ths=dim_ths,
+            space_ths=space_ths,
+            verbose=0
+        )
+        # add margin
+        region_list = [
+            region.expand(add_margin, (max_x, max_y)).as_tuple()
+            for region in region_list
+        ]
+        box_list = [box_expand(box, add_margin, (max_x, max_y)) for box in box_list]
+        # filter by size
+        if min_size:
+            if min_size < 1:
+                min_size = int(min(max_y, max_x) * min_size)
+            region_list = [
+                i for i in region_list if max(i[1] - i[0], i[3] - i[2]) > min_size
+            ]
+            box_list = [
+                i
+                for i in box_list
+                if max(utils.diff([c[0] for c in i]), utils.diff([c[1] for c in i]))
+                > min_size
+            ]
+        return region_list, box_list

easyocrlite/types.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from typing import Tuple
+Point = Tuple[int, int]
+BoxTuple = Tuple[Point, Point, Point, Point]
+RegionTuple = Tuple[int, int, int, int]

easyocrlite/utils/__init__.py ADDED Viewed

File without changes

easyocrlite/utils/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (257 Bytes). View file

easyocrlite/utils/__pycache__/detect_utils.cpython-38.pyc ADDED Viewed

Binary file (7.51 kB). View file

easyocrlite/utils/__pycache__/download_utils.cpython-38.pyc ADDED Viewed

Binary file (3.04 kB). View file

easyocrlite/utils/__pycache__/image_utils.cpython-38.pyc ADDED Viewed

Binary file (2.65 kB). View file

easyocrlite/utils/__pycache__/utils.cpython-38.pyc ADDED Viewed

Binary file (1.36 kB). View file

easyocrlite/utils/detect_utils.py ADDED Viewed

	@@ -0,0 +1,327 @@

+from __future__ import annotations
+import itertools
+import logging
+import math
+import operator
+from collections import namedtuple
+from functools import cached_property
+from typing import Iterable, Optional, Tuple
+import cv2
+import numpy as np
+from easyocrlite.types import BoxTuple, RegionTuple
+from easyocrlite.utils.utils import grouped_by
+logger = logging.getLogger(__name__)
+class Region(namedtuple("Region", ["x_min", "x_max", "y_min", "y_max"])):
+    @cached_property
+    def ycenter(self):
+        return 0.5 * (self.y_min + self.y_max)
+    @cached_property
+    def xcenter(self):
+        return 0.5 * (self.x_min + self.x_max)
+    @cached_property
+    def height(self):
+        return self.y_max - self.y_min
+    @cached_property
+    def width(self):
+        return self.x_max - self.x_min
+    @classmethod
+    def from_box(cls, box: BoxTuple) -> Region:
+        (xtl, ytl), (xtr, ytr), (xbr, ybr), (xbl, ybl) = box
+        x_max = max(xtl, xtr, xbr, xbl)
+        x_min = min(xtl, xtr, xbr, xbl)
+        y_max = max(ytl, ytr, ybr, ybl)
+        y_min = min(ytl, ytr, ybr, ybl)
+        return cls(x_min, x_max, y_min, y_max)
+    def as_tuple(self) -> RegionTuple:
+        return self.x_min, self.x_max, self.y_min, self.y_max
+    def expand(
+        self, add_margin: float, size: Optional[Tuple[int, int] | int] = None
+    ) -> Region:
+        margin = int(add_margin * min(self.width, self.height))
+        if isinstance(size, Iterable):
+            max_x, max_y = size
+        elif size is None:
+            max_x = self.width * 2
+            max_y = self.height * 2
+        else:
+            max_x = max_y = size
+        return Region(
+            max(0, self.x_min - margin),
+            min(max_x, self.x_max + margin),
+            max(0, self.y_min - margin),
+            min(max_y, self.y_max + margin),
+        )
+    def __add__(self, region: Region) -> Region:
+        return Region(
+            min(self.x_min, region.x_min),
+            max(self.x_max, region.x_max),
+            min(self.y_min, region.y_min),
+            max(self.y_max, region.y_max),
+        )
+def extract_boxes(
+    textmap: np.ndarray,
+    linkmap: np.ndarray,
+    text_threshold: float,
+    link_threshold: float,
+    low_text: float,
+) -> Tuple[list[BoxTuple], list[int]]:
+    # prepare data
+    linkmap = linkmap.copy()
+    textmap = textmap.copy()
+    img_h, img_w = textmap.shape
+    """ labeling method """
+    ret, text_score = cv2.threshold(textmap, low_text, 1, 0)
+    ret, link_score = cv2.threshold(linkmap, link_threshold, 1, 0)
+    text_score_comb = np.clip(text_score + link_score, 0, 1)
+    nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(
+        text_score_comb.astype(np.uint8), connectivity=4
+    )
+    boxes = []
+    mapper = []
+    for k in range(1, nLabels):
+        # size filtering
+        size = stats[k, cv2.CC_STAT_AREA]
+        if size < 10:
+            continue
+        # thresholding
+        if np.max(textmap[labels == k]) < text_threshold:
+            continue
+        # make segmentation map
+        segmap = np.zeros(textmap.shape, dtype=np.uint8)
+        segmap[labels == k] = 255
+        mapper.append(k)
+        segmap[np.logical_and(link_score == 1, text_score == 0)] = 0  # remove link area
+        x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP]
+        w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT]
+        niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
+        sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1
+        # boundary check
+        if sx < 0:
+            sx = 0
+        if sy < 0:
+            sy = 0
+        if ex >= img_w:
+            ex = img_w
+        if ey >= img_h:
+            ey = img_h
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1 + niter, 1 + niter))
+        segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel)
+        # make box
+        np_contours = (
+            np.roll(np.array(np.where(segmap != 0)), 1, axis=0)
+            .transpose()
+            .reshape(-1, 2)
+        )
+        rectangle = cv2.minAreaRect(np_contours)
+        box = cv2.boxPoints(rectangle)
+        # align diamond-shape
+        w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2])
+        box_ratio = max(w, h) / (min(w, h) + 1e-5)
+        if abs(1 - box_ratio) <= 0.1:
+            l, r = min(np_contours[:, 0]), max(np_contours[:, 0])
+            t, b = min(np_contours[:, 1]), max(np_contours[:, 1])
+            box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32)
+        # make clock-wise order
+        startidx = box.sum(axis=1).argmin()
+        box = np.roll(box, 4 - startidx, 0)
+        box = np.array(box)
+        boxes.append(box)
+    return boxes, mapper
+def extract_regions_from_boxes(
+    boxes: list[BoxTuple], slope_ths: float
+) -> Tuple[list[Region], list[BoxTuple]]:
+    region_list: list[Region] = []
+    box_list = []
+    for box in boxes:
+        box = np.array(box).astype(np.int32)
+        (xtl, ytl), (xtr, ytr), (xbr, ybr), (xbl, ybl) = box
+        # get the tan of top and bottom edge
+        # why 10?
+        slope_top = (ytr - ytl) / max(10, xtr - xtl)
+        slope_bottom = (ybr - ybl) / max(10, xbr - xbl)
+        if max(abs(slope_top), abs(slope_bottom)) < slope_ths:
+            # not very tilted, rectangle box
+            region_list.append(Region.from_box(box))
+        else:
+            # tilted
+            box_list.append(box)
+    return region_list, box_list
+def box_expand(
+    box: BoxTuple, add_margin: float, size: Optional[Tuple[int, int] | int] = None
+) -> BoxTuple:
+    (xtl, ytl), (xtr, ytr), (xbr, ybr), (xbl, ybl) = box
+    height = np.linalg.norm([xbl - xtl, ybl - ytl])  # from top left to bottom left
+    width = np.linalg.norm([xtr - xtl, ytr - ytl])  # from top left to top right
+    # margin is added based on the diagonal
+    margin = int(1.44 * add_margin * min(width, height))
+    theta13 = abs(np.arctan((ytl - ybr) / max(10, (xtl - xbr))))
+    theta24 = abs(np.arctan((ytr - ybl) / max(10, (xtr - xbl))))
+    if isinstance(size, Iterable):
+        max_x, max_y = size
+    elif size is None:
+        max_x = width * 2
+        max_y = height * 2
+    else:
+        max_x = max_y = size
+    new_box = (
+        (
+            max(0, int(xtl - np.cos(theta13) * margin)),
+            max(0, int(ytl - np.sin(theta13) * margin)),
+        ),
+        (
+            min(max_x, math.ceil(xtr + np.cos(theta24) * margin)),
+            max(0, int(ytr - np.sin(theta24) * margin)),
+        ),
+        (
+            min(max_x, math.ceil(xbr + np.cos(theta13) * margin)),
+            min(max_y, math.ceil(ybr + np.sin(theta13) * margin)),
+        ),
+        (
+            max(0, int(xbl - np.cos(theta24) * margin)),
+            min(max_y, math.ceil(ybl + np.sin(theta24) * margin)),
+        ),
+    )
+    return new_box
+def greedy_merge(
+    regions: list[Region],
+    ratio_ths: float = 0.5,
+    center_ths: float = 0.5,
+    dim_ths: float = 0.5,
+    space_ths: float = 1.0,
+    verbose: int = 4,
+) -> list[Region]:
+    regions = sorted(regions, key=operator.attrgetter("ycenter"))
+    # grouped by ycenter
+    groups = grouped_by(
+        regions,
+        operator.attrgetter("ycenter"),
+        center_ths,
+        operator.attrgetter("height"),
+    )
+    for group in groups:
+        group.sort(key=operator.attrgetter("x_min"))
+        idx = 0
+        while idx < len(group) - 1:
+            region1, region2 = group[idx], group[idx + 1]
+            # both are horizontal regions
+            cond = (region1.width / region1.height) > ratio_ths and (
+                region2.width / region2.height
+            ) > ratio_ths
+            # similar heights
+            cond = cond and abs(region1.height - region2.height) < dim_ths * np.mean(
+                [region1.height, region2.height]
+            )
+            # similar ycenters
+            # cond = cond and abs(region1.ycenter - region2.ycenter) < center_ths * np.mean(
+            #     [region1.height, region2.height]
+            # )
+            # horizontal space is small
+            cond = cond and (region2.x_min - region1.x_max) < space_ths * np.mean(
+                [region1.height, region2.height]
+            )
+            if cond:
+                # merge regiona
+                region = region1 + region2
+                if verbose > 2:
+                    logger.debug(f"horizontal merging {region1} {region2}")
+                group.pop(idx)
+                group.pop(idx)
+                group.insert(idx, region)
+            else:
+                if verbose > 0:
+                    logger.debug(f"not horizontal merging {region1} {region2}")
+                idx += 1
+    # flatten groups
+    regions = list(itertools.chain.from_iterable(groups))
+    # grouped by xcenter
+    groups = grouped_by(
+        regions,
+        operator.attrgetter("xcenter"),
+        center_ths,
+        operator.attrgetter("width"),
+    )
+    for group in groups:
+        group.sort(key=operator.attrgetter("y_min"))
+        idx = 0
+        while idx < len(group) - 1:
+            region1, region2 = group[idx], group[idx + 1]
+            # both are vertical regions
+            cond = (region1.height / region1.width) > ratio_ths and (
+                region2.height / region2.width
+            ) > ratio_ths
+            # similar widths
+            cond = cond and abs(region1.width - region2.width) < dim_ths * np.mean(
+                [region1.width, region2.width]
+            )
+            # # similar xcenters
+            # cond = cond and abs(region1.xcenter - region2.xcenter) < center_ths * np.mean(
+            #     [region1.width, region2.width]
+            # )
+            # vertical space is small
+            cond = cond and (region2.y_min - region1.y_max) < space_ths * np.mean(
+                [region1.width, region2.width]
+            )
+            if cond:
+                # merge region
+                region = region1 + region2
+                if verbose > 2:
+                    logger.debug(f"vertical merging {region1} {region2}")
+                group.pop(idx)
+                group.pop(idx)
+                group.insert(idx, region)
+            else:
+                if verbose > 1:
+                    logger.debug(f"not vertical merging {region1} {region2}")
+                idx += 1
+    # flatten groups
+    regions = list(itertools.chain.from_iterable(groups))
+    return regions

easyocrlite/utils/download_utils.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import hashlib
+import logging
+from pathlib import Path
+from typing import Callable, Optional
+from urllib.request import urlretrieve
+from zipfile import ZipFile
+from tqdm.auto import tqdm
+FILENAME = "craft_mlt_25k.pth"
+URL = (
+    "https://xc-models.oss-cn-zhangjiakou.aliyuncs.com/modelscope/studio/easyocr/craft_mlt_25k.zip"
+)
+MD5SUM = "2f8227d2def4037cdb3b34389dcf9ec1"
+MD5MSG = "MD5 hash mismatch, possible file corruption"
+logger = logging.getLogger(__name__)
+def calculate_md5(path: Path) -> str:
+    hash_md5 = hashlib.md5()
+    with open(path, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            hash_md5.update(chunk)
+    return hash_md5.hexdigest()
+def print_progress_bar(t: tqdm) -> Callable[[int, int, Optional[int]], None]:
+    last = 0
+    def update_to(
+        count: int = 1, block_size: int = 1, total_size: Optional[int] = None
+    ):
+        nonlocal last
+        if total_size is not None:
+            t.total = total_size
+        t.update((count - last) * block_size)
+        last = count
+    return update_to
+def download_and_unzip(
+    url: str, filename: str, model_storage_directory: Path, verbose: bool = True
+):
+    zip_path = model_storage_directory / "temp.zip"
+    with tqdm(
+        unit="B", unit_scale=True, unit_divisor=1024, miniters=1, disable=not verbose
+    ) as t:
+        reporthook = print_progress_bar(t)
+        urlretrieve(url, str(zip_path), reporthook=reporthook)
+    with ZipFile(zip_path, "r") as zipObj:
+        zipObj.extract(filename, str(model_storage_directory))
+    zip_path.unlink()
+def prepare_model(model_storage_directory: Path, download=True, verbose: bool = True) -> bool:
+    model_storage_directory.mkdir(parents=True, exist_ok=True)
+    detector_path = model_storage_directory / FILENAME
+    # try get model path
+    model_available = False
+    if not detector_path.is_file():
+        if not download:
+            raise FileNotFoundError(f"Missing {detector_path} and downloads disabled")
+        logger.info(
+            "Downloading detection model, please wait. "
+            "This may take several minutes depending upon your network connection."
+        )
+    elif calculate_md5(detector_path) != MD5SUM:
+        logger.warning(MD5MSG)
+        if not download:
+            raise FileNotFoundError(
+                f"MD5 mismatch for {detector_path} and downloads disabled"
+            )
+        detector_path.unlink()
+        logger.info(
+            "Re-downloading the detection model, please wait. "
+            "This may take several minutes depending upon your network connection."
+        )
+    else:
+        model_available = True
+    if not model_available:
+        download_and_unzip(URL, FILENAME, model_storage_directory, verbose)
+        if calculate_md5(detector_path) != MD5SUM:
+            raise ValueError(MD5MSG)
+        logger.info("Download complete")
+    return detector_path

easyocrlite/utils/image_utils.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from __future__ import annotations
+from typing import Tuple
+import cv2
+import numpy as np
+from easyocrlite.types import BoxTuple
+def resize_aspect_ratio(
+    img: np.ndarray, max_size: int, interpolation: int, expand_ratio: float = 1.0
+) -> Tuple[np.ndarray, float]:
+    height, width, channel = img.shape
+    # magnify image size
+    target_size = expand_ratio * max(height, width)
+    # set original image size
+    if max_size and max_size > 0 and target_size > max_size:
+        target_size = max_size
+    ratio = target_size / max(height, width)
+    target_h, target_w = int(height * ratio), int(width * ratio)
+    if target_h != height or target_w != width:
+        proc = cv2.resize(img, (target_w, target_h), interpolation=interpolation)
+        # make canvas and paste image
+        target_h32, target_w32 = target_h, target_w
+        if target_h % 32 != 0:
+            target_h32 = target_h + (32 - target_h % 32)
+        if target_w % 32 != 0:
+            target_w32 = target_w + (32 - target_w % 32)
+        resized = np.zeros((target_h32, target_w32, channel), dtype=np.float32)
+        resized[0:target_h, 0:target_w, :] = proc
+        target_h, target_w = target_h32, target_w32
+    else:
+        resized = img
+    return resized, ratio
+def adjust_result_coordinates(
+    box: BoxTuple, inverse_ratio: int = 1, ratio_net: int = 2
+) -> np.ndarray:
+    if len(box) > 0:
+        box = np.array(box)
+        for k in range(len(box)):
+            if box[k] is not None:
+                box[k] *= (inverse_ratio * ratio_net, inverse_ratio * ratio_net)
+    return box
+def normalize_mean_variance(
+    in_img: np.ndarray,
+    mean: Tuple[float, float, float] = (0.485, 0.456, 0.406),
+    variance: Tuple[float, float, float] = (0.229, 0.224, 0.225),
+) -> np.ndarray:
+    # should be RGB order
+    img = in_img.copy().astype(np.float32)
+    img -= np.array(
+        [mean[0] * 255.0, mean[1] * 255.0, mean[2] * 255.0], dtype=np.float32
+    )
+    img /= np.array(
+        [variance[0] * 255.0, variance[1] * 255.0, variance[2] * 255.0],
+        dtype=np.float32,
+    )
+    return img
+def boxed_transform(image: np.ndarray, box: BoxTuple) -> np.ndarray:
+    (tl, tr, br, bl) = box
+    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
+    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
+    maxWidth = max(int(widthA), int(widthB))
+    # compute the height of the new image, which will be the
+    # maximum distance between the top-right and bottom-right
+    # y-coordinates or the top-left and bottom-left y-coordinates
+    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
+    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
+    maxHeight = max(int(heightA), int(heightB))
+    dst = np.array(
+        [[0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]],
+        dtype="float32",
+    )
+    # compute the perspective transform matrix and then apply it
+    M = cv2.getPerspectiveTransform(box, dst)
+    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
+    return warped

easyocrlite/utils/utils.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from __future__ import annotations
+import numpy as np
+from typing import Iterable, TypeVar, Callable
+T = TypeVar("T")
+V = TypeVar("V")
+def diff(input_list: Iterable[T]) -> T:
+    return max(input_list) - min(input_list)
+def grouped_by(
+    items: list[T],
+    group_key: Callable[[T], V],
+    eps: float,
+    eps_key: Callable[[T], float],
+) -> list[list[T]]:
+    items = sorted(items, key=group_key)
+    groups = []
+    group = []
+    for item in items:
+        if not group:
+            group.append(item)
+            continue
+        if group:
+            cond = abs(
+                group_key(item) - np.mean([group_key(item) for item in group])
+            ) < eps * np.mean([eps_key(item) for item in group])
+            if cond:
+                group.append(item)
+            else:
+                groups.append(group)
+                group = [item]
+    else:
+        if group:
+            groups.append(group)
+    return groups