Spaces:

Cyril666
/

ContourNet-ABI

Runtime error

App Files Files Community

Cyril666 commited on Jun 2, 2022

Commit

1a827c6

1 Parent(s): e5414eb

First model version

Browse files

Files changed (16) hide show

configs/pretrain_language_model.yaml +0 -45
configs/pretrain_vision_model.yaml +0 -58
configs/pretrain_vision_model_sv.yaml +0 -58
configs/template.yaml +0 -67
configs/train_abinet.yaml +0 -71
configs/train_abinet_sv.yaml +0 -71
configs/train_abinet_wo_iter.yaml +0 -68
data/charset_36.txt +0 -36
data/charset_62.txt +0 -62
docker/Dockerfile +0 -25
notebooks/dataset-text.ipynb +0 -159
notebooks/dataset.ipynb +0 -298
notebooks/prepare_wikitext103.ipynb +0 -468
notebooks/transforms.ipynb +0 -0
tools/create_lmdb_dataset.py +0 -87
tools/crop_by_word_bb_syn90k.py +0 -153

configs/pretrain_language_model.yaml DELETED Viewed

@@ -1,45 +0,0 @@
-global:
-  name: pretrain-language-model
-  phase: train
-  stage: pretrain-language
-  workdir: workdir
-  seed: ~
-dataset:
-  train: {
-    roots: ['data/WikiText-103.csv'],
-    batch_size: 4096
-  }
-  test: {
-    roots: ['data/WikiText-103_eval_d1.csv'],
-    batch_size: 4096
-  }
-training:
-  epochs: 80
-  show_iters: 50
-  eval_iters: 6000
-  save_iters: 3000
-optimizer:
-  type: Adam
-  true_wd: False
-  wd: 0.0
-  bn_wd: False
-  clip_grad: 20
-  lr: 0.0001
-  args: {
-    betas: !!python/tuple [0.9, 0.999], # for default Adam
-  }
-  scheduler: {
-    periods: [70, 10],
-    gamma: 0.1,
-  }
-model:
-  name: 'modules.model_language.BCNLanguage'
-  language: {
-    num_layers: 4,
-    loss_weight: 1.,
-    use_self_attn: False
-  }

configs/pretrain_vision_model.yaml DELETED Viewed

@@ -1,58 +0,0 @@
-global:
-  name: pretrain-vision-model
-  phase: train
-  stage: pretrain-vision
-  workdir: workdir
-  seed: ~
-dataset:
-  train: {
-    roots: ['data/training/MJ/MJ_train/',
-            'data/training/MJ/MJ_test/',
-            'data/training/MJ/MJ_valid/',
-            'data/training/ST'],
-    batch_size: 384
-  }
-  test: {
-    roots: ['data/evaluation/IIIT5k_3000',
-            'data/evaluation/SVT',
-            'data/evaluation/SVTP',
-            'data/evaluation/IC13_857',
-            'data/evaluation/IC15_1811',
-            'data/evaluation/CUTE80'],
-    batch_size: 384
-  }
-  data_aug: True
-  multiscales: False
-  num_workers: 14
-training:
-  epochs: 8
-  show_iters: 50
-  eval_iters: 3000
-  save_iters: 3000
-optimizer:
-  type: Adam
-  true_wd: False
-  wd: 0.0
-  bn_wd: False
-  clip_grad: 20
-  lr: 0.0001
-  args: {
-    betas: !!python/tuple [0.9, 0.999], # for default Adam
-  }
-  scheduler: {
-    periods: [6, 2],
-    gamma: 0.1,
-  }
-model:
-  name: 'modules.model_vision.BaseVision'
-  checkpoint: ~
-  vision: {
-    loss_weight: 1.,
-    attention: 'position',
-    backbone: 'transformer',
-    backbone_ln: 3,
-  }

configs/pretrain_vision_model_sv.yaml DELETED Viewed

@@ -1,58 +0,0 @@
-global:
-  name: pretrain-vision-model-sv
-  phase: train
-  stage: pretrain-vision
-  workdir: workdir
-  seed: ~
-dataset:
-  train: {
-    roots: ['data/training/MJ/MJ_train/',
-            'data/training/MJ/MJ_test/',
-            'data/training/MJ/MJ_valid/',
-            'data/training/ST'],
-    batch_size: 384
-  }
-  test: {
-    roots: ['data/evaluation/IIIT5k_3000',
-            'data/evaluation/SVT',
-            'data/evaluation/SVTP',
-            'data/evaluation/IC13_857',
-            'data/evaluation/IC15_1811',
-            'data/evaluation/CUTE80'],
-    batch_size: 384
-  }
-  data_aug: True
-  multiscales: False
-  num_workers: 14
-training:
-  epochs: 8
-  show_iters: 50
-  eval_iters: 3000
-  save_iters: 3000
-optimizer:
-  type: Adam
-  true_wd: False
-  wd: 0.0
-  bn_wd: False
-  clip_grad: 20
-  lr: 0.0001
-  args: {
-    betas: !!python/tuple [0.9, 0.999], # for default Adam
-  }
-  scheduler: {
-    periods: [6, 2],
-    gamma: 0.1,
-  }
-model:
-  name: 'modules.model_vision.BaseVision'
-  checkpoint: ~
-  vision: {
-    loss_weight: 1.,
-    attention: 'attention',
-    backbone: 'transformer',
-    backbone_ln: 2,
-  }

configs/template.yaml DELETED Viewed

@@ -1,67 +0,0 @@
-global:
-  name: exp
-  phase: train
-  stage: pretrain-vision
-  workdir: /tmp/workdir
-  seed: ~
-dataset:
-  train: {
-    roots: ['data/training/MJ/MJ_train/',
-            'data/training/MJ/MJ_test/',
-            'data/training/MJ/MJ_valid/',
-            'data/training/ST'],
-    batch_size: 128
-  }
-  test: {
-    roots: ['data/evaluation/IIIT5k_3000',
-            'data/evaluation/SVT',
-            'data/evaluation/SVTP',
-            'data/evaluation/IC13_857',
-            'data/evaluation/IC15_1811',
-            'data/evaluation/CUTE80'],
-    batch_size: 128
-  }
-  charset_path: data/charset_36.txt
-  num_workers: 4
-  max_length: 25  # 30
-  image_height: 32
-  image_width: 128
-  case_sensitive: False
-  eval_case_sensitive: False
-  data_aug: True
-  multiscales: False
-  pin_memory: True
-  smooth_label: False
-  smooth_factor: 0.1
-  one_hot_y: True
-  use_sm: False
-training:
-  epochs: 6
-  show_iters: 50
-  eval_iters: 3000
-  save_iters: 20000
-  start_iters: 0
-  stats_iters: 100000
-optimizer:
-  type: Adadelta # Adadelta, Adam
-  true_wd: False
-  wd: 0. # 0.001
-  bn_wd: False
-  args: {
-    # betas: !!python/tuple [0.9, 0.99], # betas=(0.9,0.99) for AdamW
-    # betas: !!python/tuple [0.9, 0.999], # for default Adam
-  }
-  clip_grad: 20
-  lr: [1.0, 1.0, 1.0]  # lr: [0.005, 0.005, 0.005]
-  scheduler: {
-    periods: [3, 2, 1],
-    gamma: 0.1,
-  }
-model:
-  name: 'modules.model_abinet.ABINetModel'
-  checkpoint: ~
-  strict: True

configs/train_abinet.yaml DELETED Viewed

@@ -1,71 +0,0 @@
-global:
-  name: train-abinet
-  phase: train
-  stage: train-super
-  workdir: workdir
-  seed: ~
-dataset:
-  train: {
-    roots: ['data/training/MJ/MJ_train/',
-            'data/training/MJ/MJ_test/',
-            'data/training/MJ/MJ_valid/',
-            'data/training/ST'],
-    batch_size: 384
-  }
-  test: {
-    roots: ['data/evaluation/IIIT5k_3000',
-            'data/evaluation/SVT',
-            'data/evaluation/SVTP',
-            'data/evaluation/IC13_857',
-            'data/evaluation/IC15_1811',
-            'data/evaluation/CUTE80'],
-    batch_size: 384
-  }
-  data_aug: True
-  multiscales: False
-  num_workers: 14
-training:
-  epochs: 10
-  show_iters: 50
-  eval_iters: 3000
-  save_iters: 3000
-optimizer:
-  type: Adam
-  true_wd: False
-  wd: 0.0
-  bn_wd: False
-  clip_grad: 20
-  lr: 0.0001
-  args: {
-    betas: !!python/tuple [0.9, 0.999], # for default Adam
-  }
-  scheduler: {
-    periods: [6, 4],
-    gamma: 0.1,
-  }
-model:
-  name: 'modules.model_abinet_iter.ABINetIterModel'
-  iter_size: 3
-  ensemble: ''
-  use_vision: False
-  vision: {
-    checkpoint: workdir/pretrain-vision-model/best-pretrain-vision-model.pth,
-    loss_weight: 1.,
-    attention: 'position',
-    backbone: 'transformer',
-    backbone_ln: 3,
-  }
-  language: {
-    checkpoint:  workdir/pretrain-language-model/pretrain-language-model.pth,
-    num_layers: 4,
-    loss_weight: 1.,
-    detach: True,
-    use_self_attn: False
-  }
-  alignment: {
-    loss_weight: 1.,
-  }

configs/train_abinet_sv.yaml DELETED Viewed

@@ -1,71 +0,0 @@
-global:
-  name: train-abinet-sv
-  phase: train
-  stage: train-super
-  workdir: workdir
-  seed: ~
-dataset:
-  train: {
-    roots: ['data/training/MJ/MJ_train/',
-            'data/training/MJ/MJ_test/',
-            'data/training/MJ/MJ_valid/',
-            'data/training/ST'],
-    batch_size: 384
-  }
-  test: {
-    roots: ['data/evaluation/IIIT5k_3000',
-            'data/evaluation/SVT',
-            'data/evaluation/SVTP',
-            'data/evaluation/IC13_857',
-            'data/evaluation/IC15_1811',
-            'data/evaluation/CUTE80'],
-    batch_size: 384
-  }
-  data_aug: True
-  multiscales: False
-  num_workers: 14
-training:
-  epochs: 10
-  show_iters: 50
-  eval_iters: 3000
-  save_iters: 3000
-optimizer:
-  type: Adam
-  true_wd: False
-  wd: 0.0
-  bn_wd: False
-  clip_grad: 20
-  lr: 0.0001
-  args: {
-    betas: !!python/tuple [0.9, 0.999], # for default Adam
-  }
-  scheduler: {
-    periods: [6, 4],
-    gamma: 0.1,
-  }
-model:
-  name: 'modules.model_abinet_iter.ABINetIterModel'
-  iter_size: 3
-  ensemble: ''
-  use_vision: False
-  vision: {
-    checkpoint: workdir/pretrain-vision-model-sv/best-pretrain-vision-model-sv.pth,
-    loss_weight: 1.,
-    attention: 'attention',
-    backbone: 'transformer',
-    backbone_ln: 2,
-  }
-  language: {
-    checkpoint:  workdir/pretrain-language-model/pretrain-language-model.pth,
-    num_layers: 4,
-    loss_weight: 1.,
-    detach: True,
-    use_self_attn: False
-  }
-  alignment: {
-    loss_weight: 1.,
-  }

configs/train_abinet_wo_iter.yaml DELETED Viewed

@@ -1,68 +0,0 @@
-global:
-  name: train-abinet-wo-iter
-  phase: train
-  stage: train-super
-  workdir: workdir
-  seed: ~
-dataset:
-  train: {
-    roots: ['data/training/MJ/MJ_train/',
-            'data/training/MJ/MJ_test/',
-            'data/training/MJ/MJ_valid/',
-            'data/training/ST'],
-    batch_size: 384
-  }
-  test: {
-    roots: ['data/evaluation/IIIT5k_3000',
-            'data/evaluation/SVT',
-            'data/evaluation/SVTP',
-            'data/evaluation/IC13_857',
-            'data/evaluation/IC15_1811',
-            'data/evaluation/CUTE80'],
-    batch_size: 384
-  }
-  data_aug: True
-  multiscales: False
-  num_workers: 14
-training:
-  epochs: 10
-  show_iters: 50
-  eval_iters: 3000
-  save_iters: 3000
-optimizer:
-  type: Adam
-  true_wd: False
-  wd: 0.0
-  bn_wd: False
-  clip_grad: 20
-  lr: 0.0001
-  args: {
-    betas: !!python/tuple [0.9, 0.999], # for default Adam
-  }
-  scheduler: {
-    periods: [6, 4],
-    gamma: 0.1,
-  }
-model:
-  name: 'modules.model_abinet.ABINetModel'
-  vision: {
-    checkpoint: workdir/pretrain-vision-model/best-pretrain-vision-model.pth,
-    loss_weight: 1.,
-    attention: 'position',
-    backbone: 'transformer',
-    backbone_ln: 3,
-  }
-  language: {
-    checkpoint:  workdir/pretrain-language-model/pretrain-language-model.pth,
-    num_layers: 4,
-    loss_weight: 1.,
-    detach: True,
-    use_self_attn: False
-  }
-  alignment: {
-    loss_weight: 1.,
-  }

data/charset_36.txt DELETED Viewed

@@ -1,36 +0,0 @@
-0	a
-1	b
-2	c
-3	d
-4	e
-5	f
-6	g
-7	h
-8	i
-9	j
-10	k
-11	l
-12	m
-13	n
-14	o
-15	p
-16	q
-17	r
-18	s
-19	t
-20	u
-21	v
-22	w
-23	x
-24	y
-25	z
-26	1
-27	2
-28	3
-29	4
-30	5
-31	6
-32	7
-33	8
-34	9
-35	0

data/charset_62.txt DELETED Viewed

@@ -1,62 +0,0 @@
-0	0
-1	1
-2	2
-3	3
-4	4
-5	5
-6	6
-7	7
-8	8
-9	9
-10	A
-11	B
-12	C
-13	D
-14	E
-15	F
-16	G
-17	H
-18	I
-19	J
-20	K
-21	L
-22	M
-23	N
-24	O
-25	P
-26	Q
-27	R
-28	S
-29	T
-30	U
-31	V
-32	W
-33	X
-34	Y
-35	Z
-36	a
-37	b
-38	c
-39	d
-40	e
-41	f
-42	g
-43	h
-44	i
-45	j
-46	k
-47	l
-48	m
-49	n
-50	o
-51	p
-52	q
-53	r
-54	s
-55	t
-56	u
-57	v
-58	w
-59	x
-60	y
-61	z

docker/Dockerfile DELETED Viewed

@@ -1,25 +0,0 @@
-FROM anibali/pytorch:cuda-9.0
-MAINTAINER fangshancheng <fangsc@ustc.edu.cn>
-RUN sudo rm -rf /etc/apt/sources.list.d && \
-    sudo apt update && \
-    sudo apt install -y build-essential vim && \
-    conda config --add channels https://mirrors.ustc.edu.cn/anaconda/pkgs/free/ && \
-    conda config --add channels https://mirrors.ustc.edu.cn/anaconda/pkgs/main/ && \
-    conda config --set show_channel_urls yes && \
-    pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ && \
-    pip install torch==1.1.0 torchvision==0.3.0 && \
-    pip install fastai==1.0.60 && \
-    pip install ipdb jupyter ipython lmdb editdistance tensorboardX natsort nltk && \
-    conda uninstall -y --force pillow pil jpeg libtiff libjpeg-turbo && \
-    pip uninstall -y pillow pil jpeg libtiff libjpeg-turbo && \
-    conda install -yc conda-forge libjpeg-turbo && \
-    CFLAGS="${CFLAGS} -mavx2" pip install --no-cache-dir --force-reinstall --no-binary :all: --compile pillow-simd==6.2.2.post1 && \
-    conda install -y jpeg libtiff opencv && \
-    sudo rm -rf /var/lib/apt/lists/* && \
-    sudo rm -rf /tmp/* && \
-    sudo rm -rf ~/.cache && \
-    sudo apt clean all && \
-    conda clean -y -a
-EXPOSE 8888
-ENV LANG C.UTF-8
-ENV LC_ALL C.UTF-8

notebooks/dataset-text.ipynb DELETED Viewed

@@ -1,159 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.chdir('..')\n",
-    "from dataset import *\n",
-    "torch.set_printoptions(sci_mode=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Construct dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = TextDataset('data/Vocabulary_train_v2.csv', is_training=False, smooth_label=True, smooth_factor=0.1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = DataBunch.create(train_ds=data, valid_ds=None, bs=6)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x, y = data.one_batch(); x, y"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x[0].shape, x[1].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "y[0].shape, y[1].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x[0].argmax(-1) - y[0].argmax(-1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x[0].argmax(-1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "y[0].argmax(-1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x[0][0,0]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# test SpellingMutation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "probs = {'pn0': 0., 'pn1': 0., 'pn2': 0., 'pt0': 1.0, 'pt1': 1.0}\n",
-    "charset = CharsetMapper('data/charset_36.txt')\n",
-    "sm = SpellingMutation(charset=charset, **probs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sm('*a-aa')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

notebooks/dataset.ipynb DELETED Viewed

@@ -1,298 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.chdir('..')\n",
-    "from dataset import *"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "import logging\n",
-    "from torchvision.transforms import ToPILImage\n",
-    "from torchvision.utils import make_grid\n",
-    "from IPython.display import display\n",
-    "from torch.utils.data import ConcatDataset\n",
-    "charset = CharsetMapper('data/charset_36.txt')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def show_all(dl, iter_size=None):\n",
-    "    if iter_size is None: iter_size = len(dl)\n",
-    "    for i, item in enumerate(dl):\n",
-    "        if i >= iter_size:\n",
-    "             break\n",
-    "        image = item[0]\n",
-    "        label = item[1][0]\n",
-    "        length = item[1][1]\n",
-    "        print(f'iter {i}:', [charset.get_text(label[j][0: length[j]].argmax(-1), padding=False) for j in range(bs)])\n",
-    "        display(ToPILImage()(make_grid(item[0].cpu())))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Construct dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data1 = ImageDataset('data/training/ST', is_training=True);data1  # is_training"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "bs=64\n",
-    "data2 = ImageDataBunch.create(train_ds=data1, valid_ds=None, bs=bs, num_workers=1);data2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#data3 = data2.normalize(imagenet_stats);data3\n",
-    "data3 = data2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "show_all(data3.train_dl, 4)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Add dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "kwargs = {'data_aug': False, 'is_training': False}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data1 = ImageDataset('data/evaluation/IIIT5k_3000', **kwargs);data1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data2 = ImageDataset('data/evaluation/SVT', **kwargs);data2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data3 = ConcatDataset([data1, data2])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bs=64\n",
-    "data4 = ImageDataBunch.create(train_ds=data1, valid_ds=data3, bs=bs, num_workers=1);data4"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(data4.train_dl), len(data4.valid_dl)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "show_all(data4.train_dl, 4)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# TEST"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(data4.valid_dl)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import time\n",
-    "niter = 1000\n",
-    "start = time.time()\n",
-    "for i, item in enumerate(progress_bar(data4.valid_dl)):\n",
-    "    if i % niter == 0 and i > 0:\n",
-    "        print(i, (time.time() - start) / niter)\n",
-    "        start = time.time()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "num = 20\n",
-    "index = 6\n",
-    "plt.figure(figsize=(20, 10))\n",
-    "for i in range(num):\n",
-    "    plt.subplot(num // 4, 4, i+1)\n",
-    "    plt.imshow(data4.train_ds[i][0].data.numpy().transpose(1,2,0))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def show(path, image_key):\n",
-    "    with lmdb.open(str(path), readonly=True, lock=False, readahead=False, meminit=False).begin(write=False) as txn:\n",
-    "        imgbuf = txn.get(image_key.encode())  # image\n",
-    "        buf = six.BytesIO()\n",
-    "        buf.write(imgbuf)\n",
-    "        buf.seek(0)\n",
-    "        with warnings.catch_warnings():\n",
-    "            warnings.simplefilter(\"ignore\", UserWarning) # EXIF warning from TiffPlugin\n",
-    "            x = PIL.Image.open(buf).convert('RGB')\n",
-    "        print(x.size)\n",
-    "        plt.imshow(x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "image_key = 'image-003118258'\n",
-    "image_key = 'image-002780217'\n",
-    "image_key = 'image-002780218'\n",
-    "path = 'data/CVPR2016'\n",
-    "show(path, image_key)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "image_key = 'image-004668347'\n",
-    "image_key = 'image-006128516'\n",
-    "path = 'data/NIPS2014'\n",
-    "show(path, image_key)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "image_key = 'image-004668347'\n",
-    "image_key = 'image-000002420'\n",
-    "path = 'data/IIIT5K_3000'\n",
-    "show(path, image_key)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

notebooks/prepare_wikitext103.ipynb DELETED Viewed

@@ -1,468 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 82841986 is_char and is_digit"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 82075350 regrex non-ascii and none-digit"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 86460763 left"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import random\n",
-    "import re\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "max_length = 25\n",
-    "min_length = 1\n",
-    "root = '../data'\n",
-    "charset = 'abcdefghijklmnopqrstuvwxyz'\n",
-    "digits = '0123456789'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def is_char(text, ratio=0.5):\n",
-    "    text = text.lower()\n",
-    "    length = max(len(text), 1)\n",
-    "    char_num = sum([t in charset for t in text])\n",
-    "    if char_num < min_length: return False\n",
-    "    if char_num / length < ratio: return False\n",
-    "    return True\n",
-    "\n",
-    "def is_digit(text, ratio=0.5):\n",
-    "    length = max(len(text), 1)\n",
-    "    digit_num = sum([t in digits for t in text])\n",
-    "    if digit_num / length < ratio: return False\n",
-    "    return True"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# generate training dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open('/tmp/wikitext-103/wiki.train.tokens', 'r') as file:\n",
-    "    lines = file.readlines()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "inp, gt = [], []\n",
-    "for line in lines:\n",
-    "    token = line.lower().split()\n",
-    "    for text in token:\n",
-    "        text = re.sub('[^0-9a-zA-Z]+', '', text)\n",
-    "        if len(text) < min_length:\n",
-    "            # print('short-text', text)\n",
-    "            continue\n",
-    "        if len(text) > max_length:\n",
-    "            # print('long-text', text)\n",
-    "            continue\n",
-    "        inp.append(text)\n",
-    "        gt.append(text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_voc = os.path.join(root, 'WikiText-103.csv')\n",
-    "pd.DataFrame({'inp':inp, 'gt':gt}).to_csv(train_voc, index=None, sep='\\t')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "86460763"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(inp)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['valkyria',\n",
-       " 'chronicles',\n",
-       " 'iii',\n",
-       " 'senj',\n",
-       " 'no',\n",
-       " 'valkyria',\n",
-       " '3',\n",
-       " 'unk',\n",
-       " 'chronicles',\n",
-       " 'japanese',\n",
-       " '3',\n",
-       " 'lit',\n",
-       " 'valkyria',\n",
-       " 'of',\n",
-       " 'the',\n",
-       " 'battlefield',\n",
-       " '3',\n",
-       " 'commonly',\n",
-       " 'referred',\n",
-       " 'to',\n",
-       " 'as',\n",
-       " 'valkyria',\n",
-       " 'chronicles',\n",
-       " 'iii',\n",
-       " 'outside',\n",
-       " 'japan',\n",
-       " 'is',\n",
-       " 'a',\n",
-       " 'tactical',\n",
-       " 'role',\n",
-       " 'playing',\n",
-       " 'video',\n",
-       " 'game',\n",
-       " 'developed',\n",
-       " 'by',\n",
-       " 'sega',\n",
-       " 'and',\n",
-       " 'mediavision',\n",
-       " 'for',\n",
-       " 'the',\n",
-       " 'playstation',\n",
-       " 'portable',\n",
-       " 'released',\n",
-       " 'in',\n",
-       " 'january',\n",
-       " '2011',\n",
-       " 'in',\n",
-       " 'japan',\n",
-       " 'it',\n",
-       " 'is',\n",
-       " 'the',\n",
-       " 'third',\n",
-       " 'game',\n",
-       " 'in',\n",
-       " 'the',\n",
-       " 'valkyria',\n",
-       " 'series',\n",
-       " 'employing',\n",
-       " 'the',\n",
-       " 'same',\n",
-       " 'fusion',\n",
-       " 'of',\n",
-       " 'tactical',\n",
-       " 'and',\n",
-       " 'real',\n",
-       " 'time',\n",
-       " 'gameplay',\n",
-       " 'as',\n",
-       " 'its',\n",
-       " 'predecessors',\n",
-       " 'the',\n",
-       " 'story',\n",
-       " 'runs',\n",
-       " 'parallel',\n",
-       " 'to',\n",
-       " 'the',\n",
-       " 'first',\n",
-       " 'game',\n",
-       " 'and',\n",
-       " 'follows',\n",
-       " 'the',\n",
-       " 'nameless',\n",
-       " 'a',\n",
-       " 'penal',\n",
-       " 'military',\n",
-       " 'unit',\n",
-       " 'serving',\n",
-       " 'the',\n",
-       " 'nation',\n",
-       " 'of',\n",
-       " 'gallia',\n",
-       " 'during',\n",
-       " 'the',\n",
-       " 'second',\n",
-       " 'europan',\n",
-       " 'war',\n",
-       " 'who',\n",
-       " 'perform',\n",
-       " 'secret',\n",
-       " 'black']"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "inp[:100]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# generate evaluation dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def disturb(word, degree, p=0.3):\n",
-    "    if len(word) // 2 < degree: return word\n",
-    "    if is_digit(word): return word\n",
-    "    if random.random() < p: return word\n",
-    "    else:\n",
-    "        index = list(range(len(word)))\n",
-    "        random.shuffle(index)\n",
-    "        index = index[:degree]\n",
-    "        new_word = []\n",
-    "        for i in range(len(word)):\n",
-    "            if i not in index: \n",
-    "                new_word.append(word[i])\n",
-    "                continue\n",
-    "            if (word[i] not in charset) and (word[i] not in digits):\n",
-    "                # special token\n",
-    "                new_word.append(word[i])\n",
-    "                continue\n",
-    "            op = random.random()\n",
-    "            if op < 0.1: # add\n",
-    "                new_word.append(random.choice(charset))\n",
-    "                new_word.append(word[i])\n",
-    "            elif op < 0.2: continue  # remove\n",
-    "            else: new_word.append(random.choice(charset))  # replace\n",
-    "        return ''.join(new_word)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "lines = inp\n",
-    "degree = 1\n",
-    "keep_num = 50000\n",
-    "\n",
-    "random.shuffle(lines)\n",
-    "part_lines = lines[:keep_num]\n",
-    "inp, gt = [], []\n",
-    "\n",
-    "for w in part_lines:\n",
-    "    w = w.strip().lower()\n",
-    "    new_w = disturb(w, degree)\n",
-    "    inp.append(new_w)\n",
-    "    gt.append(w)\n",
-    "    \n",
-    "eval_voc = os.path.join(root, f'WikiText-103_eval_d{degree}.csv')\n",
-    "pd.DataFrame({'inp':inp, 'gt':gt}).to_csv(eval_voc, index=None, sep='\\t')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[('high', 'high'),\n",
-       " ('vctoria', 'victoria'),\n",
-       " ('mains', 'mains'),\n",
-       " ('bi', 'by'),\n",
-       " ('13', '13'),\n",
-       " ('ticnet', 'ticket'),\n",
-       " ('basil', 'basic'),\n",
-       " ('cut', 'cut'),\n",
-       " ('aqarky', 'anarky'),\n",
-       " ('the', 'the'),\n",
-       " ('tqe', 'the'),\n",
-       " ('oc', 'of'),\n",
-       " ('diwpersal', 'dispersal'),\n",
-       " ('traffic', 'traffic'),\n",
-       " ('in', 'in'),\n",
-       " ('the', 'the'),\n",
-       " ('ti', 'to'),\n",
-       " ('professionalms', 'professionals'),\n",
-       " ('747', '747'),\n",
-       " ('in', 'in'),\n",
-       " ('and', 'and'),\n",
-       " ('exezutive', 'executive'),\n",
-       " ('n400', 'n400'),\n",
-       " ('yusic', 'music'),\n",
-       " ('s', 's'),\n",
-       " ('henri', 'henry'),\n",
-       " ('heard', 'heard'),\n",
-       " ('thousand', 'thousand'),\n",
-       " ('to', 'to'),\n",
-       " ('arhy', 'army'),\n",
-       " ('td', 'to'),\n",
-       " ('a', 'a'),\n",
-       " ('oall', 'hall'),\n",
-       " ('qind', 'kind'),\n",
-       " ('od', 'on'),\n",
-       " ('samfria', 'samaria'),\n",
-       " ('driveway', 'driveway'),\n",
-       " ('which', 'which'),\n",
-       " ('wotk', 'work'),\n",
-       " ('ak', 'as'),\n",
-       " ('persona', 'persona'),\n",
-       " ('s', 's'),\n",
-       " ('melbourne', 'melbourne'),\n",
-       " ('apong', 'along'),\n",
-       " ('fas', 'was'),\n",
-       " ('thea', 'then'),\n",
-       " ('permcy', 'percy'),\n",
-       " ('nnd', 'and'),\n",
-       " ('alan', 'alan'),\n",
-       " ('13', '13'),\n",
-       " ('matteos', 'matters'),\n",
-       " ('against', 'against'),\n",
-       " ('nefion', 'nexion'),\n",
-       " ('held', 'held'),\n",
-       " ('negative', 'negative'),\n",
-       " ('gogd', 'good'),\n",
-       " ('the', 'the'),\n",
-       " ('thd', 'the'),\n",
-       " ('groening', 'groening'),\n",
-       " ('tqe', 'the'),\n",
-       " ('cwould', 'would'),\n",
-       " ('fb', 'ft'),\n",
-       " ('uniten', 'united'),\n",
-       " ('kone', 'one'),\n",
-       " ('thiy', 'this'),\n",
-       " ('lanren', 'lauren'),\n",
-       " ('s', 's'),\n",
-       " ('thhe', 'the'),\n",
-       " ('is', 'is'),\n",
-       " ('modep', 'model'),\n",
-       " ('weird', 'weird'),\n",
-       " ('angwer', 'answer'),\n",
-       " ('imprisxnment', 'imprisonment'),\n",
-       " ('marpery', 'margery'),\n",
-       " ('eventuanly', 'eventually'),\n",
-       " ('in', 'in'),\n",
-       " ('donnoa', 'donna'),\n",
-       " ('ik', 'it'),\n",
-       " ('reached', 'reached'),\n",
-       " ('at', 'at'),\n",
-       " ('excxted', 'excited'),\n",
-       " ('ws', 'was'),\n",
-       " ('raes', 'rates'),\n",
-       " ('the', 'the'),\n",
-       " ('firsq', 'first'),\n",
-       " ('concluyed', 'concluded'),\n",
-       " ('recdorded', 'recorded'),\n",
-       " ('fhe', 'the'),\n",
-       " ('uegiment', 'regiment'),\n",
-       " ('a', 'a'),\n",
-       " ('glanes', 'planes'),\n",
-       " ('conyrol', 'control'),\n",
-       " ('thr', 'the'),\n",
-       " ('arrext', 'arrest'),\n",
-       " ('bth', 'both'),\n",
-       " ('forward', 'forward'),\n",
-       " ('allowdd', 'allowed'),\n",
-       " ('revealed', 'revealed'),\n",
-       " ('mayagement', 'management'),\n",
-       " ('normal', 'normal')]"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "list(zip(inp, gt))[:100]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

notebooks/transforms.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

tools/create_lmdb_dataset.py DELETED Viewed

@@ -1,87 +0,0 @@
-""" a modified version of CRNN torch repository https://github.com/bgshih/crnn/blob/master/tool/create_dataset.py """
-import fire
-import os
-import lmdb
-import cv2
-import numpy as np
-def checkImageIsValid(imageBin):
-    if imageBin is None:
-        return False
-    imageBuf = np.frombuffer(imageBin, dtype=np.uint8)
-    img = cv2.imdecode(imageBuf, cv2.IMREAD_GRAYSCALE)
-    imgH, imgW = img.shape[0], img.shape[1]
-    if imgH * imgW == 0:
-        return False
-    return True
-def writeCache(env, cache):
-    with env.begin(write=True) as txn:
-        for k, v in cache.items():
-            txn.put(k, v)
-def createDataset(inputPath, gtFile, outputPath, checkValid=True):
-    """
-    Create LMDB dataset for training and evaluation.
-    ARGS:
-        inputPath  : input folder path where starts imagePath
-        outputPath : LMDB output path
-        gtFile     : list of image path and label
-        checkValid : if true, check the validity of every image
-    """
-    os.makedirs(outputPath, exist_ok=True)
-    env = lmdb.open(outputPath, map_size=1099511627776)
-    cache = {}
-    cnt = 1
-    with open(gtFile, 'r', encoding='utf-8') as data:
-        datalist = data.readlines()
-    nSamples = len(datalist)
-    for i in range(nSamples):
-        imagePath, label = datalist[i].strip('\n').split('\t')
-        imagePath = os.path.join(inputPath, imagePath)
-        # # only use alphanumeric data
-        # if re.search('[^a-zA-Z0-9]', label):
-        #     continue
-        if not os.path.exists(imagePath):
-            print('%s does not exist' % imagePath)
-            continue
-        with open(imagePath, 'rb') as f:
-            imageBin = f.read()
-        if checkValid:
-            try:
-                if not checkImageIsValid(imageBin):
-                    print('%s is not a valid image' % imagePath)
-                    continue
-            except:
-                print('error occured', i)
-                with open(outputPath + '/error_image_log.txt', 'a') as log:
-                    log.write('%s-th image data occured error\n' % str(i))
-                continue
-        imageKey = 'image-%09d'.encode() % cnt
-        labelKey = 'label-%09d'.encode() % cnt
-        cache[imageKey] = imageBin
-        cache[labelKey] = label.encode()
-        if cnt % 1000 == 0:
-            writeCache(env, cache)
-            cache = {}
-            print('Written %d / %d' % (cnt, nSamples))
-        cnt += 1
-    nSamples = cnt-1
-    cache['num-samples'.encode()] = str(nSamples).encode()
-    writeCache(env, cache)
-    print('Created dataset with %d samples' % nSamples)
-if __name__ == '__main__':
-    fire.Fire(createDataset)

tools/crop_by_word_bb_syn90k.py DELETED Viewed

@@ -1,153 +0,0 @@
-# Crop by word bounding box
-# Locate script with gt.mat
-# $ python crop_by_word_bb.py
-import os
-import re
-import cv2
-import scipy.io as sio
-from itertools import chain
-import numpy as np
-import math
-mat_contents = sio.loadmat('gt.mat')
-image_names = mat_contents['imnames'][0]
-cropped_indx = 0
-start_img_indx = 0
-gt_file = open('gt_oabc.txt', 'a')
-err_file = open('err_oabc.txt', 'a')
-for img_indx in range(start_img_indx, len(image_names)):
-    # Get image name
-    image_name_new = image_names[img_indx][0]
-    # print(image_name_new)
-    image_name = '/home/yxwang/pytorch/dataset/SynthText/img/'+ image_name_new
-    # print('IMAGE : {}.{}'.format(img_indx, image_name))
-    print('evaluating {} image'.format(img_indx), end='\r')
-    # Get text in image
-    txt = mat_contents['txt'][0][img_indx]
-    txt = [re.split(' \n|\n |\n| ', t.strip()) for t in txt]
-    txt = list(chain(*txt))
-    txt = [t for t in txt if len(t) > 0 ]
-    # print(txt) # ['Lines:', 'I', 'lost', 'Kevin', 'will', 'line', 'and', 'and', 'the', '(and', 'the', 'out', 'you', "don't", 'pkg']
-    # assert 1<0
-    # Open image
-    #img = Image.open(image_name)
-    img = cv2.imread(image_name, cv2.IMREAD_COLOR)
-    img_height, img_width, _ = img.shape
-    # Validation
-    if len(np.shape(mat_contents['wordBB'][0][img_indx])) == 2:
-        wordBBlen = 1
-    else:
-        wordBBlen = mat_contents['wordBB'][0][img_indx].shape[-1]
-    if wordBBlen == len(txt):
-        # Crop image and save
-        for word_indx in range(len(txt)):
-            # print('txt--',txt)
-            txt_temp = txt[word_indx]
-            len_now = len(txt_temp)
-            # txt_temp = re.sub('[^0-9a-zA-Z]+', '', txt_temp)
-            # print('txt_temp-1-',txt_temp)
-            txt_temp = re.sub('[^a-zA-Z]+', '', txt_temp)
-            # print('txt_temp-2-',txt_temp)
-            if len_now - len(txt_temp) != 0:
-                print('txt_temp-2-', txt_temp)
-            if len(np.shape(mat_contents['wordBB'][0][img_indx])) == 2:  # only one word (2,4)
-                wordBB = mat_contents['wordBB'][0][img_indx]
-            else:  # many words (2,4,num_words)
-                wordBB = mat_contents['wordBB'][0][img_indx][:, :, word_indx]
-            if np.shape(wordBB) != (2, 4):
-                err_log = 'malformed box index: {}\t{}\t{}\n'.format(image_name, txt[word_indx], wordBB)
-                err_file.write(err_log)
-                # print(err_log)
-                continue
-            pts1 = np.float32([[wordBB[0][0], wordBB[1][0]],
-                               [wordBB[0][3], wordBB[1][3]],
-                               [wordBB[0][1], wordBB[1][1]],
-                               [wordBB[0][2], wordBB[1][2]]])
-            height = math.sqrt((wordBB[0][0] - wordBB[0][3])**2 + (wordBB[1][0] - wordBB[1][3])**2)
-            width = math.sqrt((wordBB[0][0] - wordBB[0][1])**2 + (wordBB[1][0] - wordBB[1][1])**2)
-            # Coord validation check
-            if (height * width) <= 0:
-                err_log = 'empty file : {}\t{}\t{}\n'.format(image_name, txt[word_indx], wordBB)
-                err_file.write(err_log)
-                # print(err_log)
-                continue
-            elif (height * width) > (img_height * img_width):
-                err_log = 'too big box : {}\t{}\t{}\n'.format(image_name, txt[word_indx], wordBB)
-                err_file.write(err_log)
-                # print(err_log)
-                continue
-            else:
-                valid = True
-                for i in range(2):
-                    for j in range(4):
-                        if wordBB[i][j] < 0 or wordBB[i][j] > img.shape[1 - i]:
-                            valid = False
-                            break
-                    if not valid:
-                        break
-                if not valid:
-                    err_log = 'invalid coord : {}\t{}\t{}\t{}\t{}\n'.format(
-                        image_name, txt[word_indx], wordBB, (width, height), (img_width, img_height))
-                    err_file.write(err_log)
-                    # print(err_log)
-                    continue
-            pts2 = np.float32([[0, 0],
-                               [0, height],
-                               [width, 0],
-                               [width, height]])
-            x_min = np.int(round(min(wordBB[0][0], wordBB[0][1], wordBB[0][2], wordBB[0][3])))
-            x_max = np.int(round(max(wordBB[0][0], wordBB[0][1], wordBB[0][2], wordBB[0][3])))
-            y_min = np.int(round(min(wordBB[1][0], wordBB[1][1], wordBB[1][2], wordBB[1][3])))
-            y_max = np.int(round(max(wordBB[1][0], wordBB[1][1], wordBB[1][2], wordBB[1][3])))
-            # print(x_min, x_max, y_min, y_max)
-            # print(img.shape)
-            # assert 1<0
-            if len(img.shape) == 3:
-                img_cropped = img[ y_min:y_max:1, x_min:x_max:1, :]
-            else:
-                img_cropped = img[ y_min:y_max:1, x_min:x_max:1]
-            dir_name = '/home/yxwang/pytorch/dataset/SynthText/cropped-oabc/{}'.format(image_name_new.split('/')[0])
-            # print('dir_name--',dir_name)
-            if not os.path.exists(dir_name):
-                os.mkdir(dir_name)
-            cropped_file_name = "{}/{}_{}_{}.jpg".format(dir_name, cropped_indx,
-                                                         image_name.split('/')[-1][:-len('.jpg')], word_indx)
-            # print('cropped_file_name--',cropped_file_name)
-            # print('img_cropped--',img_cropped.shape)
-            if img_cropped.shape[0] == 0 or img_cropped.shape[1] == 0:
-                err_log = 'word_box_mismatch : {}\t{}\t{}\n'.format(image_name, mat_contents['txt'][0][
-                    img_indx], mat_contents['wordBB'][0][img_indx])
-                err_file.write(err_log)
-                # print(err_log)
-                continue
-            # print('img_cropped--',img_cropped)
-            # img_cropped.save(cropped_file_name)
-            cv2.imwrite(cropped_file_name, img_cropped)
-            cropped_indx += 1
-            gt_file.write('%s\t%s\n' % (cropped_file_name, txt[word_indx]))
-            # if cropped_indx>10:
-            #     assert 1<0
-        # assert 1 < 0
-    else:
-        err_log = 'word_box_mismatch : {}\t{}\t{}\n'.format(image_name, mat_contents['txt'][0][
-                                                            img_indx], mat_contents['wordBB'][0][img_indx])
-        err_file.write(err_log)
-        # print(err_log)
-gt_file.close()
-err_file.close()