Pix2Text V1.0 {title}

# coding: utf-8
# Copyright (C) 2023, [Breezedeus](https://github.com/breezedeus).
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
# Ref: https://huggingface.co/spaces/hysts/Manga-OCR/blob/main/app.py

import os
import json
import functools
import random
import string
import time

import yaml

import gradio as gr
import numpy as np

# from cnstd.utils import pil_to_numpy, imsave

from pix2text import Pix2Text
from pix2text.utils import set_logger, merge_line_texts

logger = set_logger()

LANGUAGES = yaml.safe_load(open('languages.yaml', 'r', encoding='utf-8'))['languages']


def get_p2t_model(lan_list: list):
    p2t = Pix2Text(languages=lan_list)
    return p2t


def latex_render(latex_str):
    return f"$$\n{latex_str}\n$$"
    # return latex_str


def recognize(lang_list, rec_type, resized_shape, image_file):
    lang_list = [LANGUAGES[l] for l in lang_list]
    p2t = get_p2t_model(lang_list)

    if rec_type == 'Formula & Text':
        suffix = list(string.ascii_letters)
        random.shuffle(suffix)
        suffix = ''.join(suffix[:6])
        out_det_fp = f'out-det-{time.time()}-{suffix}.jpg'
        outs = p2t(
            image_file, resized_shape=resized_shape, save_analysis_res=out_det_fp
        )
        # To get just the text contents, use:
        only_text = merge_line_texts(outs, auto_line_break=True)

        # return only_text, latex_render(only_text)
        return only_text, out_det_fp
    elif rec_type == 'Only Formula':
        only_text = p2t.recognize_formula(image_file)
        return latex_render(only_text), None
    elif rec_type == 'Only Text':
        only_text = p2t.recognize_text(image_file)
        return only_text, None


def main():
    langs = list(LANGUAGES.keys())
    langs.sort(key=lambda x: x.lower())

    title = 'Demo'
    # example_func = functools.partial(
    #     recognize,
    #     new_size=768,
    #     box_score_thresh=0.3,
    #     min_box_size=10,
    # )
    # examples = [
    #     [
    #         'ch_PP-OCRv3_det::onnx',
    #         True,
    #         'number-densenet_lite_136-fc',
    #         False,
    #         'docs/examples/card1-s.jpg',
    #     ],
    #     [
    #         'ch_PP-OCRv3_det::onnx',
    #         True,
    #         'number-densenet_lite_136-fc',
    #         False,
    #         'docs/examples/card2-s.jpg',
    #     ],
    #     [
    #         'ch_PP-OCRv3_det::onnx',
    #         True,
    #         'number-densenet_lite_136-fc',
    #         False,
    #         'docs/examples/cy1-s.jpg',
    #     ],
    #     [
    #         'ch_PP-OCRv3_det::onnx',
    #         False,
    #         'densenet_lite_136-gru',
    #         False,
    #         'docs/examples/huochepiao.jpeg',
    #     ],
    #     [
    #         'ch_PP-OCRv3_det::onnx',
    #         False,
    #         'densenet_lite_136-gru',
    #         False,
    #         'docs/examples/1_res.jpg',
    #     ],
    #     [
    #         'db_shufflenet_v2::pytorch',
    #         False,
    #         'en_number_mobile_v2.0',
    #         False,
    #         'docs/examples/en_book1.jpeg',
    #     ],
    #     [
    #         'db_shufflenet_v2::pytorch',
    #         False,
    #         'densenet_lite_136-gru',
    #         True,
    #         'docs/examples/beauty0.jpg',
    #     ],
    # ]

    table_desc = """
<div align="center">
<img src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2F9341931a-53f0-48e1-b026-0f1ad17b457c%2Fc41e0b1d-4869-4e39-93db-631569e6a38d%2FUntitled.png?table=block&id=3d0819ca-2e1a-46a7-b6f3-b4cf89cd045c" width="120px"/>

[![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2FCnOCR-Demo&labelColor=%23697689&countColor=%23f5c791&style=flat&labelStyle=upper)](https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2FCnOCR-Demo)

[![Discord](https://img.shields.io/discord/1200765964434821260?logo=discord&label=Discord)](https://discord.gg/H9FmDSMA)

|                                 |                                         |
| ------------------------------- | --------------------------------------- |
| 🏄 **Free Web Service**             | [p2t.breezedeus.com](https://p2t.breezedeus.com) |
| 📀 **Code**              | [Github](https://github.com/breezedeus/pix2text) |
| 💬 **Discord**              | [P2T @ Discord](https://discord.gg/H9FmDSMA) |
| 👨🏻‍💻 **Author**            | [Breezedeus](https://www.breezedeus.com) |

If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/pix2text)** 🙏
</div>
    """

    with gr.Blocks() as demo:
        gr.HTML(
            f'<h1 style="text-align: center; margin-bottom: 1rem;"><a href="https://github.com/breezedeus/pix2text" target="_blank">Pix2Text V1.0</a> {title}</h1>'
        )
        with gr.Row(equal_height=False):
            with gr.Column(min_width=200, variant='panel', scale=3):
                gr.Markdown('### Settings')
                lang_list = gr.Dropdown(
                    label='Text Languages',
                    choices=langs,
                    value=['English', 'Chinese Simplified'],
                    multiselect=True,
                    info='Which languages to be recognized as Texts.',
                )
                rec_type = gr.Radio(
                    choices=['Formula & Text', 'Only Formula', 'Only Text'],
                    label='Image Type',
                    value='Formula & Text',
                    info='Which type of image to be recognized.',
                )
                resized_shape = gr.Slider(
                    label='resized_shape',
                    minimum=512,
                    maximum=2048,
                    value=608,
                    step=32,
                )
                # with gr.Accordion('Choose Text Languages', open=False):
                #     lang_list = gr.Checkboxgroup(
                #         label='Text Languages',
                #         choices=langs,
                #         value=['English', 'Chinese Simplified'],
                #     )

            with gr.Column(scale=6, variant='compact'):
                gr.Markdown('### Upload Image to be Recognized')
                image_file = gr.Image(label='Image', type="pil", image_mode='RGB', show_label=False)
                sub_btn = gr.Button("Submit", variant="primary")

            with gr.Column(scale=2, variant='compact'):
                gr.Markdown(table_desc)
        with gr.Row(equal_height=False):
            with gr.Column(scale=1, variant='compact'):
                gr.Markdown('**Detection Result**')
                det_result = gr.Image(
                    label='Detection Result', scale=1, show_label=False
                )
            with gr.Column(scale=1, variant='compact'):
                gr.Markdown('**Recognition Result**')
                rec_result = gr.Textbox(
                    label=f'Recognition Result',
                    lines=5,
                    value='',
                    scale=1,
                    show_label=False,
                    show_copy_button=True,
                )
            # render_result = gr.Markdown(label=f'After Rendering', value='')
            # rec_result.change(latex_render, rec_result, render_result)
        sub_btn.click(
            recognize,
            inputs=[lang_list, rec_type, resized_shape, image_file,],
            outputs=[rec_result, det_result],
        )

        # gr.Examples(
        #     label='示例',
        #     examples=examples,
        #     inputs=[
        #         det_model_name,
        #         is_single_line,
        #         rec_model_name,
        #         use_angle_clf,
        #         image_file,
        #     ],
        #     outputs=[out_image, naive_warn, out_texts],
        #     fn=example_func,
        #     cache_examples=os.getenv('CACHE_EXAMPLES') == '1',
        # )

    demo.queue(max_size=10)
    demo.launch()


if __name__ == '__main__':
    main()