antiOCR / app.py
breezedeus's picture
Update app.py
6df8db3
raw
history blame
5.56 kB
# coding: utf-8
# Copyright (C) 2022, [Breezedeus](https://github.com/breezedeus).
import os
from PIL import Image, ImageFilter
import streamlit as st
from cnocr import CnOcr
from cnocr.utils import set_logger, download
from antiocr.anti_ocr import AntiOcr
from antiocr.consts import RESOURCE_PATH
logger = set_logger()
st.set_page_config(layout="wide")
FONT_FP = 'resources/simfang.ttf'
if not os.path.exists(FONT_FP):
url = 'https://huggingface.co/datasets/breezedeus/cnocr-wx-qr-code/resolve/main/fonts/simfang.ttf'
os.makedirs(os.path.dirname(FONT_FP), exist_ok=True)
download(url, path=FONT_FP, overwrite=True)
@st.cache(allow_output_mutation=True)
def get_ocr_model():
return CnOcr()
def save_image(img):
from io import BytesIO
buf = BytesIO()
img.save(buf, format="JPEG")
byte_im = buf.getvalue()
st.download_button(
label="下载图片",
data=byte_im,
file_name="antiOCR.jpeg",
mime="image/jpeg",
)
def main():
st.sidebar.header('输出设置')
char_reverse_ratio = st.sidebar.slider(
'文字倒转概率', min_value=0.0, max_value=1.0, value=0.1
)
char_to_pinyin_ratio = st.sidebar.slider(
'文字转拼音概率', min_value=0.0, max_value=1.0, value=0.1
)
cols = st.sidebar.columns(2)
min_font_size = int(cols[0].number_input('最小文字大小', 2, 80, value=15))
max_font_size = int(
cols[1].number_input(
'最大文字大小', min_font_size + 1, 120, value=max(40, min_font_size + 1)
)
)
text_color = st.sidebar.color_picker('文字颜色', value='#5087DC')
st.sidebar.markdown('----')
use_random_bg = st.sidebar.checkbox('随机生成背景图片')
if use_random_bg:
bg_text_density = st.sidebar.slider(
'背景图片文字密度', min_value=0.0, max_value=3.0, value=1.0
)
cols = st.sidebar.columns(2)
bg_min_size = int(
cols[0].number_input('背景图片最小文字', 2, 80, key='bg_min', value=15)
)
bg_max_size = int(
cols[1].number_input(
'背景图片最大文字',
bg_min_size + 1,
120,
key='bg_max',
value=max(70, bg_min_size + 1),
)
)
bg_text_color = st.sidebar.color_picker('背景图片文字颜色', value='#07BCE0')
bg_gen_config = dict(
text_density=bg_text_density,
text_color=bg_text_color,
min_font_size=bg_min_size,
max_font_size=bg_max_size,
)
bg_image = None
else:
bg_gen_config = None
bg_image = Image.open('resources/bg.jpeg')
bg_image = bg_image.filter(ImageFilter.MaxFilter(3))
title = '让文字自由传播:<a href="https://github.com/breezedeus/antiOCR">antiOCR</a>'
st.markdown(f"<h1 style='text-align: center;'>{title}</h1>", unsafe_allow_html=True)
subtitle = '作者:<a href="https://github.com/breezedeus">breezedeus</a>; ' \
'欢迎加入 <a href="https://cnocr.readthedocs.io/zh/latest/contact/">交流群</a>'
st.markdown(f"<div style='text-align: center;'>{subtitle}</div>", unsafe_allow_html=True)
st.markdown('')
st.markdown('')
desc = '<strong>antiOCR</strong> 对指定的文字(来自输入或者图片)进行处理,输出图片,此图片无法通过OCR技术识别出有意义的文字。'
st.markdown(f"<div style='text-align: left;'>{desc}</div>", unsafe_allow_html=True)
st.markdown('')
st.subheader('选择待转换文字图片,或者直接输入待转换文字')
default_texts = '真的猛士,敢于直面惨淡的人生,敢于正视淋漓的鲜血。这是怎样的哀痛者和幸福者?然而造化又常常为庸人设计,以时间的流逝,来洗涤旧迹,仅是留下淡红的血色和微漠的悲哀。在这淡红的血色和微漠的悲哀中,又给人暂得偷生,维持着这似人非人的世界。 ——鲁迅'
content_file = st.file_uploader('待转换文字图片', type=["png", "jpg", "jpeg", "webp"])
ocr = get_ocr_model()
anti = AntiOcr()
texts = None
if content_file is not None:
try:
img = Image.open(content_file).convert('RGB')
ocr_out = ocr.ocr(img)
texts = '\n'.join([out['text'] for out in ocr_out])
except Exception as e:
st.error(e)
texts = st.text_area('待转换文字图片', value=texts or default_texts, height=120)
if st.button("生成图片"):
if texts:
with st.spinner('图片生成中…'):
out_img = anti(
texts,
char_reverse_ratio=char_reverse_ratio,
char_to_pinyin_ratio=char_to_pinyin_ratio,
text_color=text_color,
min_font_size=min_font_size,
max_font_size=max_font_size,
bg_image=bg_image,
bg_gen_config=bg_gen_config,
font_fp=FONT_FP,
)
st.subheader('输出图片')
st.image(out_img)
save_image(out_img)
st.markdown('**对输出图片进行OCR,结果如下(如果依旧出现敏感词,尝试重新生成图片):**')
ocr_out = ocr.ocr(out_img)
new_texts = [out['text'] for out in ocr_out]
st.text('\n'.join(new_texts))
if __name__ == '__main__':
main()