# coding: utf-8 # Copyright (C) 2022, [Breezedeus](https://github.com/breezedeus). import os from PIL import Image, ImageFilter import streamlit as st from cnocr import CnOcr from cnocr.utils import set_logger, download from antiocr.anti_ocr import AntiOcr from antiocr.consts import RESOURCE_PATH logger = set_logger() st.set_page_config(layout="wide") FONT_FP = 'resources/simfang.ttf' if not os.path.exists(FONT_FP): url = 'https://huggingface.co/datasets/breezedeus/cnocr-wx-qr-code/resolve/main/fonts/simfang.ttf' os.makedirs(os.path.dirname(FONT_FP), exist_ok=True) download(url, path=FONT_FP, overwrite=True) @st.cache(allow_output_mutation=True) def get_ocr_model(): return CnOcr() def save_image(img): from io import BytesIO buf = BytesIO() img.save(buf, format="JPEG") byte_im = buf.getvalue() st.download_button( label="下载图片", data=byte_im, file_name="antiOCR.jpeg", mime="image/jpeg", ) def main(): st.sidebar.header('输出设置') char_reverse_ratio = st.sidebar.slider( '文字倒转概率', min_value=0.0, max_value=1.0, value=0.1 ) char_to_pinyin_ratio = st.sidebar.slider( '文字转拼音概率', min_value=0.0, max_value=1.0, value=0.1 ) cols = st.sidebar.columns(2) min_font_size = int(cols[0].number_input('最小文字大小', 2, 80, value=15)) max_font_size = int( cols[1].number_input( '最大文字大小', min_font_size + 1, 120, value=max(40, min_font_size + 1) ) ) text_color = st.sidebar.color_picker('文字颜色', value='#5087DC') st.sidebar.markdown('----') use_random_bg = st.sidebar.checkbox('随机生成背景图片') if use_random_bg: bg_text_density = st.sidebar.slider( '背景图片文字密度', min_value=0.0, max_value=3.0, value=1.0 ) cols = st.sidebar.columns(2) bg_min_size = int( cols[0].number_input('背景图片最小文字', 2, 80, key='bg_min', value=15) ) bg_max_size = int( cols[1].number_input( '背景图片最大文字', bg_min_size + 1, 120, key='bg_max', value=max(70, bg_min_size + 1), ) ) bg_text_color = st.sidebar.color_picker('背景图片文字颜色', value='#07BCE0') bg_gen_config = dict( text_density=bg_text_density, text_color=bg_text_color, min_font_size=bg_min_size, max_font_size=bg_max_size, ) bg_image = None else: bg_gen_config = None bg_image = Image.open('resources/bg.jpeg') bg_image = bg_image.filter(ImageFilter.MaxFilter(3)) title = '让文字自由传播:antiOCR' st.markdown(f"

{title}

", unsafe_allow_html=True) subtitle = '作者:breezedeus; ' \ '欢迎加入 交流群' st.markdown(f"
{subtitle}
", unsafe_allow_html=True) st.markdown('') st.markdown('') desc = 'antiOCR 对指定的文字(来自输入或者图片)进行处理,输出图片,此图片无法通过OCR技术识别出有意义的文字。' st.markdown(f"
{desc}
", unsafe_allow_html=True) st.markdown('') st.subheader('选择待转换文字图片,或者直接输入待转换文字') default_texts = '真的猛士,敢于直面惨淡的人生,敢于正视淋漓的鲜血。这是怎样的哀痛者和幸福者?然而造化又常常为庸人设计,以时间的流逝,来洗涤旧迹,仅是留下淡红的血色和微漠的悲哀。在这淡红的血色和微漠的悲哀中,又给人暂得偷生,维持着这似人非人的世界。 ——鲁迅' content_file = st.file_uploader('输入待转换的文字图片:', type=["png", "jpg", "jpeg", "webp"]) ocr = get_ocr_model() anti = AntiOcr() texts = None if content_file is not None: try: img = Image.open(content_file).convert('RGB') ocr_out = ocr.ocr(img) texts = '\n'.join([out['text'] for out in ocr_out]) except Exception as e: st.error(e) texts = st.text_area('或者,直接输入待转换的文字:', value=texts or default_texts, height=120) if st.button("生成图片"): if texts: with st.spinner('图片生成中…'): out_img = anti( texts, char_reverse_ratio=char_reverse_ratio, char_to_pinyin_ratio=char_to_pinyin_ratio, text_color=text_color, min_font_size=min_font_size, max_font_size=max_font_size, bg_image=bg_image, bg_gen_config=bg_gen_config, font_fp=FONT_FP, ) st.subheader('输出图片') st.image(out_img) save_image(out_img) st.markdown('**对输出图片进行OCR,结果如下(如果依旧出现敏感词,尝试重新生成图片):**') ocr_out = ocr.ocr(out_img) new_texts = [out['text'] for out in ocr_out] st.text('\n'.join(new_texts)) if __name__ == '__main__': main()