Spaces:
Running
Running
File size: 5,596 Bytes
877c27b 88181ef 877c27b 6177ab7 877c27b 26e39ff 6177ab7 877c27b 6df8db3 877c27b 26e39ff 877c27b 17e7ade 877c27b 17e7ade 877c27b 2f5b9f5 877c27b 6df8db3 877c27b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# coding: utf-8
# Copyright (C) 2022, [Breezedeus](https://github.com/breezedeus).
import os
from PIL import Image, ImageFilter
import streamlit as st
from cnocr import CnOcr
from cnocr.utils import set_logger, download
from antiocr.anti_ocr import AntiOcr
from antiocr.consts import RESOURCE_PATH
logger = set_logger()
st.set_page_config(layout="wide")
FONT_FP = 'resources/simfang.ttf'
if not os.path.exists(FONT_FP):
url = 'https://huggingface.co/datasets/breezedeus/cnocr-wx-qr-code/resolve/main/fonts/simfang.ttf'
os.makedirs(os.path.dirname(FONT_FP), exist_ok=True)
download(url, path=FONT_FP, overwrite=True)
@st.cache(allow_output_mutation=True)
def get_ocr_model():
return CnOcr()
def save_image(img):
from io import BytesIO
buf = BytesIO()
img.save(buf, format="JPEG")
byte_im = buf.getvalue()
st.download_button(
label="下载图片",
data=byte_im,
file_name="antiOCR.jpeg",
mime="image/jpeg",
)
def main():
st.sidebar.header('输出设置')
char_reverse_ratio = st.sidebar.slider(
'文字倒转概率', min_value=0.0, max_value=1.0, value=0.1
)
char_to_pinyin_ratio = st.sidebar.slider(
'文字转拼音概率', min_value=0.0, max_value=1.0, value=0.1
)
cols = st.sidebar.columns(2)
min_font_size = int(cols[0].number_input('最小文字大小', 2, 80, value=15))
max_font_size = int(
cols[1].number_input(
'最大文字大小', min_font_size + 1, 120, value=max(40, min_font_size + 1)
)
)
text_color = st.sidebar.color_picker('文字颜色', value='#5087DC')
st.sidebar.markdown('----')
use_random_bg = st.sidebar.checkbox('随机生成背景图片')
if use_random_bg:
bg_text_density = st.sidebar.slider(
'背景图片文字密度', min_value=0.0, max_value=3.0, value=1.0
)
cols = st.sidebar.columns(2)
bg_min_size = int(
cols[0].number_input('背景图片最小文字', 2, 80, key='bg_min', value=15)
)
bg_max_size = int(
cols[1].number_input(
'背景图片最大文字',
bg_min_size + 1,
120,
key='bg_max',
value=max(70, bg_min_size + 1),
)
)
bg_text_color = st.sidebar.color_picker('背景图片文字颜色', value='#07BCE0')
bg_gen_config = dict(
text_density=bg_text_density,
text_color=bg_text_color,
min_font_size=bg_min_size,
max_font_size=bg_max_size,
)
bg_image = None
else:
bg_gen_config = None
bg_image = Image.open('resources/bg.jpeg')
bg_image = bg_image.filter(ImageFilter.MaxFilter(3))
title = '让文字自由传播:<a href="https://github.com/breezedeus/antiOCR">antiOCR</a>'
st.markdown(f"<h1 style='text-align: center;'>{title}</h1>", unsafe_allow_html=True)
subtitle = '作者:<a href="https://github.com/breezedeus">breezedeus</a>; ' \
'欢迎加入 <a href="https://cnocr.readthedocs.io/zh/latest/contact/">交流群</a>'
st.markdown(f"<div style='text-align: center;'>{subtitle}</div>", unsafe_allow_html=True)
st.markdown('')
st.markdown('')
desc = '<strong>antiOCR</strong> 对指定的文字(来自输入或者图片)进行处理,输出图片,此图片无法通过OCR技术识别出有意义的文字。'
st.markdown(f"<div style='text-align: left;'>{desc}</div>", unsafe_allow_html=True)
st.markdown('')
st.subheader('选择待转换文字图片,或者直接输入待转换文字')
default_texts = '真的猛士,敢于直面惨淡的人生,敢于正视淋漓的鲜血。这是怎样的哀痛者和幸福者?然而造化又常常为庸人设计,以时间的流逝,来洗涤旧迹,仅是留下淡红的血色和微漠的悲哀。在这淡红的血色和微漠的悲哀中,又给人暂得偷生,维持着这似人非人的世界。 ——鲁迅'
content_file = st.file_uploader('输入待转换的文字图片:', type=["png", "jpg", "jpeg", "webp"])
ocr = get_ocr_model()
anti = AntiOcr()
texts = None
if content_file is not None:
try:
img = Image.open(content_file).convert('RGB')
ocr_out = ocr.ocr(img)
texts = '\n'.join([out['text'] for out in ocr_out])
except Exception as e:
st.error(e)
texts = st.text_area('或者,直接输入待转换的文字:', value=texts or default_texts, height=120)
if st.button("生成图片"):
if texts:
with st.spinner('图片生成中…'):
out_img = anti(
texts,
char_reverse_ratio=char_reverse_ratio,
char_to_pinyin_ratio=char_to_pinyin_ratio,
text_color=text_color,
min_font_size=min_font_size,
max_font_size=max_font_size,
bg_image=bg_image,
bg_gen_config=bg_gen_config,
font_fp=FONT_FP,
)
st.subheader('输出图片')
st.image(out_img)
save_image(out_img)
st.markdown('**对输出图片进行OCR,结果如下(如果依旧出现敏感词,尝试重新生成图片):**')
ocr_out = ocr.ocr(out_img)
new_texts = [out['text'] for out in ocr_out]
st.text('\n'.join(new_texts))
if __name__ == '__main__':
main()
|