File size: 1,809 Bytes
1233b47
 
2dea056
1233b47
 
705192b
 
2dea056
1233b47
 
 
 
 
 
 
 
 
 
 
 
 
2dea056
705192b
 
 
 
 
 
 
 
 
 
 
 
8f8226b
705192b
 
 
 
 
 
 
 
 
 
 
 
 
 
2dea056
 
1233b47
705192b
1233b47
2dea056
4394dea
2dea056
 
1233b47
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import re
import jaconv
import gradio as gr
from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
from PIL import Image
import torch, time
import spaces

tokenizer = AutoTokenizer.from_pretrained("kha-white/manga-ocr-base")

model = VisionEncoderDecoderModel.from_pretrained("kha-white/manga-ocr-base")
model.to("cuda")

feature_extractor = AutoFeatureExtractor.from_pretrained("kha-white/manga-ocr-base")

def post_process(text):
  text = ''.join(text.split())
  text = text.replace('…', '...')
  text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
  text = jaconv.h2z(text, ascii=True, digit=True)
  return text

# @spaces.GPU
# def manga_ocr(img):
#   img = img.convert('L').convert('RGB')
#   pixel_values = feature_extractor(img, return_tensors="pt").pixel_values.to("cuda")
#   start_time = time.time()
#   output = model.generate(pixel_values)[0]
#   print("Time taken for OCR:", time.time() - start_time)
#   text = tokenizer.decode(output, skip_special_tokens=True)
#   text = post_process(text)
#   return text


@spaces.GPU(duration=8)
def manga_ocr(imgs):
  texts = []
  for img in imgs:
    img = Image.open(img)
    img = img.convert('L').convert('RGB')
    pixel_values = feature_extractor(img, return_tensors="pt").pixel_values.to("cuda")
    start_time = time.time()
    output = model.generate(pixel_values)[0]
    print("Time taken for OCR:", time.time() - start_time)
    text = tokenizer.decode(output, skip_special_tokens=True)
    text = post_process(text)
    texts.append(text)
  return "|||".join(texts)


iface = gr.Interface(
    fn=manga_ocr,
    inputs=gr.File(file_types=["image"], file_count="multiple"),
    outputs="text",
    title="Manga OCR",
    description="Extract Manga in lighting speed ⚡",
)

iface.launch()