Spaces:

AnnasBlackHat
/

Supertmarket-Receipt-Extractor

Runtime error

App Files Files Community

Annas Dev commited on Dec 18, 2022

Commit

8cb5b3c

•

1 Parent(s): d0dae0b

add basic files

Browse files

Files changed (13) hide show

.gitignore +2 -1
app.py +35 -5
inference/__pycache__/annotate_image.cpython-38.pyc +0 -0
inference/__pycache__/inference_handler.cpython-38.pyc +0 -0
inference/__pycache__/ocr.cpython-38.pyc +0 -0
inference/__pycache__/utils.cpython-38.pyc +0 -0
inference/annotate_image.py +50 -0
inference/inference_handler.py +196 -0
inference/ocr.py +60 -0
inference/utils.py +50 -0
requirements.txt +37 -0
util/__pycache__/file_helper.cpython-38.pyc +0 -0
util/file_helper.py +15 -0

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 .env
-venv

 .env
+venv
+tmp

app.py CHANGED Viewed

@@ -1,15 +1,45 @@
 import gradio as gr
 def get_model():
-    return ''
-def run(img):
-    print('running...')
-    return img
 gr.Markdown('Upload Foto Wajah Kamu (Pastikan hanya terdapat SATU wajah pada)')
-iface = gr.Interface(fn=run, inputs=["image"], outputs="image")
 iface.launch()

 import gradio as gr
+from dotenv import load_dotenv
+import os
+import torch
+import warnings
+from PIL import Image
+from util import file_helper
+from inference.ocr import prepare_batch_for_inference
+from inference.inference_handler import handle
+os.system('sudo apt install -y -q tesseract-ocr')
+os.system('sudo apt install -y -q libtesseract-dev')
+load_dotenv()
 def get_model():
+    model_dir = "tmp"
+    model_filename= 'receipt.pth'
+    full_path = os.path.join(model_dir, model_filename)
+    if os.path.isfile(full_path):
+        return full_path
+    return file_helper.download_gdrive(os.getenv('MODEL_ID'), model_dir, model_filename)
+def run_inference(model_path, images_path):
+    try:
+        inference_batch = prepare_batch_for_inference(images_path)
+        context = {"model_dir": model_path}
+        print('handle....')
+        handle(inference_batch,context)
+    except Exception as err:
+        print('err...', err)
+def run(img_path):
+    print('img path: ', img_path)
+    model_path = get_model()
+    run_inference(model_path, [img_path])
+    return Image.open(img_path)
 gr.Markdown('Upload Foto Wajah Kamu (Pastikan hanya terdapat SATU wajah pada)')
+iface = gr.Interface(fn=run, inputs=gr.Image(type="filepath"), outputs="image")
 iface.launch()

inference/__pycache__/annotate_image.cpython-38.pyc ADDED Viewed

Binary file (1.85 kB). View file

inference/__pycache__/inference_handler.cpython-38.pyc ADDED Viewed

Binary file (6.55 kB). View file

inference/__pycache__/ocr.cpython-38.pyc ADDED Viewed

Binary file (2.65 kB). View file

inference/__pycache__/utils.cpython-38.pyc ADDED Viewed

Binary file (2.4 kB). View file

inference/annotate_image.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+from PIL import Image, ImageDraw, ImageFont
+from .utils import image_label_2_color
+def get_flattened_output(docs):
+  flattened_output = []
+  annotation_key = 'output'
+  for doc in docs:
+    flattened_output_item = {annotation_key: []}
+    doc_annotation = doc[annotation_key]
+    for i, span in enumerate(doc_annotation):
+      if len(span['words']) > 1:
+        for span_chunk in span['words']:
+          flattened_output_item[annotation_key].append(
+              {
+                  'label': span['label'],
+                  'text': span_chunk['text'],
+                  'words': [span_chunk]
+              }
+          )
+      else:
+        flattened_output_item[annotation_key].append(span)
+    flattened_output.append(flattened_output_item)
+  return flattened_output
+def annotate_image(image_path, annotation_object):
+  img = None
+  image = Image.open(image_path).convert('RGBA')
+  tmp = image.copy()
+  label2color = image_label_2_color(annotation_object)
+  overlay = Image.new('RGBA', tmp.size, (0, 0, 0)+(0,))
+  draw = ImageDraw.Draw(overlay)
+  font = ImageFont.load_default()
+  predictions = [span['label'] for span in annotation_object['output']]
+  boxes = [span['words'][0]['box'] for span in annotation_object['output']]
+  for prediction, box in zip(predictions, boxes):
+      draw.rectangle(box, outline=label2color[prediction],
+                     width=3, fill=label2color[prediction]+(int(255*0.33),))
+      draw.text((box[0] + 10, box[1] - 10), text=prediction,
+                fill=label2color[prediction], font=font)
+  img = Image.alpha_composite(tmp, overlay)
+  img = img.convert("RGB")
+  image_name = os.path.basename(image_path)
+  image_name = image_name[:image_name.find('.')]
+  img.save(f'/content/{image_name}_inference.jpg')

inference/inference_handler.py ADDED Viewed

	@@ -0,0 +1,196 @@

+from .utils import load_model,load_processor,normalize_box,compare_boxes,adjacent
+from .annotate_image import get_flattened_output,annotate_image
+from PIL import Image,ImageDraw, ImageFont
+import logging
+import torch
+import json
+logger = logging.getLogger(__name__)
+class ModelHandler(object):
+    """
+    A base Model handler implementation.
+    """
+    def __init__(self):
+        self.model = None
+        self.model_dir = None
+        self.device = 'cpu'
+        self.error = None
+        # self._context = None
+        # self._batch_size = 0
+        self.initialized = False
+        self._raw_input_data = None
+        self._processed_data = None
+        self._images_size = None
+    def initialize(self, context):
+        """
+        Initialize model. This will be called during model loading time
+        :param context: Initial context contains model server system properties.
+        :return:
+        """
+        logger.info("Loading transformer model")
+        self._context = context
+        properties = self._context
+        # self._batch_size = properties["batch_size"] or 1
+        self.model_dir = properties.get("model_dir")
+        self.model = self.load(self.model_dir)
+        self.initialized = True
+    def preprocess(self, batch):
+        """
+        Transform raw input into model input data.
+        :param batch: list of raw requests, should match batch size
+        :return: list of preprocessed model input data
+        """
+        # Take the input data and pre-process it make it inference ready
+        # assert self._batch_size == len(batch), "Invalid input batch size: {}".format(len(batch))
+        inference_dict = batch
+        self._raw_input_data = inference_dict
+        processor = load_processor()
+        images = [Image.open(path).convert("RGB")
+                  for path in inference_dict['image_path']]
+        self._images_size = [img.size for img in images]
+        words = inference_dict['words']
+        boxes = [[normalize_box(box, images[i].size[0], images[i].size[1])
+                  for box in doc] for i, doc in enumerate(inference_dict['bboxes'])]
+        encoded_inputs = processor(
+            images, words, boxes=boxes, return_tensors="pt", padding="max_length", truncation=True)
+        self._processed_data = encoded_inputs
+        return encoded_inputs
+    def load(self, model_dir):
+        """The load handler is responsible for loading the hunggingface transformer model.
+        Returns:
+            hf_pipeline (Pipeline): A Hugging Face Transformer pipeline.
+        """
+        # TODO model dir should be microsoft/layoutlmv2-base-uncased
+        model = load_model(model_dir)
+        return model
+    def inference(self, model_input):
+        """
+        Internal inference methods
+        :param model_input: transformed model input data
+        :return: list of inference output in NDArray
+        """
+        # TODO load the model state_dict before running the inference
+        # Do some inference call to engine here and return output
+        with torch.no_grad():
+            inference_outputs = self.model(**model_input)
+            predictions = inference_outputs.logits.argmax(-1).tolist()
+        results = []
+        for i in range(len(predictions)):
+            tmp = dict()
+            tmp[f'output_{i}'] = predictions[i]
+            results.append(tmp)
+        return [results]
+    def postprocess(self, inference_output):
+        docs = []
+        k = 0
+        for page, doc_words in enumerate(self._raw_input_data['words']):
+            doc_list = []
+            width, height = self._images_size[page]
+            for i, doc_word in enumerate(doc_words, start=0):
+                word_tagging = None
+                word_labels = []
+                word = dict()
+                word['id'] = k
+                k += 1
+                word['text'] = doc_word
+                word['pageNum'] = page + 1
+                word['box'] = self._raw_input_data['bboxes'][page][i]
+                _normalized_box = normalize_box(
+                    self._raw_input_data['bboxes'][page][i], width, height)
+                for j, box in enumerate(self._processed_data['bbox'].tolist()[page]):
+                    if compare_boxes(box, _normalized_box):
+                        if self.model.config.id2label[inference_output[0][page][f'output_{page}'][j]] != 'O':
+                            word_labels.append(
+                                self.model.config.id2label[inference_output[0][page][f'output_{page}'][j]][2:])
+                        else:
+                            word_labels.append('other')
+                if word_labels != []:
+                    word_tagging = word_labels[0] if word_labels[0] != 'other' else word_labels[-1]
+                else:
+                    word_tagging = 'other'
+                word['label'] = word_tagging
+                word['pageSize'] = {'width': width, 'height': height}
+                if word['label'] != 'other':
+                    doc_list.append(word)
+            spans = []
+            def adjacents(entity): return [
+                adj for adj in doc_list if adjacent(entity, adj)]
+            output_test_tmp = doc_list[:]
+            for entity in doc_list:
+                if adjacents(entity) == []:
+                    spans.append([entity])
+                    output_test_tmp.remove(entity)
+            while output_test_tmp != []:
+                span = [output_test_tmp[0]]
+                output_test_tmp = output_test_tmp[1:]
+                while output_test_tmp != [] and adjacent(span[-1], output_test_tmp[0]):
+                    span.append(output_test_tmp[0])
+                    output_test_tmp.remove(output_test_tmp[0])
+                spans.append(span)
+            output_spans = []
+            for span in spans:
+                if len(span) == 1:
+                    output_span = {"text": span[0]['text'],
+                                   "label": span[0]['label'],
+                                   "words": [{
+                                       'id': span[0]['id'],
+                                       'box': span[0]['box'],
+                                       'text': span[0]['text']
+                                   }],
+                                   }
+                else:
+                    output_span = {"text": ' '.join([entity['text'] for entity in span]),
+                                   "label": span[0]['label'],
+                                   "words": [{
+                                       'id': entity['id'],
+                                       'box': entity['box'],
+                                       'text': entity['text']
+                                   } for entity in span]
+                                   }
+                output_spans.append(output_span)
+            docs.append({f'output': output_spans})
+        return [json.dumps(docs, ensure_ascii=False)]
+    def handle(self, data, context):
+        """
+        Call preprocess, inference and post-process functions
+        :param data: input data
+        :param context: mms context
+        """
+        model_input = self.preprocess(data)
+        model_out = self.inference(model_input)
+        inference_out = self.postprocess(model_out)[0]
+        with open('LayoutlMV3InferenceOutput.json', 'w') as inf_out:
+            inf_out.write(inference_out)
+        inference_out_list = json.loads(inference_out)
+        flattened_output_list = get_flattened_output(inference_out_list)
+        for i, flattened_output in enumerate(flattened_output_list):
+            annotate_image(data['image_path'][i], flattened_output)
+_service = ModelHandler()
+def handle(data, context):
+    if not _service.initialized:
+        _service.initialize(context)
+    if data is None:
+        return None
+    return _service.handle(data, context)

inference/ocr.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+import pandas as pd
+def is_image(img_path):
+  ext = os.path.splitext(img_path)[1]
+  result = ext == ".jpg" or ext == ".png"
+  if not result: print('NOT IMAGE: ', img_path)
+  return result
+def run_tesseract_on_image(image_path):  # -> tsv output path
+  print('--- run tesseract on ', image_path)
+  image_name = os.path.basename(image_path)
+  image_name = image_name[:image_name.find('.')]
+  error_code = os.system(f'''
+  tesseract "{image_path}" "/content/{image_name}" -l eng tsv
+  ''')
+  if not error_code:
+    return f"/content/{image_name}.tsv"
+  else:
+    raise ValueError('Tesseract OCR Error please verify image format PNG,JPG,JPEG')
+def clean_tesseract_output(tsv_output_path):
+  print('clean tesseract output for: ', tsv_output_path)
+  ocr_df = pd.read_csv(tsv_output_path, sep='\t')
+  ocr_df = ocr_df.dropna()
+  ocr_df = ocr_df.drop(ocr_df[ocr_df.text.str.strip() == ''].index)
+  text_output = ' '.join(ocr_df.text.tolist())
+  words = []
+  for index, row in ocr_df.iterrows():
+    word = {}
+    origin_box = [row['left'], row['top'], row['left'] +
+                  row['width'], row['top']+row['height']]
+    word['word_text'] = row['text']
+    word['word_box'] = origin_box
+    words.append(word)
+  return words
+def prepare_batch_for_inference(image_paths):
+  # tesseract_outputs is a list of paths
+  inference_batch = dict()
+  tesseract_outputs = [run_tesseract_on_image(
+      image_path) for image_path in image_paths if (is_image(image_path))]
+  print('tesseract has run on all images...')
+  # clean_outputs is a list of lists
+  clean_outputs = [clean_tesseract_output(
+      tsv_path) for tsv_path in tesseract_outputs]
+  word_lists = [[word['word_text'] for word in clean_output]
+                for clean_output in clean_outputs]
+  boxes_lists = [[word['word_box'] for word in clean_output]
+                 for clean_output in clean_outputs]
+  inference_batch = {
+      "image_path": image_paths,
+      "bboxes": boxes_lists,
+      "words": word_lists
+  }
+  print('inference_batch:', inference_batch)
+  return inference_batch

inference/utils.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import numpy as np
+from transformers import AutoModelForTokenClassification, AutoProcessor
+def normalize_box(bbox, width, height):
+    return [
+        int(bbox[0]*(1000/width)),
+        int(bbox[1]*(1000/height)),
+        int(bbox[2]*(1000/width)),
+        int(bbox[3]*(1000/height)),
+    ]
+def compare_boxes(b1, b2):
+    b1 = np.array([c for c in b1])
+    b2 = np.array([c for c in b2])
+    equal = np.array_equal(b1, b2)
+    return equal
+def unnormalize_box(bbox, width, height):
+    return [
+        width * (bbox[0] / 1000),
+        height * (bbox[1] / 1000),
+        width * (bbox[2] / 1000),
+        height * (bbox[3] / 1000),
+    ]
+def adjacent(w1, w2):
+  if w1['label'] == w2['label'] and abs(w1['id'] - w2['id']) == 1:
+    return True
+  return False
+def random_color():
+  return np.random.randint(0, 255, 3)
+def image_label_2_color(annotation):
+  if 'output' in annotation.keys():
+    image_labels = set([span['label'] for span in annotation['output']])
+    label2color = {f'{label}': (random_color()[0], random_color()[
+                                1], random_color()[2]) for label in image_labels}
+    return label2color
+  else:
+    raise ValueError('please use "output" as annotation key')
+def load_model(model_path):
+    model = AutoModelForTokenClassification.from_pretrained(model_path)
+    return model
+def load_processor():
+    processor = AutoProcessor.from_pretrained(
+        "microsoft/layoutlmv3-base", apply_ocr=False)
+    return processor

requirements.txt CHANGED Viewed

@@ -3,39 +3,76 @@ aiosignal==1.3.1
 altair==4.2.0
 anyio==3.6.2
 async-timeout==4.0.2
 click==8.1.3
 contourpy==1.0.6
 cycler==0.11.0
 fastapi==0.88.0
 ffmpy==0.3.0
 fonttools==4.38.0
 frozenlist==1.3.3
 fsspec==2022.11.0
 gradio==3.14.0
 h11==0.14.0
 httpcore==0.16.2
 httpx==0.23.1
 kiwisolver==1.4.4
 linkify-it-py==1.0.3
 markdown-it-py==2.1.0
 matplotlib==3.6.2
 mdit-py-plugins==0.3.3
 mdurl==0.1.2
 multidict==6.0.3
 numpy==1.23.5
 orjson==3.8.3
 pandas==1.5.2
 Pillow==9.3.0
 pycryptodome==3.16.0
 pydantic==1.10.2
 pydub==0.25.1
 python-multipart==0.0.5
 pytz==2022.7
 rfc3986==1.5.0
 sniffio==1.3.0
 starlette==0.22.0
 toolz==0.12.0
 uc-micro-py==1.0.1
 uvicorn==0.20.0
 websockets==10.4
 yarl==1.8.2

 altair==4.2.0
 anyio==3.6.2
 async-timeout==4.0.2
+attrs==22.1.0
+beautifulsoup4==4.11.1
+certifi==2022.12.7
+charset-normalizer==2.1.1
 click==8.1.3
 contourpy==1.0.6
 cycler==0.11.0
+entrypoints==0.4
 fastapi==0.88.0
 ffmpy==0.3.0
+filelock==3.8.2
 fonttools==4.38.0
 frozenlist==1.3.3
 fsspec==2022.11.0
+gdown==4.6.0
 gradio==3.14.0
 h11==0.14.0
 httpcore==0.16.2
 httpx==0.23.1
+huggingface-hub==0.11.1
+idna==3.4
+importlib-resources==5.10.1
+Jinja2==3.1.2
+jsonschema==4.17.3
 kiwisolver==1.4.4
 linkify-it-py==1.0.3
 markdown-it-py==2.1.0
+MarkupSafe==2.1.1
 matplotlib==3.6.2
 mdit-py-plugins==0.3.3
 mdurl==0.1.2
 multidict==6.0.3
 numpy==1.23.5
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
 orjson==3.8.3
+packaging==22.0
 pandas==1.5.2
 Pillow==9.3.0
+pkgutil_resolve_name==1.3.10
 pycryptodome==3.16.0
 pydantic==1.10.2
 pydub==0.25.1
+pyparsing==3.0.9
+pyrsistent==0.19.2
+PySocks==1.7.1
+pytesseract==0.3.10
+python-dateutil==2.8.2
+python-dotenv==0.21.0
 python-multipart==0.0.5
 pytz==2022.7
+PyYAML==6.0
+regex==2022.10.31
+requests==2.28.1
 rfc3986==1.5.0
+six==1.16.0
 sniffio==1.3.0
+soupsieve==2.3.2.post1
 starlette==0.22.0
+tokenizers==0.13.2
 toolz==0.12.0
+torch==1.13.1
+tqdm==4.64.1
+transformers @ git+https://github.com/huggingface/transformers.git@7032e0203262ebb2ebf55da8d2e01f873973e835
+typing_extensions==4.4.0
 uc-micro-py==1.0.1
+urllib3==1.26.13
 uvicorn==0.20.0
 websockets==10.4
 yarl==1.8.2
+zipp==3.11.0

util/__pycache__/file_helper.cpython-38.pyc ADDED Viewed

Binary file (521 Bytes). View file

util/file_helper.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import gdown
+import os
+def download_gdrive(id, dir = ".", filename = None):
+    print('download...')
+    tmp_filename = gdown.download(id=id, quiet=True)
+    if filename is None:
+        filename = tmp_filename
+    file_path = f'{dir}/{filename}'
+    if os.path.isdir(dir) == False: os.mkdir(dir)
+    os.replace(tmp_filename, file_path)
+    return file_path