Tonic commited on
Commit
b332619
1 Parent(s): 852d108

Delete ocr_image.py

Browse files
Files changed (1) hide show
  1. ocr_image.py +0 -67
ocr_image.py DELETED
@@ -1,67 +0,0 @@
1
- import argparse
2
- import os.path
3
-
4
- from texify.inference import batch_inference
5
- from texify.model.model import load_model
6
- from texify.model.processor import load_processor
7
- from PIL import Image
8
- from texify.settings import settings
9
- from texify.util import is_valid_image
10
- import json
11
-
12
-
13
- def inference_single_image(image_path, json_path, model, processor):
14
- image = Image.open(image_path)
15
- text = batch_inference([image], model, processor)
16
- write_data = [{"image_path": image_path, "text": text[0]}]
17
- with open(json_path, "w+") as f:
18
- json_repr = json.dumps(write_data, indent=4)
19
- f.write(json_repr)
20
-
21
-
22
- def inference_image_dir(image_dir, json_path, model, processor, max=None):
23
- image_paths = [os.path.join(image_dir, image_name) for image_name in os.listdir(image_dir)]
24
- image_paths = [ip for ip in image_paths if is_valid_image(ip)]
25
- if max:
26
- image_paths = image_paths[:max]
27
-
28
- write_data = []
29
- for i in range(0, len(image_paths), settings.BATCH_SIZE):
30
- batch = image_paths[i:i+settings.BATCH_SIZE]
31
- images = [Image.open(image_path) for image_path in batch]
32
- text = batch_inference(images, model, processor)
33
- for image_path, t in zip(batch, text):
34
- write_data.append({"image_path": image_path, "text": t})
35
-
36
- with open(json_path, "w+") as f:
37
- json_repr = json.dumps(write_data, indent=4)
38
- f.write(json_repr)
39
-
40
-
41
- def main():
42
- parser = argparse.ArgumentParser(description="OCR an image of a LaTeX equation.")
43
- parser.add_argument("image", type=str, help="Path to image or folder of images to OCR.")
44
- parser.add_argument("--max", type=int, help="Maximum number of images to OCR if a folder is passes.", default=None)
45
- parser.add_argument("--json_path", type=str, help="Path to JSON file to save results to.", default=os.path.join(settings.DATA_DIR, "results.json"))
46
- args = parser.parse_args()
47
-
48
- image_path = args.image
49
- model = load_model()
50
- processor = load_processor()
51
-
52
- json_path = os.path.abspath(args.json_path)
53
- os.makedirs(os.path.dirname(json_path), exist_ok=True)
54
-
55
- if os.path.isfile(image_path):
56
- inference_single_image(image_path, json_path, model, processor)
57
- else:
58
- inference_image_dir(image_path, json_path, model, processor, args.max)
59
-
60
- print(f"Wrote results to {json_path}")
61
-
62
-
63
- if __name__ == "__main__":
64
- main()
65
-
66
-
67
-