ucaslcl commited on
Commit
9062b8b
·
verified ·
1 Parent(s): e7ea699

Delete run_ocr.py

Browse files
Files changed (1) hide show
  1. run_ocr.py +0 -74
run_ocr.py DELETED
@@ -1,74 +0,0 @@
1
- import argparse
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
- import os
5
- from .conversation import conv_templates, SeparatorStyle
6
- from transformers import CLIPVisionModel, CLIPImageProcessor, StoppingCriteria
7
- from .utils import KeywordsStoppingCriteria, disable_torch_init
8
-
9
- from PIL import Image
10
-
11
- import os
12
- import requests
13
- from PIL import Image
14
- from io import BytesIO
15
- from .blip_process import BlipImageEvalProcessor
16
-
17
-
18
- from transformers import TextStreamer
19
- import re
20
- import string
21
-
22
-
23
- import string
24
-
25
- punctuation_dict = {
26
- ",": ",",
27
- "。": ".",
28
- }
29
-
30
-
31
- def svg_to_html(svg_content, output_filename):
32
-
33
- html_content = f"""
34
- <!DOCTYPE html>
35
- <html lang="en">
36
- <head>
37
- <meta charset="UTF-8">
38
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
39
- <title>SVG Embedded in HTML</title>
40
- </head>
41
- <body>
42
- <svg width="2100" height="15000" xmlns="http://www.w3.org/2000/svg">
43
- {svg_content}
44
- </svg>
45
- </body>
46
- </html>
47
- """
48
-
49
- with open(output_filename, 'w') as file:
50
- file.write(html_content)
51
-
52
-
53
-
54
- DEFAULT_IMAGE_TOKEN = "<image>"
55
- DEFAULT_IMAGE_PATCH_TOKEN = '<imgpad>'
56
-
57
- DEFAULT_IM_START_TOKEN = '<img>'
58
- DEFAULT_IM_END_TOKEN = '</img>'
59
-
60
-
61
-
62
- translation_table = str.maketrans(punctuation_dict)
63
-
64
-
65
- def load_image(image_file):
66
- if image_file.startswith('http') or image_file.startswith('https'):
67
- response = requests.get(image_file)
68
- image = Image.open(BytesIO(response.content)).convert('RGB')
69
- else:
70
- image = Image.open(image_file).convert('RGB')
71
- return image
72
-
73
-
74
-