sundeveloper commited on
Commit
e8a1990
·
verified ·
1 Parent(s): cf33acb

Upload app2.py

Browse files
Files changed (1) hide show
  1. app2.py +27 -0
app2.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ from PIL import Image
4
+ import pytesseract
5
+ from docx import Document
6
+
7
+ # Уверете се, че Tesseract пътят е правилен
8
+ pytesseract.pytesseract.tesseract_cmd = r"Tesseract-OCR/tesseract.exe"
9
+
10
+ class OCRProcessor:
11
+ def __init__(self):
12
+ self.languages = ["eng", "hin", "guj", "san", "tam", "tel"]
13
+
14
+ def extract_text(self, image_path, lang="eng"):
15
+ return pytesseract.image_to_string(Image.open(image_path), lang=lang)
16
+
17
+ def save_as_docx(self, ocr_text, file_path="OCR_Result.docx"):
18
+ try:
19
+ doc = Document()
20
+ if ocr_text.strip():
21
+ doc.add_paragraph(ocr_text)
22
+ doc.save(file_path)
23
+ return file_path
24
+ else:
25
+ return None
26
+ except Exception as e:
27
+ return str(e)