Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ from transformers import pipeline
|
|
3 |
from PyPDF2 import PdfReader
|
4 |
import easyocr
|
5 |
from PIL import Image
|
|
|
6 |
from difflib import SequenceMatcher
|
7 |
|
8 |
# Initialize the T5-small pipeline for lightweight generative capabilities
|
@@ -18,9 +19,10 @@ def analyze_pdf(file):
|
|
18 |
|
19 |
def analyze_image(file):
|
20 |
"""Extract text from an image file using EasyOCR."""
|
21 |
-
image = Image.open(file)
|
22 |
-
|
23 |
-
|
|
|
24 |
return " ".join(result) # Combine detected text into a single string
|
25 |
|
26 |
def classify_and_respond(extracted_text):
|
|
|
3 |
from PyPDF2 import PdfReader
|
4 |
import easyocr
|
5 |
from PIL import Image
|
6 |
+
import numpy as np
|
7 |
from difflib import SequenceMatcher
|
8 |
|
9 |
# Initialize the T5-small pipeline for lightweight generative capabilities
|
|
|
19 |
|
20 |
def analyze_image(file):
|
21 |
"""Extract text from an image file using EasyOCR."""
|
22 |
+
image = Image.open(file) # Open the uploaded image
|
23 |
+
image_np = np.array(image) # Convert the PIL image to a NumPy array
|
24 |
+
reader = easyocr.Reader(['en'], gpu=False) # Initialize EasyOCR reader
|
25 |
+
result = reader.readtext(image_np, detail=0) # Extract text without bounding box details
|
26 |
return " ".join(result) # Combine detected text into a single string
|
27 |
|
28 |
def classify_and_respond(extracted_text):
|