Spaces:

coolfrxcrazy
/

YOLO_MODEL_DETECTION

Sleeping

coolfrxcrazy commited on Oct 3, 2024

Commit

e1ff842

verified ·

1 Parent(s): 231c02f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -178,18 +178,20 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 def ocr(image):
     tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
     model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).to(device)
-    if isinstance(image, str):
-        image = Image.open(image)
-    # Save the image to a temporary file
-    temp_image_path = "temp_image.jpg"
-    image.save(temp_image_path, format='JPEG')
-    # Perform OCR on the image using the file path
-    res = model.chat(tokenizer, temp_image_path, ocr_type='ocr')
-    # Clean up the temporary file
-    os.remove(temp_image_path)
     # Return the extracted text
     return res
@@ -217,7 +219,7 @@ iface_out = gr.Interface(
 iface_ocr = gr.Interface(
     fn=ocr,
     inputs=gr.Image(type="numpy", label="Upload Image"),
-    outputs="text",
     api_name="ocr",  # This explicitly sets the api_name
     title="OCR Image Text Extraction",
     description="Upload an image and extract text using the OCR model."

 def ocr(image):
     tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
     model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).to(device)
+    # Check if the input is a numpy array and convert to PIL Image
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    # Ensure the input is a PIL Image
+    elif not isinstance(image, Image.Image):
+        raise ValueError("Input must be a numpy.ndarray or a PIL.Image.")
+    # Save the image to a BytesIO stream
+    image_bytes = io.BytesIO()
+    image.save(image_bytes, format='JPEG')
+    image_bytes.seek(0)  # Move the cursor to the start of the stream
+    # Perform OCR on the image using the BytesIO stream
+    res = model.chat(tokenizer, image_bytes, ocr_type='ocr')  # Check if the model supports BytesIO input
     # Return the extracted text
     return res
 iface_ocr = gr.Interface(
     fn=ocr,
     inputs=gr.Image(type="numpy", label="Upload Image"),
+    outputs=gr.Textbox(label="Extracted Text"),
     api_name="ocr",  # This explicitly sets the api_name
     title="OCR Image Text Extraction",
     description="Upload an image and extract text using the OCR model."