Spaces:

atlury
/

document-layout-comparison

Sleeping

File size: 2,013 Bytes

b764ffe
 
 
 
73cd058
b764ffe
db520f8
682c5ed
 
 
 
 
 
 
 
73cd058
4dee5e9
b764ffe
 
 
 
 
 
 
 
 
682c5ed
 
 
 
 
b764ffe
 
4dee5e9
 
 
682c5ed
 
 
 
 
 
4dee5e9
 
 
682c5ed
4dee5e9
 
 
 
682c5ed
4dee5e9
 
 
b764ffe
 
 
682c5ed

import gradio as gr
from ultralytics import YOLO
import cv2
import numpy as np
import os
import requests
import torch
import huggingface_hub

# Initialize ZeroGPU 
zero_gpu_is_available = huggingface_hub.utils.is_google_colab() or huggingface_hub.utils.is_notebook()
if zero_gpu_is_available:
    from accelerate import Accelerator
    accelerator = Accelerator()


# Load the model file
model_path = "yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
if not os.path.exists(model_path):
    # Download the model file if it doesn't exist
    model_url = "https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
    response = requests.get(model_url)
    with open(model_path, "wb") as f:
        f.write(response.content)

# Load the document segmentation model
docseg_model = YOLO(model_path) 

if zero_gpu_is_available:
    docseg_model.to(accelerator.device)  # Put the model on the accelerator's device.


def process_image(image):
    try:
        # Convert image to the format YOLO model expects
        image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

        # If Zero GPU, move image to accelerator
        if zero_gpu_is_available:
            image = torch.from_numpy(image).to(accelerator.device)

        results = docseg_model.predict(image)
        result = results[0]  # Get the first (and usually only) result
        
        # Extract annotated image from results
        annotated_img = result.plot() 
        annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)

        # Prepare detected areas and labels as text output
        detected_areas_labels = "\n".join(
            [f"{box.label.upper()}: {box.conf:.2f}" for box in result.boxes]
        )
    except Exception as e:
        return None, f"Error during processing: {e}"  # Error handling

    return annotated_img, detected_areas_labels

# The rest of the code remains the same (Gradio interface)