import streamlit as st import cv2 import torch from PIL import Image from doclayout_yolo import YOLOv10 import numpy as np # Load the pre-trained model model = YOLOv10("doclayout_yolo_docstructbench_imgsz1024.pt") # Automatically select device device = 'cuda' if torch.cuda.is_available() else 'cpu' #st.write(f"Using device: {device}") # Streamlit UI st.title("Document Layout Detection") st.subheader("Upload an image to detect and annotate document layout") uploaded_file = st.file_uploader("Upload Image", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: # Display the uploaded image #st.image(uploaded_file, caption="Uploaded Image", use_container_width=True) # Load the uploaded image image = Image.open(uploaded_file).convert("RGB") image_path = "temp_input.jpg" # Temporary save for inference image.save(image_path) # Perform prediction with st.spinner("Processing..."): det_res = model.predict( image_path, imgsz=1024, conf=0.2, device=device, ) # Annotate the result annotated_frame = det_res[0].plot(pil=True, line_width=5, font_size=20) # Convert annotated PIL image to displayable format annotated_image = np.array(annotated_frame) # Display the annotated image st.image(annotated_image, caption="Annotated Image", use_container_width=True) st.success("Detection completed!") st.markdown("**Application Created By Shubham Mhaske**") st.write("Do have a look on Papers 📄 : - https://arxiv.org/pdf/2410.12628") st.write("Thanks to https://github.com/opendatalab")