omniparser-fast / app.py
gauthambalraj07@gmail.com
sc
d017f4c
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
from PIL import Image
import io
from omniparser import Omniparser
import os
import base64
# Initialize FastAPI app
app = FastAPI()
# Configuration for Omniparser
config = {
'som_model_path': 'weights/icon_detect/best.pt',
'device': 'cuda', # Use 'cpu' if CUDA is unavailable
'caption_model_path': 'weights/icon_caption_florence',
'draw_bbox_config': {
'text_scale': 0.8,
'text_thickness': 2,
'text_padding': 3,
'thickness': 3,
},
'BOX_TRESHOLD': 0.05
}
# Initialize Omniparser
parser = Omniparser(config)
@app.post("/process/")
async def process_image(file: UploadFile = File(...)):
"""
Endpoint to process an image and return parsed content.
"""
try:
# Load the uploaded image
content = await file.read()
image = Image.open(io.BytesIO(content))
# Save image temporarily
image_save_path = f"uploaded_images/{file.filename}"
os.makedirs("uploaded_images", exist_ok=True)
image.save(image_save_path)
# Process the image using Omniparser
image, parsed_content_list, label_coordinates = parser.parse(image_save_path)
# # Convert processed image to base64
# img_byte_arr = io.BytesIO()
# image.save(img_byte_arr, format='PNG')
# img_byte_arr = img_byte_arr.getvalue()
# encoded_image = base64.b64encode(img_byte_arr).decode('utf-8')
# Create response
response = {
"processed_image": image,
"parsed_content": parsed_content_list,
"label_coordinates": label_coordinates
}
return JSONResponse(content=response)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
@app.get("/")
async def root():
return {"message": "OmniParser FastAPI is running"}