Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import cv2 | |
import numpy as np | |
import json | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import pytesseract | |
# Load Object Detection Pipeline | |
obj_detect = pipeline("object-detection", model="facebook/detr-resnet-50", device=-1) | |
# Load Qwen for Code Generation | |
MODEL_NAME = "Qwen/Qwen2.5-Coder-3B" | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, torch_dtype=dtype, device_map="auto" | |
) | |
# Define the process_image function (same as your original logic) | |
def process_image(img): | |
opencv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) | |
img_height, img_width, _ = opencv_image.shape | |
# Run Object Detection | |
detections = obj_detect(img) | |
# Run OCR | |
text_data = pytesseract.image_to_string(opencv_image) | |
ui_json = { | |
"id": "generated-ui", | |
"name": "Generated UI", | |
"components": [], | |
"ocr_text": text_data.strip() | |
} | |
for det in detections: | |
ui_json["components"].append({ | |
"id": f"{det['label']}-{len(ui_json['components']) + 1}", | |
"name": det["label"].capitalize(), | |
"confidence": round(det["score"], 2), | |
}) | |
metadata_str = json.dumps(ui_json, indent=2) | |
# Generate React Code | |
prompt = f"Generate a React component from this metadata:\n{metadata_str}" | |
inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
output = model.generate(**inputs, max_length=1024) | |
code_response = tokenizer.decode(output[0], skip_special_tokens=True) | |
return metadata_str, code_response | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=process_image, | |
inputs=gr.Image(type="pil"), | |
outputs=["text", "text"], | |
title="Screenshot β Metadata & React Code", | |
description="Upload a UI screenshot and get structured metadata + React code.", | |
) | |
# Run in Docker with 0.0.0.0 to allow external access | |
interface.launch(server_name="0.0.0.0", server_port=7860) | |