Spaces:
Runtime error
Runtime error
File size: 2,170 Bytes
3ac452f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import gradio as gr
import torch
import cv2
import numpy as np
import json
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pytesseract
# Load Object Detection Pipeline
obj_detect = pipeline("object-detection", model="facebook/detr-resnet-50", device=-1)
# Load Qwen for Code Generation
MODEL_NAME = "Qwen/Qwen2.5-Coder-3B"
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if torch.cuda.is_available() else torch.float32
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME, torch_dtype=dtype, device_map="auto"
)
# Define the process_image function (same as your original logic)
def process_image(img):
opencv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
img_height, img_width, _ = opencv_image.shape
# Run Object Detection
detections = obj_detect(img)
# Run OCR
text_data = pytesseract.image_to_string(opencv_image)
ui_json = {
"id": "generated-ui",
"name": "Generated UI",
"components": [],
"ocr_text": text_data.strip()
}
for det in detections:
ui_json["components"].append({
"id": f"{det['label']}-{len(ui_json['components']) + 1}",
"name": det["label"].capitalize(),
"confidence": round(det["score"], 2),
})
metadata_str = json.dumps(ui_json, indent=2)
# Generate React Code
prompt = f"Generate a React component from this metadata:\n{metadata_str}"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
output = model.generate(**inputs, max_length=1024)
code_response = tokenizer.decode(output[0], skip_special_tokens=True)
return metadata_str, code_response
# Gradio Interface
interface = gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil"),
outputs=["text", "text"],
title="Screenshot → Metadata & React Code",
description="Upload a UI screenshot and get structured metadata + React code.",
)
# Run in Docker with 0.0.0.0 to allow external access
interface.launch(server_name="0.0.0.0", server_port=7860)
|