ui-convert2 / app.py
chriswang09's picture
first commit
3ac452f
raw
history blame
2.17 kB
import gradio as gr
import torch
import cv2
import numpy as np
import json
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pytesseract
# Load Object Detection Pipeline
obj_detect = pipeline("object-detection", model="facebook/detr-resnet-50", device=-1)
# Load Qwen for Code Generation
MODEL_NAME = "Qwen/Qwen2.5-Coder-3B"
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if torch.cuda.is_available() else torch.float32
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME, torch_dtype=dtype, device_map="auto"
)
# Define the process_image function (same as your original logic)
def process_image(img):
opencv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
img_height, img_width, _ = opencv_image.shape
# Run Object Detection
detections = obj_detect(img)
# Run OCR
text_data = pytesseract.image_to_string(opencv_image)
ui_json = {
"id": "generated-ui",
"name": "Generated UI",
"components": [],
"ocr_text": text_data.strip()
}
for det in detections:
ui_json["components"].append({
"id": f"{det['label']}-{len(ui_json['components']) + 1}",
"name": det["label"].capitalize(),
"confidence": round(det["score"], 2),
})
metadata_str = json.dumps(ui_json, indent=2)
# Generate React Code
prompt = f"Generate a React component from this metadata:\n{metadata_str}"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
output = model.generate(**inputs, max_length=1024)
code_response = tokenizer.decode(output[0], skip_special_tokens=True)
return metadata_str, code_response
# Gradio Interface
interface = gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil"),
outputs=["text", "text"],
title="Screenshot β†’ Metadata & React Code",
description="Upload a UI screenshot and get structured metadata + React code.",
)
# Run in Docker with 0.0.0.0 to allow external access
interface.launch(server_name="0.0.0.0", server_port=7860)