Spaces:

greydenim
/

ui-convert2

Runtime error

App Files Files Community

ui-convert2 / app.py

chriswang09

first commit

3ac452f about 1 month ago

raw

history blame

2.17 kB

	import gradio as gr
	import torch
	import cv2
	import numpy as np
	import json
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import pytesseract

	# Load Object Detection Pipeline
	obj_detect = pipeline("object-detection", model="facebook/detr-resnet-50", device=-1)

	# Load Qwen for Code Generation
	MODEL_NAME = "Qwen/Qwen2.5-Coder-3B"
	device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME, torch_dtype=dtype, device_map="auto"
	)

	# Define the process_image function (same as your original logic)
	def process_image(img):
	opencv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
	img_height, img_width, _ = opencv_image.shape

	# Run Object Detection
	detections = obj_detect(img)

	# Run OCR
	text_data = pytesseract.image_to_string(opencv_image)

	ui_json = {
	"id": "generated-ui",
	"name": "Generated UI",
	"components": [],
	"ocr_text": text_data.strip()
	}

	for det in detections:
	ui_json["components"].append({
	"id": f"{det['label']}-{len(ui_json['components']) + 1}",
	"name": det["label"].capitalize(),
	"confidence": round(det["score"], 2),
	})

	metadata_str = json.dumps(ui_json, indent=2)

	# Generate React Code
	prompt = f"Generate a React component from this metadata:\n{metadata_str}"
	inputs = tokenizer(prompt, return_tensors="pt").to(device)
	with torch.no_grad():
	output = model.generate(**inputs, max_length=1024)
	code_response = tokenizer.decode(output[0], skip_special_tokens=True)

	return metadata_str, code_response

	# Gradio Interface
	interface = gr.Interface(
	fn=process_image,
	inputs=gr.Image(type="pil"),
	outputs=["text", "text"],
	title="Screenshot → Metadata & React Code",
	description="Upload a UI screenshot and get structured metadata + React code.",
	)

	# Run in Docker with 0.0.0.0 to allow external access
	interface.launch(server_name="0.0.0.0", server_port=7860)