chriswang09 commited on
Commit
3ac452f
·
1 Parent(s): 84db4ad

first commit

Browse files
Files changed (3) hide show
  1. Dockerfile +30 -0
  2. app.py +68 -0
  3. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python base image
2
+ FROM python:3.10-slim
3
+
4
+ # Set environment variables to avoid prompts during package installation
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+
7
+ # Install system dependencies for OCR (Tesseract) and other libraries
8
+ RUN apt-get update && apt-get install -y \
9
+ tesseract-ocr \
10
+ libtesseract-dev \
11
+ libgl1-mesa-glx \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Set working directory
15
+ WORKDIR /app
16
+
17
+ # Copy the requirements file
18
+ COPY requirements.txt .
19
+
20
+ # Install Python dependencies
21
+ RUN pip install --no-cache-dir -r requirements.txt
22
+
23
+ # Copy the application files
24
+ COPY . .
25
+
26
+ # Expose the port Gradio uses (default is 7860)
27
+ EXPOSE 7860
28
+
29
+ # Run the application
30
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import cv2
4
+ import numpy as np
5
+ import json
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
7
+ import pytesseract
8
+
9
+ # Load Object Detection Pipeline
10
+ obj_detect = pipeline("object-detection", model="facebook/detr-resnet-50", device=-1)
11
+
12
+ # Load Qwen for Code Generation
13
+ MODEL_NAME = "Qwen/Qwen2.5-Coder-3B"
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
18
+ model = AutoModelForCausalLM.from_pretrained(
19
+ MODEL_NAME, torch_dtype=dtype, device_map="auto"
20
+ )
21
+
22
+ # Define the process_image function (same as your original logic)
23
+ def process_image(img):
24
+ opencv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
25
+ img_height, img_width, _ = opencv_image.shape
26
+
27
+ # Run Object Detection
28
+ detections = obj_detect(img)
29
+
30
+ # Run OCR
31
+ text_data = pytesseract.image_to_string(opencv_image)
32
+
33
+ ui_json = {
34
+ "id": "generated-ui",
35
+ "name": "Generated UI",
36
+ "components": [],
37
+ "ocr_text": text_data.strip()
38
+ }
39
+
40
+ for det in detections:
41
+ ui_json["components"].append({
42
+ "id": f"{det['label']}-{len(ui_json['components']) + 1}",
43
+ "name": det["label"].capitalize(),
44
+ "confidence": round(det["score"], 2),
45
+ })
46
+
47
+ metadata_str = json.dumps(ui_json, indent=2)
48
+
49
+ # Generate React Code
50
+ prompt = f"Generate a React component from this metadata:\n{metadata_str}"
51
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
52
+ with torch.no_grad():
53
+ output = model.generate(**inputs, max_length=1024)
54
+ code_response = tokenizer.decode(output[0], skip_special_tokens=True)
55
+
56
+ return metadata_str, code_response
57
+
58
+ # Gradio Interface
59
+ interface = gr.Interface(
60
+ fn=process_image,
61
+ inputs=gr.Image(type="pil"),
62
+ outputs=["text", "text"],
63
+ title="Screenshot → Metadata & React Code",
64
+ description="Upload a UI screenshot and get structured metadata + React code.",
65
+ )
66
+
67
+ # Run in Docker with 0.0.0.0 to allow external access
68
+ interface.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ opencv-python-headless
4
+ numpy
5
+ transformers
6
+ pytesseract
7
+ timm
8
+ accelerate>=0.26.0