darknoon commited on
Commit
7d5b9d2
0 Parent(s):
Files changed (3) hide show
  1. .gitignore +3 -0
  2. app.py +214 -0
  3. requirements.txt +5 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .venv
2
+ .env
3
+ __pycache__
app.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from playwright.async_api import async_playwright, Browser, Page
3
+ import asyncio
4
+ from PIL import Image
5
+ from io import BytesIO
6
+ from anthropic import Anthropic, TextEvent
7
+ from dotenv import load_dotenv
8
+ import os
9
+ from typing import Literal
10
+ import time
11
+ from base64 import b64encode
12
+ import json
13
+
14
+ load_dotenv()
15
+ # check for ANTHROPIC_API_KEY
16
+ if os.getenv("ANTHROPIC_API_KEY") is None:
17
+ raise ValueError(
18
+ "ANTHROPIC_API_KEY is not set, set it in .env or export it in your environment"
19
+ )
20
+
21
+ anthropic = Anthropic()
22
+ model = "claude-3-5-sonnet-20240620"
23
+
24
+
25
+ def apply_tailwind(content):
26
+ return f"""
27
+ <!DOCTYPE html>
28
+ <html lang="en">
29
+ <head>
30
+ <meta charset="UTF-8">
31
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
32
+ <title>Document</title>
33
+ <script src="https://cdn.tailwindcss.com"></script>
34
+ </head>
35
+ <body>
36
+ {content}
37
+ </body>
38
+ </html>
39
+ """
40
+
41
+
42
+ system_prompt = f"""
43
+ You are a helpful assistant that generates HTML and CSS.
44
+
45
+ You directly output HTML with tailwind css classes, and nothing else (no markdown, no other text, etc).
46
+
47
+ You are not able to insert images from the internet, but you can still generate an <img> tag with an appropriate alt tag (leave out the src, we will provide that).
48
+
49
+ Assume that the content is being inserted into a template like this:
50
+ {apply_tailwind("your html here")}
51
+ """
52
+
53
+ improve_prompt = """
54
+ Given the current draft of the webpage you generated for me as HTML and the screenshot of it rendered, improve the HTML to look nicer.
55
+ """
56
+
57
+
58
+ def stream_initial(prompt):
59
+ with anthropic.messages.stream(
60
+ model=model,
61
+ max_tokens=2000,
62
+ system=system_prompt,
63
+ messages=[
64
+ {"role": "user", "content": prompt},
65
+ ],
66
+ ) as stream:
67
+ for message in stream:
68
+ if isinstance(message, TextEvent):
69
+ yield message.text
70
+
71
+
72
+ def format_image(image: bytes, media_type: Literal["image/png", "image/jpeg"]):
73
+ image_base64 = b64encode(image).decode("utf-8")
74
+ return {
75
+ "type": "image",
76
+ "source": {
77
+ "type": "base64",
78
+ "media_type": media_type,
79
+ "data": image_base64,
80
+ },
81
+ }
82
+
83
+
84
+ def stream_with_visual_feedback(prompt, history: list[tuple[str, bytes]]):
85
+ """
86
+ history is a list of tuples of (content, image) corresponding to iterations of generation and rendering
87
+ """
88
+ print(f"History has {len(history)} images")
89
+
90
+ messages = [
91
+ {"role": "user", "content": prompt},
92
+ *[
93
+ item
94
+ for content, image_bytes in history
95
+ for item in [
96
+ {
97
+ "role": "assistant",
98
+ "content": content,
99
+ },
100
+ {
101
+ "role": "user",
102
+ "content": [
103
+ {
104
+ "type": "text",
105
+ "text": "Here is a screenshot of the above HTML code rendered in a browser:",
106
+ },
107
+ format_image(image_bytes, "image/png"),
108
+ {
109
+ "type": "text",
110
+ "text": improve_prompt,
111
+ },
112
+ ],
113
+ },
114
+ ]
115
+ ],
116
+ ]
117
+
118
+ with anthropic.messages.stream(
119
+ model=model,
120
+ max_tokens=2000,
121
+ system=system_prompt,
122
+ messages=messages,
123
+ ) as stream:
124
+ for message in stream:
125
+ if isinstance(message, TextEvent):
126
+ yield message.text
127
+
128
+
129
+ async def render_html(page: Page, content: str):
130
+ start_time = t()
131
+ await page.set_content(content)
132
+ # weird, can we set scale to 2.0 directly instead of "device", ie whatever server this is running on?
133
+ image_bytes = await page.screenshot(type="png", scale="device", full_page=True)
134
+ return image_bytes, t() - start_time
135
+
136
+
137
+ def t():
138
+ return time.perf_counter()
139
+
140
+
141
+ def apply_template(content, template):
142
+ if template == "tailwind":
143
+ return apply_tailwind(content)
144
+ return content
145
+
146
+
147
+ def to_pil(image_bytes: bytes):
148
+ return Image.open(BytesIO(image_bytes))
149
+
150
+
151
+ async def generate_with_visual_feedback(
152
+ prompt,
153
+ template,
154
+ resolution: str = "512",
155
+ num_iterations: int = 1,
156
+ ):
157
+ render_every = 0.25
158
+ resolution = {"512": (512, 512), "1024": (1024, 1024)}[resolution]
159
+ async with async_playwright() as p:
160
+ browser = await p.chromium.launch()
161
+ page = await browser.new_page(
162
+ viewport={"width": resolution[0], "height": resolution[1]}
163
+ )
164
+ last_yield = t()
165
+ history = []
166
+ for i in range(num_iterations):
167
+ stream = (
168
+ stream_initial(prompt)
169
+ if i == 0
170
+ else stream_with_visual_feedback(prompt, history)
171
+ )
172
+ content = ""
173
+ for chunk in stream:
174
+ content = content + chunk
175
+ current_time = t()
176
+ if current_time - last_yield >= render_every:
177
+ image_bytes, render_time = await render_html(
178
+ page, apply_template(content, template)
179
+ )
180
+ yield to_pil(image_bytes), content, render_time
181
+ last_yield = t()
182
+ # always render the final image of each iteration
183
+ image_bytes, render_time = await render_html(
184
+ page, apply_template(content, template)
185
+ )
186
+ history.append((content, image_bytes))
187
+ yield to_pil(image_bytes), content, render_time
188
+ # cleanup
189
+ await browser.close()
190
+
191
+
192
+ demo = gr.Interface(
193
+ generate_with_visual_feedback,
194
+ inputs=[
195
+ gr.Textbox(
196
+ lines=5,
197
+ label="Prompt",
198
+ placeholder="Prompt to generate HTML",
199
+ value="Generate a beautiful webpage for a cat cafe",
200
+ ),
201
+ gr.Dropdown(choices=["tailwind"], label="Template", value="tailwind"),
202
+ gr.Dropdown(choices=["512", "1024"], label="Page Width", value="512"),
203
+ gr.Slider(1, 10, 1, step=1, label="Iterations"),
204
+ ],
205
+ outputs=[
206
+ gr.Image(type="pil", label="Rendered HTML", image_mode="RGB", format="png"),
207
+ gr.Textbox(lines=5, label="Code"),
208
+ gr.Number(label="Render Time", precision=2),
209
+ ],
210
+ )
211
+
212
+
213
+ if __name__ == "__main__":
214
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ playwright
3
+ anthropic
4
+ python-dotenv
5
+