chuanli11 commited on
Commit
3ecedd3
1 Parent(s): 00cb073

Add demo.py

Browse files
Files changed (2) hide show
  1. demo.py +353 -0
  2. requirements.txt +8 -1
demo.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import pathlib
7
+ import torch
8
+ import gradio as gr
9
+
10
+ from vtoonify_model import Model
11
+
12
+
13
+ def parse_args() -> argparse.Namespace:
14
+ parser = argparse.ArgumentParser()
15
+ parser.add_argument("--device", type=str, default="cpu")
16
+ parser.add_argument("--theme", type=str)
17
+ parser.add_argument("--share", action="store_true")
18
+ parser.add_argument("--port", type=int)
19
+ parser.add_argument("--disable-queue", dest="enable_queue", action="store_false")
20
+ return parser.parse_args()
21
+
22
+
23
+ DESCRIPTION = """
24
+ <div align=center>
25
+ <h1 style="font-weight: 900; margin-bottom: 7px;">
26
+ Portrait Style Transfer with <a href="https://github.com/williamyang1991/VToonify">VToonify</a>
27
+ </h1>
28
+ <p>For faster inference without waiting in queue, you may duplicate the space and use the GPU setting.
29
+ <br/>
30
+ <a href="https://huggingface.co/spaces/PKUWilliamYang/VToonify?duplicate=true">
31
+ <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
32
+ <p/>
33
+ <video id="video" width=50% controls="" preload="none" poster="https://repository-images.githubusercontent.com/534480768/53715b0f-a2df-4daa-969c-0e74c102d339">
34
+ <source id="mp4" src="https://user-images.githubusercontent.com/18130694/189483939-0fc4a358-fb34-43cc-811a-b22adb820d57.mp4
35
+ " type="video/mp4">
36
+ </videos>
37
+ </div>
38
+ """
39
+ FOOTER = '<div align=center><img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.laobi.icu/badge?page_id=williamyang1991/VToonify" /></div>'
40
+
41
+ ARTICLE = r"""
42
+ If VToonify is helpful, please help to ⭐ the <a href='https://github.com/williamyang1991/VToonify' target='_blank'>Github Repo</a>. Thanks!
43
+ [![GitHub Stars](https://img.shields.io/github/stars/williamyang1991/VToonify?style=social)](https://github.com/williamyang1991/VToonify)
44
+ ---
45
+ 📝 **Citation**
46
+ If our work is useful for your research, please consider citing:
47
+ ```bibtex
48
+ @article{yang2022Vtoonify,
49
+ title={VToonify: Controllable High-Resolution Portrait Video Style Transfer},
50
+ author={Yang, Shuai and Jiang, Liming and Liu, Ziwei and Loy, Chen Change},
51
+ journal={ACM Transactions on Graphics (TOG)},
52
+ volume={41},
53
+ number={6},
54
+ articleno={203},
55
+ pages={1--15},
56
+ year={2022},
57
+ publisher={ACM New York, NY, USA},
58
+ doi={10.1145/3550454.3555437},
59
+ }
60
+ ```
61
+
62
+ 📋 **License**
63
+ This project is licensed under <a rel="license" href="https://github.com/williamyang1991/VToonify/blob/main/LICENSE.md">S-Lab License 1.0</a>.
64
+ Redistribution and use for non-commercial purposes should follow this license.
65
+
66
+ 📧 **Contact**
67
+ If you have any questions, please feel free to reach me out at <b>williamyang@pku.edu.cn</b>.
68
+ """
69
+
70
+
71
+ def update_slider(choice: str) -> dict:
72
+ if type(choice) == str and choice.endswith("-d"):
73
+ return gr.Slider.update(maximum=1, minimum=0, value=0.5)
74
+ else:
75
+ return gr.Slider.update(maximum=0.5, minimum=0.5, value=0.5)
76
+
77
+
78
+ def set_example_image(example: list) -> dict:
79
+ return gr.Image.update(value=example[0])
80
+
81
+
82
+ def set_example_video(example: list) -> dict:
83
+ return (gr.Video.update(value=example[0]),)
84
+
85
+
86
+ sample_video = [
87
+ "./vtoonify/data/529_2.mp4",
88
+ "./vtoonify/data/7154235.mp4",
89
+ "./vtoonify/data/651.mp4",
90
+ "./vtoonify/data/908.mp4",
91
+ ]
92
+ sample_vid = gr.Video(label="Video file") # for displaying the example
93
+ example_videos = gr.components.Dataset(
94
+ components=[sample_vid],
95
+ samples=[[path] for path in sample_video],
96
+ type="values",
97
+ label="Video Examples",
98
+ )
99
+
100
+
101
+ def main():
102
+ args = parse_args()
103
+ args.device = "cuda" if torch.cuda.is_available() else "cpu"
104
+ print("*** Now using %s." % (args.device))
105
+ model = Model(device=args.device)
106
+
107
+ with gr.Blocks(theme=args.theme, css="style.css") as demo:
108
+ gr.Markdown(DESCRIPTION)
109
+
110
+ with gr.Box():
111
+ gr.Markdown(
112
+ """## Step 1(Select Style)
113
+ - Select **Style Type**.
114
+ - Type with `-d` means it supports style degree adjustment.
115
+ - Type without `-d` usually has better toonification quality.
116
+
117
+ """
118
+ )
119
+ with gr.Row():
120
+ with gr.Column():
121
+ gr.Markdown("""Select Style Type""")
122
+ with gr.Row():
123
+ style_type = gr.Radio(
124
+ label="Style Type",
125
+ choices=[
126
+ "cartoon1",
127
+ "cartoon1-d",
128
+ "cartoon2-d",
129
+ "cartoon3-d",
130
+ "cartoon4",
131
+ "cartoon4-d",
132
+ "cartoon5-d",
133
+ "comic1-d",
134
+ "comic2-d",
135
+ "arcane1",
136
+ "arcane1-d",
137
+ "arcane2",
138
+ "arcane2-d",
139
+ "caricature1",
140
+ "caricature2",
141
+ "pixar",
142
+ "pixar-d",
143
+ "illustration1-d",
144
+ "illustration2-d",
145
+ "illustration3-d",
146
+ "illustration4-d",
147
+ "illustration5-d",
148
+ ],
149
+ )
150
+ exstyle = gr.Variable()
151
+ with gr.Row():
152
+ loadmodel_button = gr.Button("Load Model")
153
+ with gr.Row():
154
+ load_info = gr.Textbox(
155
+ label="Process Information",
156
+ interactive=False,
157
+ value="No model loaded.",
158
+ )
159
+ with gr.Column():
160
+ gr.Markdown(
161
+ """Reference Styles
162
+ ![example](https://raw.githubusercontent.com/williamyang1991/tmpfile/master/vtoonify/style.jpg)"""
163
+ )
164
+
165
+ with gr.Box():
166
+ gr.Markdown(
167
+ """## Step 2 (Preprocess Input Image / Video)
168
+ - Drop an image/video containing a near-frontal face to the **Input Image**/**Input Video**.
169
+ - Hit the **Rescale Image**/**Rescale First Frame** button.
170
+ - Rescale the input to make it best fit the model.
171
+ - The final image result will be based on this **Rescaled Face**. Use padding parameters to adjust the background space.
172
+ - **<font color=red>Solution to [Error: no face detected!]</font>**: VToonify uses dlib.get_frontal_face_detector but sometimes it fails to detect a face. You can try several times or use other images until a face is detected, then switch back to the original image.
173
+ - For video input, further hit the **Rescale Video** button.
174
+ - The final video result will be based on this **Rescaled Video**. To avoid overload, video is cut to at most **100/300** frames for CPU/GPU, respectively.
175
+
176
+ """
177
+ )
178
+ with gr.Row():
179
+ with gr.Box():
180
+ with gr.Column():
181
+ gr.Markdown(
182
+ """Choose the padding parameters.
183
+ ![example](https://raw.githubusercontent.com/williamyang1991/tmpfile/master/vtoonify/rescale.jpg)"""
184
+ )
185
+ with gr.Row():
186
+ top = gr.Slider(128, 256, value=200, step=8, label="top")
187
+ with gr.Row():
188
+ bottom = gr.Slider(
189
+ 128, 256, value=200, step=8, label="bottom"
190
+ )
191
+ with gr.Row():
192
+ left = gr.Slider(128, 256, value=200, step=8, label="left")
193
+ with gr.Row():
194
+ right = gr.Slider(
195
+ 128, 256, value=200, step=8, label="right"
196
+ )
197
+ with gr.Box():
198
+ with gr.Column():
199
+ gr.Markdown("""Input""")
200
+ with gr.Row():
201
+ input_image = gr.Image(label="Input Image", type="filepath")
202
+ with gr.Row():
203
+ preprocess_image_button = gr.Button("Rescale Image")
204
+ with gr.Row():
205
+ input_video = gr.Video(
206
+ label="Input Video",
207
+ mirror_webcam=False,
208
+ type="filepath",
209
+ )
210
+ with gr.Row():
211
+ preprocess_video0_button = gr.Button("Rescale First Frame")
212
+ preprocess_video1_button = gr.Button("Rescale Video")
213
+
214
+ with gr.Box():
215
+ with gr.Column():
216
+ gr.Markdown("""View""")
217
+ with gr.Row():
218
+ input_info = gr.Textbox(
219
+ label="Process Information",
220
+ interactive=False,
221
+ value="n.a.",
222
+ )
223
+ with gr.Row():
224
+ aligned_face = gr.Image(
225
+ label="Rescaled Face", type="numpy", interactive=False
226
+ )
227
+ instyle = gr.Variable()
228
+ with gr.Row():
229
+ aligned_video = gr.Video(
230
+ label="Rescaled Video", type="mp4", interactive=False
231
+ )
232
+ with gr.Row():
233
+ with gr.Column():
234
+ paths = [
235
+ "./vtoonify/data/pexels-andrea-piacquadio-733872.jpg",
236
+ "./vtoonify/data/i5R8hbZFDdc.jpg",
237
+ "./vtoonify/data/yRpe13BHdKw.jpg",
238
+ "./vtoonify/data/ILip77SbmOE.jpg",
239
+ "./vtoonify/data/077436.jpg",
240
+ "./vtoonify/data/081680.jpg",
241
+ ]
242
+ example_images = gr.Dataset(
243
+ components=[input_image],
244
+ samples=[[path] for path in paths],
245
+ label="Image Examples",
246
+ )
247
+ with gr.Column():
248
+ # example_videos = gr.Dataset(components=[input_video], samples=[['./vtoonify/data/529.mp4']], type='values')
249
+ # to render video example on mouse hover/click
250
+ example_videos.render()
251
+
252
+ # to load sample video into input_video upon clicking on it
253
+ def load_examples(video):
254
+ # print("****** inside load_example() ******")
255
+ # print("in_video is : ", video[0])
256
+ return video[0]
257
+
258
+ example_videos.click(load_examples, example_videos, input_video)
259
+
260
+ with gr.Box():
261
+ gr.Markdown("""## Step 3 (Generate Style Transferred Image/Video)""")
262
+ with gr.Row():
263
+ with gr.Column():
264
+ gr.Markdown(
265
+ """
266
+
267
+ - Adjust **Style Degree**.
268
+ - Hit **Toonify!** to toonify one frame. Hit **VToonify!** to toonify full video.
269
+ - Estimated time on 1600x1440 video of 300 frames: 1 hour (CPU); 2 mins (GPU)
270
+ """
271
+ )
272
+ style_degree = gr.Slider(
273
+ 0, 1, value=0.5, step=0.05, label="Style Degree"
274
+ )
275
+ with gr.Column():
276
+ gr.Markdown(
277
+ """![example](https://raw.githubusercontent.com/williamyang1991/tmpfile/master/vtoonify/degree.jpg)
278
+ """
279
+ )
280
+ with gr.Row():
281
+ output_info = gr.Textbox(
282
+ label="Process Information", interactive=False, value="n.a."
283
+ )
284
+ with gr.Row():
285
+ with gr.Column():
286
+ with gr.Row():
287
+ result_face = gr.Image(
288
+ label="Result Image", type="numpy", interactive=False
289
+ )
290
+ with gr.Row():
291
+ toonify_button = gr.Button("Toonify!")
292
+ with gr.Column():
293
+ with gr.Row():
294
+ result_video = gr.Video(
295
+ label="Result Video", type="mp4", interactive=False
296
+ )
297
+ with gr.Row():
298
+ vtoonify_button = gr.Button("VToonify!")
299
+
300
+ gr.Markdown(ARTICLE)
301
+ gr.Markdown(FOOTER)
302
+
303
+ loadmodel_button.click(
304
+ fn=model.load_model, inputs=[style_type], outputs=[exstyle, load_info]
305
+ )
306
+
307
+ style_type.change(fn=update_slider, inputs=style_type, outputs=style_degree)
308
+
309
+ preprocess_image_button.click(
310
+ fn=model.detect_and_align_image,
311
+ inputs=[input_image, top, bottom, left, right],
312
+ outputs=[aligned_face, instyle, input_info],
313
+ )
314
+ preprocess_video0_button.click(
315
+ fn=model.detect_and_align_video,
316
+ inputs=[input_video, top, bottom, left, right],
317
+ outputs=[aligned_face, instyle, input_info],
318
+ )
319
+ preprocess_video1_button.click(
320
+ fn=model.detect_and_align_full_video,
321
+ inputs=[input_video, top, bottom, left, right],
322
+ outputs=[aligned_video, instyle, input_info],
323
+ )
324
+
325
+ toonify_button.click(
326
+ fn=model.image_toonify,
327
+ inputs=[aligned_face, instyle, exstyle, style_degree, style_type],
328
+ outputs=[result_face, output_info],
329
+ )
330
+ vtoonify_button.click(
331
+ fn=model.video_tooniy,
332
+ inputs=[aligned_video, instyle, exstyle, style_degree, style_type],
333
+ outputs=[result_video, output_info],
334
+ )
335
+
336
+ example_images.click(
337
+ fn=set_example_image,
338
+ inputs=example_images,
339
+ outputs=example_images.components,
340
+ )
341
+
342
+ # demo.launch(
343
+ # enable_queue=args.enable_queue,
344
+ # server_port=args.port,
345
+ # share=args.share,
346
+ # )
347
+
348
+ demo.queue(concurrency_count=1, max_size=4)
349
+ demo.launch(server_port=8266)
350
+
351
+
352
+ if __name__ == "__main__":
353
+ main()
requirements.txt CHANGED
@@ -4,4 +4,11 @@ opencv-python-headless
4
  Pillow
5
  scipy
6
  torch
7
- torchvision
 
 
 
 
 
 
 
 
4
  Pillow
5
  scipy
6
  torch
7
+ torchvision
8
+
9
+ gradio==3.5
10
+ ray[serve]==2.2.0
11
+ fastapi==0.88.0
12
+ httpx==0.23.1
13
+ starlette==0.22.0
14
+ protobuf==3.20.*