Spaces:

EuroPython2022
/

pulsar-clip

Build error

App Files Files Community

neverix commited on Jul 17, 2022

Commit

dee645c

•

0 Parent(s):

Initial commit

Browse files

Files changed (5) hide show

.gitignore +3 -0
app.py +35 -0
pulsar_clip.py +222 -0
requirements.txt +6 -0
utils.py +71 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.idea/
+**/__pycache__/
+flagged/

app.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from pulsar_clip import PulsarCLIP, CONFIG_SPEC
+from datetime import datetime
+import gradio as gr
+def generate(*args):
+    pc = PulsarCLIP(dict([(k, t(v) if not isinstance(t, (tuple, list)) else v)
+                          for v, (k, v0, t) in zip(args, CONFIG_SPEC)]))
+    frames = []
+    for image in pc.generate():
+        frames.append(image)
+    from tqdm.auto import tqdm
+    from subprocess import Popen, PIPE
+    fps = 30
+    video_path = f"{datetime.strftime(datetime.now())}.mp4"
+    if frames:
+        p = Popen((f"ffmpeg -y -f image2pipe -vcodec png -r {fps} -i - -vcodec libx264 -r {fps} "
+                   f"-pix_fmt yuv420p -crf 17 -preset fast ").split() + [str(video_path)], stdin=PIPE)
+        for im in tqdm(frames):
+            im.save(p.stdin, "PNG")
+        p.stdin.close()
+        p.wait()
+    return video_path
+def main():
+    gr.Interface(inputs=[
+        (gr.inputs.Number(label=k, default=v0) if t in (float, int) else
+         gr.inputs.Checkbox(label=k, default=v0) if t == bool else gr.inputs.Textbox(label=k, default=v0) if t == str
+         else gr.inputs.Dropdown(label=k, default=v0, choices=t) if isinstance(t, (tuple, list)) else 1/0)
+        for k, v0, t in CONFIG_SPEC], outputs=gr.outputs.Video(), fn=generate).launch()
+if __name__ == '__main__':
+    main()

pulsar_clip.py ADDED Viewed

	@@ -0,0 +1,222 @@

+from transformers import set_seed
+from tqdm.auto import trange
+from PIL import Image
+import numpy as np
+import random
+import utils
+import torch
+CONFIG_SPEC = [
+    ("text", "A cloud at dawn", str),
+    ("iterations", 5000, int),
+    ("turns", 4, int),
+    ("showoff", 5000, int),
+    ("seed", 12, int),
+    ("focal_length", 0.1, float),
+    ("plane_width", 0.1, float),
+    ("shade_strength", 0.25, float),
+    ("gamma", 0.5, float),
+    ("max_depth", 7, float),
+    ("lr", 0.5, float),
+    ("offset", 5, float),
+    ("offset_random", 0.75, float),
+    ("xyz_random", 0.25, float),
+    ("altitude_range", 0.3, float),
+    ("augments", 4, int),
+    ("show_every", 50, int),
+    ("epochs", 1, int),
+    ("w", 224, int),
+    ("h", 224, int),
+    ("num_objects", 256, int),
+    #@markdown CLIP loss type, might improve the results
+    ("loss_type", "spherical", ("spherical", "cosine")),
+    #@markdown CLIP loss weight
+    ("clip_weight", 1.0, float),        #@param {type: "number"}
+    #@markdown Number of dimensions. 0 is for point clouds (default), 1 will make
+    #@markdown strokes, 2 will make planes, 3 produces little cubes
+    ("ndim", 0, (0, 1, 2, 3)),  #@param {type: "integer"}
+    #@markdown Opacity scale:
+    ("min_opacity", 1e-4, float),       #@param {type: "number"}
+    ("max_opacity", 1.0, float),        #@param {type: "number"}
+    ("log_opacity", False, bool),      #@param {type: "boolean"}
+    ("min_radius", 0.030, float),
+    ("max_radius", 0.070, float),
+    ("log_radius", False, bool),
+    # TODO dynamically decide bezier_res
+    #@markdown Bezier resolution: how many points a line/plane/cube will have. Not applicable to points
+    ("bezier_res", 8, int),  #@param {type: "integer"}
+    #@markdown Maximum scale of parameters: position, velocity, acceleration
+    ("pos_scale", 0.4, float),  #@param {type: "number"}
+    ("vel_scale", 0.15, float),  #@param {type: "number"}
+    ("acc_scale", 0.15, float),  #@param {type: "number"}
+    #@markdown Scale of each individual 3D object. Master control for velocity and acceleration scale.
+    ("scale", 1, float),  #@param {type: "number"}
+]
+# TODO: one day separate the config into multiple parts and split this megaobject into multiple objects
+class PulsarCLIP(object):
+    def __init__(self, args):
+        args = DotDict(**args)
+        set_seed(args.seed)
+        self.args = args
+        self.device = args.get("device", "cuda" if torch.cuda.is_available() else "cpu")
+        # Defer the import so that we can import `pulsar_clip` and then install `pytorch3d`
+        import pytorch3d.renderer.points.pulsar as ps
+        self.ndim = int(self.args.ndim)
+        self.renderer = ps.Renderer(self.args.w, self.args.h,
+                                    self.args.num_objects * (self.args.bezier_res ** self.ndim)).to(self.device)
+        self.bezier_pos = torch.nn.Parameter(torch.randn((args.num_objects, 4)).to(self.device))
+        self.bezier_vel = torch.nn.Parameter(torch.randn((args.num_objects, 3 * self.ndim)).to(self.device))
+        self.bezier_acc = torch.nn.Parameter(torch.randn((args.num_objects, 3 * self.ndim)).to(self.device))
+        self.bezier_col = torch.nn.Parameter(torch.randn((args.num_objects, 4 * (1 + self.ndim))).to(self.device))
+        self.optimizer = torch.optim.Adam([dict(params=[self.bezier_col], lr=5e-1 * args.lr),
+                                           dict(params=[self.bezier_pos], lr=1e-1 * args.lr),
+                                           dict(params=[self.bezier_vel, self.bezier_acc], lr=5e-2 * args.lr),
+                                           ])
+        self.model_clip, self.preprocess_clip = utils.load_clip()
+        self.model_clip.visual.requires_grad_(False)
+        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(self.optimizer,
+                                                                              int(self.args.iterations
+                                                                              / self.args.augments
+                                                                              / self.args.epochs))
+        import clip
+        self.txt_emb = self.model_clip.encode_text(clip.tokenize([self.args.text]).to(self.device))[0].detach()
+        self.txt_emb = torch.nn.functional.normalize(self.txt_emb, dim=-1)
+    def get_points(self):
+        if self.ndim > 0:
+            bezier_ts = torch.stack(torch.meshgrid(
+                (torch.linspace(0, 1, self.args.bezier_res, device=self.device),) * self.ndim), dim=0
+            ).unsqueeze(1).repeat((1, self.args.num_objects) + (1,) * self.ndim).unsqueeze(-1)
+        def interpolate_3D(pos, vel=0.0, acc=0.0, pos_scale=None, vel_scale=None, acc_scale=None, scale=None):
+            pos_scale = self.args.pos_scale if pos_scale is None else pos_scale
+            vel_scale = self.args.vel_scale if vel_scale is None else vel_scale
+            acc_scale = self.args.acc_scale if acc_scale is None else acc_scale
+            scale = self.args.scale if scale is None else scale
+            if self.ndim == 0:
+                return pos * pos_scale
+            result = 0.0
+            s = pos.shape[-1]
+            assert s * self.ndim == vel.shape[-1] == acc.shape[-1]
+            # O(dim) sequential lol
+            for d, bezier_t in zip(range(self.ndim), bezier_ts):  # TODO replace with fused dimension operation
+                result = (result
+                          + torch.tanh(vel[..., d * s:(d + 1) * s]).view(
+                            (-1,) + (1,) * self.ndim + (s,)) * vel_scale * bezier_t
+                          + torch.tanh(acc[..., d * s:(d + 1) * s]).view(
+                            (-1,) + (1,) * self.ndim + (s,)) * acc_scale * bezier_t.pow(2))
+            result = (result * scale
+                      + torch.tanh(pos[..., :s]).view((-1,) + (1,) * self.ndim + (s,)) * pos_scale).view(-1, s)
+            return result
+        vert_pos = interpolate_3D(self.bezier_pos[..., :3], self.bezier_vel, self.bezier_acc)
+        vert_col = interpolate_3D(self.bezier_col[..., :4],
+                                  self.bezier_col[..., 4:4 + 4 * self.ndim],
+                                  self.bezier_col[..., -4 * self.ndim:])
+        to_bezier = lambda x: x.view((-1,) + (1,) * self.ndim + (x.shape[-1],)).repeat(
+            (1,) + (self.args.bezier_res,) * self.ndim + (1,)).reshape(-1, x.shape[-1])
+        rescale = lambda x, a, b, is_log=False: (torch.exp(x
+                                                           * np.log(b / a)
+                                                           + np.log(a))) if is_log else x * (b - a) + a
+        return (
+            vert_pos,
+            torch.sigmoid(vert_col[..., :3]),
+            rescale(
+                torch.sigmoid(to_bezier(self.bezier_pos[..., -1:])[..., 0]),
+                self.args.min_radius, self.args.max_radius, is_log=self.args.log_radius
+            ),
+            rescale(torch.sigmoid(vert_col[..., -1]),
+                    self.args.min_opacity, self.args.max_opacity, is_log=self.args.log_opacity))
+    def camera(self, angle, altitude=0.0, offset=None, use_random=True, offset_random=None,
+               xyz_random=None, focal_length=None, plane_width=None):
+        if offset is None:
+            offset = self.args.offset
+        if xyz_random is None:
+            xyz_random = self.args.xyz_random
+        if focal_length is None:
+            focal_length = self.args.focal_length
+        if plane_width is None:
+            plane_width = self.args.plane_width
+        if offset_random is None:
+            offset_random = self.args.offset_random
+        device = self.device
+        offset = offset + np.random.normal() * offset_random * int(use_random)
+        position = torch.tensor([0, 0, -offset], dtype=torch.float)
+        position = utils.rotate_axis(position, altitude, 0)
+        position = utils.rotate_axis(position, angle, 1)
+        position = position + torch.randn(3) * xyz_random * int(use_random)
+        return torch.tensor([position[0], position[1], position[2],
+                             altitude, angle, 0,
+                             focal_length, plane_width], dtype=torch.float, device=device)
+    def render(self, cam_params=None):
+        if cam_params is None:
+            cam_params = self.camera(0, 0)
+        vert_pos, vert_col, radius, opacity = self.get_points()
+        rgb = self.renderer(vert_pos, vert_col, radius, cam_params,
+                            self.args.gamma, self.args.max_depth, opacity=opacity)
+        opacity = self.renderer(vert_pos, vert_col * 0, radius, cam_params,
+                                self.args.gamma, self.args.max_depth, opacity=opacity)
+        return rgb, opacity
+    def random_view_render(self):
+        angle = random.uniform(0, np.pi * 2)
+        altitude = random.uniform(-self.args.altitude_range / 2, self.args.altitude_range / 2)
+        cam_params = self.camera(angle, altitude)
+        result, alpha = self.render(cam_params)
+        back = torch.zeros_like(result)
+        s = back.shape
+        for j in range(s[-1]):
+            n = random.choice([7, 14, 28])
+            back[..., j] = utils.rand_perlin_2d_octaves(s[:-1], (n, n)).clip(-0.5, 0.5) + 0.5
+        result = result * (1 - alpha) + back * alpha
+        return result
+    def generate(self):
+        self.optimizer.zero_grad()
+        try:
+            for i in trange(self.args.iterations + self.args.showoff):
+                if i < self.args.iterations:
+                    result = self.random_view_render()
+                    img_emb = self.model_clip.encode_image(
+                        self.preprocess_clip(result.permute(2, 0, 1)).unsqueeze(0).clamp(0., 1.))
+                    img_emb = torch.nn.functional.normalize(img_emb, dim=-1)
+                    if self.args.loss_type == "spherical":
+                        clip_loss = (img_emb - self.txt_emb).norm(dim=-1).div(2).arcsin().pow(2).mul(2).mean()
+                    elif self.args.loss_type == "cosine":
+                        clip_loss = (1 - img_emb @ self.txt_emb.T).mean()
+                    else:
+                        raise NotImplementedError(f"CLIP loss type not supported: {self.args.loss_type}")
+                    loss = clip_loss * self.args.clip_weight + (0 and ...)  # TODO add more loss types
+                    loss.backward()
+                if i % self.args.augments == self.args.augments - 1:
+                    self.optimizer.step()
+                    self.optimizer.zero_grad()
+                    try:
+                        self.scheduler.step()
+                    except AttributeError:
+                        pass
+                if i % self.args.show_every == 0:
+                    cam_params = self.camera(i / self.args.iterations * np.pi * 2 * self.args.turns, use_random=False)
+                    img_show, _ = self.render(cam_params)
+                    img = Image.fromarray((img_show.cpu().detach().numpy() * 255).astype(np.uint8))
+                    yield img
+        except KeyboardInterrupt:
+            pass
+class DotDict(dict):
+    def __getattr__(self, item):
+        return self.__getitem__(item)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+pytorch3d==0.6.2
+transformers==4.10.3
+torch==1.11.0+cu113
+torchvision==0.12.0+cu113
+clip
+gradio

utils.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import random
+import torch
+import math
+def rotate_axis(x, add_angle=0, axis=1):  # TODO Replace with a rotation matrix   # But this is more fun
+    axes = list(range(3))
+    axes.remove(axis)
+    ax1, ax2 = axes
+    angle = torch.atan2(x[..., ax1], x[..., ax2])
+    if isinstance(add_angle, torch.Tensor):
+        while add_angle.ndim < angle.ndim:
+            add_angle = add_angle.unsqueeze(-1)
+    angle = angle + add_angle
+    dist = x.norm(dim=-1)
+    t = []
+    _, t = zip(*sorted([
+        (axis, x[..., axis]),
+        (ax1, torch.sin(angle) * dist),
+        (ax2, torch.cos(angle) * dist),
+    ]))
+    return torch.stack(t, dim=-1)
+noise_level = 0.5
+# stolen from https://gist.github.com/ac1b097753f217c5c11bc2ff396e0a57
+# ported from https://github.com/pvigier/perlin-numpy/blob/master/perlin2d.py
+def rand_perlin_2d(shape, res, fade=lambda t: 6 * t ** 5 - 15 * t ** 4 + 10 * t ** 3):
+    delta = (res[0] / shape[0], res[1] / shape[1])
+    d = (shape[0] // res[0], shape[1] // res[1])
+    grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1])), dim=-1) % 1
+    angles = 2 * math.pi * torch.rand(res[0] + 1, res[1] + 1)
+    gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim=-1)
+    tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0],
+                                                                                                              0).repeat_interleave(
+        d[1], 1)
+    dot = lambda grad, shift: (
+                torch.stack((grid[:shape[0], :shape[1], 0] + shift[0], grid[:shape[0], :shape[1], 1] + shift[1]),
+                            dim=-1) * grad[:shape[0], :shape[1]]).sum(dim=-1)
+    n00 = dot(tile_grads([0, -1], [0, -1]), [0, 0])
+    n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
+    n01 = dot(tile_grads([0, -1], [1, None]), [0, -1])
+    n11 = dot(tile_grads([1, None], [1, None]), [-1, -1])
+    t = fade(grid[:shape[0], :shape[1]])
+    return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1])
+def rand_perlin_2d_octaves(shape, res, octaves=1, persistence=0.5):
+    noise = torch.zeros(shape)
+    frequency = 1
+    amplitude = 1
+    for _ in range(octaves):
+        noise += amplitude * rand_perlin_2d(shape, (frequency * res[0], frequency * res[1]))
+        frequency *= 2
+        amplitude *= persistence
+    noise *= random.random() - noise_level  # haha
+    noise += random.random() - noise_level  # haha x2
+    return noise
+def load_clip(model_name="ViT-B/16", device="cuda:0" if torch.cuda.is_available() else "cpu"):
+    import clip
+    model, preprocess = clip.load(model_name, device=device, jit=False)
+    if len(preprocess.transforms) > 4:
+        preprocess.transforms = preprocess.transforms[-1:]
+    return model, preprocess