Spaces:

rynmurdock
/

Blue_Tigers

Runtime error

App Files Files Community

rynmurdock commited on Jun 3, 2024

Commit

9435075

1 Parent(s): 9239164

lfs and sync with blue-tigers github

Browse files

Files changed (25) hide show

.gitattributes +20 -0
app.py +78 -108
eigth.gemb_.pt +3 -0
eigth.im_.pt +3 -0
fifth.gemb_.pt +3 -0
fifth.im_.pt +3 -0
first.gemb_.pt +3 -0
first.im_.pt +3 -0
fourth.gemb_.pt +3 -0
fourth.im_.pt +3 -0
lightning_app.py +0 -452
ninth.gemb_.pt +3 -0
ninth.im_.pt +3 -0
requirements.txt +1 -3
second.gemb_.pt +3 -0
second.im_.pt +3 -0
seventh.gemb_.pt +3 -0
seventh.im_.pt +3 -0
sixth.gemb_.pt +3 -0
sixth.im_.pt +3 -0
tenth.gemb_.pt +3 -0
tenth.im_.pt +3 -0
third.gemb_.pt +3 -0
third.im_.pt +3 -0
twitter_prompts.csv +0 -72

.gitattributes CHANGED Viewed

	@@ -1 +1,21 @@
1	nsfweffnetv2-b02-3epochs.h5 filter=lfs diff=lfs merge=lfs -text

 nsfweffnetv2-b02-3epochs.h5 filter=lfs diff=lfs merge=lfs -text
+fifth.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+ninth.im_.pt filter=lfs diff=lfs merge=lfs -text
+tenth.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+third.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+eigth.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+first.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+fourth.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+ninth.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+sixth.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+tenth.im_.pt filter=lfs diff=lfs merge=lfs -text
+eigth.im_.pt filter=lfs diff=lfs merge=lfs -text
+seventh.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+sixth.im_.pt filter=lfs diff=lfs merge=lfs -text
+third.im_.pt filter=lfs diff=lfs merge=lfs -text
+fifth.im_.pt filter=lfs diff=lfs merge=lfs -text
+first.im_.pt filter=lfs diff=lfs merge=lfs -text
+fourth.im_.pt filter=lfs diff=lfs merge=lfs -text
+second.gemb_.pt filter=lfs diff=lfs merge=lfs -text
+second.im_.pt filter=lfs diff=lfs merge=lfs -text
+seventh.im_.pt filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -10,12 +10,9 @@ STEPS = 6
 output_hidden_state = False
 device = "cuda"
 dtype = torch.bfloat16
-import matplotlib.pyplot as plt
-import matplotlib
 import logging
 import os
 import imageio
 import gradio as gr
@@ -24,8 +21,6 @@ from sklearn.svm import SVC
 from sklearn import preprocessing
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
-import sched
-import threading
 import random
 import time
@@ -104,7 +99,7 @@ pipe = AnimateDiffPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", mot
                                             unet=unet, text_encoder=text_encoder)
 pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
 pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora",)
-pipe.set_adapters(["lcm-lora"], [.9])
 pipe.fuse_lora()
@@ -121,6 +116,7 @@ pipe.unet.fuse_qkv_projections()
 #pipe.enable_free_init(method="gaussian", use_fast_sampling=True)
 pipe.to(device=DEVICE)
 #pipe.unet = torch.compile(pipe.unet)
 #pipe.vae = torch.compile(pipe.vae)
@@ -130,9 +126,10 @@ pipe.to(device=DEVICE)
 from transformers import AutoProcessor, PaliGemmaForConditionalGeneration, BitsAndBytesConfig
 quantization_config = BitsAndBytesConfig(load_in_4bit=True)
-pali = PaliGemmaForConditionalGeneration.from_pretrained('google/paligemma-3b-pt-224', torch_dtype=dtype, device_map='cuda').eval()
 processor = AutoProcessor.from_pretrained('google/paligemma-3b-pt-224')
 @spaces.GPU()
 def to_wanted_embs(image_outputs, input_ids, attention_mask, cache_position=None):
@@ -148,19 +145,34 @@ def to_wanted_embs(image_outputs, input_ids, attention_mask, cache_position=None
     return inputs_embeds
 @spaces.GPU()
-def generate_pali(user_emb):
-    with torch.no_grad():
-        prompt = 'caption en'
-        model_inputs = processor(text=prompt, images=torch.zeros(1, 3, 224, 224), return_tensors="pt")
-        # we need to get im_embs taken in here.
-        input_len = model_inputs["input_ids"].shape[-1]
-        input_embeds = to_wanted_embs(user_emb.squeeze()[None, None, :].repeat(1, 256, 1),
-                            model_inputs["input_ids"].to(device),
-                            model_inputs["attention_mask"].to(device))
-        generation = pali.generate(max_new_tokens=100, do_sample=True, top_p=.94, temperature=1.2, inputs_embeds=input_embeds)
-        decoded = processor.decode(generation[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
     return decoded
@@ -182,7 +194,7 @@ def generate_gpu(in_im_embs, prompt='the scene'):
         im = torchvision.transforms.ToTensor()(output.frames[0][len(output.frames[0])//2]).unsqueeze(0)
         im = torch.nn.functional.interpolate(im, (224, 224))
         im = (im - .5) * 2
-        gemb = pali.vision_tower(im.to(device).to(dtype)).last_hidden_state.detach().to('cpu').to(torch.float32).mean(1)
     return output, im_emb, gemb
@@ -210,10 +222,10 @@ def generate(in_im_embs, prompt='the scene'):
 def get_user_emb(embs, ys):
     # handle case where every instance of calibration videos is 'Neither' or 'Like' or 'Dislike'
-    if len(list(ys)) <= 7:
-        aways = [.01*torch.randn_like(embs[0]) for i in range(3)]
         embs += aways
-        awal = [0 for i in range(3)]
         ys += awal
     indices = list(range(len(embs)))
@@ -241,9 +253,10 @@ def get_user_emb(embs, ys):
         feature_embs = feature_embs / feature_embs.norm()
     #lin_class = Ridge(fit_intercept=False).fit(feature_embs, chosen_y)
-    lin_class = SVC(max_iter=20, kernel='linear', C=.1, class_weight='balanced').fit(feature_embs.squeeze(), chosen_y)
     coef_ = torch.tensor(lin_class.coef_, dtype=torch.float32).detach().to('cpu')
-    coef_ = coef_ / coef_.abs().max() * 3
     w = 1# if len(embs) % 2 == 0 else 0
     im_emb = w * coef_.to(dtype=dtype)
@@ -273,7 +286,7 @@ def background_next_image():
         # only let it get N (maybe 3) ahead of the user
         #not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
         rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
-        while len(rated_rows) < 4:
         #    not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
             rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
             time.sleep(.01)
@@ -290,25 +303,21 @@ def background_next_image():
             rated_from_user = rated_rows[[i[1]['from_user_id'] == uid for i in rated_rows.iterrows()]]
             # we pop previous ratings if there are > n
-            if len(rated_from_user) >= 15:
                 oldest = rated_from_user.iloc[0]['paths']
                 prevs_df = prevs_df[prevs_df['paths'] != oldest]
             # we don't compute more after n are in the queue for them
-            if len(unrated_from_user) >= 10:
-                continue
-            if len(rated_rows) < 5:
                 continue
             embs, ys, gembs = pluck_embs_ys(uid)
-            user_emb = get_user_emb(embs, ys)
-            if len(gembs) > 4:
-                user_gem = get_user_emb(gembs, ys) / 4 # TODO scale this correctly; matplotlib, etc.
-                text = generate_pali(user_gem)
             else:
-                text = generate_pali(torch.zeros(1, 1152))
             img, embs, new_gem = generate(user_emb, text)
             if img:
@@ -351,60 +360,16 @@ def next_image(calibrate_prompts, user_id):
         if len(calibrate_prompts) > 0:
             cal_video = calibrate_prompts.pop(0)
             image = prevs_df[prevs_df['paths'] == cal_video]['paths'].to_list()[0]
             return image, calibrate_prompts, ''
         else:
             embs, ys, gembs = pluck_embs_ys(user_id)
-            user_emb = get_user_emb(embs, ys)
             image, text = pluck_img(user_id, user_emb)
             return image, calibrate_prompts, text
-done_init = False
 def start(_, calibrate_prompts, user_id, request: gr.Request):
-    global done_init
-    global prevs_df
-    if not done_init:
-        # prep our calibration videos
-        for im in [
-            './first.mp4',
-            # './second.mp4',
-            # './third.mp4',
-            # './fourth.mp4',
-            # './fifth.mp4',
-            # './sixth.mp4',
-            # './seventh.mp4',
-            # './eigth.mp4',
-            # './ninth.mp4',
-            # './tenth.mp4',
-            ]:
-            tmp_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'text', 'gemb'])
-            tmp_df['paths'] = [im]
-            image = list(imageio.imiter(im))
-            image = image[len(image)//2]
-            im = torchvision.transforms.ToTensor()(image).unsqueeze(0)
-            im = torch.nn.functional.interpolate(im, (224, 224))
-            im = (im - .5) * 2
-            im_emb, gemb = encode_space(image, im)
-            im_emb = im_emb.to('cpu')
-            gemb = gemb.to('cpu')
-            tmp_df['embeddings'] = [im_emb]
-            tmp_df['gemb'] = [gemb]
-            tmp_df['user:rating'] = [{' ': ' '}]
-            prevs_df = pd.concat((prevs_df, tmp_df))
-        done_init = True
     user_id = int(str(time.time())[-7:].replace('.', ''))
     image, calibrate_prompts, text = next_image(calibrate_prompts, user_id)
     return [
@@ -436,6 +401,7 @@ def choose(img, choice, calibrate_prompts, user_id, request: gr.Request):
         print('NSFW -- choice is disliked')
         choice = 0
     row_mask = [p.split('/')[-1] in img for p in prevs_df['paths'].to_list()]
     # if it's still in the dataframe, add the choice
     if len(prevs_df.loc[row_mask, 'user:rating']) > 0:
@@ -506,11 +472,11 @@ Explore the latent space without text prompts based on your preferences. Learn m
     # calibration videos -- this is a misnomer now :D
     calibrate_prompts = gr.State([
     './first.mp4',
-    # './second.mp4',
-    # './third.mp4',
-    # './fourth.mp4',
-    # './fifth.mp4',
-    # './sixth.mp4',
     ])
     def l():
         return None
@@ -569,26 +535,30 @@ scheduler = BackgroundScheduler()
 scheduler.add_job(func=background_next_image, trigger="interval", seconds=.5)
 scheduler.start()
-#thread = threading.Thread(target=background_next_image,)
-#thread.start()
-# TODO shouldn't call this before gradio launch, yeah?
-@spaces.GPU(duration=50)
-def encode_space(x, im):
-    with torch.no_grad():
-        print('encode')
-        im_emb, _ = pipe.encode_image(
-                    x, DEVICE, 1, output_hidden_state
-                )
-        print('encoded')
-        print('pali_enc')
-        gemb = pali.vision_tower(im.to(dtype).to('cuda')).last_hidden_state
-        print('pali_enced')
-        return im_emb.to('cpu'), gemb.to('cpu')
-demo.launch(share=True,)

 output_hidden_state = False
 device = "cuda"
 dtype = torch.bfloat16
+N_IMG_EMBS = 3
 import logging
 import os
 import imageio
 import gradio as gr
 from sklearn import preprocessing
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 import random
 import time
                                             unet=unet, text_encoder=text_encoder)
 pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
 pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora",)
+pipe.set_adapters(["lcm-lora"], [.95])
 pipe.fuse_lora()
 #pipe.enable_free_init(method="gaussian", use_fast_sampling=True)
 pipe.to(device=DEVICE)
 #pipe.unet = torch.compile(pipe.unet)
 #pipe.vae = torch.compile(pipe.vae)
 from transformers import AutoProcessor, PaliGemmaForConditionalGeneration, BitsAndBytesConfig
 quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+pali = PaliGemmaForConditionalGeneration.from_pretrained('google/paligemma-3b-pt-224', torch_dtype=dtype, quantization_config=quantization_config).eval()
 processor = AutoProcessor.from_pretrained('google/paligemma-3b-pt-224')
+#pali = torch.compile(pali)
 @spaces.GPU()
 def to_wanted_embs(image_outputs, input_ids, attention_mask, cache_position=None):
     return inputs_embeds
+# TODO cache descriptions?
 @spaces.GPU()
+def generate_pali(n_embs):
+    prompt = 'caption en'
+    model_inputs = processor(text=prompt, images=torch.zeros(1, 3, 224, 224), return_tensors="pt")
+    # we need to get im_embs taken in here.
+    descs = ''
+    for n, emb in enumerate(n_embs):
+        if n < len(n_embs)-1:
+            input_len = model_inputs["input_ids"].shape[-1]
+            input_embeds = to_wanted_embs(emb,
+                                model_inputs["input_ids"].to(device),
+                                model_inputs["attention_mask"].to(device))
+            generation = pali.generate(max_new_tokens=20, do_sample=True, top_p=.94, temperature=1.2, inputs_embeds=input_embeds)
+            decoded = processor.decode(generation[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
+            descs += f'Description: {decoded}\n'
+        else:
+            prompt = f'en {descs} Describe a new image that is similar.'
+            print(prompt)
+            model_inputs = processor(text=prompt, images=torch.zeros(1, 3, 224, 224), return_tensors="pt")
+            input_len = model_inputs["input_ids"].shape[-1]
+            input_embeds = to_wanted_embs(emb,
+                                model_inputs["input_ids"].to(device),
+                                model_inputs["attention_mask"].to(device))
+            generation = pali.generate(max_new_tokens=20, do_sample=True, top_p=.94, temperature=1.2, inputs_embeds=input_embeds)
+            decoded = processor.decode(generation[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
     return decoded
         im = torchvision.transforms.ToTensor()(output.frames[0][len(output.frames[0])//2]).unsqueeze(0)
         im = torch.nn.functional.interpolate(im, (224, 224))
         im = (im - .5) * 2
+        gemb = pali.vision_tower(im.to(device).to(dtype)).last_hidden_state.detach().to('cpu').to(torch.float32)
     return output, im_emb, gemb
 def get_user_emb(embs, ys):
     # handle case where every instance of calibration videos is 'Neither' or 'Like' or 'Dislike'
+    if len(list(ys)) <= 10:
+        aways = [torch.zeros_like(embs[0]) for i in range(10)]
         embs += aways
+        awal = [0 for i in range(5)] + [1 for i in range(5)]
         ys += awal
     indices = list(range(len(embs)))
         feature_embs = feature_embs / feature_embs.norm()
     #lin_class = Ridge(fit_intercept=False).fit(feature_embs, chosen_y)
+    #class_weight='balanced'
+    lin_class = SVC(max_iter=500, kernel='linear', C=.1, ).fit(feature_embs.squeeze(), chosen_y)
     coef_ = torch.tensor(lin_class.coef_, dtype=torch.float32).detach().to('cpu')
+    coef_ = coef_ / coef_.abs().max()
     w = 1# if len(embs) % 2 == 0 else 0
     im_emb = w * coef_.to(dtype=dtype)
         # only let it get N (maybe 3) ahead of the user
         #not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
         rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
+        while len(rated_rows) < 5:
         #    not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
             rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
             time.sleep(.01)
             rated_from_user = rated_rows[[i[1]['from_user_id'] == uid for i in rated_rows.iterrows()]]
             # we pop previous ratings if there are > n
+            if len(rated_from_user) >= 25:
                 oldest = rated_from_user.iloc[0]['paths']
                 prevs_df = prevs_df[prevs_df['paths'] != oldest]
             # we don't compute more after n are in the queue for them
+            if len(unrated_from_user) >= 20:
                 continue
             embs, ys, gembs = pluck_embs_ys(uid)
+            user_emb = get_user_emb(embs, ys) * 3
+            pos_gembs = [g for g, y in zip(gembs, ys) if y == 1]
+            if len(pos_gembs) > 4:
+                hist_gem = random.sample(pos_gembs, N_IMG_EMBS) # rng n embeddings
+                text = generate_pali(hist_gem)
             else:
+                text = 'the scene'
             img, embs, new_gem = generate(user_emb, text)
             if img:
         if len(calibrate_prompts) > 0:
             cal_video = calibrate_prompts.pop(0)
             image = prevs_df[prevs_df['paths'] == cal_video]['paths'].to_list()[0]
             return image, calibrate_prompts, ''
         else:
             embs, ys, gembs = pluck_embs_ys(user_id)
+            user_emb = get_user_emb(embs, ys) * 3
             image, text = pluck_img(user_id, user_emb)
             return image, calibrate_prompts, text
 def start(_, calibrate_prompts, user_id, request: gr.Request):
     user_id = int(str(time.time())[-7:].replace('.', ''))
     image, calibrate_prompts, text = next_image(calibrate_prompts, user_id)
     return [
         print('NSFW -- choice is disliked')
         choice = 0
+    print(prevs_df['paths'].to_list(), img)
     row_mask = [p.split('/')[-1] in img for p in prevs_df['paths'].to_list()]
     # if it's still in the dataframe, add the choice
     if len(prevs_df.loc[row_mask, 'user:rating']) > 0:
     # calibration videos -- this is a misnomer now :D
     calibrate_prompts = gr.State([
     './first.mp4',
+    './second.mp4',
+    './third.mp4',
+    './fourth.mp4',
+    './fifth.mp4',
+    './sixth.mp4',
     ])
     def l():
         return None
 scheduler.add_job(func=background_next_image, trigger="interval", seconds=.5)
 scheduler.start()
+# prep our calibration videos
+for im in [
+    './first.mp4',
+    './second.mp4',
+    './third.mp4',
+    './fourth.mp4',
+    './fifth.mp4',
+    './sixth.mp4',
+    './seventh.mp4',
+    './eigth.mp4',
+    './ninth.mp4',
+    './tenth.mp4',
+    ]:
+    tmp_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'text', 'gemb'])
+    tmp_df['paths'] = [im]
+    image = list(imageio.imiter(im))
+    image = image[len(image)//2]
+    tmp_df['embeddings'] = [torch.load(im.replace('mp4', 'im_.pt'))]
+    tmp_df['gemb'] = [torch.load(im.replace('mp4', 'gemb_.pt'))]
+    tmp_df['user:rating'] = [{' ': ' '}]
+    prevs_df = pd.concat((prevs_df, tmp_df))
+demo.launch(share=True, server_port=8443)

eigth.gemb_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:313d2e918194715ad1da5e0dbbd567ef086bb5365920b1d0ec8f727187611be2
+size 1180848

eigth.im_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6804672b5692563a7a6886a6e4010ab983dc6c1699cb6e41375776842fe4f2c7
+size 6310

fifth.gemb_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b55ac2c8c3b7109e2673d7ee6c631597832c0f78331ab116d6130e77c2323587
+size 1180848

fifth.im_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6165a30db650d03f17925a6590f4e0313d9c9c3ba2e4e4ce51fe00012d0efdff
+size 6310

first.gemb_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:caca2ad20ebeefa19efbda52c60610521552759214a47fc36bf85c3ce2c7237d
+size 1180848

first.im_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1728d8e50da01013a81c681200e9a1568663b6b48bc824b1ad0f3894a7e06aa0
+size 6310

fourth.gemb_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc346bd7447c7f84119cc9275c74b5f41509b357358f2dff4aa5b63a246442ce
+size 1180853

fourth.im_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6b60957c398b1a3ccfe29c9fde226570a69ed2a127c9db82136fdc872e10a26
+size 6315

lightning_app.py DELETED Viewed

@@ -1,452 +0,0 @@
-import torch
-# lol
-sidel = 512
-DEVICE = 'cuda'
-STEPS = 4
-output_hidden_state = False
-device = "cuda"
-dtype = torch.float16
-import matplotlib.pyplot as plt
-import matplotlib
-matplotlib.use('TkAgg')
-from sklearn.linear_model import LinearRegression
-from sfast.compilers.diffusion_pipeline_compiler import (compile, compile_unet,
-                                                         CompilationConfig)
-config = CompilationConfig.Default()
-try:
-    import triton
-    config.enable_triton = True
-except ImportError:
-    print('Triton not installed, skip')
-config.enable_cuda_graph = True
-config.enable_jit = True
-config.enable_jit_freeze = True
-config.enable_cnn_optimization = True
-config.preserve_parameters = False
-config.prefer_lowp_gemm = True
-import imageio
-import gradio as gr
-import numpy as np
-from sklearn.svm import SVC
-from sklearn.inspection import permutation_importance
-from sklearn import preprocessing
-import pandas as pd
-import random
-import time
-from PIL import Image
-from safety_checker_improved import maybe_nsfw
-torch.set_grad_enabled(False)
-torch.backends.cuda.matmul.allow_tf32 = True
-torch.backends.cudnn.allow_tf32 = True
-# TODO put back?
-# import spaces
-prompt_list = [p for p in list(set(
-                pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
-start_time = time.time()
-####################### Setup Model
-from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler, LCMScheduler, ConsistencyDecoderVAE, AutoencoderTiny
-from hyper_tile import split_attention, flush
-from huggingface_hub import hf_hub_download
-from safetensors.torch import load_file
-from PIL import Image
-from transformers import CLIPVisionModelWithProjection
-import uuid
-import av
-def write_video(file_name, images, fps=10):
-    print('Saving')
-    container = av.open(file_name, mode="w")
-    stream = container.add_stream("h264", rate=fps)
-    stream.width = sidel
-    stream.height = sidel
-    stream.pix_fmt = "yuv420p"
-    for img in images:
-        img = np.array(img)
-        img = np.round(img).astype(np.uint8)
-        frame = av.VideoFrame.from_ndarray(img, format="rgb24")
-        for packet in stream.encode(frame):
-            container.mux(packet)
-    # Flush stream
-    for packet in stream.encode():
-        container.mux(packet)
-    # Close the file
-    container.close()
-    print('Saved')
-bases = {
-    #"basem": "emilianJR/epiCRealism"
-    #SG161222/Realistic_Vision_V6.0_B1_noVAE
-    #runwayml/stable-diffusion-v1-5
-    #frankjoshua/realisticVisionV51_v51VAE
-    #Lykon/dreamshaper-7
-}
-image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="models/image_encoder", torch_dtype=dtype).to(DEVICE)
-vae = AutoencoderTiny.from_pretrained("madebyollin/taesd", torch_dtype=dtype)
-# vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=dtype)
-# vae = compile_unet(vae, config=config)
-#adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")
-#pipe = AnimateDiffPipeline.from_pretrained("emilianJR/epiCRealism", motion_adapter=adapter, image_encoder=image_encoder, torch_dtype=dtype)
-#pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
-#pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora",)
-#pipe.set_adapters(["lcm-lora"], [1])
-#pipe.fuse_lora()
-pipe = AnimateDiffPipeline.from_pretrained('emilianJR/epiCRealism', torch_dtype=dtype, image_encoder=image_encoder, vae=vae)
-pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
-repo = "ByteDance/AnimateDiff-Lightning"
-ckpt = f"animatediff_lightning_4step_diffusers.safetensors"
-pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device='cpu'), strict=False)
-pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin", map_location='cpu')
-pipe.set_ip_adapter_scale(.8)
-# pipe.unet.fuse_qkv_projections()
-#pipe.enable_free_init(method="gaussian", use_fast_sampling=True)
-pipe = compile(pipe, config=config)
-pipe.to(device=DEVICE)
-# THIS WOULD NEED PATCHING TODO
-with split_attention(pipe.vae, tile_size=128, swap_size=2, disable=False, aspect_ratio=1):
-    # ! Change the tile_size and disable to see their effects
-    with split_attention(pipe.unet, tile_size=128, swap_size=2, disable=False, aspect_ratio=1):
-        im_embs = torch.zeros(1, 1, 1, 1024, device=DEVICE, dtype=dtype)
-        output = pipe(prompt='a person', guidance_scale=0, added_cond_kwargs={}, ip_adapter_image_embeds=[im_embs], num_inference_steps=STEPS)
-        leave_im_emb, _ = pipe.encode_image(
-                output.frames[0][len(output.frames[0])//2], DEVICE, 1, output_hidden_state
-    )
-assert len(output.frames[0]) == 16
-leave_im_emb.to('cpu')
-# TODO put back
-# @spaces.GPU()
-def generate(prompt, in_im_embs=None, base='basem'):
-    if in_im_embs == None:
-        in_im_embs = torch.zeros(1, 1, 1, 1024, device=DEVICE, dtype=dtype)
-        #in_im_embs = in_im_embs / torch.norm(in_im_embs)
-    else:
-        in_im_embs = in_im_embs.to('cuda').unsqueeze(0).unsqueeze(0)
-        #im_embs = torch.cat((torch.zeros(1, 1024, device=DEVICE, dtype=dtype), in_im_embs), 0)
-    with split_attention(pipe.unet, tile_size=128, swap_size=2, disable=False, aspect_ratio=1):
-        # ! Change the tile_size and disable to see their effects
-        with split_attention(pipe.vae, tile_size=128, disable=False, aspect_ratio=1):
-            output = pipe(prompt=prompt, guidance_scale=0, added_cond_kwargs={}, ip_adapter_image_embeds=[in_im_embs], num_inference_steps=STEPS)
-    im_emb, _ = pipe.encode_image(
-                output.frames[0][len(output.frames[0])//2], DEVICE, 1, output_hidden_state
-            )
-    nsfw = maybe_nsfw(output.frames[0][len(output.frames[0])//2])
-    name = str(uuid.uuid4()).replace("-", "")
-    path = f"/tmp/{name}.mp4"
-    if nsfw:
-        gr.Warning("NSFW content detected.")
-        # TODO could return an automatic dislike of auto dislike on the backend for neither as well; just would need refactoring.
-        return None, im_emb
-    plt.close('all')
-    plt.hist(np.array(im_emb.to('cpu')).flatten(), bins=5)
-    plt.savefig('real_im_emb_plot.jpg')
-    write_video(path, output.frames[0])
-    return path, im_emb.to('cpu')
-#######################
-# TODO add to state instead of shared across all
-glob_idx = 0
-def next_image(embs, ys, calibrate_prompts):
-    global glob_idx
-    glob_idx = glob_idx + 1
-    with torch.no_grad():
-        if len(calibrate_prompts) > 0:
-            print('######### Calibrating with sample prompts #########')
-            prompt = calibrate_prompts.pop(0)
-            print(prompt)
-            image, img_embs = generate(prompt)
-            embs += img_embs
-            print(len(embs))
-            return image, embs, ys, calibrate_prompts
-        else:
-            print('######### Roaming #########')
-            # sample a .8 of rated embeddings for some stochasticity, or at least two embeddings.
-            # could take a sample < len(embs)
-            #n_to_choose = max(int((len(embs))), 2)
-            #indices = random.sample(range(len(embs)), n_to_choose)
-            # sample only as many negatives as there are positives
-            #pos_indices = [i for i in indices if ys[i] == 1]
-            #neg_indices = [i for i in indices if ys[i] == 0]
-            #lower = min(len(pos_indices), len(neg_indices))
-            #neg_indices = random.sample(neg_indices, lower)
-            #pos_indices = random.sample(pos_indices, lower)
-            #indices = neg_indices + pos_indices
-            pos_indices = [i for i in range(len(embs)) if ys[i] == 1]
-            neg_indices = [i for i in range(len(embs)) if ys[i] == 0]
-            # the embs & ys stay tied by index but we shuffle to drop randomly
-            random.shuffle(pos_indices)
-            random.shuffle(neg_indices)
-            #if len(pos_indices) - len(neg_indices) > 48 and len(pos_indices) > 80:
-            #    pos_indices = pos_indices[32:]
-            if len(neg_indices) - len(pos_indices) > 48/16 and len(pos_indices) > 120/16:
-                pos_indices = pos_indices[1:]
-            if len(neg_indices) - len(pos_indices) > 48/16 and len(neg_indices) > 200/16:
-                neg_indices = neg_indices[2:]
-            print(len(pos_indices), len(neg_indices))
-            indices = pos_indices + neg_indices
-            embs = [embs[i] for i in indices]
-            ys = [ys[i] for i in indices]
-            indices = list(range(len(embs)))
-            # handle case where every instance of calibration prompts is 'Neither' or 'Like' or 'Dislike'
-            if len(list(set(ys))) <= 1:
-                embs.append(.01*torch.randn(1024))
-                embs.append(.01*torch.randn(1024))
-                ys.append(0)
-                ys.append(1)
-            # also add the latest 0 and the latest 1
-            has_0 = False
-            has_1 = False
-            for i in reversed(range(len(ys))):
-                if ys[i] == 0 and has_0 == False:
-                    indices.append(i)
-                    has_0 = True
-                elif ys[i] == 1 and has_1 == False:
-                    indices.append(i)
-                    has_1 = True
-                if has_0 and has_1:
-                    break
-            # we may have just encountered a rare multi-threading diffusers issue (https://github.com/huggingface/diffusers/issues/5749);
-            # this ends up adding a rating but losing an embedding, it seems.
-            # let's take off a rating if so to continue without indexing errors.
-            if len(ys) > len(embs):
-                print('ys are longer than embs; popping latest rating')
-                ys.pop(-1)
-            feature_embs = np.array(torch.stack([embs[i].to('cpu') for i in indices] + [leave_im_emb[0].to('cpu')]).to('cpu'))
-            scaler = preprocessing.StandardScaler().fit(feature_embs)
-            feature_embs = scaler.transform(feature_embs)
-            chosen_y = np.array([ys[i] for i in indices] + [0])
-            print('Gathering coefficients')
-            #lin_class = LinearRegression(fit_intercept=False).fit(feature_embs, chosen_y)
-            lin_class = SVC(max_iter=50000, kernel='linear', class_weight='balanced', C=1).fit(feature_embs, chosen_y)
-            coef_ = torch.tensor(lin_class.coef_, dtype=torch.double)
-            coef_ = coef_ / coef_.abs().max() * 3
-            print(coef_.shape, 'COEF')
-            plt.close('all')
-            plt.hist(np.array(coef_).flatten(), bins=5)
-            plt.savefig('plot.jpg')
-            print(coef_)
-            print('Gathered')
-            rng_prompt = random.choice(prompt_list)
-            w = 1# if len(embs) % 2 == 0 else 0
-            im_emb = w * coef_.to(dtype=dtype)
-            prompt= 'the scene' if glob_idx % 2 == 0 else rng_prompt
-            print(prompt)
-            image, im_emb = generate(prompt, im_emb)
-            embs += im_emb
-            if len(embs) > 700/16:
-                embs = embs[1:]
-                ys = ys[1:]
-            return image, embs, ys, calibrate_prompts
-def start(_, embs, ys, calibrate_prompts):
-    image, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
-    return [
-            gr.Button(value='Like (L)', interactive=True),
-            gr.Button(value='Neither (Space)', interactive=True),
-            gr.Button(value='Dislike (A)', interactive=True),
-            gr.Button(value='Start', interactive=False),
-            image,
-            embs,
-            ys,
-            calibrate_prompts
-            ]
-def choose(img, choice, embs, ys, calibrate_prompts):
-    if choice == 'Like (L)':
-        choice = 1
-    elif choice == 'Neither (Space)':
-        embs = embs[:-1]
-        img, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
-        return img, embs, ys, calibrate_prompts
-    else:
-        choice = 0
-    # if we detected NSFW, leave that area of latent space regardless of how they rated chosen.
-    # TODO skip allowing rating
-    if img == None:
-        print('NSFW -- choice is disliked')
-        choice = 0
-    ys += [choice]*1
-    img, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
-    return img, embs, ys, calibrate_prompts
-css = '''.gradio-container{max-width: 700px !important}
-#description{text-align: center}
-#description h1, #description h3{display: block}
-#description p{margin-top: 0}
-.fade-in-out {animation: fadeInOut 3s forwards}
-@keyframes fadeInOut {
-    0% {
-      background: var(--bg-color);
-    }
-    100% {
-      background: var(--button-secondary-background-fill);
-    }
-}
-'''
-js_head = '''
-<script>
-document.addEventListener('keydown', function(event) {
-    if (event.key === 'a' || event.key === 'A') {
-        // Trigger click on 'dislike' if 'A' is pressed
-        document.getElementById('dislike').click();
-    } else if (event.key === ' ' || event.keyCode === 32) {
-        // Trigger click on 'neither' if Spacebar is pressed
-        document.getElementById('neither').click();
-    } else if (event.key === 'l' || event.key === 'L') {
-        // Trigger click on 'like' if 'L' is pressed
-        document.getElementById('like').click();
-    }
-});
-function fadeInOut(button, color) {
-  button.style.setProperty('--bg-color', color);
-  button.classList.remove('fade-in-out');
-  void button.offsetWidth; // This line forces a repaint by accessing a DOM property
-  button.classList.add('fade-in-out');
-  button.addEventListener('animationend', () => {
-    button.classList.remove('fade-in-out'); // Reset the animation state
-  }, {once: true});
-}
-document.body.addEventListener('click', function(event) {
-    const target = event.target;
-    if (target.id === 'dislike') {
-      fadeInOut(target, '#ff1717');
-    } else if (target.id === 'like') {
-      fadeInOut(target, '#006500');
-    } else if (target.id === 'neither') {
-      fadeInOut(target, '#cccccc');
-    }
-});
-</script>
-'''
-with gr.Blocks(css=css, head=js_head) as demo:
-    gr.Markdown('''### Blue Tigers: Generative Recommenders for Exporation of Video
-    Explore the latent space without text prompts based on your preferences. Learn more on [the write-up](https://rynmurdock.github.io/posts/2024/3/generative_recomenders/).
-    ''', elem_id="description")
-    embs = gr.State([])
-    ys = gr.State([])
-    calibrate_prompts = gr.State([
-    'the moon is melting into my glass of tea',
-    'a sea slug -- pair of claws scuttling -- jelly fish glowing',
-    'an adorable creature. It may be a goblin or a pig or a slug.',
-    'an animation about a gorgeous nebula',
-    'an octopus writhes',
-    ])
-    def l():
-        return None
-    with gr.Row(elem_id='output-image'):
-        img = gr.Video(
-        label='Lightning',
-        autoplay=True,
-        interactive=False,
-        height=sidel,
-        width=sidel,
-        include_audio=False,
-        elem_id="video_output"
-       )
-        img.play(l, js='''document.querySelector('[data-testid="Lightning-player"]').loop = true''')
-    with gr.Row(equal_height=True):
-        b3 = gr.Button(value='Dislike (A)', interactive=False, elem_id="dislike")
-        b2 = gr.Button(value='Neither (Space)', interactive=False, elem_id="neither")
-        b1 = gr.Button(value='Like (L)', interactive=False, elem_id="like")
-        b1.click(
-        choose,
-        [img, b1, embs, ys, calibrate_prompts],
-        [img, embs, ys, calibrate_prompts]
-        )
-        b2.click(
-        choose,
-        [img, b2, embs, ys, calibrate_prompts],
-        [img, embs, ys, calibrate_prompts]
-        )
-        b3.click(
-        choose,
-        [img, b3, embs, ys, calibrate_prompts],
-        [img, embs, ys, calibrate_prompts]
-        )
-    with gr.Row():
-        b4 = gr.Button(value='Start')
-        b4.click(start,
-                 [b4, embs, ys, calibrate_prompts],
-                 [b1, b2, b3, b4, img, embs, ys, calibrate_prompts])
-    with gr.Row():
-        html = gr.HTML('''<div style='text-align:center; font-size:20px'>You will calibrate for several prompts and then roam. </ div><br><br><br>
-<div style='text-align:center; font-size:14px'>Note that while the AnimateDiff-Lightning model with NSFW filtering is unlikely to produce NSFW images, this may still occur, and users should avoid NSFW content when rating.
-</ div>
-<br><br>
-<div style='text-align:center; font-size:14px'>Thanks to @multimodalart for their contributions to the demo, esp. the interface and @maxbittker for feedback.
-</ div>''')
-demo.launch(share=True)

ninth.gemb_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:519ba2479c0605772adbb8405f267b0543316da7520d26988417104b2ffc176b
+size 1180848

ninth.im_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7583e812651917c82ad7f8e41a031d1b2568e1369dd5cc63959a6bc5fd32959
+size 6310

requirements.txt CHANGED Viewed

@@ -15,6 +15,4 @@ tensorflow==2.14.0
 imageio
 apscheduler
 pandas
-av
-torchvision
-bitsandbytes

 imageio
 apscheduler
 pandas
+av

second.gemb_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3fd4d35ade16f272d9df5ceb3faf859c20245553a32e41d1f0a7573e247ffde
+size 1180853

second.im_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:389cce5de6ae401dbef57ec7ef8561f4a871a8d84a9107403d511cf259ff1840
+size 6315

seventh.gemb_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8525e6bf8db787722604b13b5d00bb63b0ed20849ecd4c48cb1b64bafb9ba8fa
+size 1180858

seventh.im_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d36e188adfaaca095979cfb8b899e013151a4604cf85c3f194300632890a64d5
+size 6320

sixth.gemb_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:23d6a9c82b0684aec1b5d643bc9613e46d312de82cfae2416d316286bca4d11a
+size 1180848

sixth.im_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8bb985011ba6fadd956681423b824783a7177f7bf3987527db92c657dbbda0b
+size 6310

tenth.gemb_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba865e86007c31b12074cbd7939fb19491abf80bc6d5f7c16f004f14c70cb2de
+size 1180848

tenth.im_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ded797adeade52c0b2c1ea28e65963336a2b3572c1b5bf3a3f3f0bdfdf7457b6
+size 6310

third.gemb_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b467e423b27fbc9963c581b3c24b6ed00cc2092d7ee207547f399904007bf67
+size 1180848

third.im_.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc7db62b595535a68c484d582484d9af1fb1e94673d3e3a9aa7d181c75fe1fec
+size 6310

twitter_prompts.csv DELETED Viewed

@@ -1,72 +0,0 @@
-,0
-0,a sunset
-1,a still life in blue
-2,last day on earth
-3,the conch shell
-4,the winds of change
-5,a surrealist eye
-6,a surrealist polaroid photo of an apple
-7,metaphysics
-8,the sun is setting into my glass of tea
-9,the moon at 3am
-10,a memento mori
-11,quaking aspen tree
-12,violets and daffodils
-13,espresso
-14,sisyphus
-15,high windows of stained glass
-16,a green dog
-17,an adorable companion; it is a pig
-18,bird of paradise
-19,a complex intricate machine
-20,a white clock
-21,a film featuring the landscape Salt Lake City Utah
-22,a creature
-23,a house set aflame.
-24,a gorgeous landscape by Cy Twombly
-25,smoke rises from the caterpillar's hookah
-26,corvid in red
-27,Monet's pond
-28,Genesis
-29,Death is a black camel that kneels down so we can ride
-30,a cherry tree made of fractals
-29,the end of the sidewalk
-30,a polaroid photo of a bustling city of lights and sky scrapers
-31,The Fig Tree metaphor
-32,God killed Van Gogh.
-33,a cosmic entity alien with four eyes.
-34,a horse with 128 eyes.
-35,a being with an infinite set of eyes (it is omniscient)
-36,A sticky-note magnum opus featuring birds
-37,Moka Pot
-38,the moon is a sickle cell
-39,The Penultimate Supper
-40,Art
-41,surrealism
-42,a god made of wires & dust
-43,a dandelion blown into the universe