patrickvonplaten commited on
Commit
27dfa17
1 Parent(s): 096afd9
Files changed (4) hide show
  1. benchmark_llama.py +53 -0
  2. clear_mem.py +8 -6
  3. run_xl.py +4 -2
  4. save_lora.py +62 -0
benchmark_llama.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import time
4
+ import torch
5
+
6
+ DEVICE = "cuda:1"
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
9
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
10
+ model.to(DEVICE)
11
+
12
+
13
+ # forward
14
+ print("Forward benchmarks")
15
+ print(50 * "=")
16
+
17
+ for batch_size in (1, 4, 16):
18
+ for input_seq in (4, 16, 256):
19
+ input_ids = torch.ones((batch_size, input_seq), dtype=torch.long, device=DEVICE)
20
+ attention_mask = torch.ones_like(input_ids)
21
+ attention_mask[0, 3] = 0
22
+
23
+ times = []
24
+ for _ in range(3):
25
+ start_time = time.time()
26
+ with torch.no_grad():
27
+ logits = model(input_ids=input_ids, attention_mask=attention_mask).logits
28
+ times.append(time.time() - start_time)
29
+
30
+ result = min(times)
31
+
32
+ print(f"Forward bsz={batch_size}, input_seq={input_seq}: {result}")
33
+
34
+
35
+ # generate
36
+ print("Generate benchmarks")
37
+ print(50 * "=")
38
+
39
+ for batch_size in (1, 16):
40
+ for input_seq in (4, 256):
41
+ input_ids = torch.ones((batch_size, input_seq), dtype=torch.long, device=DEVICE)
42
+ attention_mask = torch.ones_like(input_ids)
43
+ attention_mask[0, 3] = 0
44
+
45
+ times = []
46
+ for _ in range(3):
47
+ start_time = time.time()
48
+ out = model.generate(input_ids=input_ids, max_new_tokens=256)
49
+ times.append(time.time() - start_time)
50
+
51
+ result = min(times)
52
+
53
+ print(f"Generate bsz={batch_size}, input_seq={input_seq}: {result}")
clear_mem.py CHANGED
@@ -1,10 +1,11 @@
1
  #!/usr/bin/env python3
2
  import torch
3
  import gc
 
4
 
5
- shape = (10,000)
6
 
7
- input = torch.ones((shape, shape), device="cuda")
8
 
9
 
10
  def clear_memory(model):
@@ -14,8 +15,9 @@ def clear_memory(model):
14
  torch.cuda.ipc_collect()
15
  torch.clear_autocast_cache()
16
 
17
- for _ in range(6):
18
- linear = torch.nn.Linear(shape, shape).to("cuda")
19
- output = linear(input)
 
20
 
21
- clear_memory(linear)
 
1
  #!/usr/bin/env python3
2
  import torch
3
  import gc
4
+ from diffusers import DiffusionPipeline
5
 
6
+ shape = (30_000, 30_000)
7
 
8
+ input = torch.randn(shape, device="cuda")
9
 
10
 
11
  def clear_memory(model):
 
15
  torch.cuda.ipc_collect()
16
  torch.clear_autocast_cache()
17
 
18
+ for _ids in ["runwayml/stable-diffusion-v1-5", "CompVis/stable-diffusion-v1-4", "runwayml/stable-diffusion-v1-5", "CompVis/stable-diffusion-v1-4", "runwayml/stable-diffusion-v1-5"]:
19
+ pipe = DiffusionPipeline.from_pretrained(_ids, use_safetensors=True).to("cuda")
20
+ pipe("hey", num_inference_steps=1)
21
+ print("finished...")
22
 
23
+ clear_memory(pipe)
run_xl.py CHANGED
@@ -14,8 +14,10 @@ pipe2 = DiffusionPipeline.from_pretrained(
14
  variant="fp16",
15
  torch_dtype=torch.float16
16
  )
17
- pipe.enable_model_cpu_offload()
18
- pipe2.enable_model_cpu_offload()
 
 
19
 
20
  compel = Compel(
21
  tokenizer=[pipe.tokenizer, pipe.tokenizer_2] ,
 
14
  variant="fp16",
15
  torch_dtype=torch.float16
16
  )
17
+ pipe.to("cuda")
18
+ pipe2.to("cuda")
19
+ # pipe.enable_model_cpu_offload()
20
+ # pipe2.enable_model_cpu_offload()
21
 
22
  compel = Compel(
23
  tokenizer=[pipe.tokenizer, pipe.tokenizer_2] ,
save_lora.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+ from warnings import warn
4
+ from diffusers import (
5
+ AutoencoderKL,
6
+ DiffusionPipeline,
7
+ )
8
+ import hashlib
9
+
10
+ base = "stabilityai/stable-diffusion-xl-base-1.0"
11
+ adapter1 = 'nerijs/pixel-art-xl'
12
+ weightname1 = 'pixel-art-xl.safetensors'
13
+
14
+ adapter2 = 'Alexzyx/lora-trained-xl-colab'
15
+ weightname2 = None
16
+
17
+ inputs = "elephant"
18
+ kwargs = {}
19
+
20
+ if torch.cuda.is_available():
21
+ kwargs["torch_dtype"] = torch.float16
22
+
23
+ #vae = AutoencoderKL.from_pretrained(
24
+ # "madebyollin/sdxl-vae-fp16-fix",
25
+ # torch_dtype=torch.float16, # load fp16 fix VAE
26
+ #)
27
+ #kwargs["vae"] = vae
28
+ #kwargs["variant"] = "fp16"
29
+ #
30
+
31
+ model = DiffusionPipeline.from_pretrained(
32
+ base, **kwargs
33
+ )
34
+
35
+ if torch.cuda.is_available():
36
+ model.to("cuda")
37
+
38
+
39
+ def inference(adapter, weightname):
40
+ model.load_lora_weights(adapter, weight_name=weightname)
41
+ try:
42
+ model.fuse_lora(safe_fusing=True)
43
+ except ValueError:
44
+ warn(f"{adapter} and {weightname} is broken. LoRA is not fused.")
45
+ model.unload_lora_weights()
46
+
47
+ data = model(inputs, num_inference_steps=1).images[0]
48
+ model.unfuse_lora()
49
+ model.unload_lora_weights()
50
+ filename = '/tmp/hello.jpg'
51
+ data.save(filename, format='jpeg')
52
+ with open(filename, 'rb') as f:
53
+ md5 = hashlib.md5(f.read()).hexdigest()
54
+ print("Adapter %s, md5sum %s" % (adapter, md5))
55
+ if md5 == '40c78c9fd4daeff01c988c3532fdd51b':
56
+ print("BLACK SCREEN IMAGE for adapter %s" % adapter)
57
+
58
+
59
+ inference(adapter1, weightname1)
60
+ inference(adapter2, weightname2)
61
+ inference(adapter1, weightname1)
62
+ inference(adapter1, weightname1)