File size: 1,361 Bytes
610afda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
from transformers import CLIPProcessor, CLIPModel
from PIL import Image

# Get the directory of the script
script_directory = os.path.dirname(os.path.realpath(__file__))
# Specify the directory where the cache will be stored (same folder as the script)
cache_directory = os.path.join(script_directory, "cache")
# Create the cache directory if it doesn't exist
os.makedirs(cache_directory, exist_ok=True)

# Load the CLIP processor and model
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=cache_directory)
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=cache_directory)

# Text description to generate image
text = "a cat sitting on a table"

# Tokenize text and get features
inputs = clip_processor(text, return_tensors="pt", padding=True)

# Generate image from text
generated_image = clip_model.generate(
  inputs=inputs.input_ids,
  attention_mask=inputs.attention_mask,
  visual_input=None,  # We don't provide image inputvi
  return_tensors="pt"  # Return PyTorch tensor
)

# Convert the generated image tensor to a NumPy array
generated_image_np = generated_image[0].cpu().numpy()

# Save the generated image
output_image_path = "generated_image.png"
Image.fromarray(generated_image_np).save(output_image_path)

print("Image generated and saved as:", output_image_path)