Spaces:

HuangLab
/

CELL-E_2-Image_Prediction

Sleeping

App Files Files Community

EmaadKhwaja commited on May 16, 2023

Commit

64212e0

1 Parent(s): 86d2765

update app.py

Browse files

Files changed (6) hide show

.gitignore +2 -0
app.py +114 -4
celle/celle.py +1061 -0
celle/utils.py +230 -0
dataloader.py +308 -0
requirements.txt +13 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__
2	+ env

app.py CHANGED Viewed

@@ -1,7 +1,117 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

 import gradio as gr
+from huggingface_hub import hf_hub_download
+from prediction import run_image_prediction
+import torch
+import torchvision.transforms as T
+from celle.utils import process_image
+from PIL import Image
+from matplotlib import pyplot as plt
+def gradio_demo(model_name, sequence_input, nucleus_image, protein_image):
+    model = hf_hub_download(repo_id=f"HuangLab/{model_name}", filename="model.ckpt")
+    config = hf_hub_download(repo_id=f"HuangLab/{model_name}", filename="config.yaml")
+    hf_hub_download(repo_id=f"HuangLab/{model_name}", filename="nucleus_vqgan.yaml")
+    hf_hub_download(repo_id=f"HuangLab/{model_name}", filename="threshold_vqgan.yaml")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if 'Finetuned' in model_name:
+        dataset = 'OpenCell'
+    else:
+        dataset = 'HPA'
+    nucleus_image = process_image(nucleus_image,dataset,'nucleus')
+    if protein_image:
+        protein_image = process_image(protein_image,dataset,'protein')
+        protein_image = protein_image > torch.median(protein_image)
+        protein_image = protein_image[0,0]
+        protein_image = protein_image*1.0
+    else:
+        protein_image = torch.ones((256,256))
+    threshold, heatmap = run_image_prediction(sequence_input = sequence_input,
+                         nucleus_image = nucleus_image,
+                         model_ckpt_path=model,
+                         model_config_path=config,
+                         device=device)
+    # Plot the heatmap
+    plt.imshow(heatmap.cpu(), cmap='rainbow', interpolation = 'bicubic')
+    plt.axis('off')
+    # Save the plot to a temporary file
+    plt.savefig('temp.png', bbox_inches='tight', dpi = 256)
+    # Open the temporary file as a PIL image
+    heatmap = Image.open('temp.png')
+    return T.ToPILImage()(nucleus_image[0,0]), T.ToPILImage()(protein_image), T.ToPILImage()(threshold), heatmap
+with gr.Blocks() as demo:
+    gr.Markdown("Select the prediction model.")
+    gr.Markdown("CELL-E_2_HPA_480 is a good general purpose model for various cell types using ICC-IF.")
+    gr.Markdown("CELL-E_2_HPA_Finetuned_480 is finetuned on OpenCell and is good more live-cell predictions on HEK cells.")
+    with gr.Row():
+        model_name = gr.Dropdown(['CELL-E_2_HPA_480','CELL-E_2_HPA_Finetuned_480'],
+                      value='CELL-E_2_HPA_480', label = 'Model Name')
+    with gr.Row():
+        gr.Markdown("Input the desired amino acid sequence. GFP is shown below by default.")
+    with gr.Row():
+        sequence_input = gr.Textbox(value='MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK',
+                        label = 'Sequence')
+    with gr.Row():
+        gr.Markdown("Uploading a nucleus image is necessary. A random crop of 256 x 256 will be applied if larger.")
+        gr.Markdown("The protein image is optional and is just used for display.")
+    with gr.Row().style(equal_height=True):
+        nucleus_image = gr.Image(value = 'images/Armadillo repeat-containing X-linked protein 5 nucleus.jpg',
+                   type='pil',
+                   label = 'Nucleus Image')
+        protein_image =  gr.Image(type='pil', label = 'Protein Image (Optional)')
+    with gr.Row():
+        gr.Markdown("Image predictions are show below.")
+    with gr.Row().style(equal_height=True):
+        nucleus_image_crop = gr.Image(type='pil',
+                   label = 'Nucleus Image')
+        protein_threshold_image = gr.Image(type='pil',
+                    label = 'Protein Threshold Image')
+        predicted_threshold_image = gr.Image(type='pil',
+                    label = 'Predicted Threshold image')
+        predicted_heatmap = gr.Image(type='pil',
+                            label = 'Predicted Heatmap')
+    with gr.Row():
+        button = gr.Button("Run Model")
+        inputs = [model_name,
+                sequence_input,
+                nucleus_image,
+                protein_image]
+        outputs = [nucleus_image_crop,
+                    protein_threshold_image,
+                   predicted_threshold_image,
+                   predicted_heatmap]
+        button.click(gradio_demo, inputs, outputs)
+examples = [['CELL-E_2_HPA_Finetuned_480',
+            'MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK',
+            'images/Proteasome activator complex subunit 3 nucleus.png',
+            'images/Proteasome activator complex subunit 3 protein.png'],
+            ['CELL-E_2_HPA_480',
+            'MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK',
+            'images/Armadillo repeat-containing X-linked protein 5 nucleus.jpg',
+            'images/Armadillo repeat-containing X-linked protein 5 protein.jpg']]
+# demo = gr.Interface(gradio_demo, inputs, outputs, examples, cache_examples=True, layout = layout)
+demo.launch(share=True)

celle/celle.py ADDED Viewed

	@@ -0,0 +1,1061 @@

+# Import necessary packages and modules
+from math import floor, ceil
+import torch
+from torch import nn
+import torch.nn.functional as F
+from axial_positional_embedding import AxialPositionalEmbedding
+from einops import rearrange
+from celle.utils import (
+    exists,
+    always,
+    eval_decorator,
+    gumbel_sample,
+    top_k,
+    gamma_func,
+    DivideMax,
+)
+from tqdm import tqdm
+# Import additional modules from within the codebase
+from celle.transformer import Transformer
+def generate_mask(gamma_func, batch_size, length, device):
+    # Get the number of `True` values in the mask for each batch element
+    num_true_values = floor(gamma_func(torch.rand(1)) * length)
+    # Generate a random sample of indices to set to `True` in the mask
+    # The number of indices in the sample is determined by `num_true_values`
+    indices = (
+        torch.rand((batch_size, length), device=device)
+        .topk(num_true_values, dim=1)
+        .indices
+    )
+    # Create a binary mask tensor with `True` values at the sampled indices
+    mask = torch.zeros((batch_size, length), dtype=torch.bool, device=device)
+    mask.scatter_(dim=1, index=indices, value=True)
+    return mask
+def match_batch_size(text, condition, image, batch_size):
+    """
+    This function ensures all inputs to the sample function have the same batch size.
+    """
+    if text.shape[0] != batch_size:
+        text = text.repeat(batch_size, 1)
+    if condition.shape[0] != batch_size:
+        condition = condition.repeat(batch_size, 1)
+    if image.shape[0] != batch_size:
+        image = image.repeat(batch_size, 1)
+    return text, condition, image
+def calc_unmask_probs(timestep, timesteps, gamma_func):
+    if timestep == 1 or timesteps == 1:
+        unmask_prob = 1
+    else:
+        unmask_prob = 1 - gamma_func(timestep)
+    return unmask_prob
+def calculate_logits(
+    input_tokens, input_mask, logits_function, filter_thres, temperature
+):
+    logits, _, _ = logits_function(input_tokens, input_mask, return_encoding=False)
+    filtered_logits = top_k(logits, thres=filter_thres)
+    sample = gumbel_sample(filtered_logits, temperature=temperature, dim=-1)
+    return logits, sample
+def unmask_tokens(
+    input_tokens,
+    input_mask,
+    num_masked_tokens,
+    logits,
+    sample,
+    timestep,
+    timesteps,
+    gamma,
+    filter_func=None,
+    pad_token=None,
+    mask_token=None,
+    force_aas=True,
+):
+    sample = sample.masked_fill(~input_mask.unsqueeze(-1), -torch.inf)
+    if filter_func:
+        sample = filter_func(
+            input_tokens, sample, force_aas, pad_token=pad_token, mask_token=mask_token
+        )
+    selected_token_probs, selected_tokens = torch.max(sample, dim=-1)
+    unmask_prob = calc_unmask_probs(timestep, timesteps, gamma)
+    num_tokens_to_unmask = max(1, ceil(unmask_prob * num_masked_tokens))
+    _, top_k_indices = torch.topk(selected_token_probs, num_tokens_to_unmask, dim=-1)
+    sample_mask = torch.zeros(
+        input_tokens.shape, dtype=torch.bool, device=input_tokens.device
+    )
+    sample_mask.scatter_(dim=1, index=top_k_indices, value=True)
+    unmasked_tokens = torch.where(sample_mask, selected_tokens, input_tokens)
+    full_logits = torch.where(
+        sample_mask.unsqueeze(-1), logits, torch.zeros_like(logits)
+    )
+    return unmasked_tokens, full_logits
+def suppress_invalid_text_tokens(
+    text,
+    logits,
+    start_token=None,
+    end_token=None,
+    pad_token=None,
+    mask_token=None,
+    force_aas=False,
+):
+    # Find the indices of start_token and end_token in tensor text along axis=1
+    idx_start = (text == start_token).nonzero(as_tuple=True)[1]
+    idx_end = (text == end_token).nonzero(as_tuple=True)[1]
+    # For every position other than the index corresponding to the start index, set the values on the start index of dimension=2 to -torch.inf
+    if idx_start.nelement() != start_token:
+        try:
+            mask = idx_start.unsqueeze(1) != torch.arange(
+                logits.size(1), device=text.device
+            )
+            indices = torch.where(mask)
+            logits[indices[0], indices[1], start_token] = -torch.inf
+        except:
+            pass
+    # else:
+    #     idx_start = torch.zeros(text.size(0), dtype=torch.long)
+    # Similarly, for every position other than the index corresponding to the end index, set the values on the end index of dimension=2 to -torch.inf
+    if idx_end.nelement() != 0:
+        try:
+            mask = idx_end.unsqueeze(1) != torch.arange(
+                logits.size(1), device=text.device
+            )
+            indices = torch.where(mask)
+            logits[indices[0], indices[1], end_token] = -torch.inf
+        except:
+            pass
+    # else:
+    #     idx_end = torch.full((text.size(0),), text.size(1) - 1, dtype=torch.long)
+    if pad_token:
+        if idx_start.nelement() != 0 and idx_end.nelement() != 0:
+            try:
+                # For every position between the indices of start_token and end_token, set the values for 1st index of dimension=2 equal to -torch.inf. Any value outside of that range should be set to torch.inf.
+                mask = (
+                    torch.arange(logits.size(1), device=text.device)
+                    >= idx_start.unsqueeze(1)
+                ) & (
+                    torch.arange(logits.size(1), device=text.device)
+                    <= idx_end.unsqueeze(1)
+                )
+                indices = torch.where(mask)
+                logits[indices[0], indices[1], pad_token] = -torch.inf
+                indices = torch.where(~mask)
+                logits[indices[0], indices[1], pad_token] = torch.inf
+            except:
+                pass
+        elif idx_start.nelement() != 0:
+            try:
+                mask = torch.arange(
+                    logits.size(1), device=text.device
+                ) < idx_start.unsqueeze(1)
+                logits[indices[0], indices[1], pad_token] = torch.inf
+            except:
+                pass
+        elif idx_end.nelement() != 0:
+            try:
+                mask = torch.arange(
+                    logits.size(1), device=text.device
+                ) > idx_end.unsqueeze(1)
+                logits[indices[0], indices[1], pad_token] = torch.inf
+            except:
+                pass
+    if force_aas:
+        if pad_token:
+            logits[:, :, pad_token] = -torch.inf
+        logits[:, :, 3] = -torch.inf
+        logits[:, :, 29:] = -torch.inf
+    if mask_token:
+        logits[:, :, mask_token] = -torch.inf
+    return logits
+def detokenize_text(text_embedding, sequence):
+    if text_embedding == "esm1b" or text_embedding == "esm2":
+        from esm import Alphabet
+        alphabet = (
+            Alphabet.from_architecture("ESM-1b").get_batch_converter().alphabet.all_toks
+        )
+    else:
+        assert NameError("Detokenization only available for ESM mdodels")
+    output_seqs = []
+    for batch in sequence:
+        converted_seq = [alphabet[idx] for idx in batch]
+        converted_seq = "".join(converted_seq)
+        output_seqs.append(converted_seq)
+    return output_seqs
+class ImageEmbedding(nn.Module):
+    def __init__(self, num_tokens, dim):
+        super(ImageEmbedding, self).__init__()
+        self.image_embedding = nn.Embedding(num_tokens, dim)
+    def forward(self, image):
+        return self.image_embedding(image)
+class ModelExtender(nn.Module):
+    def __init__(self, vocab, out_features, fixed_embedding=False):
+        super(ModelExtender, self).__init__()
+        # Initialize the model according to the given vocabulary
+        self.vocab = vocab
+        if vocab == "esm1b":
+            from esm import pretrained
+            self.model, _ = pretrained.esm1b_t33_650M_UR50S()
+            self.in_features = 1280
+        elif vocab == "esm2":
+            from esm import pretrained
+            if out_features == 320:
+                self.model, _ = pretrained.esm2_t6_8M_UR50D()
+            elif out_features == 480:
+                self.model, _ = pretrained.esm2_t12_35M_UR50D()
+            elif out_features == 640:
+                self.model, _ = pretrained.esm2_t30_150M_UR50D()
+            elif out_features == 1280:
+                self.model, _ = pretrained.esm2_t33_650M_UR50D()
+            elif out_features == 2560:
+                self.model, _ = pretrained.esm2_t36_3B_UR50D()
+            else:
+                self.model, _ = pretrained.esm2_t33_650M_UR50D()
+            self.in_features = self.model.embed_dim
+        # Set the number of output features and initialize the scaling layer
+        self.out_features = out_features
+        self.scale_layer = nn.Linear(self.in_features, self.out_features)
+        # Determine whether to freeze the model's parameters
+        self.fixed_embedding = fixed_embedding
+        if self.fixed_embedding:
+            self.model = self.model.eval()
+    def forward(self, x, **kwargs):
+        # If the model's parameters are fixed, use torch.no_grad()
+        if self.fixed_embedding:
+            with torch.no_grad():
+                if self.vocab == "esm1b" or self.vocab == "esm2":
+                    # Reduce sequence length dimension, get top layer representation tensor
+                    x = self.model(x.squeeze(1), repr_layers=[self.model.num_layers])[
+                        "representations"
+                    ][self.model.num_layers]
+                    # Tensor shape: (batch_size, hidden_size)
+                else:
+                    # Get top layer representation tensor
+                    x = self.model(x, **kwargs)[0]
+                    # Tensor shape: (batch_size, sequence_length, hidden_size)
+        else:
+            if self.vocab == "esm1b" or self.vocab == "esm2":
+                # Reduce sequence length dimension, get top layer representation tensor
+                x = self.model(x.squeeze(1), repr_layers=[self.model.num_layers])[
+                    "representations"
+                ][self.model.num_layers]
+                # Tensor shape: (batch_size, hidden_size)
+            else:
+                # Get top layer representation tensor
+                x = self.model(x, **kwargs)[0]
+                # Tensor shape: (batch_size, sequence_length, hidden_size)
+        # Scale the representation tensor if necessary
+        if self.out_features != self.in_features:
+            x = self.scale_layer(x)
+            # Tensor shape: (batch_size, out_features)
+        return x
+class CELLE(nn.Module):
+    def __init__(
+        self,
+        *,
+        dim,
+        vae,  # The VAE model used to encode/decode images
+        condition_vae=None,  # An optional VAE model used to condition the image generation
+        num_images=2,  # Number of images to generate
+        num_text_tokens=30,  # Number of tokens in the text vocabulary
+        text_seq_len=1000,  # Maximum length of input text sequence
+        depth=16,  # Number of layers in the transformer model
+        heads=16,  # Number of attention heads
+        dim_head=64,  # Dimensionality of each attention head
+        attn_dropout=0.1,  # Dropout rate for attention weights
+        ff_dropout=0.1,  # Dropout rate for feedforward layers
+        attn_types=None,  # Types of attention to use in the transformer
+        causal=False,  # Whether to use causal attention
+        loss_cond_weight=1,  # Weight of conditioning loss
+        loss_img_weight=1,  # Weight of image generation loss
+        stable=False,  # Whether to use divide-by-max normalization in the transformer
+        rotary_emb=True,  # Whether to use rotary positional embeddings
+        text_embedding="esm2",  # Text embedding to use (esm1b, esm2)
+        fixed_embedding=True,  # Whether to fix the text embedding or learn it
+        sampling_mode="cosine",  # Sampling mode for the VAE
+        linear_project=False,  # Whether to project embeddings linearly
+        **kwargs,
+    ):
+        super().__init__()
+        # Set the stable flag
+        self.stable = stable
+        # If the stable flag is set, initialize the DivideMax layer for normalization
+        if stable:
+            self.norm_by_max = DivideMax(dim=-1)
+        ### Initializing text parameters ###
+        # Initialize the text and fixed embeddings
+        self.text_embedding = text_embedding
+        self.fixed_embedding = fixed_embedding
+        # Offset logits index and calculate cross entropy loss
+        self.num_text_tokens = num_text_tokens
+        self.linear_project = linear_project
+        # Add <BOS> and <EOS> tokens to the beginning and end of text sequences
+        if text_embedding.lower() in ("esm1b", "esm2"):
+            self.text_seq_len = text_seq_len + 2
+        else:
+            self.text_seq_len = text_seq_len
+        # Initialize embeddings for <SEP> token
+        self.sep_emb = nn.Embedding(1, dim)
+        # Initialize positional embeddings for text sequences and <SEP> token
+        self.text_pos_emb = (
+            nn.Embedding(self.text_seq_len + 1, dim) if not rotary_emb else always(0)
+        )  # +1 for <SEP>
+        ### ###
+        self.num_images = num_images
+        ### Initializing condition parameters ###
+        # Initialize the number of condition tokens, condition sequence length, and condition embedding
+        if exists(condition_vae):
+            condition_size = condition_vae.image_size
+            num_condition_tokens = condition_vae.num_tokens
+            self.num_condition_tokens = num_condition_tokens
+            condition_fmap_size = condition_vae.image_size // (
+                2**condition_vae.num_layers
+            )
+            condition_seq_len = condition_fmap_size**2
+            # Initialize ImageEmbedding for condition embedding
+            self.condition_emb = ImageEmbedding(num_condition_tokens + 1, dim)
+            # Initialize positional embeddings for condition embedding
+            self.condition_pos_emb = (
+                AxialPositionalEmbedding(
+                    dim, axial_shape=(condition_fmap_size, condition_fmap_size)
+                )
+                if not rotary_emb
+                else always(0)
+            )
+        else:
+            condition_fmap_size = 0
+            condition_seq_len = 0
+            num_condition_tokens = 0
+        ### ####
+        ### Initializing image parameters ###
+        # Initialize the image size, image token size, and sequence length
+        self.image_size = vae.image_size
+        num_image_tokens = vae.num_tokens
+        image_fmap_size = vae.image_size // (2**vae.num_layers)
+        image_seq_len = image_fmap_size**2
+        self.image_seq_len = image_seq_len
+        self.num_image_tokens = num_image_tokens
+        # Initialize ImageEmbedding and positional embeddings for image embedding
+        self.image_emb = ImageEmbedding(num_image_tokens + 1, dim) # +1 for <IM_MASK>
+        self.image_pos_emb = (
+            AxialPositionalEmbedding(
+                dim, axial_shape=(image_fmap_size, image_fmap_size)
+            )
+            if not rotary_emb
+            else always(0)
+        )
+        # Set total sequence length and total tokens
+        self.num_condition_tokens = num_condition_tokens
+        self.condition_seq_len = condition_seq_len
+        # Text Length + <SEP> + Condition Tokens + Image Tokens
+        seq_len = self.text_seq_len + 1 + self.condition_seq_len + self.image_seq_len
+        total_tokens = (
+            num_text_tokens + 1 + num_condition_tokens + 1 + num_image_tokens + 1
+        )
+        self.total_tokens = total_tokens
+        self.total_seq_len = seq_len
+        # Set the VAE and condition VAE for the model
+        self.vae = vae.eval()
+        self.condition_vae = condition_vae.eval()
+        ### ###
+        ### Setting discrete ids ###
+        # Initialize text embedding based on the given text_embedding parameter
+        if text_embedding == "esm1b" or text_embedding == "esm2":
+            self.text_mask_token = 32
+            self.pad_token = 1
+            self.text_emb = ModelExtender(text_embedding, dim, fixed_embedding)
+        else:
+            raise ValueError("Only ESM models are supported.")
+        # Set token indices for text, condition, and image sequences
+        self.sep_token = num_text_tokens
+        self.cond_mask_token = num_condition_tokens
+        self.image_mask_token = num_image_tokens
+        # Create indices for sequence and logits dimensions
+        self.seq_range = torch.arange(seq_len)
+        self.logits_range = torch.arange(total_tokens)
+        # Reshape sequence and logits indices
+        self.seq_range = rearrange(self.seq_range, "n -> () n ()")
+        self.logits_range = rearrange(self.logits_range, "d -> () () d")
+        # Create a mask to exclude invalid token positions from the model output
+        # e.g. no image tokens where sequence tokens should be
+        logits_mask = (
+            # Mask text tokens beyond text_seq_len and invalid logits_range
+            (
+                (self.seq_range < self.text_seq_len)
+                & (self.logits_range < num_text_tokens)
+                & (self.logits_range != self.text_mask_token)
+            )
+            |
+            # Mask [SEP] token after text
+            (
+                (self.seq_range == self.text_seq_len)
+                & (self.logits_range == num_text_tokens)
+            )
+            |
+            # Mask condition tokens beyond text_seq_len+1 ([SEP]) and invalid logits_range
+            (
+                (self.seq_range >= self.text_seq_len + 1)
+                & (self.seq_range < self.text_seq_len + 1 + condition_seq_len)
+                & (self.logits_range >= num_text_tokens + 1)
+                & (self.logits_range < num_text_tokens + 1 + num_condition_tokens)
+            )
+            |
+            # Mask image tokens beyond num_text_tokens+num_condition_tokens+1
+            (
+                (self.seq_range >= self.text_seq_len + 1 + condition_seq_len)
+                & (self.logits_range >= num_text_tokens + 1 + num_condition_tokens + 1)
+                & (
+                    self.logits_range
+                    < num_text_tokens + 1 + num_condition_tokens + 1 + num_image_tokens
+                )
+            )
+        )
+        # Invert the mask
+        logits_mask = ~logits_mask
+        # Register the buffer with the logits_mask
+        self.register_buffer("logits_mask", logits_mask, persistent=False)
+        ### ###
+        # Initialize the Transformer model with given parameters
+        self.transformer = Transformer(
+            dim=dim,
+            causal=causal,
+            seq_len=seq_len,
+            depth=depth,
+            heads=heads,
+            dim_head=dim_head,
+            attn_dropout=attn_dropout,
+            ff_dropout=ff_dropout,
+            image_fmap_size=image_fmap_size + condition_fmap_size,
+            num_images=num_images,
+            stable=stable,
+            rotary_emb=rotary_emb,
+        )
+        # Initialize the linear layers for converting transformer output to logits
+        self.to_logits = nn.Sequential(
+            nn.LayerNorm(dim),
+            nn.Linear(dim, self.total_tokens),
+        )
+        # Set instance variables for weights and critic
+        self.loss_img_weight = loss_img_weight
+        self.loss_cond_weight = loss_cond_weight
+        self.gamma = gamma_func(sampling_mode)
+    def embed_and_transform(self, inputs, masks, return_encoding=False):
+        text, condition, image = inputs
+        device = text.device
+        text_mask, _, image_mask = masks
+        text_labels = text.clone()
+        text = torch.where(
+            text_mask, self.text_mask_token * torch.ones_like(text, device=device), text
+        )
+        tokens = self.text_emb(text)
+        # Add SEP token
+        sep_token_emb = self.sep_emb(
+            torch.zeros((tokens.shape[0], 1), dtype=torch.long, device=device)
+        )
+        tokens = torch.cat((tokens, sep_token_emb), dim=1)
+        tokens += self.text_pos_emb(torch.arange(text.shape[1] + 1, device=device))
+        with torch.no_grad():
+            if self.linear_project:
+                b = condition.shape[0]
+                condition, _, [_, _, condition_labels] = self.condition_vae.encode(
+                    condition
+                )
+                condition_labels = rearrange(condition_labels, "(b n) -> b n", b=b)
+            else:
+                condition_labels = condition
+                if condition.dtype == torch.float:
+                    condition_labels = self.condition_vae.get_codebook_indices(
+                        condition
+                    )
+                condition = condition_labels.clone()
+        condition_emb = self.condition_emb(condition)
+        condition_emb += self.condition_pos_emb(condition_emb)
+        tokens = torch.cat((tokens, condition_emb), dim=1)
+        with torch.no_grad():
+            if self.linear_project:
+                b = image.shape[0]
+                image, _, [_, _, image_labels] = self.vae.encode(image)
+                image_labels = rearrange(image_labels, "(b n) -> b n", b=b)
+            else:
+                image_labels = image
+                if image.dtype == torch.float:
+                    image_labels = self.vae.get_codebook_indices(image)
+                image = torch.where(
+                    image_mask,
+                    self.image_mask_token
+                    * torch.ones_like(image_labels, device=device),
+                    image_labels,
+                )
+        image_emb = self.image_emb(image)
+        image_emb += self.image_pos_emb(image_emb)
+        tokens = torch.cat((tokens, image_emb), dim=1)
+        if self.stable:
+            alpha = 0.1
+            tokens = tokens * alpha + tokens.detach() * (1 - alpha)
+        out = self.transformer(tokens)
+        if self.stable:
+            out = self.norm_by_max(out)
+        logits = self.to_logits(out)
+        max_neg_value = -torch.finfo(logits.dtype).max
+        logits.masked_fill_(self.logits_mask, max_neg_value)
+        if return_encoding:
+            return logits, out, [text_labels, condition_labels, image_labels]
+        else:
+            return logits, None, [text_labels, condition_labels, image_labels]
+    def forward(
+        self,
+        text,
+        condition=None,
+        image=None,
+        return_loss=False,
+        return_encoding=False,
+    ):
+        batch_size, device = text.shape[0], text.device
+        # Check that image is supplied when training
+        assert exists(image), "when training, image must be supplied"
+        # Check that image dimensions match the expected dimensions
+        assert tuple(image.shape[1:]) == (
+            self.vae.channels,
+            self.image_size,
+            self.image_size,
+        ), f"invalid image of dimensions {image.shape} passed in during training"
+        # Generate masks for text, condition, and image
+        # text_mask = generate_mask(self.gamma, batch_size, self.text_seq_len, device)
+        text_mask = generate_mask(
+            gamma_func("scaled-cosine"), batch_size, self.text_seq_len, device
+        )
+        image_mask = generate_mask(self.gamma, batch_size, self.image_seq_len, device)
+        # Embed and transform inputs
+        logits, _, labels = self.embed_and_transform(
+            [text, condition, image],
+            [text_mask, None, image_mask],
+            return_encoding,
+            device,
+        )
+        # If not returning loss, return the logits
+        if not return_loss:
+            return logits
+        # Separate labels
+        text, condition, image = labels
+        # Add SEP token to end of text label
+        sep_token = torch.tensor(self.sep_token, device=device).repeat(
+            labels.shape[0], 1
+        )
+        labels = torch.cat([labels, sep_token], dim=1)
+        # If condition exists and condition vae is defined, add the condition to the labels
+        if exists(condition) and exists(self.condition_vae):
+            offsetted_condition = condition + self.num_text_tokens + 1
+            labels = torch.cat((labels, offsetted_condition), dim=1)
+        # Add image to the labels
+        offsetted_image = (
+            image + self.num_text_tokens + 1 + self.num_condition_tokens + 1
+        )
+        labels = torch.cat((labels, offsetted_image), dim=1)
+        # Rearrange logits for cross-entropy loss calculation
+        # Logits size: (batch_size, vocab_size, total_seq_len)
+        # Labels size: (batch_size, total_seq_len)
+        logits = rearrange(logits, "b n c -> b c n")
+        # Calculate cross-entropy loss for text and image
+        loss_text = F.cross_entropy(
+            logits[:, :, : self.text_seq_len],
+            labels[:, : self.text_seq_len],
+            reduction="none",
+        )[text_mask].mean()
+        loss_img = F.cross_entropy(
+            logits[:, :, self.text_seq_len + 1 + self.condition_seq_len :],
+            labels[:, self.text_seq_len + 1 + self.condition_seq_len :],
+            reduction="none",
+        )[image_mask].mean()
+        # Calculate total loss
+        loss = (loss_text + self.loss_img_weight * loss_img) / (
+            self.loss_img_weight + 1
+        )
+        loss_dict = {
+            "loss_text": loss_text,
+            # "loss_cond": loss_cond,
+            "loss_img": loss_img,
+            "loss": torch.nan_to_num(loss, 0.0, 0.0, 0.0),
+        }
+        return loss, loss_dict, None
+    def create_tensors(self, text, condition, image):
+        """
+        This function creates tensors for text, condition, and image when they are not provided as inputs to the sample function.
+        """
+        device = next(
+            filter(lambda x: isinstance(x, torch.Tensor), [text, condition, image]),
+            None,
+        ).device
+        if not isinstance(text, torch.Tensor):
+            text = (
+                torch.ones(1, self.text_seq_len, device=device, dtype=torch.long)
+                * self.text_mask_token
+            )
+        if not isinstance(condition, torch.Tensor):
+            condition = (
+                torch.ones(1, self.condition_seq_len, device=device, dtype=torch.long)
+                * self.cond_mask_token
+            )
+        else:
+            with torch.no_grad():
+                condition = self.condition_vae.get_codebook_indices(condition)
+        if not isinstance(image, torch.Tensor):
+            image = (
+                torch.ones(1, self.image_seq_len, device=device, dtype=torch.long)
+                * self.image_mask_token
+            )
+        else:
+            with torch.no_grad():
+                image = self.vae.get_codebook_indices(image)
+        return text, condition, image
+    @torch.no_grad()
+    @eval_decorator
+    def sample(
+        self,
+        text=None,
+        condition=None,
+        image=None,
+        temperature=1.0,
+        filter_thres=0.9,
+        progress=False,
+        timesteps=1,
+        force_aas=True,
+    ):
+        # ensure timesteps is a positive integer
+        assert int(timesteps) > 0
+        # set model and VAEs to evaluation mode
+        self.eval()
+        vae = self.vae.eval()
+        if progress == True:
+            progress = tqdm
+        else:
+            progress = lambda x: x
+        # ensure that at least one of text, condition, or image is supplied
+        assert (
+            isinstance(text, torch.Tensor)
+            or isinstance(condition, torch.Tensor)
+            or isinstance(image, torch.Tensor)
+        ), "some data must be supplied"
+        # convert text, condition, and image to tensors if they aren't already
+        text, condition, image = self.create_tensors(text, condition, image)
+        # determine the maximum batch size of the input tensors
+        batch_size = max(text.shape[0], condition.shape[0], image.shape[0])
+        # match the batch sizes of text, condition, and image
+        text, condition, image = match_batch_size(text, condition, image, batch_size)
+        # determine the device of the tensors
+        device = next(
+            filter(lambda x: isinstance(x, torch.Tensor), [text, condition, image]),
+            None,
+        ).device
+        assert text.shape[0] == condition.shape[0] == image.shape[0]
+        # Create a tensor of zeros of size (batch_size, image_seq_len, num_image_tokens + 1) and set it to device
+        # full_text_logits = torch.zeros(batch_size, self.text_seq_len, self.num_text_tokens+3).to(device)
+        full_text_logits = torch.zeros(
+            batch_size, self.text_seq_len, self.num_text_tokens
+        ).to(device)
+        # Use scatter_ to fill the tensor with 1 values at the indices given by the image tensor
+        full_text_logits = full_text_logits.scatter_(
+            dim=-1, index=text.unsqueeze(-1), value=1
+        )
+        # Use scatter_ to fill the tensor with 1 values at the indices given by the image tensor
+        full_image_logits = torch.zeros(
+            batch_size, self.image_seq_len, self.num_image_tokens + 1
+        ).to(device)
+        # Remove the last token from each image sequence by setting full_image_logits to its first num_image_tokens elements
+        full_image_logits = full_image_logits.scatter_(
+            dim=-1, index=image.unsqueeze(-1), value=1
+        )
+        # cut off mask token
+        full_image_logits = full_image_logits[:, :, : self.num_image_tokens]
+        count = 0
+        for timestep in progress(torch.linspace(0, 1, timesteps)):
+            # Create masks for the text, condition, and image tensors
+            text_mask = text == self.text_mask_token
+            cond_mask = condition == self.cond_mask_token
+            image_mask = image == self.image_mask_token
+            # Calculate logits and samples using the calculate_logits function
+            logits, sample = calculate_logits(
+                [text, condition, image],
+                [text_mask, cond_mask, image_mask],
+                self.embed_and_transform,
+                filter_thres,
+                temperature,
+            )
+            # Calculate the number of masked tokens in the text and image tensors
+            num_masked_text_tokens = torch.sum(text_mask, dim=1)[0]
+            num_masked_image_tokens = torch.sum(image_mask, dim=1)[0]
+            # If there are masked text tokens, unmask them using unmask_tokens and fill the full text logits tensor with -inf for unmasked tokens
+            if num_masked_text_tokens.any() > 0:
+                text, full_text_logits = unmask_tokens(
+                    text,
+                    text_mask,
+                    num_masked_text_tokens,
+                    logits[:, : self.text_seq_len, : self.num_text_tokens],
+                    sample[:, : self.text_seq_len, : self.num_text_tokens],
+                    timestep,
+                    timesteps,
+                    self.gamma,
+                    suppress_invalid_text_tokens,
+                    self.pad_token,
+                    self.text_mask_token,
+                    force_aas=force_aas,
+                )
+                full_text_logits = full_text_logits.masked_fill(
+                    ~text_mask.unsqueeze(-1), -torch.inf
+                )
+            # If there are masked image tokens, unmask them using unmask_tokens and fill the full image logits tensor with -inf for unmasked tokens
+            if num_masked_image_tokens > 0:
+                image, full_image_logits = unmask_tokens(
+                    image,
+                    image_mask,
+                    num_masked_image_tokens,
+                    logits[:, -self.image_seq_len :, -(self.num_image_tokens + 1) : -1],
+                    sample[:, -self.image_seq_len :, -(self.num_image_tokens + 1) : -1],
+                    timestep,
+                    timesteps,
+                    self.gamma,
+                )
+                full_text_logits = full_text_logits.masked_fill(
+                    ~text_mask.unsqueeze(-1), -torch.inf
+                )
+        # Generate heatmap
+        with torch.no_grad():
+            # Normalize full image logits tensor
+            full_image_logits /= torch.max(
+                torch.abs(full_image_logits), dim=-1, keepdim=True
+            ).values
+            # Apply quantize embedding to full image logits tensor
+            full_image_logits = torch.matmul(
+                full_image_logits, self.vae.model.quantize.embedding.weight
+            )
+            # Rearrange full image logits tensor
+            h = int(self.image_seq_len**0.5)
+            full_image_logits = rearrange(
+                full_image_logits, "b (h w) c -> b c h w", h=h
+            )
+            # Decode full image logits tensor
+            full_image_logits = self.vae.model.decode(full_image_logits)
+            # Add clipping to full image logits tensor
+            max_val = torch.max(full_image_logits.view(batch_size, -1), dim=-1)[0]
+            min_val = torch.min(full_image_logits.view(batch_size, -1), dim=-1)[0]
+            full_image_logits += torch.clip(1 - max_val, 0, float("inf")).view(
+                batch_size, 1, 1, 1
+            )
+            full_image_logits += torch.clip(0 - min_val, float("-inf"), 0).view(
+                batch_size, 1, 1, 1
+            )
+            # Clip full image logits tensor values to the range [0, 1]
+            full_image_logits = torch.clip(full_image_logits, 0, 1)
+        # Return text tensor, detokenized text tensor, full text logits tensor,
+        # binary image tensor, and full image logits tensor
+        return (
+            text,
+            detokenize_text(self.text_embedding, text),
+            full_text_logits,
+            1.0 * (vae.decode(image) > 0.5),
+            full_image_logits,
+        )
+    @torch.no_grad()
+    @eval_decorator
+    def sample_text(
+        self,
+        text=False,
+        condition=False,
+        image=False,
+        temperature=1.0,
+        filter_thres=0.9,
+        progress=False,
+        n_unmask=1,
+        place_amino=True,
+        force_aas=False,
+    ):
+        # set model and VAEs to evaluation mode
+        self.eval()
+        # ensure that at least one of text, condition, or image is supplied
+        assert (
+            isinstance(text, torch.Tensor)
+            or isinstance(condition, torch.Tensor)
+            or isinstance(image, torch.Tensor)
+        ), "some data must be supplied"
+        # convert text, condition, and image to tensors if they aren't already
+        text, condition, image = self.create_tensors(text, condition, image)
+        # determine the maximum batch size of the input tensors
+        batch_size = max(text.shape[0], condition.shape[0], image.shape[0])
+        # match the batch sizes of text, condition, and image
+        text, condition, image = match_batch_size(text, condition, image, batch_size)
+        # determine the device of the tensors
+        device = next(
+            filter(lambda x: isinstance(x, torch.Tensor), [text, condition, image]),
+            None,
+        ).device
+        assert text.shape[0] == condition.shape[0] == image.shape[0]
+        # Create a tensor of zeros of size (batch_size, image_seq_len, num_image_tokens + 1) and set it to device
+        # full_text_logits = torch.zeros(batch_size, self.text_seq_len, self.num_text_tokens+3).to(device)
+        full_text_logits = torch.zeros(
+            batch_size, self.text_seq_len, self.num_text_tokens
+        ).to(device)
+        # Use scatter_ to fill the tensor with 1 values at the indices given by the image tensor
+        full_text_logits = full_text_logits.scatter_(
+            dim=-1, index=text.unsqueeze(-1), value=1
+        )
+        text_mask = text == self.text_mask_token
+        cond_mask = condition == self.cond_mask_token
+        image_mask = image == self.image_mask_token
+        mask_indices = text_mask.nonzero()
+        non_mask_indices = (~text_mask).nonzero()
+        # figure out the center of the amino acids to determine generation direction
+        central_protein_index = torch.tensor(
+            [
+                torch.median(
+                    non_mask_indices[torch.where(non_mask_indices[:, 0] == idx)][:, -1]
+                )
+                for idx in range(batch_size)
+            ]
+        )
+        count = 1
+        run_mask = text_mask
+        if progress:
+            pbar = progress(total=torch.sum(run_mask).item())
+        while torch.sum(run_mask) > 0:
+            logits, sample = calculate_logits(
+                [text, condition, image],
+                [text_mask, cond_mask, image_mask],
+                self.embed_and_transform,
+                filter_thres,
+                temperature,
+            )
+            # sub_sample: [batch_size ,text_seq_len ,num_text_tokens]
+            sub_sample = sample[:, : self.text_seq_len, : self.num_text_tokens]
+            sub_sample = sub_sample.masked_fill(~text_mask.unsqueeze(-1), -torch.inf)
+            sub_sample = suppress_invalid_text_tokens(
+                text, sub_sample, 0, 2, self.pad_token, self.text_mask_token, force_aas
+            )
+            # calculate % to  unmasked
+            # get most likely token and probability for each position
+            for idx in range(batch_size):
+                selected_mask_indices = mask_indices[
+                    torch.where(mask_indices[:, 0] == idx)
+                ][:, -1]
+                # Generate to the left
+                if selected_mask_indices[-count] < central_protein_index[idx]:
+                    unmask_index = selected_mask_indices[-count]
+                    left_sample = max(0, (unmask_index + 1) - n_unmask)
+                    right_sample = min(unmask_index + 1, self.text_seq_len - 1)
+                    central_protein_index[idx] = max(
+                        0, central_protein_index[idx] - 0.5 * n_unmask
+                    )
+                # Generate to the right
+                elif selected_mask_indices[count - 1] > central_protein_index[idx]:
+                    unmask_index = selected_mask_indices[count - 1]
+                    left_sample = max(0, unmask_index)
+                    right_sample = min(unmask_index + n_unmask, self.text_seq_len - 1)
+                    central_protein_index[idx] = min(
+                        central_protein_index[idx] + 0.5 * n_unmask,
+                        self.text_seq_len - 1,
+                    )
+                # save logits for relevant position
+                full_text_logits[
+                    idx, left_sample:right_sample, : self.text_seq_len - 1
+                ] = logits[idx, left_sample:right_sample, : self.num_text_tokens]
+                run_mask[idx, left_sample:right_sample] = False
+                # you may want to resample the amion acids or calculate marginal probs
+                # if so, set place_amino to false
+                if place_amino:
+                    text[idx, left_sample:right_sample] = torch.where(
+                        text[idx, left_sample:right_sample] == self.text_mask_token,
+                        sub_sample[
+                            idx, left_sample:right_sample, : self.num_text_tokens
+                        ].argmax(dim=-1),
+                        text[idx, left_sample:right_sample],
+                    )
+                    text_mask = run_mask
+            count += n_unmask
+            if progress:
+                pbar.update(n_unmask)
+        if progress:
+            pbar.close()
+        return (
+            text,
+            detokenize_text(self.text_embedding, text),
+            full_text_logits,
+        )

celle/utils.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import torch
+from torchvision import transforms
+from PIL import Image, ImageSequence
+from math import pi
+import torchvision.transforms.functional as TF
+# Define helper functions
+def exists(val):
+    """Check if a variable exists"""
+    return val is not None
+def uniq(arr):
+    return {el: True for el in arr}.keys()
+def default(val, d):
+    """If a value exists, return it; otherwise, return a default value"""
+    return val if exists(val) else d
+def max_neg_value(t):
+    return -torch.finfo(t.dtype).max
+def cast_tuple(val, depth=1):
+    if isinstance(val, list):
+        val = tuple(val)
+    return val if isinstance(val, tuple) else (val,) * depth
+def is_empty(t):
+    """Check if a tensor is empty"""
+    # Return True if the number of elements in the tensor is zero, else False
+    return t.nelement() == 0
+def masked_mean(t, mask, dim=1):
+    """
+    Compute the mean of a tensor, masked by a given mask
+    Args:
+        t (torch.Tensor): input tensor of shape (batch_size, seq_len, hidden_dim)
+        mask (torch.Tensor): mask tensor of shape (batch_size, seq_len)
+        dim (int): dimension along which to compute the mean (default=1)
+    Returns:
+        torch.Tensor: masked mean tensor of shape (batch_size, hidden_dim)
+    """
+    t = t.masked_fill(~mask[:, :, None], 0.0)
+    return t.sum(dim=1) / mask.sum(dim=1)[..., None]
+def set_requires_grad(model, value):
+    """
+    Set whether or not the model's parameters require gradients
+    Args:
+        model (torch.nn.Module): the PyTorch model to modify
+        value (bool): whether or not to require gradients
+    """
+    for param in model.parameters():
+        param.requires_grad = value
+def eval_decorator(fn):
+    """
+    Decorator function to evaluate a given function
+    Args:
+        fn (callable): function to evaluate
+    Returns:
+        callable: the decorated function
+    """
+    def inner(model, *args, **kwargs):
+        was_training = model.training
+        model.eval()
+        out = fn(model, *args, **kwargs)
+        model.train(was_training)
+        return out
+    return inner
+def log(t, eps=1e-20):
+    """
+    Compute the natural logarithm of a tensor
+    Args:
+        t (torch.Tensor): input tensor
+        eps (float): small value to add to prevent taking the log of 0 (default=1e-20)
+    Returns:
+        torch.Tensor: the natural logarithm of the input tensor
+    """
+    return torch.log(t + eps)
+def gumbel_noise(t):
+    """
+    Generate Gumbel noise
+    Args:
+        t (torch.Tensor): input tensor
+    Returns:
+        torch.Tensor: a tensor of Gumbel noise with the same shape as the input tensor
+    """
+    noise = torch.zeros_like(t).uniform_(0, 1)
+    return -log(-log(noise))
+def gumbel_sample(t, temperature=0.9, dim=-1):
+    """
+    Sample from a Gumbel-softmax distribution
+    Args:
+        t (torch.Tensor): input tensor of shape (batch_size, num_classes)
+        temperature (float): temperature for the Gumbel-softmax distribution (default=0.9)
+        dim (int): dimension along which to sample (default=-1)
+    Returns:
+        torch.Tensor: a tensor of samples from the Gumbel-softmax distribution with the same shape as the input tensor
+    """
+    return (t / max(temperature, 1e-10)) + gumbel_noise(t)
+def top_k(logits, thres=0.5):
+    """
+    Return a tensor where all but the top k values are set to negative infinity
+    Args:
+        logits (torch.Tensor): input tensor of shape (batch_size, num_classes)
+        thres (float): threshold for the top k values (default=0.5)
+    Returns:
+        torch.Tensor: a tensor with the same shape as the input tensor, where all but the top k values are set to negative infinity
+    """
+    num_logits = logits.shape[-1]
+    k = max(int((1 - thres) * num_logits), 1)
+    val, ind = torch.topk(logits, k)
+    probs = torch.full_like(logits, float("-inf"))
+    probs.scatter_(-1, ind, val)
+    return probs
+def gamma_func(mode="cosine", scale=0.15):
+    """Return a function that takes a single input r and returns a value based on the selected mode"""
+    # Define a different function based on the selected mode
+    if mode == "linear":
+        return lambda r: 1 - r
+    elif mode == "cosine":
+        return lambda r: torch.cos(r * pi / 2)
+    elif mode == "square":
+        return lambda r: 1 - r**2
+    elif mode == "cubic":
+        return lambda r: 1 - r**3
+    elif mode == "scaled-cosine":
+        return lambda r: scale * (torch.cos(r * pi / 2))
+    else:
+        # Raise an error if the selected mode is not implemented
+        raise NotImplementedError
+class always:
+    """Helper class to always return a given value"""
+    def __init__(self, val):
+        self.val = val
+    def __call__(self, x, *args, **kwargs):
+        return self.val
+class DivideMax(torch.nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.dim = dim
+    def forward(self, x):
+        maxes = x.amax(dim=self.dim, keepdim=True).detach()
+        return x / maxes
+def replace_outliers(image, percentile=0.0001):
+    lower_bound, upper_bound = torch.quantile(image, percentile), torch.quantile(
+        image, 1 - percentile
+    )
+    mask = (image <= upper_bound) & (image >= lower_bound)
+    valid_pixels = image[mask]
+    image[~mask] = torch.clip(image[~mask], min(valid_pixels), max(valid_pixels))
+    return image
+def process_image(image, dataset, image_type=None):
+    image = TF.to_tensor(image)[0].unsqueeze(0).unsqueeze(0)
+    image /= image.max()
+    if dataset == "HPA":
+        if image_type == 'nucleus':
+            normalize = (0.0655, 0.0650)
+        elif image_type == 'protein':
+            normalize = (0.1732, 0.1208)
+    elif dataset == "OpenCell":
+        if image_type == 'nucleus':
+            normalize = (0.0272, 0.0244)
+        elif image_type == 'protein':
+            normalize = (0.0486, 0.0671)
+    t_forms = []
+    t_forms.append(transforms.RandomCrop(256))
+    # t_forms.append(transforms.Normalize(normalize[0],normalize[1]))
+    image = transforms.Compose(t_forms)(image)
+    return image

dataloader.py ADDED Viewed

	@@ -0,0 +1,308 @@

+import os
+import numpy as np
+from PIL import Image, ImageSequence
+import json
+import pandas as pd
+import torch
+from torch.utils.data import Dataset
+from torchvision import transforms
+import torchvision.transforms.functional as TF
+from celle.utils import replace_outliers
+def simple_conversion(seq):
+    """Create 26-dim embedding"""
+    chars = [
+        "-",
+        "M",
+        "R",
+        "H",
+        "K",
+        "D",
+        "E",
+        "S",
+        "T",
+        "N",
+        "Q",
+        "C",
+        "U",
+        "G",
+        "P",
+        "A",
+        "V",
+        "I",
+        "F",
+        "Y",
+        "W",
+        "L",
+        "O",
+        "X",
+        "Z",
+        "B",
+        "J",
+    ]
+    nums = range(len(chars))
+    seqs_x = np.zeros(len(seq))
+    for idx, char in enumerate(seq):
+        lui = chars.index(char)
+        seqs_x[idx] = nums[lui]
+    return torch.tensor([seqs_x]).long()
+class CellLoader(Dataset):
+    """imports mined opencell images with protein sequence"""
+    def __init__(
+        self,
+        data_csv=None,
+        dataset=None,
+        split_key=None,
+        resize=600,
+        crop_size=600,
+        crop_method="random",
+        sequence_mode="simple",
+        vocab="bert",
+        threshold="median",
+        text_seq_len=0,
+        pad_mode="random",
+    ):
+        self.data_csv = data_csv
+        self.dataset = dataset
+        self.image_folders = []
+        self.crop_method = crop_method
+        self.resize = resize
+        self.crop_size = crop_size
+        self.sequence_mode = sequence_mode
+        self.threshold = threshold
+        self.text_seq_len = int(text_seq_len)
+        self.vocab = vocab
+        self.pad_mode = pad_mode
+        if self.sequence_mode == "embedding" or self.sequence_mode == "onehot":
+            if self.vocab == "esm1b" or self.vocab == "esm2":
+                from esm import Alphabet
+                self.tokenizer = Alphabet.from_architecture(
+                    "ESM-1b"
+                ).get_batch_converter()
+                self.text_seq_len += 2
+        if data_csv:
+            data = pd.read_csv(data_csv)
+            self.parent_path = os.path.dirname(data_csv).split(data_csv)[0]
+            if split_key == "train":
+                self.data = data[data["split"] == "train"]
+            elif split_key == "val":
+                self.data = data[data["split"] == "val"]
+            else:
+                self.data = data
+            self.data = self.data.reset_index(drop=True)
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(
+        self,
+        idx,
+        get_sequence=True,
+        get_images=True,
+    ):
+        if get_sequence and self.text_seq_len > 0:
+            protein_vector = self.get_protein_vector(idx)
+        else:
+            protein_vector = torch.zeros((1, 1))
+        if get_images:
+            nucleus, target, threshold = self.get_images(idx, self.dataset)
+        else:
+            nucleus, target, threshold = torch.zeros((3, 1))
+        data_dict = {
+            "nucleus": nucleus.float(),
+            "target": target.float(),
+            "threshold": threshold.float(),
+            "sequence": protein_vector.long(),
+        }
+        return data_dict
+    def get_protein_vector(self, idx):
+        if "protein_sequence" not in self.data.columns:
+            metadata = self.retrieve_metadata(idx)
+            protein_sequence = metadata["sequence"]
+        else:
+            protein_sequence = self.data.iloc[idx]["protein_sequence"]
+        protein_vector = self.tokenize_sequence(protein_sequence)
+        return protein_vector
+    def get_images(self, idx, dataset):
+        if dataset == "HPA":
+            nucleus = Image.open(
+                os.path.join(
+                    self.parent_path, self.data.iloc[idx]["nucleus_image_path"]
+                )
+            )
+            target = Image.open(
+                os.path.join(self.parent_path, self.data.iloc[idx]["target_image_path"])
+            )
+            nucleus = TF.to_tensor(nucleus)[0]
+            target = TF.to_tensor(target)[0]
+            image = torch.stack([nucleus, target], axis=0)
+            normalize = (0.0655, 0.0650), (0.1732, 0.1208)
+        elif dataset == "OpenCell":
+            image = Image.open(
+                os.path.join(self.parent_path, self.data.iloc[idx]["image_path"])
+            )
+            nucleus, target = [page.copy() for page in ImageSequence.Iterator(image)]
+            nucleus = replace_outliers(torch.divide(TF.to_tensor(nucleus), 65536))[0]
+            target = replace_outliers(torch.divide(TF.to_tensor(target), 65536))[0]
+            image = torch.stack([nucleus, target], axis=0)
+            normalize = (
+                (0.0272, 0.0244),
+                (0.0486, 0.0671),
+            )
+        # # from https://discuss.pytorch.org/t/how-to-apply-same-transform-on-a-pair-of-picture/14914
+        t_forms = [transforms.Resize(self.resize, antialias=None)]
+        if self.crop_method == "random":
+            t_forms.append(transforms.RandomCrop(self.crop_size))
+            t_forms.append(transforms.RandomHorizontalFlip(p=0.5))
+            t_forms.append(transforms.RandomVerticalFlip(p=0.5))
+        elif self.crop_method == "center":
+            t_forms.append(transforms.CenterCrop(self.crop_size))
+        t_forms.append(transforms.Normalize(normalize[0], normalize[1]))
+        image = transforms.Compose(t_forms)(image)
+        nucleus, target = image
+        nucleus /= torch.abs(nucleus).max()
+        target -= target.min()
+        target /= target.max()
+        nucleus = nucleus.unsqueeze(0)
+        target = target.unsqueeze(0)
+        threshold = target
+        if self.threshold == "mean":
+            threshold = 1.0 * (threshold > (torch.mean(threshold)))
+        elif self.threshold == "median":
+            threshold = 1.0 * (threshold > (torch.median(threshold)))
+        elif self.threshold == "1090_IQR":
+            p10 = torch.quantile(threshold, 0.1, None)
+            p90 = torch.quantile(threshold, 0.9, None)
+            threshold = torch.clip(threshold, p10, p90)
+        nucleus = torch.nan_to_num(nucleus, 0.0, 1.0, 0.0)
+        target = torch.nan_to_num(target, 0.0, 1.0, 0.0)
+        threshold = torch.nan_to_num(threshold, 0.0, 1.0, 0.0)
+        return nucleus, target, threshold
+    def retrieve_metadata(self, idx):
+        with open(
+            os.path.join(self.parent_path, self.data.iloc[idx]["metadata_path"])
+        ) as f:
+            metadata = json.load(f)
+        return metadata
+    def tokenize_sequence(self, protein_sequence):
+        pad_token = 0
+        if self.sequence_mode == "simple":
+            protein_vector = simple_conversion(protein_sequence)
+        elif self.sequence_mode == "center":
+            protein_sequence = protein_sequence.center(self.text_seq_length, "-")
+            protein_vector = simple_conversion(protein_sequence)
+        elif self.sequence_mode == "alternating":
+            protein_sequence = protein_sequence.center(self.text_seq_length, "-")
+            protein_sequence = protein_sequence[::18]
+            protein_sequence = protein_sequence.center(
+                int(self.text_seq_length / 18) + 1, "-"
+            )
+            protein_vector = simple_conversion(protein_sequence)
+        elif self.sequence_mode == "embedding":
+            if self.vocab == "esm1b" or self.vocab == "esm2":
+                pad_token = 1
+                protein_vector = self.tokenizer([("", protein_sequence)])[-1]
+        if protein_vector.shape[-1] < self.text_seq_len:
+            diff = self.text_seq_len - protein_vector.shape[-1]
+            if self.pad_mode == "end":
+                protein_vector = torch.nn.functional.pad(
+                    protein_vector, (0, diff), "constant", pad_token
+                )
+            elif self.pad_mode == "random":
+                split = diff - np.random.randint(0, diff + 1)
+                protein_vector = torch.cat(
+                    [torch.ones(1, split) * 0, protein_vector], dim=1
+                )
+                protein_vector = torch.nn.functional.pad(
+                    protein_vector, (0, diff - split), "constant", pad_token
+                )
+        elif protein_vector.shape[-1] > self.text_seq_len:
+            start_int = np.random.randint(
+                0, protein_vector.shape[-1] - self.text_seq_len
+            )
+            protein_vector = protein_vector[
+                :, start_int : start_int + self.text_seq_len
+            ]
+        return protein_vector.long()

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+os
+torch
+torchvision
+huggingface_hub
+gradio
+OmegaConf
+axial-positional-embedding
+einops
+rotary_embedding_torch
+fair-esm
+tqdm
+importlib
+pytorch-lightning==1.9.0