Spaces:

mawairon
/

NOOTestspace

Sleeping

App Files Files Community

mawairon commited on Jun 26, 2024

Commit

3f3c29c

verified ·

1 Parent(s): 112178f

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -4

app.py CHANGED Viewed

@@ -1,11 +1,93 @@
 import gradio as gr
 import transformers
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 # Load the Hugging Face model and tokenizer
-model_name = 'AIRI-Institute/gena-lm-bert-base-lastln-t2t'  # Replace with the actual model name
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name)
 # Define a function to process the DNA sequence
 def analyze_dna(sequence):

 import gradio as gr
 import transformers
+from transformers import AutoTokenizer, AutoModel,BertModel, BertTokenizer
+import torch
+import torch.nn as nn
+class LogisticRegressionTorch(nn.Module):
+    def __init__(self,
+                 input_dim: int,
+                 output_dim: int):
+        super(LogisticRegressionTorch, self).__init__()
+        self.batch_norm = nn.BatchNorm1d(num_features = input_dim)
+        self.linear = nn.Linear(input_dim, output_dim)
+    def forward(self, x):
+        x = self.batch_norm(x)
+        out = self.linear(x)
+        return out
+class BertClassifier(nn.Module):
+    def __init__(self,
+                 bert_model: BertForMaskedLM,
+                 classifier: LogisticRegressionTorch,
+                 num_labels: int):
+        super(BertClassifier, self).__init__()
+        self.bert = bert_model  # Assume bert_model is an instance of a pre-trained BertModel
+        self.classifier = classifier
+        self.num_labels = num_labels
+    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor = None,
+                token_type_ids: torch.Tensor = None, labels: torch.Tensor = None) -> tuple[torch.Tensor, torch.Tensor]:
+        # Extract outputs from the BERT model
+        outputs = self.bert(input_ids, attention_mask=attention_mask)
+        # Sanity check for outputs
+        assert 'hidden_states' in outputs, "BERT model output does not contain 'hidden_states'."
+        # Take the hidden states from the last layer and extract the hidden state of the first token for each element in the batch
+        pooled_output = outputs.hidden_states[-1][:, 0, :]
+        assert pooled_output.shape == (input_ids.shape[0], 768), f"Expected shape ({input_ids.shape[0]}, 768), but got {pooled_output.shape}"
+        # to-do later!
+        # Pass the pooled output to the classifier to get the logits
+        logits = self.classifier(pooled_output)
+        # Compute loss if labels are provided (assuming using CrossEntropyLoss for classification)
+        loss = None
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss()
+            pred = logits.view(-1, self.num_labels)
+            observed = labels.view(-1)
+            loss = loss_fct(pred, observed)
+            #assert loss_fct(float(observed), observed) < 1e-6
+        # Return the loss and logits
+        return loss, logits
 # Load the Hugging Face model and tokenizer
+import torch.nn as nn
+from transformers import AutoTokenizer
+metadata_features = 0
+N_UNIQUE_CLASSES = 38 ## or 38
+base_model = AutoModel.from_pretrained('AIRI-Institute/gena-lm-bert-base-lastln-t2t', trust_remote_code=True, output_hidden_states = True)
+tokenizer = AutoTokenizer.from_pretrained('AIRI-Institute/gena-lm-bert-base-lastln-t2t', trust_remote_code=True)
+# Initialize the classifier
+input_size = 768 + metadata_features # featurizer output size + metadata size
+log_reg = LogisticRegressionTorch(input_dim=input_size, output_dim=N_UNIQUE_CLASSES)
+# Load Weights
+model_weights_path = '/your_model_weights.pth'
+weights = torch.load(model_weights_path, map_location=torch.device('cpu'))
+model = BertClassifier(base_model, log_reg, num_labels = N_UNIQUE_CLASSES)
+base_model.load_state_dict(weights['model_state_dict'])
+log_reg.load_state_dict(weights['log_reg_state_dict'])
 # Define a function to process the DNA sequence
 def analyze_dna(sequence):