Spaces:

mawairon
/

NOOTestspace

Sleeping

mawairon commited on Jun 26, 2024

Commit

1f65033

•

1 Parent(s): 9f4c137

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -83,12 +83,16 @@ log_reg.load_state_dict(weights['log_reg_state_dict'])
 model = BertClassifier(base_model, log_reg, num_labels=N_UNIQUE_CLASSES)
 model.eval()
 # Define a function to process the DNA sequence
 def analyze_dna(sequence):
     # Preprocess the input sequence
     inputs = tokenizer(sequence, truncation=True, padding='max_length', max_length=512, return_tensors="pt", return_token_type_ids=False)
-    print("tokenization done.")
     # Get model predictions
     _, logits = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
@@ -97,13 +101,16 @@ def analyze_dna(sequence):
     # Convert logits to probabilities
     probabilities = torch.nn.functional.softmax(logits, dim=-1).squeeze().tolist()
-    print("Probabilities, done.")
     # Get the top 5 most likely classes
     top_5_indices = sorted(range(len(probabilities)), key=lambda i: probabilities[i], reverse=True)[:5]
     top_5_probs = [probabilities[i] for i in top_5_indices]
-    # Prepare the output as a list of tuples (class_index, probability)
-    result = [(index, prob) for index, prob in zip(top_5_indices, top_5_probs)]
     return result
@@ -112,3 +119,5 @@ demo = gr.Interface(fn=analyze_dna, inputs="text", outputs="json")
 # Launch the interface
 demo.launch()

 model = BertClassifier(base_model, log_reg, num_labels=N_UNIQUE_CLASSES)
 model.eval()
+# Dictionary to decode model predictions
+label_to_int = pd.read_pkl('label_to_int.pkl')
+int_to_label = {v: k for k, v in label_to_int.items()}
 # Define a function to process the DNA sequence
 def analyze_dna(sequence):
     # Preprocess the input sequence
     inputs = tokenizer(sequence, truncation=True, padding='max_length', max_length=512, return_tensors="pt", return_token_type_ids=False)
+    print("Tokenization done.")
     # Get model predictions
     _, logits = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
     # Convert logits to probabilities
     probabilities = torch.nn.functional.softmax(logits, dim=-1).squeeze().tolist()
+    print("Probabilities done.")
     # Get the top 5 most likely classes
     top_5_indices = sorted(range(len(probabilities)), key=lambda i: probabilities[i], reverse=True)[:5]
     top_5_probs = [probabilities[i] for i in top_5_indices]
+    # Map indices to label names
+    top_5_labels = [int_to_label[i] for i in top_5_indices]
+    # Prepare the output as a list of tuples (label_name, probability)
+    result = [(label, prob) for label, prob in zip(top_5_labels, top_5_probs)]
     return result
 # Launch the interface
 demo.launch()