Spaces:

mawairon
/

NOOTestspace

Sleeping

mawairon commited on Jun 26, 2024

Commit

04805af

verified ·

1 Parent(s): 40aaf6e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,7 +5,11 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 # Load the Hugging Face model and tokenizer
 model_name = 'AIRI-Institute/gena-lm-bert-base-lastln-t2t'  # Replace with the actual model name
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name)
 # Define a function to process the DNA sequence
 def analyze_dna(sequence):
@@ -13,12 +17,21 @@ def analyze_dna(sequence):
     inputs = tokenizer(sequence, return_tensors='pt')
     # Get model predictions
     outputs = model(**inputs)
-    predictions = outputs.logits.argmax(dim=-1).item()
-    return f"Prediction: {predictions}"
 # Create a Gradio interface
-demo = gr.Interface(fn=analyze_dna, inputs="text", outputs="text")
 # Launch the interface
 demo.launch()

 # Load the Hugging Face model and tokenizer
 model_name = 'AIRI-Institute/gena-lm-bert-base-lastln-t2t'  # Replace with the actual model name
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels = 38)
+# Ensure the model has the correct number of classes
+num_classes = model.config.num_labels
+assert num_classes == 38, f"The model has {num_classes} classes, but 38 were expected."
 # Define a function to process the DNA sequence
 def analyze_dna(sequence):
     inputs = tokenizer(sequence, return_tensors='pt')
     # Get model predictions
     outputs = model(**inputs)
+    # Convert logits to probabilities
+    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1).squeeze().tolist()
+    # Get the top 5 most likely classes
+    top_5_indices = sorted(range(len(probabilities)), key=lambda i: probabilities[i], reverse=True)[:5]
+    top_5_probs = [probabilities[i] for i in top_5_indices]
+    # Prepare the output as a list of tuples (class_index, probability)
+    result = [(index, prob) for index, prob in zip(top_5_indices, top_5_probs)]
+    return result
 # Create a Gradio interface
+demo = gr.Interface(fn=analyze_dna, inputs="text", outputs="json")
 # Launch the interface
 demo.launch()