File size: 1,263 Bytes
c1e6692
088c2ad
5f8dde1
c1e6692
5f8dde1
 
 
6432c04
c1e6692
5f8dde1
 
 
 
 
 
04805af
 
 
 
 
 
 
 
 
 
 
 
5f8dde1
 
04805af
5f8dde1
 
c1e6692
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import gradio as gr
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load the Hugging Face model and tokenizer
model_name = 'AIRI-Institute/gena-lm-bert-base-lastln-t2t'  # Replace with the actual model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Define a function to process the DNA sequence
def analyze_dna(sequence):
    # Preprocess the input sequence
    inputs = tokenizer(sequence, return_tensors='pt')
    # Get model predictions
    outputs = model(**inputs)
    
    # Convert logits to probabilities
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1).squeeze().tolist()
    
    # Get the top 5 most likely classes
    top_5_indices = sorted(range(len(probabilities)), key=lambda i: probabilities[i], reverse=True)[:5]
    top_5_probs = [probabilities[i] for i in top_5_indices]
    
    # Prepare the output as a list of tuples (class_index, probability)
    result = [(index, prob) for index, prob in zip(top_5_indices, top_5_probs)]
    
    return result

# Create a Gradio interface
demo = gr.Interface(fn=analyze_dna, inputs="text", outputs="json")

# Launch the interface
demo.launch()