import gradio as gr import transformers from transformers import AutoTokenizer, AutoModelForSequenceClassification # Load the Hugging Face model and tokenizer model_name = 'AIRI-Institute/gena-lm-bert-base-lastln-t2t' # Replace with the actual model name tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) # Define a function to process the DNA sequence def analyze_dna(sequence): # Preprocess the input sequence inputs = tokenizer(sequence, return_tensors='pt') # Get model predictions outputs = model(**inputs) # Convert logits to probabilities probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1).squeeze().tolist() # Get the top 5 most likely classes top_5_indices = sorted(range(len(probabilities)), key=lambda i: probabilities[i], reverse=True)[:5] top_5_probs = [probabilities[i] for i in top_5_indices] # Prepare the output as a list of tuples (class_index, probability) result = [(index, prob) for index, prob in zip(top_5_indices, top_5_probs)] return result # Create a Gradio interface demo = gr.Interface(fn=analyze_dna, inputs="text", outputs="json") # Launch the interface demo.launch()