|
from api_prediction import AptaBLE_Pipeline |
|
import gradio as gr |
|
import pandas as pd |
|
import torch |
|
import tempfile |
|
from tabulate import tabulate |
|
import itertools |
|
import os |
|
import random |
|
|
|
|
|
os.environ['GRADIO_SERVER_NAME'] = '0.0.0.0' |
|
title='DNAptaBLE Model Inference' |
|
desc='AptaBLE (cross-attention network), trained to predict the likelihood a DNA aptamer will form a complex with a target protein!\n\nPass in a FASTA-formatted file of all aptamers and input your protein target amino acid sequence. Your output scores are available for download via an Excel file.' |
|
|
|
global pipeline |
|
|
|
pipeline = AptaBLE_Pipeline( |
|
lr=1e-6, |
|
weight_decay=None, |
|
epochs=None, |
|
model_type=None, |
|
model_version=None, |
|
model_save_path=None, |
|
accelerate_save_path=None, |
|
tensorboard_logdir=None, |
|
d_model=128, |
|
d_ff=512, |
|
n_layers=6, |
|
n_heads=8, |
|
dropout=0.1, |
|
load_best_pt=True, |
|
device='cuda', |
|
seed=1004) |
|
|
|
def comparison(protein, aptamer_file, analysis): |
|
print('analysis: ', analysis) |
|
display = [] |
|
table_data = pd.DataFrame() |
|
r_names, aptamers = read_fasta(aptamer_file) |
|
proteins = [protein for i in range(len(aptamers))] |
|
df = pd.DataFrame(columns=['Protein', 'Protein Seq', 'Aptamer', 'Aptamer Seq', 'Score']) |
|
|
|
scores = get_scores(aptamers, proteins) |
|
df['Protein'] = ['protein_prov.']*len(aptamers) |
|
df['Aptamer'] = r_names |
|
df['Protein Seq'] = proteins |
|
df['Aptamer Seq'] = aptamers |
|
df['Score'] = scores |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as temp_file: |
|
with pd.ExcelWriter(temp_file.name, engine='openpyxl') as writer: |
|
df.to_excel(writer, index=False) |
|
temp_file_path = temp_file.name |
|
|
|
print('Saving to excel!') |
|
df.to_excel(f'{aptamer_file}.xlsx') |
|
|
|
torch.cuda.empty_cache() |
|
|
|
return '\n'.join(display), temp_file_path |
|
|
|
def read_fasta(file_path): |
|
headers = [] |
|
sequences = [] |
|
with open(file_path, 'r') as file: |
|
content = file.readlines() |
|
for i in range(0, len(content), 2): |
|
header = content[i].strip() |
|
if header.startswith('>'): |
|
headers.append(header) |
|
sequences.append(content[i+1].strip()) |
|
return headers, sequences |
|
|
|
def get_scores(aptamers, proteins): |
|
pipeline.model.to('cuda') |
|
scores = pipeline.inference(aptamers, proteins, [0]*len(aptamers)) |
|
pipeline.model.to('cpu') |
|
return scores |
|
|
|
|
|
iface = gr.Interface( |
|
fn=comparison, |
|
inputs=[ |
|
gr.Textbox(lines=2, placeholder="Protein"), |
|
gr.File(type="filepath"), |
|
], |
|
outputs=[ |
|
gr.Textbox(placeholder="Scores"), |
|
gr.File(label="Download Excel") |
|
], |
|
description=desc |
|
) |
|
|
|
iface.launch() |
|
|