Spaces:
Sleeping
Sleeping
File size: 1,711 Bytes
0ae121b 19ff66f 0ae121b f050150 0ae121b db25cc4 fd2a35c 0ae121b 19ff66f 77d468f 19ff66f fd2a35c cc2c186 0ae121b fd2a35c 0ae121b cc2c186 f050150 0805962 15a14e4 0ae121b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
from transformers import AutoModel, AutoTokenizer, AutoModelForTokenClassification
import torch
import numpy as np
import torch.nn.functional as F
import matplotlib.pyplot as plt
tokenizer = AutoTokenizer.from_pretrained("./checkpoint-final/")
model = AutoModelForTokenClassification.from_pretrained("./checkpoint-final/")
model = model.eval()
examples = [
["GSHMSDNEDNFDGDDFDDVEEDEGLDDLENAEEEGQENVEILPSGERPQANQKRITTPYMTKYERARVLGTRALQIAMCAPVMVELEGETDPLLIAMKELKARKIPIIIRRYLPDGSYEDWGVDELIITD"]]
def get_out(sent):
prefix = ""
if len(sent)>1022:
sent = sent[:1022]
prefix = "Your protein was longer than 1022 AAs. We are working on including longer sequences but in the meantime, here are the scores for the first 1022 AAs: \n "
print(sent)
encoded = tokenizer.encode_plus(sent, return_tensors="pt")
with torch.no_grad():
output = model(**encoded)
output = F.softmax(torch.squeeze(output['logits']))[1:-1,1].detach().numpy()
fig = plt.figure()
plt.plot(output)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.xlabel('Sequence position', fontsize=15)
plt.ylabel('DR-BERT score', fontsize=15)
output = ','.join(str(x) for x in output)
return (fig,prefix+output)
gr.Interface(
get_out,
[
gr.components.Textbox(label="Input Amino Acid Sequence", placeholder = " Amino acid sequence here ...")
],
["plot","text"],
examples=examples,
title="DR-BERT: A Protein Language Model to Predict Disordered Regions",
description="The app uses DR-BERT to predict disordered regions in proteins. Outputs generated are the probability that a residue is disordered."
).launch() |