File size: 1,711 Bytes
0ae121b
 
 
 
 
19ff66f
 
0ae121b
 
 
 
 
f050150
 
 
0ae121b
db25cc4
fd2a35c
 
 
 
0ae121b
 
 
 
19ff66f
 
 
77d468f
 
 
 
19ff66f
fd2a35c
cc2c186
0ae121b
fd2a35c
0ae121b
 
 
 
 
cc2c186
f050150
0805962
15a14e4
0ae121b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio as gr
from transformers import AutoModel, AutoTokenizer, AutoModelForTokenClassification
import torch
import numpy as np
import torch.nn.functional as F
import matplotlib.pyplot as plt


tokenizer = AutoTokenizer.from_pretrained("./checkpoint-final/")
model = AutoModelForTokenClassification.from_pretrained("./checkpoint-final/")
model = model.eval()

examples = [
    ["GSHMSDNEDNFDGDDFDDVEEDEGLDDLENAEEEGQENVEILPSGERPQANQKRITTPYMTKYERARVLGTRALQIAMCAPVMVELEGETDPLLIAMKELKARKIPIIIRRYLPDGSYEDWGVDELIITD"]]

def get_out(sent):
    prefix = ""
    if len(sent)>1022:
        sent =  sent[:1022]
        prefix = "Your protein was longer than 1022 AAs. We are working on including longer sequences but in the meantime, here are the scores for the first 1022 AAs: \n "
    print(sent)
    encoded = tokenizer.encode_plus(sent, return_tensors="pt")
    with torch.no_grad():
        output = model(**encoded)
    output = F.softmax(torch.squeeze(output['logits']))[1:-1,1].detach().numpy()

    fig = plt.figure()
    plt.plot(output)
    plt.xticks(fontsize=15)
    plt.yticks(fontsize=15)
    plt.xlabel('Sequence position', fontsize=15)
    plt.ylabel('DR-BERT score', fontsize=15)
    
    output = ','.join(str(x) for x in output)
    return (fig,prefix+output)


gr.Interface(
    get_out,
    [
        gr.components.Textbox(label="Input Amino Acid Sequence", placeholder = " Amino acid sequence here ...")
    ],
    ["plot","text"],
    examples=examples,
    title="DR-BERT: A Protein Language Model to Predict Disordered Regions",
    description="The app uses DR-BERT to predict disordered regions in proteins. Outputs generated are the probability that a residue is disordered."
).launch()