Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- Glydentify.py +501 -0
- requirements.txt +329 -0
Glydentify.py
ADDED
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from IPython.display import clear_output
|
3 |
+
import torch
|
4 |
+
from transformers import EsmForSequenceClassification, AdamW, AutoTokenizer
|
5 |
+
from torch.utils.data import DataLoader, TensorDataset, random_split
|
6 |
+
from sklearn.preprocessing import LabelEncoder
|
7 |
+
from tqdm import tqdm
|
8 |
+
import numpy as np
|
9 |
+
import seaborn as sns
|
10 |
+
from sklearn.model_selection import train_test_split
|
11 |
+
import matplotlib.pyplot as plt
|
12 |
+
import pickle
|
13 |
+
import torch.nn.functional as F
|
14 |
+
import gradio as gr
|
15 |
+
import io
|
16 |
+
from PIL import Image
|
17 |
+
import Bio
|
18 |
+
from Bio import SeqIO
|
19 |
+
import zipfile
|
20 |
+
import os
|
21 |
+
|
22 |
+
# Load the model from the file
|
23 |
+
with open('family_labels.pkl', 'rb') as filefam:
|
24 |
+
yfam = pickle.load(filefam)
|
25 |
+
|
26 |
+
tokenizerfam = AutoTokenizer.from_pretrained("facebook/esm2_t33_650M_UR50D") #facebook/esm2_t33_650M_UR50D
|
27 |
+
label_encoderfam = LabelEncoder()
|
28 |
+
encoded_labelsfam = label_encoderfam.fit_transform(yfam)
|
29 |
+
labelsfam = torch.tensor(encoded_labelsfam)
|
30 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
31 |
+
device
|
32 |
+
|
33 |
+
modelfam = EsmForSequenceClassification.from_pretrained("facebook/esm2_t33_650M_UR50D", num_labels=len(set(labelsfam.tolist())))
|
34 |
+
modelfam = modelfam.to('cpu')
|
35 |
+
|
36 |
+
modelfam.load_state_dict(torch.load("/home/aarya/Documents/paper3/model_650M.pth"))
|
37 |
+
modelfam.eval()
|
38 |
+
|
39 |
+
x_testfam = ["""MAEVLRTLAGKPKCHALRPMILFLIMLVLVLFGYGVLSPRSLMPGSLERGFCMAVREPDH
|
40 |
+
LQRVSLPRMVYPQPKVLTPCRKDVLVVTPWLAPIVWEGTFNIDILNEQFRLQNTTIGLTV
|
41 |
+
FAIKKYVAFLKLFLETAEKHFMVGHRVHYYVFTDQPAAVPRVTLGTGRQLSVLEVRAYKR
|
42 |
+
WQDVSMRRMEMISDFCERRFLSEVDYLVCVDVDMEFRDHVGVEILTPLFGTLHPGFYGSS
|
43 |
+
REAFTYERRPQSQAYIPKDEGDFYYLGGFFGGSVQEVQRLTRACHQAMMVDQANGIEAVW
|
44 |
+
HDESHLNKYLLRHKPTKVLSPEYLWDQQLLGWPAVLRKLRFTAVPKNHQAVRNP
|
45 |
+
"""]
|
46 |
+
|
47 |
+
encoded_inputfam = tokenizerfam(x_testfam, padding=True, truncation=True, max_length=512, return_tensors="pt")
|
48 |
+
input_idsfam = encoded_inputfam["input_ids"]
|
49 |
+
attention_maskfam = encoded_inputfam["attention_mask"]
|
50 |
+
|
51 |
+
with torch.no_grad():
|
52 |
+
outputfam = modelfam(input_idsfam, attention_mask=attention_maskfam)
|
53 |
+
logitsfam = outputfam.logits
|
54 |
+
probabilitiesfam = F.softmax(logitsfam, dim=1)
|
55 |
+
_, predicted_labelsfam = torch.max(logitsfam, dim=1)
|
56 |
+
probabilitiesfam[0]
|
57 |
+
|
58 |
+
decoded_labelsfam = label_encoderfam.inverse_transform(predicted_labelsfam.tolist())
|
59 |
+
decoded_labelsfam
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
#Load donor model from file
|
64 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t12_35M_UR50D")
|
65 |
+
|
66 |
+
with open('donorslabels.pkl', 'rb') as file:
|
67 |
+
label_encoder = pickle.load(file)
|
68 |
+
|
69 |
+
# encoded_labels = label_encoder.fit(y)
|
70 |
+
# labels = torch.tensor(encoded_labels)
|
71 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
72 |
+
device
|
73 |
+
|
74 |
+
model = EsmForSequenceClassification.from_pretrained("facebook/esm2_t12_35M_UR50D", num_labels=len(label_encoder.classes_))
|
75 |
+
model = model.to('cpu')
|
76 |
+
|
77 |
+
model.load_state_dict(torch.load("best_model_35M_t12_5v5.pth")) #model_best_35v2M.pth
|
78 |
+
model.eval()
|
79 |
+
|
80 |
+
x_test = ["""MAEVLRTLAGKPKCHALRPMILFLIMLVLVLFGYGVLSPRSLMPGSLERGFCMAVREPDH
|
81 |
+
LQRVSLPRMVYPQPKVLTPCRKDVLVVTPWLAPIVWEGTFNIDILNEQFRLQNTTIGLTV
|
82 |
+
FAIKKYVAFLKLFLETAEKHFMVGHRVHYYVFTDQPAAVPRVTLGTGRQLSVLEVRAYKR
|
83 |
+
WQDVSMRRMEMISDFCERRFLSEVDYLVCVDVDMEFRDHVGVEILTPLFGTLHPGFYGSS
|
84 |
+
REAFTYERRPQSQAYIPKDEGDFYYLGGFFGGSVQEVQRLTRACHQAMMVDQANGIEAVW
|
85 |
+
HDESHLNKYLLRHKPTKVLSPEYLWDQQLLGWPAVLRKLRFTAVPKNHQAVRNP
|
86 |
+
"""]
|
87 |
+
|
88 |
+
encoded_input = tokenizer(x_test, padding=True, truncation=True, max_length=512, return_tensors="pt")
|
89 |
+
input_ids = encoded_input["input_ids"]
|
90 |
+
attention_mask = encoded_input["attention_mask"]
|
91 |
+
|
92 |
+
with torch.no_grad():
|
93 |
+
output = model(input_ids, attention_mask=attention_mask)
|
94 |
+
logits = output.logits
|
95 |
+
probabilities = F.softmax(logits, dim=1)
|
96 |
+
_, predicted_labels = torch.max(logits, dim=1)
|
97 |
+
probabilities[0]
|
98 |
+
|
99 |
+
decoded_labels = label_encoder.inverse_transform(predicted_labels.tolist())
|
100 |
+
decoded_labels
|
101 |
+
|
102 |
+
|
103 |
+
glycosyltransferase_db = {
|
104 |
+
"GT31-chsy" : {'CAZy Name': 'GT31', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT31.html'},
|
105 |
+
"GT2-CesA2" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '1 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
106 |
+
"GT43-arath" : {'CAZy Name': 'GT43', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT43.html'},
|
107 |
+
"GT8-Met1" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT8.html' },
|
108 |
+
"GT32-higher" : {'CAZy Name': 'GT32', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT32.html'},
|
109 |
+
"GT40" : {'CAZy Name': 'GT40', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT40.html'},
|
110 |
+
"GT16" : {'CAZy Name': 'GT16', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '6 ', 'More Info': 'http://www.cazy.org/GT16.html'},
|
111 |
+
"GT27" : {'CAZy Name': 'GT27', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '5 ', 'More Info': 'http://www.cazy.org/GT27.html'},
|
112 |
+
"GT55" : {'CAZy Name': 'GT55', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '2 ', 'More Info': 'http://www.cazy.org/GT55.html'},
|
113 |
+
"GT8-Glycogenin" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT8.html' },
|
114 |
+
"GT8-1" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT8.html' },
|
115 |
+
"GT25" : {'CAZy Name': 'GT25', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '6 ', 'More Info': 'http://www.cazy.org/GT25.html'},
|
116 |
+
"GT2-DPM_like" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '2 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
117 |
+
"GT31-fringe" : {'CAZy Name': 'GT31', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT31.html'},
|
118 |
+
"GT2-Bact_puta" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT2.html' },
|
119 |
+
"GT84" : {'CAZy Name': 'GT84', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '1 ', 'More Info': 'http://www.cazy.org/GT84.html'},
|
120 |
+
"GT13" : {'CAZy Name': 'GT13', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '6 ', 'More Info': 'http://www.cazy.org/GT13.html'},
|
121 |
+
"GT43-cele" : {'CAZy Name': 'GT43', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT43.html'},
|
122 |
+
"GT2-Bact_LPS1" : {'CAZy Name': 'GT92', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT2.html' },
|
123 |
+
"GT2-Bact_Oant" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
124 |
+
"GT67" : {'CAZy Name': 'GT67', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT67.html'},
|
125 |
+
"GT2-HAS" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '1 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
126 |
+
"GT82" : {'CAZy Name': 'GT82', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '7 ', 'More Info': 'http://www.cazy.org/GT82.html'},
|
127 |
+
"GT24" : {'CAZy Name': 'GT24', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT24.html'},
|
128 |
+
"GT31-plant" : {'CAZy Name': 'GT31', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT31.html'},
|
129 |
+
"GT81-Bact" : {'CAZy Name': 'GT81', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '2 ', 'More Info': 'http://www.cazy.org/GT81.html'},
|
130 |
+
"GT2-Bact_gt25Me": {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
131 |
+
"GT2-B3GntL" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '4 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
132 |
+
"GT49" : {'CAZy Name': 'GT49', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT49.html'},
|
133 |
+
"GT34" : {'CAZy Name': 'GT34', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT34.html'},
|
134 |
+
"GT45" : {'CAZy Name': 'GT45', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT45.html'},
|
135 |
+
"GT32-lower" : {'CAZy Name': 'GT32', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT32.html'},
|
136 |
+
"GT88" : {'CAZy Name': 'GT88', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT88.html'},
|
137 |
+
"GT21" : {'CAZy Name': 'GT21', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '1 ', 'More Info': 'http://www.cazy.org/GT21.html'},
|
138 |
+
"GT2-DPG_synt" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '2 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
139 |
+
"GT43-b3gat2" : {'CAZy Name': 'GT43', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT43.html'},
|
140 |
+
"GT2-Chitin_synt": {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '5 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
141 |
+
"GT8-Bact" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT8.html' },
|
142 |
+
"GT8-Met2" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT8.html' },
|
143 |
+
"GT2-Bact_Chlor1": {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
144 |
+
"GT54" : {'CAZy Name': 'GT54', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '6 ', 'More Info': 'http://www.cazy.org/GT54.html'},
|
145 |
+
"GT2-Cel_bre3" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '1 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
146 |
+
"GT2-Bact_Rham" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT2.html' },
|
147 |
+
"GT6" : {'CAZy Name': 'GT6 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT6.html' },
|
148 |
+
"GT2-Bact_puta2" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
149 |
+
"GT7-1" : {'CAZy Name': 'GT7 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '5 ', 'More Info': 'http://www.cazy.org/GT7.html' },
|
150 |
+
"GT2-Csl" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '4 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
151 |
+
"GT2-ExoU" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
152 |
+
"GT2-Csl2" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '4 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
153 |
+
"GT64" : {'CAZy Name': 'GT64', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT64.html'},
|
154 |
+
"GT2-Bact_Chlor2": {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
155 |
+
"GT78" : {'CAZy Name': 'GT78', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '2 ', 'More Info': 'http://www.cazy.org/GT78.html'},
|
156 |
+
"GT12" : {'CAZy Name': 'GT12', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT12.html'},
|
157 |
+
"GT31-gnt" : {'CAZy Name': 'GT31', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT31.html'},
|
158 |
+
"GT2-Bact_CHS" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '5 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
159 |
+
"GT62" : {'CAZy Name': 'GT62', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '3 ', 'More Info': 'http://www.cazy.org/GT62.html'},
|
160 |
+
"GT8-Met_Pla" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT8.html' },
|
161 |
+
"GT15" : {'CAZy Name': 'GT15', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT15.html'},
|
162 |
+
"GT43-b3gat1" : {'CAZy Name': 'GT43', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT43.html'},
|
163 |
+
"GT31-b3glt" : {'CAZy Name': 'GT31', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT31.html'},
|
164 |
+
"GT2-CesA1" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '1 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
165 |
+
"GT60" : {'CAZy Name': 'GT60', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '5 ', 'More Info': 'http://www.cazy.org/GT60.html'},
|
166 |
+
"GT14" : {'CAZy Name': 'GT14', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '7 ', 'More Info': 'http://www.cazy.org/GT14.html'},
|
167 |
+
"GT2-Bact_DPM_sy": {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '2 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
168 |
+
"GT17" : {'CAZy Name': 'GT17', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '7 ', 'More Info': 'http://www.cazy.org/GT17.html'},
|
169 |
+
"GT2-Bact_LPS2" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '3 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
170 |
+
"GT77" : {'CAZy Name': 'GT77', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT77.html'},
|
171 |
+
"GT2-Bact_EpsO" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
172 |
+
"GT43-b3gat3" : {'CAZy Name': 'GT43', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT43.html'},
|
173 |
+
"GT8-Fun" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT8.html' },
|
174 |
+
"GT75" : {'CAZy Name': 'GT75', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT75.html'},
|
175 |
+
"GT2-Bact_GlfT" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT2.html' },
|
176 |
+
|
177 |
+
}
|
178 |
+
|
179 |
+
|
180 |
+
|
181 |
+
|
182 |
+
|
183 |
+
def get_family_info(family_name):
|
184 |
+
family_info = glycosyltransferase_db.get(family_name, {})
|
185 |
+
# convert information to markdown formatted string
|
186 |
+
markdown_text = ""
|
187 |
+
for key, value in family_info.items():
|
188 |
+
if key == "more_info":
|
189 |
+
markdown_text += "**{}:**".format(key.title().replace("_", " ")) + "\n"
|
190 |
+
for link in value:
|
191 |
+
markdown_text += "[{}]({})\n".format(link, link)
|
192 |
+
else:
|
193 |
+
markdown_text += "**{}:** {}\n".format(key.title().replace("_", " "), value)
|
194 |
+
return markdown_text
|
195 |
+
|
196 |
+
def fig_to_img(fig):
|
197 |
+
"""Converts a matplotlib figure to a PIL Image and returns it"""
|
198 |
+
buf = io.BytesIO()
|
199 |
+
fig.savefig(buf, format='png', bbox_inches='tight')
|
200 |
+
buf.seek(0)
|
201 |
+
img = Image.open(buf)
|
202 |
+
return img
|
203 |
+
|
204 |
+
|
205 |
+
def process_family_sequence(protein_fasta):
|
206 |
+
lines = protein_fasta.split('\n')
|
207 |
+
|
208 |
+
headers = [line for line in lines if line.startswith('>')]
|
209 |
+
if len(headers) > 1:
|
210 |
+
return None, "Multiple fasta sequences detected. Please upload a fasta file with multiple sequences, otherwise only include one fasta sequence.", None
|
211 |
+
|
212 |
+
protein_sequence = ''.join(line for line in lines if not line.startswith('>'))
|
213 |
+
|
214 |
+
# Check for invalid characters
|
215 |
+
valid_characters = set("ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy") # the 20 standard amino acids
|
216 |
+
if not set(protein_sequence).issubset(valid_characters):
|
217 |
+
return None, "Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids. Does your sequence contain gaps?", None
|
218 |
+
|
219 |
+
encoded_input = tokenizer([protein_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
|
220 |
+
input_idsfam = encoded_input["input_ids"]
|
221 |
+
attention_maskfam = encoded_input["attention_mask"]
|
222 |
+
|
223 |
+
with torch.no_grad():
|
224 |
+
outputfam = modelfam(input_idsfam, attention_mask=attention_maskfam)
|
225 |
+
logitsfam = outputfam.logits
|
226 |
+
probabilitiesfam = F.softmax(logitsfam, dim=1)
|
227 |
+
_, predicted_labelsfam = torch.max(logitsfam, dim=1)
|
228 |
+
|
229 |
+
decoded_labelsfam = label_encoderfam.inverse_transform(predicted_labelsfam.tolist())
|
230 |
+
family_info = get_family_info(decoded_labelsfam[0])
|
231 |
+
|
232 |
+
figfam = plt.figure(figsize=(10, 5))
|
233 |
+
labelsfam = label_encoderfam.classes_
|
234 |
+
probabilitiesfam = probabilitiesfam.tolist()
|
235 |
+
|
236 |
+
# Convert the nested list to a flat list of probabilities
|
237 |
+
probabilitiesfam_flat = probabilitiesfam[0] if probabilitiesfam else []
|
238 |
+
|
239 |
+
# Sort labels and probabilities by probability
|
240 |
+
labels_probsfam = list(zip(labelsfam, probabilitiesfam_flat))
|
241 |
+
labels_probsfam.sort(key=lambda x: x[1], reverse=True)
|
242 |
+
|
243 |
+
# Select the top 5 fams
|
244 |
+
labels_probs_top5fam = labels_probsfam[:5]
|
245 |
+
labels_top5, probabilities_top5 = zip(*labels_probs_top5fam)
|
246 |
+
|
247 |
+
y_posfam = np.arange(len(labels_top5))
|
248 |
+
|
249 |
+
plt.barh(y_posfam, [prob*100 for prob in probabilities_top5], align='center', alpha=0.5)
|
250 |
+
plt.yticks(y_posfam, labels_top5)
|
251 |
+
plt.xlabel('Probability (%)')
|
252 |
+
plt.title('Top 5 Family Class Probabilities')
|
253 |
+
plt.xlim(0, 100)
|
254 |
+
plt.close(figfam)
|
255 |
+
|
256 |
+
img = fig_to_img(figfam)
|
257 |
+
|
258 |
+
if len(protein_sequence) < 100:
|
259 |
+
return decoded_labelsfam[0], img, None, f"**Warning:** The sequence is relatively short. Fragmentary and partial sequences may result in incorrect predictions. \n\n {family_info}"
|
260 |
+
|
261 |
+
|
262 |
+
return decoded_labelsfam[0], img, None, family_info
|
263 |
+
|
264 |
+
|
265 |
+
def process_single_sequence(protein_fasta): #, protein_file
|
266 |
+
|
267 |
+
lines = protein_fasta.split('\n')
|
268 |
+
|
269 |
+
headers = [line for line in lines if line.startswith('>')]
|
270 |
+
if len(headers) > 1:
|
271 |
+
return None, "Multiple fasta sequences detected. Please upload a fasta file with multiple sequences, otherwise only include one fasta sequence.", None
|
272 |
+
|
273 |
+
protein_sequence = ''.join(line for line in lines if not line.startswith('>'))
|
274 |
+
|
275 |
+
# Check for invalid characters
|
276 |
+
valid_characters = set("ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy") # the 20 standard amino acids
|
277 |
+
if not set(protein_sequence).issubset(valid_characters):
|
278 |
+
return None, "Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids. Does your sequence contain gaps?", None
|
279 |
+
|
280 |
+
|
281 |
+
encoded_input = tokenizer([protein_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
|
282 |
+
input_ids = encoded_input["input_ids"]
|
283 |
+
attention_mask = encoded_input["attention_mask"]
|
284 |
+
|
285 |
+
with torch.no_grad():
|
286 |
+
output = model(input_ids, attention_mask=attention_mask)
|
287 |
+
logits = output.logits
|
288 |
+
dprobabilities = F.softmax(logits, dim=1)[0]
|
289 |
+
_, predicted_labels = torch.max(logits, dim=1)
|
290 |
+
|
291 |
+
decoded_labels = label_encoder.inverse_transform(predicted_labels.tolist())
|
292 |
+
family_info = get_family_info(decoded_labels[0])
|
293 |
+
|
294 |
+
fig = plt.figure(figsize=(10, 5))
|
295 |
+
labels = label_encoder.classes_
|
296 |
+
dprobabilities = dprobabilities.tolist()
|
297 |
+
|
298 |
+
# Sort labels and probabilities by probability
|
299 |
+
labels_probs = list(zip(labels, dprobabilities))
|
300 |
+
labels_probs.sort(key=lambda x: x[1], reverse=True)
|
301 |
+
|
302 |
+
# Select the top 3 donors
|
303 |
+
labels_probs_top3 = labels_probs[:3]
|
304 |
+
labels_top3, probabilities_top3 = zip(*labels_probs_top3)
|
305 |
+
|
306 |
+
y_pos = np.arange(len(labels_top3))
|
307 |
+
|
308 |
+
plt.barh(y_pos, [prob*100 for prob in probabilities_top3], align='center', alpha=0.5)
|
309 |
+
plt.yticks(y_pos, labels_top3)
|
310 |
+
plt.xlabel('Probability (%)')
|
311 |
+
plt.title('Top 3 Donor Class Probabilities')
|
312 |
+
plt.xlim(0, 100)
|
313 |
+
plt.close(fig)
|
314 |
+
|
315 |
+
img = fig_to_img(fig)
|
316 |
+
|
317 |
+
if len(protein_sequence) < 100:
|
318 |
+
return decoded_labels[0], img, None, f"**Warning:** The sequence is relatively short. Fragmentary and partial sequences may result in incorrect predictions. \n\n {family_info}"
|
319 |
+
|
320 |
+
|
321 |
+
return decoded_labels[0], img, None, None
|
322 |
+
|
323 |
+
def process_sequence_file(protein_file): # added progress parameter that is displayed in gradio #, progress=gr.Progress()
|
324 |
+
try:
|
325 |
+
records = list(SeqIO.parse(protein_file.name, "fasta"))
|
326 |
+
except Exception as e:
|
327 |
+
return str(e)
|
328 |
+
|
329 |
+
if not os.path.exists('results'):
|
330 |
+
os.makedirs('results')
|
331 |
+
|
332 |
+
total = len(records)
|
333 |
+
|
334 |
+
for idx, record in enumerate(records):
|
335 |
+
protein_sequence = str(record.seq)
|
336 |
+
|
337 |
+
valid_characters = set("ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy")
|
338 |
+
if not set(protein_sequence).issubset(valid_characters):
|
339 |
+
with open(f'results/result_{idx+1}.txt', 'w') as file:
|
340 |
+
file.write("Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids. Does your sequence contain gaps?")
|
341 |
+
continue
|
342 |
+
|
343 |
+
label, img, _, info = process_single_sequence(protein_sequence)
|
344 |
+
img.save(f'results/result_{idx+1}.png')
|
345 |
+
with open(f'results/result_{idx+1}.txt', 'w') as file:
|
346 |
+
file.write(f'Predicted Donor: {label}\n\n{info}')
|
347 |
+
|
348 |
+
# progress(idx/total) # Update the progress bar
|
349 |
+
|
350 |
+
# Create a zip file w/ results -- To Do: Figure out how to improve compression for large files
|
351 |
+
with zipfile.ZipFile('predicted_results.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
|
352 |
+
for root, dirs, files in os.walk('results/'):
|
353 |
+
for file in files:
|
354 |
+
zipf.write(os.path.join(root, file))
|
355 |
+
|
356 |
+
return 'predicted_results.zip' #Provide indication of how to interpret downloaded zip file? f"**Warning:** The sequence is relatively short. Fragmentary and partial sequences may result in incorrect predictions.
|
357 |
+
|
358 |
+
# Function to mask a residue at a particular position
|
359 |
+
def mask_residue(sequence, position):
|
360 |
+
return sequence[:position] + 'X' + sequence[position+1:]
|
361 |
+
|
362 |
+
def generate_heatmap(protein_fasta):
|
363 |
+
|
364 |
+
lines = protein_fasta.split('\n')
|
365 |
+
|
366 |
+
headers = [line for line in lines if line.startswith('>')]
|
367 |
+
if len(headers) > 1:
|
368 |
+
return None, "Multiple fasta sequences detected. Please upload a fasta file with multiple sequences, otherwise only include one fasta sequence.", None
|
369 |
+
|
370 |
+
protein_sequence = ''.join(line for line in lines if not line.startswith('>'))
|
371 |
+
|
372 |
+
# Check for invalid characters
|
373 |
+
valid_characters = set("ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy") # the 20 standard amino acids
|
374 |
+
if not set(protein_sequence).issubset(valid_characters):
|
375 |
+
return None, "Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids. Does your sequence contain gaps?", None
|
376 |
+
|
377 |
+
|
378 |
+
# Tokenize and predict for original sequence
|
379 |
+
encoded_input = tokenizer([protein_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
|
380 |
+
with torch.no_grad():
|
381 |
+
original_output = model(encoded_input["input_ids"], attention_mask=encoded_input["attention_mask"])
|
382 |
+
original_probabilities = F.softmax(original_output.logits, dim=1).cpu().numpy()[0]
|
383 |
+
|
384 |
+
# Initialize an array to hold the importance scores
|
385 |
+
importance_scores = np.zeros((len(protein_sequence), len(original_probabilities)))
|
386 |
+
|
387 |
+
# Define the size of each group
|
388 |
+
group_size = 10 # You can change this
|
389 |
+
|
390 |
+
# Initialize an array to hold the importance scores
|
391 |
+
num_groups = len(original_sequence) // group_size + (len(original_sequence) % group_size > 0)
|
392 |
+
importance_scores = np.zeros((num_groups, len(original_probabilities)))
|
393 |
+
|
394 |
+
# Initialize tqdm progress bar
|
395 |
+
# with tqdm(total=num_groups, desc="Processing groups", position=0, leave=True) as pbar:
|
396 |
+
# # Loop through each group of residues in the sequence
|
397 |
+
for i in range(0, len(protein_sequence), group_size):
|
398 |
+
# Mask the residues in the group at positions [i, i + group_size)
|
399 |
+
masked_sequence = protein_sequence[:i] + 'X' * min(group_size, len(protein_sequence) - i) + protein_sequence[i + group_size:]
|
400 |
+
|
401 |
+
# Tokenize and predict for the masked sequence
|
402 |
+
encoded_input = tokenizer([masked_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
|
403 |
+
with torch.no_grad():
|
404 |
+
masked_output = model(encoded_input["input_ids"], attention_mask=encoded_input["attention_mask"])
|
405 |
+
masked_probabilities = F.softmax(masked_output.logits, dim=1).cpu().numpy()[0]
|
406 |
+
|
407 |
+
# Calculate the change in probabilities and store it as the importance score
|
408 |
+
group_index = i // group_size
|
409 |
+
importance_scores[group_index, :] = np.abs(original_probabilities - masked_probabilities)
|
410 |
+
|
411 |
+
progress = (i // group_size + 1) / num_groups * 100
|
412 |
+
print(f"Progress: {progress:.2f}%")
|
413 |
+
|
414 |
+
figmap, ax = plt.subplots(figsize=(20, 20))
|
415 |
+
sns.heatmap(importance_scores, annot=True, cmap="coolwarm", xticklabels=label_encoder.classes_, yticklabels=[f"{i}-{i+group_size-1}" for i in range(0, len(original_sequence), group_size)], ax=ax)
|
416 |
+
ax.set_xlabel("Predicted Labels")
|
417 |
+
ax.set_ylabel("Residue Position Groups")
|
418 |
+
|
419 |
+
img = fig_to_img(figmap)
|
420 |
+
|
421 |
+
return img
|
422 |
+
|
423 |
+
|
424 |
+
def main_function_single(sequence):
|
425 |
+
# Process seq, and return outputs for both fam and don
|
426 |
+
family_label, family_img, _, family_info = process_family_sequence(sequence)
|
427 |
+
donor_label, donor_img, *_ = process_single_sequence(sequence)
|
428 |
+
figmap = generate_heatmap(sequence)
|
429 |
+
return family_label, family_img, family_info, donor_label, donor_img, figmap
|
430 |
+
|
431 |
+
def main_function_upload(protein_file): #, progress=gr.Progress()
|
432 |
+
return process_sequence_file(protein_file) #, progress
|
433 |
+
|
434 |
+
prediction_imagefam = gr.outputs.Image(type='pil', label="Family prediction graph")
|
435 |
+
prediction_imagedonor = gr.outputs.Image(type='pil', label="Donor prediction graph")
|
436 |
+
prediction_explain = gr.outputs.Image(type='pil', label="Donor prediction explaination")
|
437 |
+
|
438 |
+
|
439 |
+
with gr.Blocks() as app:
|
440 |
+
gr.Markdown("# Glydentify")
|
441 |
+
|
442 |
+
with gr.Tab("Single Sequence Prediction"):
|
443 |
+
with gr.Row().style(equal_height=True):
|
444 |
+
with gr.Column():
|
445 |
+
sequence = gr.inputs.Textbox(lines=15, placeholder='Enter Protein Sequence Here...', label="Protein Sequence")
|
446 |
+
with gr.Column():
|
447 |
+
with gr.Accordion("Example:"):
|
448 |
+
gr.Markdown("""
|
449 |
+
\>Q9LTZ9|GALS2_ARATH Galactan beta-1,4-galactosyltransferase GALS2
|
450 |
+
MAKERDQNTKDKNLLICFLWNFSAELKLALMALLVLCTLATLLPFLPSSFSISASELRFC
|
451 |
+
ISRIAVNSTSVNFTTVVEKPVLDNAVKLTEKPVLDNGVTKQPLTEEKVLNNGVIKRTFTG
|
452 |
+
YGWAAYNFVLMNAYRGGVNTFAVIGLSSKPLHVYSHPTYRCEWIPLNQSDNRILTDGTKI
|
453 |
+
LTDWGYGRVYTTVVVNCTFPSNTVINPKNTGGTLLLHATTGDTDRNITDSIPVLTETPNT
|
454 |
+
VDFALYESNLRRREKYDYLYCGSSLYGNLSPQRIREWIAYHVRFFGERSHFVLHDAGGIT
|
455 |
+
EEVFEVLKPWIELGRVTVHDIREQERFDGYYHNQFMVVNDCLHRYRFMAKWMFFFDVDEF
|
456 |
+
IYVPAKSSISSVMVSLEEYSQFTIEQMPMSSQLCYDGDGPARTYRKWGFEKLAYRDVKKV
|
457 |
+
PRRDRKYAVQPRNVFATGVHMSQHLQGKTYHRAEGKIRYFHYHGSISQRREPCRHLYNGT
|
458 |
+
RIVHENN
|
459 |
+
""")
|
460 |
+
family_prediction = gr.outputs.Textbox(label="Predicted family")
|
461 |
+
donor_prediction = gr.outputs.Textbox(label="Predicted donor")
|
462 |
+
info_markdown = gr.Markdown()
|
463 |
+
|
464 |
+
# Predict and Clear buttons
|
465 |
+
with gr.Row().style(equal_height=True):
|
466 |
+
with gr.Column():
|
467 |
+
predict_button = gr.Button("Predict")
|
468 |
+
predict_button.click(main_function_single, inputs=sequence,
|
469 |
+
outputs=[family_prediction, prediction_imagefam, info_markdown,
|
470 |
+
donor_prediction, prediction_imagedonor, prediction_explain])
|
471 |
+
|
472 |
+
# Family & Donor Section
|
473 |
+
with gr.Row().style(equal_height=True):
|
474 |
+
with gr.Column():
|
475 |
+
with gr.Accordion("Prediction Bar Graphs:"):
|
476 |
+
prediction_imagefam.render() # = gr.outputs.Image(type='pil', label="Family prediction graph")
|
477 |
+
prediction_imagedonor.render() # = gr.outputs.Image(type='pil', label="Donor prediction graph")
|
478 |
+
|
479 |
+
# Explain Section
|
480 |
+
with gr.Column():
|
481 |
+
with gr.Accordion("Donor explanation"):
|
482 |
+
prediction_explain.render() # = gr.outputs.Image(type='pil', label="Donor prediction explaination")
|
483 |
+
|
484 |
+
with gr.Tab("Multiple Sequence Prediction"):
|
485 |
+
with gr.Row().style(equal_height=True):
|
486 |
+
with gr.Column():
|
487 |
+
protein_file = gr.inputs.File(label="Upload FASTA file")
|
488 |
+
with gr.Column():
|
489 |
+
result_file = gr.outputs.File(label="Download predictions of uploaded sequences")
|
490 |
+
with gr.Row().style(equal_height=True):
|
491 |
+
with gr.Column():
|
492 |
+
process_button = gr.Button("Process")
|
493 |
+
process_button.click(main_function_upload, inputs=protein_file, outputs=[result_file])
|
494 |
+
with gr.Column():
|
495 |
+
clear = gr.Button("Clear")
|
496 |
+
clear.click(lambda: None)
|
497 |
+
# clear.click()
|
498 |
+
|
499 |
+
app.launch(show_error=True)
|
500 |
+
|
501 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,329 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: glydentify
|
2 |
+
channels:
|
3 |
+
- conda-forge
|
4 |
+
- bioconda
|
5 |
+
dependencies:
|
6 |
+
- _libgcc_mutex=0.1=conda_forge
|
7 |
+
- _openmp_mutex=4.5=2_gnu
|
8 |
+
- aiofiles=22.1.0=pyhd8ed1ab_0
|
9 |
+
- aiohttp=3.7.4.post0=py39h3811e60_1
|
10 |
+
- aiosqlite=0.18.0=pyhd8ed1ab_0
|
11 |
+
- altair=4.2.2=pyhd8ed1ab_0
|
12 |
+
- anyio=3.6.2=pyhd8ed1ab_0
|
13 |
+
- aom=3.5.0=h27087fc_0
|
14 |
+
- argon2-cffi=21.3.0=pyhd8ed1ab_0
|
15 |
+
- argon2-cffi-bindings=21.2.0=py39hb9d737c_3
|
16 |
+
- asttokens=2.2.1=pyhd8ed1ab_0
|
17 |
+
- async-timeout=3.0.1=py_1000
|
18 |
+
- attrs=22.2.0=pyh71513ae_0
|
19 |
+
- aws-c-auth=0.7.0=hf8751d9_2
|
20 |
+
- aws-c-cal=0.6.0=h93469e0_0
|
21 |
+
- aws-c-common=0.8.23=hd590300_0
|
22 |
+
- aws-c-compression=0.2.17=h862ab75_1
|
23 |
+
- aws-c-event-stream=0.3.1=h9599702_1
|
24 |
+
- aws-c-http=0.7.11=hbe98c3e_0
|
25 |
+
- aws-c-io=0.13.28=h3870b5a_0
|
26 |
+
- aws-c-mqtt=0.8.14=h2e270ba_2
|
27 |
+
- aws-c-s3=0.3.13=heb0bb06_2
|
28 |
+
- aws-c-sdkutils=0.1.11=h862ab75_1
|
29 |
+
- aws-checksums=0.1.16=h862ab75_1
|
30 |
+
- aws-crt-cpp=0.20.3=he9c0e7f_4
|
31 |
+
- aws-sdk-cpp=1.10.57=hbc2ea52_17
|
32 |
+
- babel=2.12.1=pyhd8ed1ab_1
|
33 |
+
- backcall=0.2.0=pyh9f0ad1d_0
|
34 |
+
- backports=1.0=pyhd8ed1ab_3
|
35 |
+
- backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
|
36 |
+
- beautifulsoup4=4.12.2=pyha770c72_0
|
37 |
+
- biopython=1.81=py39h72bdee0_0
|
38 |
+
- bleach=6.0.0=pyhd8ed1ab_0
|
39 |
+
- brotli=1.0.9=h166bdaf_8
|
40 |
+
- brotli-bin=1.0.9=h166bdaf_8
|
41 |
+
- brotlipy=0.7.0=py39hb9d737c_1005
|
42 |
+
- bzip2=1.0.8=h7f98852_4
|
43 |
+
- c-ares=1.19.1=hd590300_0
|
44 |
+
- ca-certificates=2023.5.7=hbcca054_0
|
45 |
+
- certifi=2023.5.7=pyhd8ed1ab_0
|
46 |
+
- cffi=1.15.1=py39he91dace_3
|
47 |
+
- chardet=4.0.0=py39hf3d152e_3
|
48 |
+
- charset-normalizer=3.1.0=pyhd8ed1ab_0
|
49 |
+
- click=8.1.3=unix_pyhd8ed1ab_2
|
50 |
+
- colorama=0.4.6=pyhd8ed1ab_0
|
51 |
+
- comm=0.1.3=pyhd8ed1ab_0
|
52 |
+
- contourpy=1.0.7=py39h4b4f3f3_0
|
53 |
+
- cryptography=40.0.2=py39h079d5ae_0
|
54 |
+
- cycler=0.11.0=pyhd8ed1ab_0
|
55 |
+
- dataclasses=0.8=pyhc8e2a94_3
|
56 |
+
- datasets=2.13.1=pyhd8ed1ab_0
|
57 |
+
- debugpy=1.6.7=py39h227be39_0
|
58 |
+
- decorator=5.1.1=pyhd8ed1ab_0
|
59 |
+
- defusedxml=0.7.1=pyhd8ed1ab_0
|
60 |
+
- dill=0.3.6=pyhd8ed1ab_1
|
61 |
+
- entrypoints=0.4=pyhd8ed1ab_0
|
62 |
+
- executing=1.2.0=pyhd8ed1ab_0
|
63 |
+
- expat=2.5.0=hcb278e6_1
|
64 |
+
- fastapi=0.95.1=pyhd8ed1ab_0
|
65 |
+
- ffmpeg=5.1.2=gpl_h8dda1f0_106
|
66 |
+
- ffmpy=0.3.0=pyhb6f538c_0
|
67 |
+
- filelock=3.12.0=pyhd8ed1ab_0
|
68 |
+
- flit-core=3.8.0=pyhd8ed1ab_0
|
69 |
+
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
|
70 |
+
- font-ttf-inconsolata=3.000=h77eed37_0
|
71 |
+
- font-ttf-source-code-pro=2.038=h77eed37_0
|
72 |
+
- font-ttf-ubuntu=0.83=hab24e00_0
|
73 |
+
- fontconfig=2.14.2=h14ed4e7_0
|
74 |
+
- fonts-conda-ecosystem=1=0
|
75 |
+
- fonts-conda-forge=1=0
|
76 |
+
- fonttools=4.39.3=py39h72bdee0_0
|
77 |
+
- freetype=2.12.1=hca18f0e_1
|
78 |
+
- fsspec=2023.4.0=pyh1a96a4e_0
|
79 |
+
- gflags=2.2.2=he1b5a44_1004
|
80 |
+
- glog=0.6.0=h6f12383_0
|
81 |
+
- gmp=6.2.1=h58526e2_0
|
82 |
+
- gnutls=3.7.8=hf3e180e_0
|
83 |
+
- gradio=3.23.0=pyhd8ed1ab_0
|
84 |
+
- h11=0.14.0=pyhd8ed1ab_0
|
85 |
+
- h2=4.1.0=pyhd8ed1ab_0
|
86 |
+
- hpack=4.0.0=pyh9f0ad1d_0
|
87 |
+
- httpcore=0.17.0=pyhd8ed1ab_0
|
88 |
+
- httpx=0.24.0=pyhd8ed1ab_1
|
89 |
+
- huggingface_hub=0.16.4=pyhd8ed1ab_0
|
90 |
+
- hyperframe=6.0.1=pyhd8ed1ab_0
|
91 |
+
- icu=72.1=hcb278e6_0
|
92 |
+
- idna=3.4=pyhd8ed1ab_0
|
93 |
+
- importlib-metadata=6.5.0=pyha770c72_0
|
94 |
+
- importlib-resources=5.12.0=pyhd8ed1ab_0
|
95 |
+
- importlib_metadata=6.5.0=hd8ed1ab_0
|
96 |
+
- importlib_resources=5.12.0=pyhd8ed1ab_0
|
97 |
+
- ipykernel=6.22.0=pyh210e3f2_0
|
98 |
+
- ipython=8.12.0=pyh41d4057_0
|
99 |
+
- ipython_genutils=0.2.0=py_1
|
100 |
+
- jedi=0.18.2=pyhd8ed1ab_0
|
101 |
+
- jinja2=3.1.2=pyhd8ed1ab_1
|
102 |
+
- json5=0.9.5=pyh9f0ad1d_0
|
103 |
+
- jsonschema=4.17.3=pyhd8ed1ab_0
|
104 |
+
- jupyter_client=8.2.0=pyhd8ed1ab_0
|
105 |
+
- jupyter_core=5.3.0=py39hf3d152e_0
|
106 |
+
- jupyter_events=0.6.3=pyhd8ed1ab_0
|
107 |
+
- jupyter_server=2.5.0=pyhd8ed1ab_0
|
108 |
+
- jupyter_server_fileid=0.9.0=pyhd8ed1ab_0
|
109 |
+
- jupyter_server_terminals=0.4.4=pyhd8ed1ab_1
|
110 |
+
- jupyter_server_ydoc=0.8.0=pyhd8ed1ab_0
|
111 |
+
- jupyter_ydoc=0.2.3=pyhd8ed1ab_0
|
112 |
+
- jupyterlab=3.6.3=pyhd8ed1ab_0
|
113 |
+
- jupyterlab_pygments=0.2.2=pyhd8ed1ab_0
|
114 |
+
- jupyterlab_server=2.22.1=pyhd8ed1ab_0
|
115 |
+
- keyutils=1.6.1=h166bdaf_0
|
116 |
+
- kiwisolver=1.4.4=py39hf939315_1
|
117 |
+
- krb5=1.21.1=h659d440_0
|
118 |
+
- lame=3.100=h166bdaf_1003
|
119 |
+
- lcms2=2.15=haa2dc70_1
|
120 |
+
- ld_impl_linux-64=2.40=h41732ed_0
|
121 |
+
- lerc=4.0.0=h27087fc_0
|
122 |
+
- libabseil=20230125.3=cxx17_h59595ed_0
|
123 |
+
- libarrow=12.0.1=h657c46f_5_cpu
|
124 |
+
- libblas=3.9.0=16_linux64_openblas
|
125 |
+
- libbrotlicommon=1.0.9=h166bdaf_8
|
126 |
+
- libbrotlidec=1.0.9=h166bdaf_8
|
127 |
+
- libbrotlienc=1.0.9=h166bdaf_8
|
128 |
+
- libcblas=3.9.0=16_linux64_openblas
|
129 |
+
- libcrc32c=1.1.2=h9c3ff4c_0
|
130 |
+
- libcurl=8.2.0=hca28451_0
|
131 |
+
- libdeflate=1.18=h0b41bf4_0
|
132 |
+
- libdrm=2.4.114=h166bdaf_0
|
133 |
+
- libedit=3.1.20191231=he28a2e2_2
|
134 |
+
- libev=4.33=h516909a_1
|
135 |
+
- libevent=2.1.12=hf998b51_1
|
136 |
+
- libexpat=2.5.0=hcb278e6_1
|
137 |
+
- libffi=3.4.2=h7f98852_5
|
138 |
+
- libgcc-ng=12.2.0=h65d4601_19
|
139 |
+
- libgfortran-ng=12.2.0=h69a702a_19
|
140 |
+
- libgfortran5=12.2.0=h337968e_19
|
141 |
+
- libgomp=12.2.0=h65d4601_19
|
142 |
+
- libgoogle-cloud=2.12.0=h840a212_1
|
143 |
+
- libgrpc=1.56.2=h3905398_0
|
144 |
+
- libiconv=1.17=h166bdaf_0
|
145 |
+
- libidn2=2.1.0=h470a237_0
|
146 |
+
- libjpeg-turbo=2.1.5.1=h0b41bf4_0
|
147 |
+
- liblapack=3.9.0=16_linux64_openblas
|
148 |
+
- libnghttp2=1.52.0=h61bc06f_0
|
149 |
+
- libnsl=2.0.0=h7f98852_0
|
150 |
+
- libnuma=2.0.16=h0b41bf4_1
|
151 |
+
- libopenblas=0.3.21=pthreads_h78a6416_3
|
152 |
+
- libopus=1.3.1=h7f98852_1
|
153 |
+
- libpciaccess=0.17=h166bdaf_0
|
154 |
+
- libpng=1.6.39=h753d276_0
|
155 |
+
- libprotobuf=4.23.3=hd1fb520_0
|
156 |
+
- libsodium=1.0.18=h36c2ea0_1
|
157 |
+
- libsqlite=3.40.0=h753d276_0
|
158 |
+
- libssh2=1.11.0=h0841786_0
|
159 |
+
- libstdcxx-ng=12.2.0=h46fd767_19
|
160 |
+
- libtasn1=4.19.0=h166bdaf_0
|
161 |
+
- libthrift=0.18.1=h8fd135c_2
|
162 |
+
- libtiff=4.5.0=ha587672_6
|
163 |
+
- libutf8proc=2.8.0=h166bdaf_0
|
164 |
+
- libuuid=2.38.1=h0b41bf4_0
|
165 |
+
- libva=2.18.0=h0b41bf4_0
|
166 |
+
- libvpx=1.11.0=h9c3ff4c_3
|
167 |
+
- libwebp-base=1.3.0=h0b41bf4_0
|
168 |
+
- libxcb=1.13=h7f98852_1004
|
169 |
+
- libxml2=2.10.4=hfdac1af_0
|
170 |
+
- libzlib=1.2.13=h166bdaf_4
|
171 |
+
- linkify-it-py=2.0.0=pyhd8ed1ab_0
|
172 |
+
- lz4-c=1.9.4=hcb278e6_0
|
173 |
+
- markdown-it-py=2.2.0=pyhd8ed1ab_0
|
174 |
+
- markupsafe=2.1.2=py39h72bdee0_0
|
175 |
+
- matplotlib-base=3.7.1=py39he190548_0
|
176 |
+
- matplotlib-inline=0.1.6=pyhd8ed1ab_0
|
177 |
+
- mdit-py-plugins=0.3.3=pyhd8ed1ab_0
|
178 |
+
- mdurl=0.1.0=pyhd8ed1ab_0
|
179 |
+
- mistune=2.0.5=pyhd8ed1ab_0
|
180 |
+
- multidict=6.0.4=py39h72bdee0_0
|
181 |
+
- multiprocess=0.70.14=py39hb9d737c_3
|
182 |
+
- munkres=1.1.4=pyh9f0ad1d_0
|
183 |
+
- nbclassic=0.5.5=pyhb4ecaf3_1
|
184 |
+
- nbclient=0.7.3=pyhd8ed1ab_0
|
185 |
+
- nbconvert=7.3.1=pyhd8ed1ab_0
|
186 |
+
- nbconvert-core=7.3.1=pyhd8ed1ab_0
|
187 |
+
- nbconvert-pandoc=7.3.1=pyhd8ed1ab_0
|
188 |
+
- nbformat=5.8.0=pyhd8ed1ab_0
|
189 |
+
- ncurses=6.3=h27087fc_1
|
190 |
+
- nest-asyncio=1.5.6=pyhd8ed1ab_0
|
191 |
+
- nettle=3.8.1=hc379101_1
|
192 |
+
- notebook=6.5.4=pyha770c72_0
|
193 |
+
- notebook-shim=0.2.2=pyhd8ed1ab_0
|
194 |
+
- numpy=1.24.2=py39h7360e5f_0
|
195 |
+
- openh264=2.3.1=hcb278e6_2
|
196 |
+
- openjpeg=2.5.0=hfec8fc6_2
|
197 |
+
- openssl=3.1.1=hd590300_1
|
198 |
+
- orc=1.9.0=h385abfd_1
|
199 |
+
- orjson=3.8.10=py39hd8b4457_0
|
200 |
+
- p11-kit=0.24.1=hc5aa10d_0
|
201 |
+
- packaging=23.1=pyhd8ed1ab_0
|
202 |
+
- pandas=2.0.0=py39h2ad29b5_0
|
203 |
+
- pandoc=2.19.2=h32600fe_2
|
204 |
+
- pandocfilters=1.5.0=pyhd8ed1ab_0
|
205 |
+
- parso=0.8.3=pyhd8ed1ab_0
|
206 |
+
- patsy=0.5.3=pyhd8ed1ab_0
|
207 |
+
- pexpect=4.8.0=pyh1a96a4e_2
|
208 |
+
- pickleshare=0.7.5=py_1003
|
209 |
+
- pillow=9.5.0=py39h7207d5c_0
|
210 |
+
- pip=23.1=pyhd8ed1ab_0
|
211 |
+
- pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0
|
212 |
+
- platformdirs=3.2.0=pyhd8ed1ab_0
|
213 |
+
- pooch=1.7.0=pyha770c72_3
|
214 |
+
- prometheus_client=0.16.0=pyhd8ed1ab_0
|
215 |
+
- prompt-toolkit=3.0.38=pyha770c72_0
|
216 |
+
- prompt_toolkit=3.0.38=hd8ed1ab_0
|
217 |
+
- psutil=5.9.5=py39h72bdee0_0
|
218 |
+
- pthread-stubs=0.4=h36c2ea0_1001
|
219 |
+
- ptyprocess=0.7.0=pyhd3deb0d_0
|
220 |
+
- pure_eval=0.2.2=pyhd8ed1ab_0
|
221 |
+
- pyarrow=12.0.1=py39hfbd5978_5_cpu
|
222 |
+
- pycparser=2.21=pyhd8ed1ab_0
|
223 |
+
- pydantic=1.10.7=py39h72bdee0_0
|
224 |
+
- pydub=0.25.1=pyhd8ed1ab_0
|
225 |
+
- pygments=2.15.1=pyhd8ed1ab_0
|
226 |
+
- pyopenssl=23.1.1=pyhd8ed1ab_0
|
227 |
+
- pyparsing=3.0.9=pyhd8ed1ab_0
|
228 |
+
- pyrsistent=0.19.3=py39h72bdee0_0
|
229 |
+
- pysocks=1.7.1=pyha2e5f31_6
|
230 |
+
- python=3.9.16=h2782a2a_0_cpython
|
231 |
+
- python-dateutil=2.8.2=pyhd8ed1ab_0
|
232 |
+
- python-fastjsonschema=2.16.3=pyhd8ed1ab_0
|
233 |
+
- python-json-logger=2.0.7=pyhd8ed1ab_0
|
234 |
+
- python-multipart=0.0.6=pyhd8ed1ab_0
|
235 |
+
- python-tzdata=2023.3=pyhd8ed1ab_0
|
236 |
+
- python-xxhash=3.2.0=py39h72bdee0_0
|
237 |
+
- python_abi=3.9=3_cp39
|
238 |
+
- pytz=2023.3=pyhd8ed1ab_0
|
239 |
+
- pyyaml=6.0=py39hb9d737c_5
|
240 |
+
- pyzmq=25.0.2=py39h0be026e_0
|
241 |
+
- rdma-core=28.9=h59595ed_1
|
242 |
+
- re2=2023.03.02=h8c504da_0
|
243 |
+
- readline=8.2=h8228510_1
|
244 |
+
- regex=2023.6.3=py39hd1e30aa_0
|
245 |
+
- requests=2.28.2=pyhd8ed1ab_1
|
246 |
+
- responses=0.18.0=pyhd8ed1ab_0
|
247 |
+
- rfc3339-validator=0.1.4=pyhd8ed1ab_0
|
248 |
+
- rfc3986-validator=0.1.1=pyh9f0ad1d_0
|
249 |
+
- s2n=1.3.46=h06160fa_0
|
250 |
+
- sacremoses=0.0.53=pyhd8ed1ab_0
|
251 |
+
- safetensors=0.3.1=py39h9fdd4d6_0
|
252 |
+
- scipy=1.10.1=py39he83f1e1_0
|
253 |
+
- seaborn=0.12.2=hd8ed1ab_0
|
254 |
+
- seaborn-base=0.12.2=pyhd8ed1ab_0
|
255 |
+
- semantic_version=2.10.0=pyhd8ed1ab_0
|
256 |
+
- send2trash=1.8.0=pyhd8ed1ab_0
|
257 |
+
- setuptools=67.6.1=pyhd8ed1ab_0
|
258 |
+
- six=1.16.0=pyh6c4a22f_0
|
259 |
+
- snappy=1.1.10=h9fff704_0
|
260 |
+
- sniffio=1.3.0=pyhd8ed1ab_0
|
261 |
+
- soupsieve=2.3.2.post1=pyhd8ed1ab_0
|
262 |
+
- stack_data=0.6.2=pyhd8ed1ab_0
|
263 |
+
- starlette=0.26.1=pyhd8ed1ab_0
|
264 |
+
- statsmodels=0.13.5=py39h2ae25f5_2
|
265 |
+
- svt-av1=1.4.1=hcb278e6_0
|
266 |
+
- terminado=0.17.1=pyh41d4057_0
|
267 |
+
- tinycss2=1.2.1=pyhd8ed1ab_0
|
268 |
+
- tk=8.6.12=h27826a3_0
|
269 |
+
- tokenizers=0.13.3=py39h585fa2d_0
|
270 |
+
- tomli=2.0.1=pyhd8ed1ab_0
|
271 |
+
- toolz=0.12.0=pyhd8ed1ab_0
|
272 |
+
- tornado=6.3=py39h72bdee0_0
|
273 |
+
- tqdm=4.65.0=pyhd8ed1ab_1
|
274 |
+
- traitlets=5.9.0=pyhd8ed1ab_0
|
275 |
+
- transformers=4.31.0
|
276 |
+
- typing-extensions=4.5.0=hd8ed1ab_0
|
277 |
+
- typing_extensions=4.5.0=pyha770c72_0
|
278 |
+
- tzdata=2023c=h71feb2d_0
|
279 |
+
- uc-micro-py=1.0.1=pyhd8ed1ab_0
|
280 |
+
- ucx=1.14.1=hf587318_2
|
281 |
+
- unicodedata2=15.0.0=py39hb9d737c_0
|
282 |
+
- urllib3=1.26.15=pyhd8ed1ab_0
|
283 |
+
- uvicorn=0.21.1=py39hf3d152e_0
|
284 |
+
- wcwidth=0.2.6=pyhd8ed1ab_0
|
285 |
+
- webencodings=0.5.1=py_1
|
286 |
+
- websocket-client=1.5.1=pyhd8ed1ab_0
|
287 |
+
- websockets=11.0.2=py39h72bdee0_0
|
288 |
+
- wheel=0.40.0=pyhd8ed1ab_0
|
289 |
+
- wordcloud=1.8.2.2=py39hb9d737c_1
|
290 |
+
- x264=1!164.3095=h166bdaf_2
|
291 |
+
- x265=3.5=h924138e_3
|
292 |
+
- xorg-fixesproto=5.0=h7f98852_1002
|
293 |
+
- xorg-kbproto=1.0.7=h7f98852_1002
|
294 |
+
- xorg-libx11=1.8.4=h0b41bf4_0
|
295 |
+
- xorg-libxau=1.0.9=h7f98852_0
|
296 |
+
- xorg-libxdmcp=1.1.3=h7f98852_0
|
297 |
+
- xorg-libxext=1.3.4=h0b41bf4_2
|
298 |
+
- xorg-libxfixes=5.0.3=h7f98852_1004
|
299 |
+
- xorg-xextproto=7.3.0=h0b41bf4_1003
|
300 |
+
- xorg-xproto=7.0.31=h7f98852_1007
|
301 |
+
- xxhash=0.8.1=h0b41bf4_0
|
302 |
+
- xz=5.2.6=h166bdaf_0
|
303 |
+
- y-py=0.5.9=py39h50f1755_0
|
304 |
+
- yaml=0.2.5=h7f98852_2
|
305 |
+
- yarl=1.8.2=py39hb9d737c_0
|
306 |
+
- ypy-websocket=0.8.2=pyhd8ed1ab_0
|
307 |
+
- zeromq=4.3.4=h9c3ff4c_1
|
308 |
+
- zipp=3.15.0=pyhd8ed1ab_0
|
309 |
+
- zstd=1.5.2=h3eb15da_6
|
310 |
+
- pip:
|
311 |
+
- cmake==3.25.0
|
312 |
+
- cssselect2==0.7.0
|
313 |
+
- glypy==1.0.8
|
314 |
+
- hjson==3.1.0
|
315 |
+
- joblib==1.3.1
|
316 |
+
- lit==15.0.7
|
317 |
+
- lxml==4.9.2
|
318 |
+
- mpmath==1.2.1
|
319 |
+
- networkx==3.0
|
320 |
+
- pmw-py3==2.1
|
321 |
+
- preppy==4.2.1
|
322 |
+
- scikit-learn==1.3.0
|
323 |
+
- svglib==1.4.1
|
324 |
+
- sympy==1.11.1
|
325 |
+
- threadpoolctl==3.2.0
|
326 |
+
- torch==2.0.1
|
327 |
+
- torchaudio==2.0.2
|
328 |
+
- torchvision==0.15.2
|
329 |
+
- triton==2.0.0
|