|
import gradio as gr |
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
import torch |
|
|
|
title = "Protien Sequence Classification 🧬." |
|
description = "Predicts the subcellular location of the protein sequence between two classes: Cytoplasm and Membrane" |
|
article = 'Created from finetuning ESM2_150M' |
|
|
|
model = AutoModelForSequenceClassification.from_pretrained('./Model') |
|
tokenizer = AutoTokenizer.from_pretrained('facebook/esm2_t30_150M_UR50D') |
|
|
|
example_list = [['MKIIILLGFLGATLSAPLIPQRLMSASNSNELLLNLNNGQLLPLQLQGPLNSWIPPFSGILQQQQQAQIPGLSQFSLSALDQFAGLLPNQIPLTGEASFAQGAQAGQVDPLQLQTPPQTQPGPSHVMPYVFSFKMPQEQGQMFQYYPVYMVLPWEQPQQTVPRSPQQTRQQQYEEQIPFYAQFGYIPQLAEPAISGGQQQLAFDPQLGTAPEIAVMSTGEEIPYLQKEAINFRHDSAGVFMPSTSPKPSTTNVFTSAVDQTITPELPEEKDKTDSLREP'], |
|
['MSSGNYQQSEALSKPTFSEEQASALVESVFGLKVSKVRPLPSYDDQNFHVYVSKTKDGPTEYVLKISNTKASKNPDLIEVQNHIIMFLKAAGFPTASVCHTKGDNTASLVSVDSGSEIKSYLVRLLTYLPGRPIAELPVSPQLLYEIGKLAAKLDKTLQRFHHPKLSSLHRENFIWNLKNVPLLEKYLYALGQNRNREIVEHVIHLFKEEVMTKLSHFRECINHGDLNDHNILIESSKSASGNAEYQVSGILDFGDMSYGYYVFEVAITIMYMMIESKSPIQVGGHVLAGFESITPLTAVEKGALFLLVCSRFCQSLVMAAYSCQLYPENKDYLMVTAKTGWKHLQQMFDMGQKAVEEIWFETAKSYESGISM'], |
|
['MMNNTDFLMLNNPWNKLCLVSMDFCFPLDFVSNLFWIFASKFIIVTGQIKADFKRTSWEAKAEGSLEPGRLKLQLASIVPLYSSLVTAGPASKIIILKRTSLPTVSPSNERAYLLPVSFTDLAHVFYLSYFSINAKSNSFSLDIIIALGIPHNTQAHFNH'], |
|
['MNKHNLRLVQLASELILIEIIPKLFLSQVTTISHIKREKIPPNHRKGILCMFPWQCVVYVFSNFVWLVIHRFSNGFIQFLGEPYRLMTASGTHGRIKFMVDIPIIKNTQVLRIPVLKDPKMLSKKH']] |
|
|
|
def predict(ProtienSequence): |
|
input = tokenizer(ProtienSequence, return_tensors='pt') |
|
with torch.inference_mode(): |
|
outputs = model(**input) |
|
output = outputs.logits.argmax(axis=1)[0].numpy() == 0 |
|
print(output) |
|
if output: |
|
return str('Cytoplasm') |
|
else: |
|
return str('Membrane') |
|
|
|
iface = gr.Interface(fn=predict, |
|
inputs='text', |
|
outputs=gr.Text(label='Subcellular location'), |
|
title=title, |
|
description=description, |
|
article=article, |
|
examples=example_list) |
|
iface.launch() |