Update README.md
Browse files
README.md
CHANGED
@@ -7,4 +7,85 @@ tags:
|
|
7 |
|
8 |
https://huggingface.co/AmelieSchreiber/esm2_t6_8M_UR50D_sequence_classifier_v1 with ONNX weights to be compatible with Transformers.js.
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).
|
|
|
7 |
|
8 |
https://huggingface.co/AmelieSchreiber/esm2_t6_8M_UR50D_sequence_classifier_v1 with ONNX weights to be compatible with Transformers.js.
|
9 |
|
10 |
+
|
11 |
+
|
12 |
+
## Usage (Transformers.js)
|
13 |
+
|
14 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using:
|
15 |
+
```bash
|
16 |
+
npm i @xenova/transformers
|
17 |
+
```
|
18 |
+
|
19 |
+
**Example:** Protein sequence classification w/ `Xenova/esm2_t6_8M_UR50D_sequence_classifier_v1`.
|
20 |
+
```js
|
21 |
+
import { pipeline } from '@xenova/transformers';
|
22 |
+
|
23 |
+
// Create text classification pipeline
|
24 |
+
const classifier = await pipeline('text-classification', 'Xenova/esm2_t6_8M_UR50D_sequence_classifier_v1');
|
25 |
+
|
26 |
+
// Suppose these are your new sequences that you want to classify
|
27 |
+
// Additional Family 0: Enzymes
|
28 |
+
const new_sequences_0 = [
|
29 |
+
'ACGYLKTPKLADPPVLRGDSSVTKAICKPDPVLEK',
|
30 |
+
'GVALDECKALDYLPGKPLPMDGKVCQCGSKTPLRP',
|
31 |
+
'VLPGYTCGELDCKPGKPLPKCGADKTQVATPFLRG',
|
32 |
+
'TCGALVQYPSCADPPVLRGSDSSVKACKKLDPQDK',
|
33 |
+
'GALCEECKLCPGADYKPMDGDRLPAAATSKTRPVG',
|
34 |
+
'PAVDCKKALVYLPKPLPMDGKVCRGSKTPKTRPYG',
|
35 |
+
'VLGYTCGALDCKPGKPLPKCGADKTQVATPFLRGA',
|
36 |
+
'CGALVQYPSCADPPVLRGSDSSVKACKKLDPQDKT',
|
37 |
+
'ALCEECKLCPGADYKPMDGDRLPAAATSKTRPVGK',
|
38 |
+
'AVDCKKALVYLPKPLPMDGKVCRGSKTPKTRPYGR',
|
39 |
+
]
|
40 |
+
|
41 |
+
// Additional Family 1: Receptor Proteins
|
42 |
+
const new_sequences_1 = [
|
43 |
+
'VGQRFYGGRQKNRHCELSPLPSACRGSVQGALYTD',
|
44 |
+
'KDQVLTVPTYACRCCPKMDSKGRVPSTLRVKSARS',
|
45 |
+
'PLAGVACGRGLDYRCPRKMVPGDLQVTPATQRPYG',
|
46 |
+
'CGVRLGYPGCADVPLRGRSSFAPRACMKKDPRVTR',
|
47 |
+
'RKGVAYLYECRKLRCRADYKPRGMDGRRLPKASTT',
|
48 |
+
'RPTGAVNCKQAKVYRGLPLPMMGKVPRVCRSRRPY',
|
49 |
+
'RLDGGYTCGQALDCKPGRKPPKMGCADLKSTVATP',
|
50 |
+
'LGTCRKLVRYPQCADPPVMGRSSFRPKACCRQDPV',
|
51 |
+
'RVGYAMCSPKLCSCRADYKPPMGDGDRLPKAATSK',
|
52 |
+
'QPKAVNCRKAMVYRPKPLPMDKGVPVCRSKRPRPY',
|
53 |
+
]
|
54 |
+
|
55 |
+
// Additional Family 2: Structural Proteins
|
56 |
+
const new_sequences_2 = [
|
57 |
+
'VGKGFRYGSSQKRYLHCQKSALPPSCRRGKGQGSAT',
|
58 |
+
'KDPTVMTVGTYSCQCPKQDSRGSVQPTSRVKTSRSK',
|
59 |
+
'PLVGKACGRSSDYKCPGQMVSGGSKQTPASQRPSYD',
|
60 |
+
'CGKKLVGYPSSKADVPLQGRSSFSPKACKKDPQMTS',
|
61 |
+
'RKGVASLYCSSKLSCKAQYSKGMSDGRSPKASSTTS',
|
62 |
+
'RPKSAASCEQAKSYRSLSLPSMKGKVPSKCSRSKRP',
|
63 |
+
'RSDVSYTSCSQSKDCKPSKPPKMSGSKDSSTVATPS',
|
64 |
+
'LSTCSKKVAYPSSKADPPSSGRSSFSMKACKKQDPPV',
|
65 |
+
'RVGSASSEPKSSCSVQSYSKPSMSGDSSPKASSTSK',
|
66 |
+
'QPSASNCEKMSSYRPSLPSMSKGVPSSRSKSSPPYQ',
|
67 |
+
]
|
68 |
+
|
69 |
+
// Merge all sequences
|
70 |
+
const new_sequences = [...new_sequences_0, ...new_sequences_1, ...new_sequences_2];
|
71 |
+
|
72 |
+
// Get the predicted class for each sequence
|
73 |
+
const predictions = await classifier(new_sequences);
|
74 |
+
|
75 |
+
// Output the predicted class for each sequence
|
76 |
+
for (let i = 0; i < predictions.length; ++i) {
|
77 |
+
console.log(`Sequence: ${new_sequences[i]}, Predicted class: '${predictions[i].label}'`)
|
78 |
+
}
|
79 |
+
// Sequence: ACGYLKTPKLADPPVLRGDSSVTKAICKPDPVLEK, Predicted class: 'Enzymes'
|
80 |
+
// ... (truncated)
|
81 |
+
// Sequence: AVDCKKALVYLPKPLPMDGKVCRGSKTPKTRPYGR, Predicted class: 'Enzymes'
|
82 |
+
// Sequence: VGQRFYGGRQKNRHCELSPLPSACRGSVQGALYTD, Predicted class: 'Receptor Proteins'
|
83 |
+
// ... (truncated)
|
84 |
+
// Sequence: QPKAVNCRKAMVYRPKPLPMDKGVPVCRSKRPRPY, Predicted class: 'Receptor Proteins'
|
85 |
+
// Sequence: VGKGFRYGSSQKRYLHCQKSALPPSCRRGKGQGSAT, Predicted class: 'Structural Proteins'
|
86 |
+
// ... (truncated)
|
87 |
+
// Sequence: QPSASNCEKMSSYRPSLPSMSKGVPSSRSKSSPPYQ, Predicted class: 'Structural Proteins'
|
88 |
+
```
|
89 |
+
|
90 |
+
|
91 |
Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).
|