Update README.md
Browse files
README.md
CHANGED
@@ -46,11 +46,18 @@ We evaluate the benefits of pretraining DNA FM 7B by conducting a comprehensive
|
|
46 |
<center><img src="circle_benchmarks.png" alt="Downstream results of DNA FM 7B" style="width:70%; height:auto;" /></center>
|
47 |
|
48 |
## How to Use
|
49 |
-
### Build any downstream models from this backbone
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
#### Embedding
|
51 |
```python
|
52 |
-
from
|
53 |
-
model = Embed.from_config({"model.backbone": "
|
54 |
collated_batch = model.collate({"sequences": ["ACGT", "AGCT"]})
|
55 |
embedding = model(collated_batch)
|
56 |
print(embedding.shape)
|
@@ -59,8 +66,8 @@ print(embedding)
|
|
59 |
#### Sequence Level Classification
|
60 |
```python
|
61 |
import torch
|
62 |
-
from
|
63 |
-
model = SequenceClassification.from_config({"model.backbone": "
|
64 |
collated_batch = model.collate({"sequences": ["ACGT", "AGCT"]})
|
65 |
logits = model(collated_batch)
|
66 |
print(logits)
|
@@ -69,8 +76,8 @@ print(torch.argmax(logits, dim=-1))
|
|
69 |
#### Token Level Classification
|
70 |
```python
|
71 |
import torch
|
72 |
-
from
|
73 |
-
model = TokenClassification.from_config({"model.backbone": "
|
74 |
collated_batch = model.collate({"sequences": ["ACGT", "AGCT"]})
|
75 |
logits = model(collated_batch)
|
76 |
print(logits)
|
@@ -78,18 +85,12 @@ print(torch.argmax(logits, dim=-1))
|
|
78 |
```
|
79 |
#### Regression
|
80 |
```python
|
81 |
-
from
|
82 |
-
model = SequenceRegression.from_config({"model.backbone": "
|
83 |
collated_batch = model.collate({"sequences": ["ACGT", "AGCT"]})
|
84 |
logits = model(collated_batch)
|
85 |
print(logits)
|
86 |
```
|
87 |
-
#### Or use our one-liner CLI to finetune or evaluate any of the above!
|
88 |
-
```
|
89 |
-
gbft fit --model SequenceClassification --model.backbone dnafm --data SequenceClassification --data.path <hf_or_local_path_to_your_dataset>
|
90 |
-
gbft test --model SequenceClassification --model.backbone dnafm --data SequenceClassification --data.path <hf_or_local_path_to_your_dataset>
|
91 |
-
```
|
92 |
-
For more information, visit: [Model Generator](https://github.com/genbio-ai/modelgenerator)
|
93 |
|
94 |
|
95 |
## Citation
|
@@ -101,7 +102,4 @@ author={Caleb Ellington, Ning Sun, Nicholas Ho, Tianhua Tao, Sazan Mahbub, Yongh
|
|
101 |
booktitle={NeurIPS 2024 Workshop on AI for New Drug Modalities},
|
102 |
year={2024}
|
103 |
}
|
104 |
-
```
|
105 |
-
|
106 |
-
## License
|
107 |
-
@Hongyi TODO
|
|
|
46 |
<center><img src="circle_benchmarks.png" alt="Downstream results of DNA FM 7B" style="width:70%; height:auto;" /></center>
|
47 |
|
48 |
## How to Use
|
49 |
+
### Build any downstream models from this backbone with ModelGenerator
|
50 |
+
For more information, visit: [Model Generator](https://github.com/genbio-ai/modelgenerator)
|
51 |
+
```bash
|
52 |
+
mgen fit --model SequenceClassification --model.backbone aido_dna_7b --data SequenceClassificationDataModule --data.path <hf_or_local_path_to_your_dataset>
|
53 |
+
mgen test --model SequenceClassification --model.backbone aido_dna_7b --data SequenceClassificationDataModule --data.path <hf_or_local_path_to_your_dataset>
|
54 |
+
```
|
55 |
+
|
56 |
+
### Or use directly in Python
|
57 |
#### Embedding
|
58 |
```python
|
59 |
+
from modelgenerator.tasks import Embed
|
60 |
+
model = Embed.from_config({"model.backbone": "aido_dna_7b"}).eval()
|
61 |
collated_batch = model.collate({"sequences": ["ACGT", "AGCT"]})
|
62 |
embedding = model(collated_batch)
|
63 |
print(embedding.shape)
|
|
|
66 |
#### Sequence Level Classification
|
67 |
```python
|
68 |
import torch
|
69 |
+
from modelgenerator.tasks import SequenceClassification
|
70 |
+
model = SequenceClassification.from_config({"model.backbone": "aido_dna_7b", "model.n_classes": 2}).eval()
|
71 |
collated_batch = model.collate({"sequences": ["ACGT", "AGCT"]})
|
72 |
logits = model(collated_batch)
|
73 |
print(logits)
|
|
|
76 |
#### Token Level Classification
|
77 |
```python
|
78 |
import torch
|
79 |
+
from modelgenerator.tasks import TokenClassification
|
80 |
+
model = TokenClassification.from_config({"model.backbone": "aido_dna_7b", "model.n_classes": 3}).eval()
|
81 |
collated_batch = model.collate({"sequences": ["ACGT", "AGCT"]})
|
82 |
logits = model(collated_batch)
|
83 |
print(logits)
|
|
|
85 |
```
|
86 |
#### Regression
|
87 |
```python
|
88 |
+
from modelgenerator.tasks import SequenceRegression
|
89 |
+
model = SequenceRegression.from_config({"model.backbone": "aido_dna_7b"}).eval()
|
90 |
collated_batch = model.collate({"sequences": ["ACGT", "AGCT"]})
|
91 |
logits = model(collated_batch)
|
92 |
print(logits)
|
93 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
|
96 |
## Citation
|
|
|
102 |
booktitle={NeurIPS 2024 Workshop on AI for New Drug Modalities},
|
103 |
year={2024}
|
104 |
}
|
105 |
+
```
|
|
|
|
|
|