File size: 9,811 Bytes
702ff64 27bd344 ff6b47e be8bb58 702ff64 744d042 0c30ec3 744d042 981c1d7 744d042 981c1d7 744d042 b780151 2a597ea b780151 744d042 b780151 702ff64 b780151 e52bb16 b780151 2a597ea bcc8c98 3bc22fe bcc8c98 2a597ea 4e5ee16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
---
language:
- en
library_name: transformers
co2_eq_emissions: 2010
datasets:
- google/jigsaw_toxicity_pred
- SetFit/toxic_conversations
- heegyu/toxic_conversations_balanced
license: cc-by-nc-sa-4.0
---
# Tiny-Toxic-Detector
A tiny comment toxicity classifier model at only 2M parameters. With only ~8MB vram (hardware dependent) and fast inference we bring you one of the best toxicity classifiers that outperforms models over 50 times its size.
You can find the paper here: https://doi.org/10.48550/arXiv.2409.02114.
You can join us on Discord by pressing [here](https://discord.gg/45MHwt5rBM).
## Usage
This model uses custom architecture and requires some extra custom code to work. Below you can find the architecture and a fully-usable example.
<details>
<summary>
Architecture
</summary>
```python
import torch
import torch.nn as nn
from transformers import PreTrainedModel, PretrainedConfig, AutoTokenizer
# Define TinyTransformer model
class TinyTransformer(nn.Module):
def __init__(self, vocab_size, embed_dim, num_heads, ff_dim, num_layers):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.pos_encoding = nn.Parameter(torch.zeros(1, 512, embed_dim))
encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dim_feedforward=ff_dim, batch_first=True)
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
self.fc = nn.Linear(embed_dim, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.embedding(x) + self.pos_encoding[:, :x.size(1), :]
x = self.transformer(x)
x = x.mean(dim=1) # Global average pooling
x = self.fc(x)
return self.sigmoid(x)
class TinyTransformerConfig(PretrainedConfig):
model_type = "tiny_transformer"
def __init__(self, vocab_size=30522, embed_dim=64, num_heads=2, ff_dim=128, num_layers=4, max_position_embeddings=512, **kwargs):
super().__init__(**kwargs)
self.vocab_size = vocab_size
self.embed_dim = embed_dim
self.num_heads = num_heads
self.ff_dim = ff_dim
self.num_layers = num_layers
self.max_position_embeddings = max_position_embeddings
class TinyTransformerForSequenceClassification(PreTrainedModel):
config_class = TinyTransformerConfig
def __init__(self, config):
super().__init__(config)
self.num_labels = 1
self.transformer = TinyTransformer(
config.vocab_size,
config.embed_dim,
config.num_heads,
config.ff_dim,
config.num_layers
)
def forward(self, input_ids, attention_mask=None):
outputs = self.transformer(input_ids)
return {"logits": outputs}
```
</details>
<details>
<summary>
Full example
</summary>
```python
import torch
import torch.nn as nn
from transformers import PreTrainedModel, PretrainedConfig, AutoTokenizer
# Define TinyTransformer model
class TinyTransformer(nn.Module):
def __init__(self, vocab_size, embed_dim, num_heads, ff_dim, num_layers):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.pos_encoding = nn.Parameter(torch.zeros(1, 512, embed_dim))
encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dim_feedforward=ff_dim, batch_first=True)
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
self.fc = nn.Linear(embed_dim, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.embedding(x) + self.pos_encoding[:, :x.size(1), :]
x = self.transformer(x)
x = x.mean(dim=1) # Global average pooling
x = self.fc(x)
return self.sigmoid(x)
class TinyTransformerConfig(PretrainedConfig):
model_type = "tiny_transformer"
def __init__(self, vocab_size=30522, embed_dim=64, num_heads=2, ff_dim=128, num_layers=4, max_position_embeddings=512, **kwargs):
super().__init__(**kwargs)
self.vocab_size = vocab_size
self.embed_dim = embed_dim
self.num_heads = num_heads
self.ff_dim = ff_dim
self.num_layers = num_layers
self.max_position_embeddings = max_position_embeddings
class TinyTransformerForSequenceClassification(PreTrainedModel):
config_class = TinyTransformerConfig
def __init__(self, config):
super().__init__(config)
self.num_labels = 1
self.transformer = TinyTransformer(
config.vocab_size,
config.embed_dim,
config.num_heads,
config.ff_dim,
config.num_layers
)
def forward(self, input_ids, attention_mask=None):
outputs = self.transformer(input_ids)
return {"logits": outputs}
# Load the Tiny-Toxic-Detector model and tokenizer
def load_model_and_tokenizer():
device = torch.device("cpu") # Due to GPU overhead inference is faster on CPU!
# Load Tiny-toxic-detector
config = TinyTransformerConfig.from_pretrained("AssistantsLab/Tiny-Toxic-Detector")
model = TinyTransformerForSequenceClassification.from_pretrained("AssistantsLab/Tiny-Toxic-Detector", config=config).to(device)
tokenizer = AutoTokenizer.from_pretrained("AssistantsLab/Tiny-Toxic-Detector")
return model, tokenizer, device
# Prediction function
def predict_toxicity(text, model, tokenizer, device):
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128, padding="max_length").to(device)
if "token_type_ids" in inputs:
del inputs["token_type_ids"]
with torch.no_grad():
outputs = model(**inputs)
logits = outputs["logits"].squeeze()
prediction = "Toxic" if logits > 0.5 else "Not Toxic"
return prediction
def main():
model, tokenizer, device = load_model_and_tokenizer()
while True:
print("Enter text to classify (or type 'exit' to quit):")
text = input()
if text.lower() == 'exit':
print("Exiting...")
break
if text:
prediction = predict_toxicity(text, model, tokenizer, device)
print(f"Prediction: {prediction}")
else:
print("No text provided. Please enter some text.")
if __name__ == "__main__":
main()
```
</details>
Please note that to predict toxicity you can use the following example:
```python
# Define architecture before this!
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128, padding="max_length").to(device)
if "token_type_ids" in inputs:
del inputs["token_type_ids"]
with torch.no_grad():
outputs = model(**inputs)
logits = outputs["logits"].squeeze()
prediction = "Toxic" if logits > 0.5 else "Not Toxic"
```
## Benchmarks
The Tiny-Toxic-Detector achieves an impressive 90.26% on the Toxigen benchmark and 87.34% on the Jigsaw-Toxic-Comment-Classification-Challenge. Here we compare our results against other toxic classification models:
| Model | Size (parameters) | Toxigen (%) | Jigsaw (%) | Average (%) |
| --------------------------------- | ----------------- | ----------- | ---------- | ----------- |
| lmsys/toxicchat-t5-large-v1.0 | 738M | 72.67 | 88.82 | 80.745 |
| s-nlp/roberta toxicity classifier | 124M | *88.41* | **94.92** | **91.665** |
| mohsenfayyaz/toxicity-classifier | 109M | 81.50 | 83.31 | 82.405 |
| martin-ha/toxic-comment-model | *67M* | 68.02 | *91.56* | 79.790 |
| **Tiny-toxic-detector** | **2M** | **90.97** | 86.98 | *88.975* |
## Usage and Limitations
Toxicity classification models always have certain limitations you should be aware of, and this model is no different.
### Intended Usage
The Tiny-toxic-detector is designed to classify comments for toxicity. It is particularly useful in scenarios where minimal resource usage and rapid inference are essential. Key features include:
* Low Resource Consumption: With a requirement of (roughly) only 10MB of RAM and 8MB of VRAM, this model is well-suited for environments with limited hardware resources.
* Fast Inference: The model provides high-speed inference. The Tiny-toxic-detector significantly outperforms larger models on CPU-based systems. Due to the overhead of using GPU inference, small models with a relatively small number of input tokens are often faster on CPU. This includes the Tiny-toxic-detector.
### Limitations
* Training Data
* The Tiny-toxic-detector has been trained exclusively on English-language data, limiting its ability to classify toxicity in other languages.
* Maximum Context Length
* The model can handle up to 512 input tokens. Comments exceeding this length are not in the scope of this model.
* While extending the context length is possible, such modifications have not been trained for or validated. Early tests with a 4096-token context resulted in a performance drop of over 10% on the Toxigen benchmark.
* Language Ambiguity
* The Tiny-toxic-detector may struggle with ambiguous or nuanced language as any other model would. Even though benchmarks like Toxigen evaluate the model’s performance with ambiguous language, it may still misclassify comments where toxicity is not clearly defined.
### Summarization
This model is a great fit if there is a resource constraint or if fast inference is important, but as any AI classification model, it can be wrong. As such, we discourage using this model in an automated system with no human oversight. There is a chance of overreliance on words rather than the context as a whole as outlined in the paper, so please keep this in mind as well.
|