jarodrigues
commited on
Commit
·
3d5f085
1
Parent(s):
a0f19e8
Update README.md
Browse files
README.md
CHANGED
@@ -119,7 +119,7 @@ You can use this model directly with a pipeline for masked language modeling:
|
|
119 |
|
120 |
```python
|
121 |
>>> from transformers import pipeline
|
122 |
-
>>> unmasker = pipeline('fill-mask', model='PORTULAN/albertina-
|
123 |
>>> unmasker("Países como [MASK] falam a língua portuguesa.")
|
124 |
|
125 |
[{'score': 0.5964823365211487, 'token': 34214, 'token_str': 'Angola', 'sequence': 'Países como Angola falam a língua portuguesa.'},
|
@@ -136,8 +136,8 @@ The model can be used by fine-tuning it for a specific task:
|
|
136 |
>>> from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
|
137 |
>>> from datasets import load_dataset
|
138 |
|
139 |
-
>>> model = AutoModelForSequenceClassification.from_pretrained("PORTULAN/albertina-
|
140 |
-
>>> tokenizer = AutoTokenizer.from_pretrained("PORTULAN/albertina-
|
141 |
>>> dataset = load_dataset("PORTULAN/glueptpt", "rte")
|
142 |
|
143 |
>>> def tokenize_function(examples):
|
@@ -145,7 +145,7 @@ The model can be used by fine-tuning it for a specific task:
|
|
145 |
|
146 |
>>> tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
147 |
|
148 |
-
>>> training_args = TrainingArguments(output_dir="albertina-
|
149 |
>>> trainer = Trainer(
|
150 |
... model=model,
|
151 |
... args=training_args,
|
|
|
119 |
|
120 |
```python
|
121 |
>>> from transformers import pipeline
|
122 |
+
>>> unmasker = pipeline('fill-mask', model='PORTULAN/albertina-ptpt')
|
123 |
>>> unmasker("Países como [MASK] falam a língua portuguesa.")
|
124 |
|
125 |
[{'score': 0.5964823365211487, 'token': 34214, 'token_str': 'Angola', 'sequence': 'Países como Angola falam a língua portuguesa.'},
|
|
|
136 |
>>> from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
|
137 |
>>> from datasets import load_dataset
|
138 |
|
139 |
+
>>> model = AutoModelForSequenceClassification.from_pretrained("PORTULAN/albertina-ptpt", num_labels=2)
|
140 |
+
>>> tokenizer = AutoTokenizer.from_pretrained("PORTULAN/albertina-ptpt")
|
141 |
>>> dataset = load_dataset("PORTULAN/glueptpt", "rte")
|
142 |
|
143 |
>>> def tokenize_function(examples):
|
|
|
145 |
|
146 |
>>> tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
147 |
|
148 |
+
>>> training_args = TrainingArguments(output_dir="albertina-ptpt-rte", evaluation_strategy="epoch")
|
149 |
>>> trainer = Trainer(
|
150 |
... model=model,
|
151 |
... args=training_args,
|