semana6-aula / modelo.py
Felipe1908's picture
Rename modelo.ipynb to modelo.py
d2ef2a0 verified
# modelo.py
# Instalar bibliotecas necessárias
!pip install transformers datasets huggingface_hub
from datasets import load_dataset
from transformers import DistilBERTForSequenceClassification, DistilBERTTokenizer, Trainer, TrainingArguments
from huggingface_hub import HfApi
# Carregar o dataset IMDb
dataset = load_dataset('imdb')
# Carregar o tokenizer e o modelo
tokenizer = DistilBERTTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBERTForSequenceClassification.from_pretrained('distilbert-base-uncased')
# Tokenizar o dataset
def tokenize_function(examples):
return tokenizer(examples['text'], padding='max_length', truncation=True)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['text'])
tokenized_datasets.set_format('torch')
# Configurar os argumentos de treinamento
training_args = TrainingArguments(
output_dir='./results',
evaluation_strategy='epoch',
learning_rate=2e-5,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
num_train_epochs=3,
weight_decay=0.01,
)
# Criar o trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_datasets['train'],
eval_dataset=tokenized_datasets['test'],
)
# Treinar o modelo
trainer.train()
# Salvar o modelo
model.save_pretrained("imdb-distilbert")
tokenizer.save_pretrained("imdb-distilbert")
# Fazer login no Hugging Face (substitua 'seu-token' pelo seu token de acesso)
!huggingface-cli login --token seu-token
# Enviar o modelo para o Hugging Face
api = HfApi()
api.upload_folder(
folder_path="imdb-distilbert",
path_in_repo="",
repo_id="seu-username/imdb-distilbert",
repo_type="model"
)
print("Deploy completo! Acesse seu modelo no Hugging Face para mais detalhes.")