# modelo.py # Instalar bibliotecas necessárias !pip install transformers datasets huggingface_hub from datasets import load_dataset from transformers import DistilBERTForSequenceClassification, DistilBERTTokenizer, Trainer, TrainingArguments from huggingface_hub import HfApi # Carregar o dataset IMDb dataset = load_dataset('imdb') # Carregar o tokenizer e o modelo tokenizer = DistilBERTTokenizer.from_pretrained('distilbert-base-uncased') model = DistilBERTForSequenceClassification.from_pretrained('distilbert-base-uncased') # Tokenizar o dataset def tokenize_function(examples): return tokenizer(examples['text'], padding='max_length', truncation=True) tokenized_datasets = dataset.map(tokenize_function, batched=True) tokenized_datasets = tokenized_datasets.remove_columns(['text']) tokenized_datasets.set_format('torch') # Configurar os argumentos de treinamento training_args = TrainingArguments( output_dir='./results', evaluation_strategy='epoch', learning_rate=2e-5, per_device_train_batch_size=16, per_device_eval_batch_size=16, num_train_epochs=3, weight_decay=0.01, ) # Criar o trainer trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_datasets['train'], eval_dataset=tokenized_datasets['test'], ) # Treinar o modelo trainer.train() # Salvar o modelo model.save_pretrained("imdb-distilbert") tokenizer.save_pretrained("imdb-distilbert") # Fazer login no Hugging Face (substitua 'seu-token' pelo seu token de acesso) !huggingface-cli login --token seu-token # Enviar o modelo para o Hugging Face api = HfApi() api.upload_folder( folder_path="imdb-distilbert", path_in_repo="", repo_id="seu-username/imdb-distilbert", repo_type="model" ) print("Deploy completo! Acesse seu modelo no Hugging Face para mais detalhes.")