my-trained-model / main.py
sombochea's picture
Create main.py
a9a59bf
raw
history blame contribute delete
No virus
732 Bytes
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
# Load the GPT2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
# Load the training data
with open('train.txt', 'r') as f:
text = f.read()
# Tokenize the training data
input_ids = tokenizer.encode(text, return_tensors='pt')
# Train the model
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
model.train()
for i in range(100):
outputs = model(input_ids, labels=input_ids)
loss = outputs[0]
loss.backward()
optimizer.step()
optimizer.zero_grad()
print(f'Epoch {i+1}, Loss: {loss.item()}')
# Save the trained model
model.save_pretrained('my_gpt_model')