sombochea commited on
Commit
a9a59bf
1 Parent(s): 3e435ab

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +28 -0
main.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import GPT2LMHeadModel, GPT2Tokenizer
2
+ import torch
3
+
4
+ # Load the GPT2 tokenizer and model
5
+ tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
6
+ model = GPT2LMHeadModel.from_pretrained('gpt2')
7
+
8
+ # Load the training data
9
+ with open('train.txt', 'r') as f:
10
+ text = f.read()
11
+
12
+ # Tokenize the training data
13
+ input_ids = tokenizer.encode(text, return_tensors='pt')
14
+
15
+ # Train the model
16
+ optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
17
+ model.train()
18
+ for i in range(100):
19
+ outputs = model(input_ids, labels=input_ids)
20
+ loss = outputs[0]
21
+ loss.backward()
22
+ optimizer.step()
23
+ optimizer.zero_grad()
24
+
25
+ print(f'Epoch {i+1}, Loss: {loss.item()}')
26
+
27
+ # Save the trained model
28
+ model.save_pretrained('my_gpt_model')