samwell commited on
Commit
d775f55
·
verified ·
1 Parent(s): efeb0bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -3
app.py CHANGED
@@ -1,8 +1,124 @@
1
-
2
  import gradio as gr
3
  import torch
4
  import tiktoken
5
- from supplementary import GPTModel, generate_text_simple
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Load model configuration
8
  GPT_CONFIG_124M = {
@@ -46,4 +162,4 @@ iface = gr.Interface(
46
  description="Enter a prompt to generate text with the custom language model."
47
  )
48
 
49
- iface.launch()
 
 
1
  import gradio as gr
2
  import torch
3
  import tiktoken
4
+ import math
5
+
6
+ class LayerNorm(torch.nn.Module):
7
+ def __init__(self, ndim, bias):
8
+ super().__init__()
9
+ self.weight = torch.nn.Parameter(torch.ones(ndim))
10
+ self.bias = torch.nn.Parameter(torch.zeros(ndim)) if bias else None
11
+
12
+ def forward(self, input):
13
+ return torch.nn.functional.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
14
+
15
+ class CausalSelfAttention(torch.nn.Module):
16
+ def __init__(self, config):
17
+ super().__init__()
18
+ assert config["emb_dim"] % config["n_heads"] == 0
19
+ self.c_attn = torch.nn.Linear(config["emb_dim"], 3 * config["emb_dim"], bias=config["qkv_bias"])
20
+ self.c_proj = torch.nn.Linear(config["emb_dim"], config["emb_dim"], bias=True)
21
+ self.attn_dropout = torch.nn.Dropout(config["drop_rate"])
22
+ self.resid_dropout = torch.nn.Dropout(config["drop_rate"])
23
+ self.n_heads = config["n_heads"]
24
+ self.n_embd = config["emb_dim"]
25
+ self.dropout = config["drop_rate"]
26
+ self.register_buffer("bias", torch.tril(torch.ones(config["context_length"], config["context_length"]))
27
+ .view(1, 1, config["context_length"], config["context_length"]))
28
+
29
+ def forward(self, x):
30
+ B, T, C = x.size()
31
+ q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
32
+ k = k.view(B, T, self.n_heads, C // self.n_heads).transpose(1, 2)
33
+ q = q.view(B, T, self.n_heads, C // self.n_heads).transpose(1, 2)
34
+ v = v.view(B, T, self.n_heads, C // self.n_heads).transpose(1, 2)
35
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
36
+ att = att.masked_fill(self.bias[:,:,:T,:T] == 0, float('-inf'))
37
+ att = torch.nn.functional.softmax(att, dim=-1)
38
+ att = self.attn_dropout(att)
39
+ y = att @ v
40
+ y = y.transpose(1, 2).contiguous().view(B, T, C)
41
+ y = self.resid_dropout(self.c_proj(y))
42
+ return y
43
+
44
+ class MLP(torch.nn.Module):
45
+ def __init__(self, config):
46
+ super().__init__()
47
+ self.c_fc = torch.nn.Linear(config["emb_dim"], 4 * config["emb_dim"], bias=True)
48
+ self.gelu = torch.nn.GELU()
49
+ self.c_proj = torch.nn.Linear(4 * config["emb_dim"], config["emb_dim"], bias=True)
50
+ self.dropout = torch.nn.Dropout(config["drop_rate"])
51
+
52
+ def forward(self, x):
53
+ x = self.c_fc(x)
54
+ x = self.gelu(x)
55
+ x = self.c_proj(x)
56
+ x = self.dropout(x)
57
+ return x
58
+
59
+ class Block(torch.nn.Module):
60
+ def __init__(self, config):
61
+ super().__init__()
62
+ self.ln_1 = LayerNorm(config["emb_dim"], bias=True)
63
+ self.attn = CausalSelfAttention(config)
64
+ self.ln_2 = LayerNorm(config["emb_dim"], bias=True)
65
+ self.mlp = MLP(config)
66
+
67
+ def forward(self, x):
68
+ x = x + self.attn(self.ln_1(x))
69
+ x = x + self.mlp(self.ln_2(x))
70
+ return x
71
+
72
+ class GPTModel(torch.nn.Module):
73
+ def __init__(self, config):
74
+ super().__init__()
75
+ self.config = config
76
+ self.transformer = torch.nn.ModuleDict(dict(
77
+ wte = torch.nn.Embedding(config["vocab_size"], config["emb_dim"]),
78
+ wpe = torch.nn.Embedding(config["context_length"], config["emb_dim"]),
79
+ drop = torch.nn.Dropout(config["drop_rate"]),
80
+ h = torch.nn.ModuleList([Block(config) for _ in range(config["n_layers"])]),
81
+ ln_f = LayerNorm(config["emb_dim"], bias=True)
82
+ ))
83
+ self.lm_head = torch.nn.Linear(config["emb_dim"], config["vocab_size"], bias=False)
84
+ self.transformer.wte.weight = self.lm_head.weight
85
+ self.apply(self._init_weights)
86
+
87
+ def _init_weights(self, module):
88
+ if isinstance(module, torch.nn.Linear):
89
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
90
+ if module.bias is not None:
91
+ torch.nn.init.zeros_(module.bias)
92
+ elif isinstance(module, torch.nn.Embedding):
93
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
94
+
95
+ def forward(self, idx, targets=None):
96
+ device = idx.device
97
+ b, t = idx.size()
98
+ pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)
99
+ tok_emb = self.transformer.wte(idx)
100
+ pos_emb = self.transformer.wpe(pos)
101
+ x = self.transformer.drop(tok_emb + pos_emb)
102
+ for block in self.transformer.h:
103
+ x = block(x)
104
+ x = self.transformer.ln_f(x)
105
+ logits = self.lm_head(x)
106
+
107
+ loss = None
108
+ if targets is not None:
109
+ loss = torch.nn.functional.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1)
110
+
111
+ return logits, loss
112
+
113
+ def generate_text_simple(model, idx, max_new_tokens, context_size):
114
+ for _ in range(max_new_tokens):
115
+ idx_cond = idx[:, -context_size:]
116
+ logits, _ = model(idx_cond)
117
+ logits = logits[:, -1, :]
118
+ probs = torch.nn.functional.softmax(logits, dim=-1)
119
+ idx_next = torch.multinomial(probs, num_samples=1)
120
+ idx = torch.cat((idx, idx_next), dim=1)
121
+ return idx
122
 
123
  # Load model configuration
124
  GPT_CONFIG_124M = {
 
162
  description="Enter a prompt to generate text with the custom language model."
163
  )
164
 
165
+ iface.launch()