sudhakar272 commited on
Commit
92cb323
·
verified ·
1 Parent(s): 00d5cc1

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +6 -5
  2. app.py +89 -0
  3. requirements.txt +5 -0
  4. transformer.py +113 -0
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
- title: Shakespheretextgenerator
3
- emoji: 🔥
4
- colorFrom: purple
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.12.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: shakesphere text generator
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Transformer Basic
3
+ emoji: 🏆
4
+ colorFrom: pink
5
+ colorTo: pink
6
  sdk: gradio
7
  sdk_version: 5.12.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: This is a first implementation on transformer
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torch.nn.functional as F
4
+ import tiktoken
5
+ from huggingface_hub import hf_hub_download
6
+ from transformer import GPT, GPTConfig # Import your model class
7
+
8
+ # Load the model from Hugging Face Hub
9
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
10
+ def load_model_from_hf():
11
+ # Replace with your Hugging Face model ID (username/model-name)
12
+ model_id = "satyanayak/transformer-basic"
13
+ checkpoint_path = hf_hub_download(repo_id=model_id, filename="trained_model.pt")
14
+
15
+ checkpoint = torch.load(checkpoint_path, map_location=device)
16
+ config = checkpoint['config']
17
+ model = GPT(config)
18
+ model.load_state_dict(checkpoint['model_state_dict'])
19
+ model.to(device)
20
+ model.eval() # Set to evaluation mode
21
+
22
+ # Disable gradient computation
23
+ for param in model.parameters():
24
+ param.requires_grad = False
25
+
26
+ return model
27
+
28
+ model = load_model_from_hf()
29
+
30
+ # Force model to stay in eval mode
31
+ model.train(False)
32
+
33
+ def generate_text(prompt, max_length=100, num_samples=1, temperature=0.8):
34
+ enc = tiktoken.get_encoding('gpt2')
35
+ tokens = enc.encode(prompt)
36
+ tokens = torch.tensor(tokens, dtype=torch.long)
37
+ tokens = tokens.unsqueeze(0).repeat(num_samples, 1)
38
+ tokens = tokens.to(device)
39
+
40
+ with torch.no_grad():
41
+ for _ in range(max_length):
42
+ if tokens.size(1) >= 1024: # GPT context length
43
+ break
44
+
45
+ logits = model(tokens)[0]
46
+ logits = logits[:, -1, :]
47
+ #logits = logits[:, -1, :] / temperature
48
+ probs = F.softmax(logits, dim=-1)
49
+
50
+ # Top-k sampling
51
+ topk_probs, topk_indices = torch.topk(probs, 50, dim=-1)
52
+ ix = torch.multinomial(topk_probs, 1)
53
+ next_token = torch.gather(topk_indices, -1, ix)
54
+
55
+ tokens = torch.cat((tokens, next_token), dim=1)
56
+
57
+ # Remove special token check entirely
58
+ # Just generate for the specified length or until context limit
59
+
60
+ generated_texts = []
61
+ for i in range(num_samples):
62
+ text = enc.decode(tokens[i].tolist())
63
+ generated_texts.append(text)
64
+
65
+ return '\n\n---\n\n'.join(generated_texts)
66
+
67
+ # Create Gradio interface
68
+ iface = gr.Interface(
69
+ fn=generate_text,
70
+ inputs=[
71
+ gr.Textbox(label="Prompt", value="We are accounted poor citizens, the"),
72
+ gr.Slider(minimum=10, maximum=200, value=100, step=1, label="Max Length"),
73
+ gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of Samples"),
74
+ ],
75
+ outputs=gr.Textbox(label="Generated Text"),
76
+ title="Shakespeare-style Text Generator",
77
+ description="Enter a prompt to generate Shakespeare-style text continuation",
78
+ examples=[
79
+ ["O Romeo, Romeo, wherefore art thou", 100, 1],
80
+ ["To be, or not to be, that is", 60, 2],
81
+ ["Friends, Romans, countrymen, lend me", 50, 3],
82
+ ["All the world's a stage, and all the", 100, 1],
83
+ ["Now is the winter of our discontent", 100, 1],
84
+ ["If music be the food of love,", 100, 1],
85
+ ]
86
+ )
87
+
88
+ if __name__ == "__main__":
89
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ gradio
3
+ tiktoken
4
+ transformers
5
+ huggingface_hub
transformer.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import torch
3
+ import torch.nn as nn
4
+ from torch.nn import functional as F
5
+ from dataclasses import dataclass
6
+
7
+ class CausalSelfAttention(nn.Module):
8
+ def __init__(self, config):
9
+ super().__init__()
10
+ assert config.n_embd % config.n_head == 0
11
+ self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd)
12
+ self.c_proj = nn.Linear(config.n_embd, config.n_embd)
13
+ self.c_proj.NANGPT_SCALE_INIT = 1
14
+ self.n_head = config.n_head
15
+ self.n_embd = config.n_embd
16
+ self.register_buffer("bias", torch.tril(torch.ones(config.block_size, config.block_size)).view(1, 1, config.block_size, config.block_size))
17
+
18
+ def forward(self, x):
19
+ B, T, C = x.size()
20
+ qkv = self.c_attn(x)
21
+ q, k, v = qkv.split(self.n_embd, dim=2)
22
+ k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
23
+ q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
24
+ v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
25
+
26
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
27
+ att = att.masked_fill(self.bias[:, :, :T, :T] == 0, float('-inf'))
28
+ att = F.softmax(att, dim=-1)
29
+ y = att @ v
30
+ y = y.transpose(1, 2).contiguous().view(B, T, C)
31
+ y = self.c_proj(y)
32
+ return y
33
+
34
+ class MLP(nn.Module):
35
+ def __init__(self, config):
36
+ super().__init__()
37
+ self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd)
38
+ self.gelu = nn.GELU(approximate='tanh')
39
+ self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd)
40
+ self.c_proj.NANOGPT_SCALE_INIT = 1
41
+
42
+ def forward(self, x):
43
+ x = self.c_fc(x)
44
+ x = self.gelu(x)
45
+ x = self.c_proj(x)
46
+ return x
47
+
48
+ class Block(nn.Module):
49
+ def __init__(self, config):
50
+ super().__init__()
51
+ self.ln_1 = nn.LayerNorm(config.n_embd)
52
+ self.attn = CausalSelfAttention(config)
53
+ self.ln_2 = nn.LayerNorm(config.n_embd)
54
+ self.mlp = MLP(config)
55
+
56
+ def forward(self, x):
57
+ x = x + self.attn(self.ln_1(x))
58
+ x = x + self.mlp(self.ln_2(x))
59
+ return x
60
+
61
+ @dataclass
62
+ class GPTConfig:
63
+ block_size: int = 1024
64
+ vocab_size: int = 50257
65
+ n_layer: int = 12
66
+ n_head: int = 12
67
+ n_embd: int = 768
68
+
69
+ class GPT(nn.Module):
70
+ def __init__(self, config):
71
+ super().__init__()
72
+ self.config = config
73
+
74
+ self.transformer = nn.ModuleDict(dict(
75
+ wte = nn.Embedding(config.vocab_size, config.n_embd),
76
+ wpe = nn.Embedding(config.block_size, config.n_embd),
77
+ h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
78
+ ln_f = nn.LayerNorm(config.n_embd),
79
+ ))
80
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
81
+ self.transformer.wte.weight = self.lm_head.weight
82
+ self.apply(self._init_weights)
83
+
84
+ def _init_weights(self, module):
85
+ if isinstance(module, nn.Linear):
86
+ std = 0.02
87
+ if hasattr(module, 'NANGPT_SCALE_INIT'):
88
+ std *= (2 * self.config.n_layer) ** -0.5
89
+ torch.nn.init.normal_(module.weight, mean = 0.0, std = std)
90
+ if module.bias is not None:
91
+ torch.nn.init.zeros_(module.bias)
92
+ elif isinstance(module, nn.Embedding):
93
+ torch.nn.init.normal_(module.weight, mean=0.0, std = 0.02)
94
+
95
+ def forward(self, idx, targets=None):
96
+ B, T = idx.size()
97
+ assert T <= self.config.block_size, f"Cannot forward sequence of length {T}, block size is only {self.config.block_size}"
98
+
99
+ pos = torch.arange(0, T, dtype=torch.long, device=idx.device)
100
+ pos_emb = self.transformer.wpe(pos)
101
+ tok_emb = self.transformer.wte(idx)
102
+ x = tok_emb + pos_emb
103
+
104
+ for block in self.transformer.h:
105
+ x = block(x)
106
+
107
+ x = self.transformer.ln_f(x)
108
+ logits = self.lm_head(x)
109
+
110
+ loss = None
111
+ if targets is not None:
112
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
113
+ return logits, loss