PrarthanaTS commited on
Commit
5c1bfe9
·
1 Parent(s): ec5288c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -46
app.py CHANGED
@@ -157,40 +157,11 @@ with open('input.txt', 'r', encoding='utf-8') as f:
157
  shakespeare_text = f.read()
158
 
159
 
160
- # Reading wikipedia data
161
- DATA_PATH = 'wikisent2.txt'
162
- # load wikipedia sentences
163
- with open(DATA_PATH, 'r') as f:
164
- lines = f.read().splitlines()
165
-
166
- # Selecting 250k lines from the dataset.
167
- random.seed(42)
168
- texts = random.choices(lines, k=250000)
169
- del lines
170
-
171
- def preprocess(text):
172
- text = re.sub('@.*?\s+', '', text) # Remove mentions
173
- text = re.sub('#.*?\s+', '', text) # Remove hashtags
174
- text = re.sub(r'https?:\/\/.*[\r\n]*', '', text) # Remove URLs
175
- text = re.sub(r'[^\w\s\'.]', '', text) # Remove special characters except for single quotes and periods
176
- text = re.sub('\s+', ' ', text) # Replace multiple spaces with a single space
177
- text = re.sub('^\d+\s*|^\d+\.\d+\s*|^\d+\.\d+\.\d+\s*', '', text) # Remove digits at the start of sentences
178
- text = text.strip() # Remove leading and trailing whitespace
179
- return text
180
-
181
- wiki_text = [preprocess(t) for t in texts]
182
- wiki_text = '\n'.join(wiki_text)
183
-
184
  # Load the shakespeaere model
185
  shakespeare_model = BigramLanguageModel(shakespeare_text, n_embd).to(device) # Initialize an instance of your model
186
- shakespeare_model.load_state_dict(torch.load('shakespeaere_language_model.pth', map_location=torch.device('cpu')))
187
  shakespeare_model.eval() # Set the model to evaluation mode
188
 
189
- # Load the wikipedia model
190
- wikipedia_model = BigramLanguageModel(wiki_text, n_embd).to(device) # Initialize an instance of your model
191
- wikipedia_model.load_state_dict(torch.load('wikipedia_language_model.pth', map_location=torch.device('cpu')))
192
- wikipedia_model.eval() # Set the model to evaluation mode
193
-
194
 
195
  def generate_shakespeare_outputs(prompt=None, max_new_tokens=2000):
196
  if prompt:
@@ -201,14 +172,7 @@ def generate_shakespeare_outputs(prompt=None, max_new_tokens=2000):
201
  return text_output
202
 
203
 
204
- def generate_wikipedia_outputs(prompt=None, max_new_tokens=2000):
205
- if prompt:
206
- context = torch.tensor(wikipedia_model.encode(prompt), dtype=torch.long, device=device).view(1, -1)
207
- else:
208
- context = torch.zeros((1, 1), dtype=torch.long, device=device)
209
- text_output = wikipedia_model.decode(wikipedia_model.generate(context, max_new_tokens=max_new_tokens)[0].tolist())
210
- return text_output
211
-
212
 
213
  title = "Nano GPT"
214
 
@@ -219,15 +183,9 @@ shakespeare_interface = gr.Interface(generate_shakespeare_outputs,
219
  gr.Slider(minimum=100, maximum=5000, step=100, value=2000, label="Max new tokens")],
220
  outputs=gr.Textbox(label="Output generated", type="text"), description=description1)
221
 
222
- description2 = "Nano GPT trained on <a href='https://github.com/karpathy/char-rnn/blob/6f9487a6fe5b420b7ca9afb0d7c078e37c1d1b4e/data/tinyshakespeare/input.txt'>Wikipedia dataset</a>. It is trained on a very small amount of data to understand how GPT's are trained and built. The implementation can be found <a href='https://github.com/karpathy/nanoGPT'>here.</a>"
223
-
224
- wiki_interface = gr.Interface(generate_wikipedia_outputs,
225
- inputs=[gr.Textbox(label="Enter any prompt ", type="text", value="James Bond"),
226
- gr.Slider(minimum=100, maximum=5000, step=100, value=2000, label="Max new tokens")],
227
- outputs=gr.Textbox(label="Output generated", type="text"), description=description2)
228
-
229
- demo = gr.TabbedInterface([shakespeare_interface, wiki_interface], tab_names=["Shakespeare Data", "Wikipedia Data"],
230
  title=title)
231
 
232
 
 
233
  demo.launch()
 
157
  shakespeare_text = f.read()
158
 
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  # Load the shakespeaere model
161
  shakespeare_model = BigramLanguageModel(shakespeare_text, n_embd).to(device) # Initialize an instance of your model
162
+ shakespeare_model.load_state_dict(torch.load('GPT_Shakespeare_language_model.pth', map_location=torch.device('cpu')))
163
  shakespeare_model.eval() # Set the model to evaluation mode
164
 
 
 
 
 
 
165
 
166
  def generate_shakespeare_outputs(prompt=None, max_new_tokens=2000):
167
  if prompt:
 
172
  return text_output
173
 
174
 
175
+
 
 
 
 
 
 
 
176
 
177
  title = "Nano GPT"
178
 
 
183
  gr.Slider(minimum=100, maximum=5000, step=100, value=2000, label="Max new tokens")],
184
  outputs=gr.Textbox(label="Output generated", type="text"), description=description1)
185
 
186
+ demo = gr.TabbedInterface([shakespeare_interface], tab_names=["Shakespeare Data"],
 
 
 
 
 
 
 
187
  title=title)
188
 
189
 
190
+
191
  demo.launch()