Spaces:

PrarthanaTS
/

nanogpt

Sleeping

App Files Files Community

PrarthanaTS commited on Nov 3, 2023

Commit

5c1bfe9

1 Parent(s): ec5288c

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -46

app.py CHANGED Viewed

@@ -157,40 +157,11 @@ with open('input.txt', 'r', encoding='utf-8') as f:
     shakespeare_text = f.read()
-# Reading wikipedia data
-DATA_PATH = 'wikisent2.txt'
-# load wikipedia sentences
-with open(DATA_PATH, 'r') as f:
-    lines = f.read().splitlines()
-# Selecting 250k lines from the dataset.
-random.seed(42)
-texts = random.choices(lines, k=250000)
-del lines
-def preprocess(text):
-    text = re.sub('@.*?\s+', '', text)  # Remove mentions
-    text = re.sub('#.*?\s+', '', text)  # Remove hashtags
-    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)  # Remove URLs
-    text = re.sub(r'[^\w\s\'.]', '', text)  # Remove special characters except for single quotes and periods
-    text = re.sub('\s+', ' ', text)  # Replace multiple spaces with a single space
-    text = re.sub('^\d+\s*|^\d+\.\d+\s*|^\d+\.\d+\.\d+\s*', '', text)  # Remove digits at the start of sentences
-    text = text.strip()  # Remove leading and trailing whitespace
-    return text
-wiki_text = [preprocess(t) for t in texts]
-wiki_text = '\n'.join(wiki_text)
 # Load the shakespeaere model
 shakespeare_model = BigramLanguageModel(shakespeare_text, n_embd).to(device)  # Initialize an instance of your model
-shakespeare_model.load_state_dict(torch.load('shakespeaere_language_model.pth', map_location=torch.device('cpu')))
 shakespeare_model.eval()  # Set the model to evaluation mode
-# Load the wikipedia model
-wikipedia_model = BigramLanguageModel(wiki_text, n_embd).to(device)  # Initialize an instance of your model
-wikipedia_model.load_state_dict(torch.load('wikipedia_language_model.pth', map_location=torch.device('cpu')))
-wikipedia_model.eval()  # Set the model to evaluation mode
 def generate_shakespeare_outputs(prompt=None, max_new_tokens=2000):
   if prompt:
@@ -201,14 +172,7 @@ def generate_shakespeare_outputs(prompt=None, max_new_tokens=2000):
   return text_output
-def generate_wikipedia_outputs(prompt=None, max_new_tokens=2000):
-  if prompt:
-    context = torch.tensor(wikipedia_model.encode(prompt), dtype=torch.long, device=device).view(1, -1)
-  else:
-    context = torch.zeros((1, 1), dtype=torch.long, device=device)
-  text_output = wikipedia_model.decode(wikipedia_model.generate(context, max_new_tokens=max_new_tokens)[0].tolist())
-  return text_output
 title = "Nano GPT"
@@ -219,15 +183,9 @@ shakespeare_interface = gr.Interface(generate_shakespeare_outputs,
                             gr.Slider(minimum=100, maximum=5000, step=100, value=2000, label="Max new tokens")],
                     outputs=gr.Textbox(label="Output generated", type="text"), description=description1)
-description2 = "Nano GPT trained on <a href='https://github.com/karpathy/char-rnn/blob/6f9487a6fe5b420b7ca9afb0d7c078e37c1d1b4e/data/tinyshakespeare/input.txt'>Wikipedia dataset</a>. It is trained on a very small amount of data to understand how GPT's are trained and built. The implementation can be found <a href='https://github.com/karpathy/nanoGPT'>here.</a>"
-wiki_interface = gr.Interface(generate_wikipedia_outputs,
-                    inputs=[gr.Textbox(label="Enter any prompt ", type="text", value="James Bond"),
-                            gr.Slider(minimum=100, maximum=5000, step=100, value=2000, label="Max new tokens")],
-                    outputs=gr.Textbox(label="Output generated", type="text"), description=description2)
-demo = gr.TabbedInterface([shakespeare_interface, wiki_interface], tab_names=["Shakespeare Data", "Wikipedia Data"],
                           title=title)
 demo.launch()

     shakespeare_text = f.read()
 # Load the shakespeaere model
 shakespeare_model = BigramLanguageModel(shakespeare_text, n_embd).to(device)  # Initialize an instance of your model
+shakespeare_model.load_state_dict(torch.load('GPT_Shakespeare_language_model.pth', map_location=torch.device('cpu')))
 shakespeare_model.eval()  # Set the model to evaluation mode
 def generate_shakespeare_outputs(prompt=None, max_new_tokens=2000):
   if prompt:
   return text_output
 title = "Nano GPT"
                             gr.Slider(minimum=100, maximum=5000, step=100, value=2000, label="Max new tokens")],
                     outputs=gr.Textbox(label="Output generated", type="text"), description=description1)
+demo = gr.TabbedInterface([shakespeare_interface], tab_names=["Shakespeare Data"],
                           title=title)
 demo.launch()