Spaces:
Sleeping
Sleeping
Commit
·
5c1bfe9
1
Parent(s):
ec5288c
Update app.py
Browse files
app.py
CHANGED
@@ -157,40 +157,11 @@ with open('input.txt', 'r', encoding='utf-8') as f:
|
|
157 |
shakespeare_text = f.read()
|
158 |
|
159 |
|
160 |
-
# Reading wikipedia data
|
161 |
-
DATA_PATH = 'wikisent2.txt'
|
162 |
-
# load wikipedia sentences
|
163 |
-
with open(DATA_PATH, 'r') as f:
|
164 |
-
lines = f.read().splitlines()
|
165 |
-
|
166 |
-
# Selecting 250k lines from the dataset.
|
167 |
-
random.seed(42)
|
168 |
-
texts = random.choices(lines, k=250000)
|
169 |
-
del lines
|
170 |
-
|
171 |
-
def preprocess(text):
|
172 |
-
text = re.sub('@.*?\s+', '', text) # Remove mentions
|
173 |
-
text = re.sub('#.*?\s+', '', text) # Remove hashtags
|
174 |
-
text = re.sub(r'https?:\/\/.*[\r\n]*', '', text) # Remove URLs
|
175 |
-
text = re.sub(r'[^\w\s\'.]', '', text) # Remove special characters except for single quotes and periods
|
176 |
-
text = re.sub('\s+', ' ', text) # Replace multiple spaces with a single space
|
177 |
-
text = re.sub('^\d+\s*|^\d+\.\d+\s*|^\d+\.\d+\.\d+\s*', '', text) # Remove digits at the start of sentences
|
178 |
-
text = text.strip() # Remove leading and trailing whitespace
|
179 |
-
return text
|
180 |
-
|
181 |
-
wiki_text = [preprocess(t) for t in texts]
|
182 |
-
wiki_text = '\n'.join(wiki_text)
|
183 |
-
|
184 |
# Load the shakespeaere model
|
185 |
shakespeare_model = BigramLanguageModel(shakespeare_text, n_embd).to(device) # Initialize an instance of your model
|
186 |
-
shakespeare_model.load_state_dict(torch.load('
|
187 |
shakespeare_model.eval() # Set the model to evaluation mode
|
188 |
|
189 |
-
# Load the wikipedia model
|
190 |
-
wikipedia_model = BigramLanguageModel(wiki_text, n_embd).to(device) # Initialize an instance of your model
|
191 |
-
wikipedia_model.load_state_dict(torch.load('wikipedia_language_model.pth', map_location=torch.device('cpu')))
|
192 |
-
wikipedia_model.eval() # Set the model to evaluation mode
|
193 |
-
|
194 |
|
195 |
def generate_shakespeare_outputs(prompt=None, max_new_tokens=2000):
|
196 |
if prompt:
|
@@ -201,14 +172,7 @@ def generate_shakespeare_outputs(prompt=None, max_new_tokens=2000):
|
|
201 |
return text_output
|
202 |
|
203 |
|
204 |
-
|
205 |
-
if prompt:
|
206 |
-
context = torch.tensor(wikipedia_model.encode(prompt), dtype=torch.long, device=device).view(1, -1)
|
207 |
-
else:
|
208 |
-
context = torch.zeros((1, 1), dtype=torch.long, device=device)
|
209 |
-
text_output = wikipedia_model.decode(wikipedia_model.generate(context, max_new_tokens=max_new_tokens)[0].tolist())
|
210 |
-
return text_output
|
211 |
-
|
212 |
|
213 |
title = "Nano GPT"
|
214 |
|
@@ -219,15 +183,9 @@ shakespeare_interface = gr.Interface(generate_shakespeare_outputs,
|
|
219 |
gr.Slider(minimum=100, maximum=5000, step=100, value=2000, label="Max new tokens")],
|
220 |
outputs=gr.Textbox(label="Output generated", type="text"), description=description1)
|
221 |
|
222 |
-
|
223 |
-
|
224 |
-
wiki_interface = gr.Interface(generate_wikipedia_outputs,
|
225 |
-
inputs=[gr.Textbox(label="Enter any prompt ", type="text", value="James Bond"),
|
226 |
-
gr.Slider(minimum=100, maximum=5000, step=100, value=2000, label="Max new tokens")],
|
227 |
-
outputs=gr.Textbox(label="Output generated", type="text"), description=description2)
|
228 |
-
|
229 |
-
demo = gr.TabbedInterface([shakespeare_interface, wiki_interface], tab_names=["Shakespeare Data", "Wikipedia Data"],
|
230 |
title=title)
|
231 |
|
232 |
|
|
|
233 |
demo.launch()
|
|
|
157 |
shakespeare_text = f.read()
|
158 |
|
159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
# Load the shakespeaere model
|
161 |
shakespeare_model = BigramLanguageModel(shakespeare_text, n_embd).to(device) # Initialize an instance of your model
|
162 |
+
shakespeare_model.load_state_dict(torch.load('GPT_Shakespeare_language_model.pth', map_location=torch.device('cpu')))
|
163 |
shakespeare_model.eval() # Set the model to evaluation mode
|
164 |
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
def generate_shakespeare_outputs(prompt=None, max_new_tokens=2000):
|
167 |
if prompt:
|
|
|
172 |
return text_output
|
173 |
|
174 |
|
175 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
|
177 |
title = "Nano GPT"
|
178 |
|
|
|
183 |
gr.Slider(minimum=100, maximum=5000, step=100, value=2000, label="Max new tokens")],
|
184 |
outputs=gr.Textbox(label="Output generated", type="text"), description=description1)
|
185 |
|
186 |
+
demo = gr.TabbedInterface([shakespeare_interface], tab_names=["Shakespeare Data"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
title=title)
|
188 |
|
189 |
|
190 |
+
|
191 |
demo.launch()
|