arjunpatel
commited on
Commit
•
ef09c1c
1
Parent(s):
19c6000
Typos
Browse files- gradio_demo.py +35 -37
gradio_demo.py
CHANGED
@@ -18,15 +18,7 @@ seed_text = "This move is called "
|
|
18 |
tf.random.set_seed(0)
|
19 |
|
20 |
|
21 |
-
# need a function to sanitize imputs
|
22 |
-
# - remove extra spaces
|
23 |
-
# - make sure each word is capitalized
|
24 |
-
# - format the moves such that it's clearer when each move is listed
|
25 |
-
# - play with the max length parameter abit, and try to remove sentences that don't end in periods.
|
26 |
-
|
27 |
def update_history(df, move_name, move_desc, generation, parameters):
|
28 |
-
# needs to format each move description with new lines to cut down on width
|
29 |
-
|
30 |
new_row = [{"Move Name": move_name,
|
31 |
"Move Description": move_desc,
|
32 |
"Generation Type": generation,
|
@@ -84,7 +76,8 @@ with demo:
|
|
84 |
decoding methods in the process! Each tab aims to explain each generation methodology available for the
|
85 |
model. The dataframe below allows you to keep track of each move generated, to compare!""")
|
86 |
gr.Markdown("<h3> How does text generation work? <h3>")
|
87 |
-
gr.Markdown("""Roughly, text generation models accept an input sequence of words (or parts of words,
|
|
|
88 |
These models then output a corresponding set of words or tokens. Given the input, the model
|
89 |
estimates the probability of another possible word or token appearing right after the given sequence. In
|
90 |
other words, the model estimates conditional probabilities and ranks them in order to generate sequences
|
@@ -121,18 +114,19 @@ with demo:
|
|
121 |
text_output_greedy = gr.Textbox(label="Move Description")
|
122 |
text_button_greedy = gr.Button("Create my move!")
|
123 |
with gr.TabItem("Beam Search"):
|
124 |
-
gr.Markdown("This tab lets you learn about using beam search!")
|
125 |
gr.Markdown("""Beam search is an improvement on Greedy Search. Instead of directly grabbing the word that
|
126 |
maximizes probability, we conduct a search with B number of candidates. We then try to find the next word
|
127 |
that would most likely follow each beam, and we grab the top B candidates of that search. This may
|
128 |
eliminate one of the original beams we started with, and that's okay! That is how the algorithm decides
|
129 |
-
on an optimal candidate. Eventually, the beam sequence terminate or are eliminated due to being too
|
|
|
130 |
|
131 |
-
Increasing the number of beams will increase model generation time, but also result in a more thorough
|
132 |
-
Decreasing the number of beams will decrease decoding time, but it may not find an optimal
|
|
|
133 |
|
134 |
Play around with the num_beams parameter to experiment! """
|
135 |
-
|
136 |
with gr.Row():
|
137 |
num_beams = gr.Slider(minimum=2, maximum=10, value=2, step=1,
|
138 |
label="Number of Beams")
|
@@ -140,21 +134,23 @@ with demo:
|
|
140 |
text_output_beam = gr.Textbox(label="Move Description")
|
141 |
text_button_beam = gr.Button("Create my move!")
|
142 |
with gr.TabItem("Sampling and Temperature Search"):
|
143 |
-
gr.Markdown("This tab lets you experiment with adjusting the temperature of the generator")
|
144 |
gr.Markdown(
|
145 |
-
"""
|
146 |
-
|
147 |
-
but when generating cool move descriptions, we want some more variety!
|
148 |
|
149 |
Instead of choosing the word or token that is most likely to follow a given sequence, we can instead
|
150 |
-
ask the model to sample across the probability distribution of likely words.
|
151 |
-
|
|
|
|
|
152 |
for the most common mons to appear (looking at you, Zubat), but also account for surprise, like shinys!
|
153 |
|
154 |
-
We might even want to go further, though. We can rescale the probability distributions directly
|
155 |
-
allowing for rare words to temporarily become more frequently. We do this using the
|
|
|
156 |
|
157 |
-
Turn the temperature up, and rare tokens become very likely! Cool down, and we approach more sensible
|
|
|
158 |
|
159 |
Experiment with turning sampling on and off, and by varying temperature below!.
|
160 |
""")
|
@@ -168,12 +164,12 @@ with demo:
|
|
168 |
text_button_temp = gr.Button("Create my move!")
|
169 |
with gr.TabItem("Top K and Top P Sampling"):
|
170 |
gr.Markdown(
|
171 |
-
"""
|
172 |
-
|
173 |
|
174 |
|
175 |
-
The Top K sampling method selects the K most probable words given a sequence, and then samples from
|
176 |
-
rather than the whole vocabulary. This effectively cuts out low probability words.
|
177 |
|
178 |
|
179 |
Top P also reduces the available vocabulary to sample from, but instead of choosing the number of
|
@@ -198,24 +194,26 @@ with demo:
|
|
198 |
gr.Markdown("<h3> Generation History <h3>")
|
199 |
# Displays a dataframe with the history of moves generated, with parameters
|
200 |
history = gr.Dataframe(headers=["Move Name", "Move Description", "Generation Type", "Parameters"])
|
201 |
-
with gr.
|
202 |
gr.Markdown("<h3>How did you make this?<h3>")
|
203 |
gr.Markdown("""
|
204 |
-
I collected the dataset from Serebii (https://www.serebii.net) , a news source and aggregator of Pokemon info.
|
205 |
|
206 |
|
207 |
-
I then added a seed phrase "This move is called" just before each move in order to assist the model in
|
|
|
208 |
|
209 |
|
210 |
-
I then followed HuggingFace's handy language_modeling.ipynb for fine-tuning distillgpt2 on this tiny dataset,
|
211 |
-
it surprisingly worked!
|
212 |
|
213 |
|
214 |
-
I learned all about text generation using the book Natural Language Processing with Transformers
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
""")
|
|
|
219 |
text_button_baseline.click(create_move, inputs=[text_input_baseline, history],
|
220 |
outputs=[text_output_baseline, history])
|
221 |
text_button_greedy.click(create_greedy_search_move, inputs=[text_input_greedy, history],
|
|
|
18 |
tf.random.set_seed(0)
|
19 |
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def update_history(df, move_name, move_desc, generation, parameters):
|
|
|
|
|
22 |
new_row = [{"Move Name": move_name,
|
23 |
"Move Description": move_desc,
|
24 |
"Generation Type": generation,
|
|
|
76 |
decoding methods in the process! Each tab aims to explain each generation methodology available for the
|
77 |
model. The dataframe below allows you to keep track of each move generated, to compare!""")
|
78 |
gr.Markdown("<h3> How does text generation work? <h3>")
|
79 |
+
gr.Markdown("""Roughly, text generation models accept an input sequence of words (or parts of words,
|
80 |
+
known as tokens.
|
81 |
These models then output a corresponding set of words or tokens. Given the input, the model
|
82 |
estimates the probability of another possible word or token appearing right after the given sequence. In
|
83 |
other words, the model estimates conditional probabilities and ranks them in order to generate sequences
|
|
|
114 |
text_output_greedy = gr.Textbox(label="Move Description")
|
115 |
text_button_greedy = gr.Button("Create my move!")
|
116 |
with gr.TabItem("Beam Search"):
|
|
|
117 |
gr.Markdown("""Beam search is an improvement on Greedy Search. Instead of directly grabbing the word that
|
118 |
maximizes probability, we conduct a search with B number of candidates. We then try to find the next word
|
119 |
that would most likely follow each beam, and we grab the top B candidates of that search. This may
|
120 |
eliminate one of the original beams we started with, and that's okay! That is how the algorithm decides
|
121 |
+
on an optimal candidate. Eventually, the beam sequence terminate or are eliminated due to being too
|
122 |
+
improbable.
|
123 |
|
124 |
+
Increasing the number of beams will increase model generation time, but also result in a more thorough
|
125 |
+
search. Decreasing the number of beams will decrease decoding time, but it may not find an optimal
|
126 |
+
sentence.
|
127 |
|
128 |
Play around with the num_beams parameter to experiment! """
|
129 |
+
)
|
130 |
with gr.Row():
|
131 |
num_beams = gr.Slider(minimum=2, maximum=10, value=2, step=1,
|
132 |
label="Number of Beams")
|
|
|
134 |
text_output_beam = gr.Textbox(label="Move Description")
|
135 |
text_button_beam = gr.Button("Create my move!")
|
136 |
with gr.TabItem("Sampling and Temperature Search"):
|
|
|
137 |
gr.Markdown(
|
138 |
+
"""Greedy Search and Beam Search were both good at finding sequences that are likely to follow our
|
139 |
+
input text, but when generating cool move descriptions, we want some more variety!
|
|
|
140 |
|
141 |
Instead of choosing the word or token that is most likely to follow a given sequence, we can instead
|
142 |
+
ask the model to sample across the probability distribution of likely words.
|
143 |
+
|
144 |
+
It's kind of like walking into the tall grass and finding a Pokemon encounter.
|
145 |
+
There are different encounter rates, which allow
|
146 |
for the most common mons to appear (looking at you, Zubat), but also account for surprise, like shinys!
|
147 |
|
148 |
+
We might even want to go further, though. We can rescale the probability distributions directly
|
149 |
+
instead, allowing for rare words to temporarily become more frequently. We do this using the
|
150 |
+
temperature parameter.
|
151 |
|
152 |
+
Turn the temperature up, and rare tokens become very likely! Cool down, and we approach more sensible
|
153 |
+
output.
|
154 |
|
155 |
Experiment with turning sampling on and off, and by varying temperature below!.
|
156 |
""")
|
|
|
164 |
text_button_temp = gr.Button("Create my move!")
|
165 |
with gr.TabItem("Top K and Top P Sampling"):
|
166 |
gr.Markdown(
|
167 |
+
"""When we want more control over the words we get to sample from, we turn to Top K and Top P
|
168 |
+
decoding methods!
|
169 |
|
170 |
|
171 |
+
The Top K sampling method selects the K most probable words given a sequence, and then samples from
|
172 |
+
that subset, rather than the whole vocabulary. This effectively cuts out low probability words.
|
173 |
|
174 |
|
175 |
Top P also reduces the available vocabulary to sample from, but instead of choosing the number of
|
|
|
194 |
gr.Markdown("<h3> Generation History <h3>")
|
195 |
# Displays a dataframe with the history of moves generated, with parameters
|
196 |
history = gr.Dataframe(headers=["Move Name", "Move Description", "Generation Type", "Parameters"])
|
197 |
+
with gr.Box():
|
198 |
gr.Markdown("<h3>How did you make this?<h3>")
|
199 |
gr.Markdown("""
|
200 |
+
I collected the dataset from [Serebii] (https://www.serebii.net) , a news source and aggregator of Pokemon info.
|
201 |
|
202 |
|
203 |
+
I then added a seed phrase "This move is called" just before each move in order to assist the model in
|
204 |
+
generation.
|
205 |
|
206 |
|
207 |
+
I then followed HuggingFace's handy language_modeling.ipynb for fine-tuning distillgpt2 on this tiny dataset,
|
208 |
+
and it surprisingly worked!
|
209 |
|
210 |
|
211 |
+
I learned all about text generation using the book [Natural Language Processing with Transformers] (
|
212 |
+
https://www.oreilly.com/library/view/natural-language-processing/9781098103231/) by Lewis Tunstall,
|
213 |
+
Leandro von Werra and Thomas Wolf, as well as [this fantastic article] (
|
214 |
+
https://huggingface.co/blog/how-to-generate) by Patrick von Platen. Thanks to all of these folks for creating
|
215 |
+
these learning materials, and thanks to the Hugging Face team for developing this product! """)
|
216 |
+
|
217 |
text_button_baseline.click(create_move, inputs=[text_input_baseline, history],
|
218 |
outputs=[text_output_baseline, history])
|
219 |
text_button_greedy.click(create_greedy_search_move, inputs=[text_input_greedy, history],
|