m-ric HF staff commited on
Commit
73610f4
·
verified ·
1 Parent(s): b5988eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -27
app.py CHANGED
@@ -130,7 +130,7 @@ STYLE = """
130
  margin-top: -5px;
131
  transform: rotate(315deg);
132
  }
133
- .box {
134
  border: 1px solid var(--body-text-color);
135
  padding: 5px;
136
  border-radius: 5px;
@@ -141,19 +141,18 @@ STYLE = """
141
  align-items: center;
142
  justify-content: space-between;
143
  overflow: hidden;
144
- cursor: pointer;
145
  }
146
- .box span {
147
  padding: 5px;
148
  font-size: 12px;
149
  letter-spacing: 1px;
150
  font-weight: 500;
151
  }
152
  /*Hover-Section*/
153
- .box:hover, .box:hover+ul li .box {
154
  background: var(--primary-500);
155
  }
156
- .box:hover+ul li::after, .box:hover+ul li::before, .box:hover+ul::before, .box:hover+ul ul::before, .box:hover+ul .box::before {
157
  border-color: var(--primary-500);
158
  }
159
  .chosen-token {
@@ -175,9 +174,6 @@ STYLE = """
175
  .nonselected-sequence {
176
  background-color: var(--primary-500);
177
  }
178
- .nomargin {
179
- padding-left: 0!important;
180
- }
181
  """
182
 
183
 
@@ -220,14 +216,14 @@ def generate_nodes(node, step):
220
  selected_class = "selected-sequence"
221
  else:
222
  selected_class = "nonselected-sequence"
223
- return f"<li> <div class='box end-of-text child {selected_class}'> <span> <b>{clean(token)}</b> <br>Total score: {node.total_score:.2f}</span> </div> </li>"
224
 
225
  html_content = (
226
- f"<li> <div class='box nonfinal child'> <span> <b>{clean(token)}</b> </span>"
227
  )
228
  if node.table is not None:
229
  html_content += node.table
230
- html_content += "</div>"
231
 
232
  if len(node.children.keys()) > 0:
233
  html_content += "<ul> "
@@ -241,15 +237,16 @@ def generate_nodes(node, step):
241
 
242
  def generate_html(start_sentence, original_tree):
243
  html_output = f"""<div class="custom-container">
244
- <div class="tree"> <ul class="nomargin"><li class="nomargin">
245
- <div class="box" id='root'> <span> <b>{start_sentence}</b> </span> {original_tree.table} </div>"""
246
  html_output += "<ul> "
247
  for subnode in original_tree.children.values():
248
  html_output += generate_nodes(subnode, step=1)
249
  html_output += "</ul>"
250
  html_output += """
251
- </li></ul></div>
252
- </div>
 
253
  """
254
  return html_output
255
 
@@ -272,7 +269,7 @@ class BeamNode:
272
  is_selected_sequence: bool
273
 
274
 
275
- def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
276
  input_length = len(tokenizer([start_sentence], return_tensors="pt"))
277
  original_tree = BeamNode(
278
  cumulative_score=0,
@@ -289,6 +286,8 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
289
  beam_trees = [original_tree] * n_beams
290
 
291
  for step, step_scores in enumerate(scores):
 
 
292
  (
293
  top_token_indexes,
294
  top_cumulative_scores,
@@ -296,7 +295,7 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
296
  current_sequence,
297
  top_tokens,
298
  ) = ([], [], [], [], [])
299
- for beam_ix in range(n_beams): # Get possible descendants for each beam
300
  current_beam = beam_trees[beam_ix]
301
 
302
  # skip if the beam is already final
@@ -316,7 +315,6 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
316
  current_sequence += [beam_trees[beam_ix].current_sequence] * n_beams
317
  top_tokens += [tokenizer.decode([el]) for el in current_top_token_indexes]
318
 
319
-
320
  top_df = pd.DataFrame.from_dict(
321
  {
322
  "token_index": top_token_indexes,
@@ -336,6 +334,9 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
336
  top_df_selected = top_df.sort_values("cumulative_score", ascending=False).iloc[
337
  :n_beams
338
  ]
 
 
 
339
 
340
  # Write the scores table - one per beam source
341
  for beam_ix in reversed(list(range(n_beams))):
@@ -352,14 +353,13 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
352
  )
353
  beam_trees[beam_ix].table = markdown_table
354
 
355
- # Add new children for each beam
356
  cumulative_scores = [beam.cumulative_score for beam in beam_trees]
357
- for beam_ix in range(n_beams):
358
- current_token_choice_ix = top_df_selected.iloc[beam_ix]["token_index"]
359
- current_token_choice = tokenizer.decode([current_token_choice_ix])
360
-
361
  # Update the source tree
362
- source_beam_ix = int(top_df_selected.iloc[beam_ix]["beam_index"])
 
 
363
 
364
  cumulative_score = (
365
  cumulative_scores[source_beam_ix]
@@ -368,6 +368,9 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
368
  current_sequence = (
369
  beam_trees[source_beam_ix].current_sequence + current_token_choice
370
  )
 
 
 
371
  beam_trees[source_beam_ix].children[current_token_choice_ix] = BeamNode(
372
  current_token_ix=current_token_choice_ix,
373
  table=None,
@@ -387,7 +390,8 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
387
  ),
388
  )
389
 
390
- # Reassign all beams at once
 
391
  beam_trees = [
392
  beam_trees[int(top_df_selected.iloc[beam_ix]["beam_index"])]
393
  for beam_ix in range(n_beams)
@@ -400,7 +404,6 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences):
400
 
401
  return original_tree
402
 
403
-
404
  @spaces.GPU
405
  def get_beam_search_html(
406
  input_text, number_steps, number_beams, length_penalty, num_return_sequences
@@ -432,6 +435,7 @@ def get_beam_search_html(
432
  outputs.scores[:],
433
  length_penalty,
434
  decoded_sequences,
 
435
  )
436
  html = generate_html(input_text, original_tree)
437
  return html, markdown
@@ -466,7 +470,7 @@ This parameter will not impact the beam search paths, but only influence the cho
466
  )
467
  text = gr.Textbox(
468
  label="Sentence to decode from",
469
- value="Conclusion: thanks a lot. This article was originally published on",
470
  )
471
  with gr.Row():
472
  n_steps = gr.Slider(
 
130
  margin-top: -5px;
131
  transform: rotate(315deg);
132
  }
133
+ .tree li a {
134
  border: 1px solid var(--body-text-color);
135
  padding: 5px;
136
  border-radius: 5px;
 
141
  align-items: center;
142
  justify-content: space-between;
143
  overflow: hidden;
 
144
  }
145
+ .tree li a span {
146
  padding: 5px;
147
  font-size: 12px;
148
  letter-spacing: 1px;
149
  font-weight: 500;
150
  }
151
  /*Hover-Section*/
152
+ .tree li a:hover, .tree li a:hover+ul li a {
153
  background: var(--primary-500);
154
  }
155
+ .tree li a:hover+ul li::after, .tree li a:hover+ul li::before, .tree li a:hover+ul::before, .tree li a:hover+ul ul::before, .tree li a:hover+ul a::before {
156
  border-color: var(--primary-500);
157
  }
158
  .chosen-token {
 
174
  .nonselected-sequence {
175
  background-color: var(--primary-500);
176
  }
 
 
 
177
  """
178
 
179
 
 
216
  selected_class = "selected-sequence"
217
  else:
218
  selected_class = "nonselected-sequence"
219
+ return f"<li> <a href='#' class='end-of-text child {selected_class}'> <span> <b>{clean(token)}</b> <br>Total score: {node.total_score:.2f}</span> </a> </li>"
220
 
221
  html_content = (
222
+ f"<li> <a href='#' class='nonfinal child'> <span> <b>{clean(token)}</b> </span>"
223
  )
224
  if node.table is not None:
225
  html_content += node.table
226
+ html_content += "</a>"
227
 
228
  if len(node.children.keys()) > 0:
229
  html_content += "<ul> "
 
237
 
238
  def generate_html(start_sentence, original_tree):
239
  html_output = f"""<div class="custom-container">
240
+ <div class="tree">
241
+ <ul> <li> <a href='#' id='root'> <span> <b>{start_sentence}</b> </span> {original_tree.table} </a>"""
242
  html_output += "<ul> "
243
  for subnode in original_tree.children.values():
244
  html_output += generate_nodes(subnode, step=1)
245
  html_output += "</ul>"
246
  html_output += """
247
+ </li> </ul>
248
+ </div>
249
+ </body>
250
  """
251
  return html_output
252
 
 
269
  is_selected_sequence: bool
270
 
271
 
272
+ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, beam_indexes_source):
273
  input_length = len(tokenizer([start_sentence], return_tensors="pt"))
274
  original_tree = BeamNode(
275
  cumulative_score=0,
 
286
  beam_trees = [original_tree] * n_beams
287
 
288
  for step, step_scores in enumerate(scores):
289
+
290
+ # Gather all possible descendants for each beam
291
  (
292
  top_token_indexes,
293
  top_cumulative_scores,
 
295
  current_sequence,
296
  top_tokens,
297
  ) = ([], [], [], [], [])
298
+ for beam_ix in range(n_beams):
299
  current_beam = beam_trees[beam_ix]
300
 
301
  # skip if the beam is already final
 
315
  current_sequence += [beam_trees[beam_ix].current_sequence] * n_beams
316
  top_tokens += [tokenizer.decode([el]) for el in current_top_token_indexes]
317
 
 
318
  top_df = pd.DataFrame.from_dict(
319
  {
320
  "token_index": top_token_indexes,
 
334
  top_df_selected = top_df.sort_values("cumulative_score", ascending=False).iloc[
335
  :n_beams
336
  ]
337
+ if any(["you enjoyed" in el for el in top_df["current_sequence"]]):
338
+ print("Displaying debug info:::")
339
+ display(top_df_selected)
340
 
341
  # Write the scores table - one per beam source
342
  for beam_ix in reversed(list(range(n_beams))):
 
353
  )
354
  beam_trees[beam_ix].table = markdown_table
355
 
356
+ # Add new children to each beam
357
  cumulative_scores = [beam.cumulative_score for beam in beam_trees]
358
+ for _, row in top_df_selected.iterrows():
 
 
 
359
  # Update the source tree
360
+ source_beam_ix = int(row["beam_index"])
361
+ current_token_choice_ix = row["token_index"]
362
+ current_token_choice = tokenizer.decode([current_token_choice_ix])
363
 
364
  cumulative_score = (
365
  cumulative_scores[source_beam_ix]
 
368
  current_sequence = (
369
  beam_trees[source_beam_ix].current_sequence + current_token_choice
370
  )
371
+ if current_token_choice_ix == 340:
372
+ print("Found info:")
373
+ print(f"We generate token '{current_token_choice}', and the total sequence is '{current_sequence}'")
374
  beam_trees[source_beam_ix].children[current_token_choice_ix] = BeamNode(
375
  current_token_ix=current_token_choice_ix,
376
  table=None,
 
390
  ),
391
  )
392
 
393
+
394
+ # Swap all beams by descending cumul score, so that n°1 has the highest cumulative score, and so on
395
  beam_trees = [
396
  beam_trees[int(top_df_selected.iloc[beam_ix]["beam_index"])]
397
  for beam_ix in range(n_beams)
 
404
 
405
  return original_tree
406
 
 
407
  @spaces.GPU
408
  def get_beam_search_html(
409
  input_text, number_steps, number_beams, length_penalty, num_return_sequences
 
435
  outputs.scores[:],
436
  length_penalty,
437
  decoded_sequences,
438
+ outputs.beam_indices,
439
  )
440
  html = generate_html(input_text, original_tree)
441
  return html, markdown
 
470
  )
471
  text = gr.Textbox(
472
  label="Sentence to decode from",
473
+ value="Conclusion: thanks a lot. That's all for today",
474
  )
475
  with gr.Row():
476
  n_steps = gr.Slider(