CISCai commited on
Commit
ee7df4f
1 Parent(s): 9246717

Added token autocompletion lookup

Browse files

When edit/adding token_id metadata you will now get an extra token lookup to quickly find the correct token.

Files changed (1) hide show
  1. app.py +88 -15
app.py CHANGED
@@ -106,12 +106,11 @@ with gr.Blocks(
106
  )
107
 
108
  with gr.Row():
109
- # Too slow unfortunately, needs a proper search box
110
- # meta_lookup = gr.Dropdown(
111
- # label = 'Lookup token',
112
- # type = 'index',
113
- # visible = False,
114
- # )
115
 
116
  meta_number = gr.Number(
117
  visible = False,
@@ -364,8 +363,11 @@ with gr.Blocks(
364
  typ = None
365
  if (val := meta.key.get(key, standard_metadata.get(key))) is not None:
366
  typ = GGUFValueType(val[0]).name
367
- elif key and key.startswith('tokenizer.chat_template.'):
368
- typ = GGUFValueType.STRING.name
 
 
 
369
 
370
  return {
371
  meta_types: gr.Dropdown(
@@ -391,6 +393,7 @@ with gr.Blocks(
391
  ],
392
  outputs = [
393
  meta_boolean,
 
394
  meta_number,
395
  meta_string,
396
  meta_array,
@@ -402,6 +405,8 @@ with gr.Blocks(
402
  typ: int,
403
  ):
404
  val = None
 
 
405
  if (data := meta.key.get(key, standard_metadata.get(key))) is not None:
406
  typ = data[0]
407
  val = data[1]
@@ -423,6 +428,11 @@ with gr.Blocks(
423
  value = val if typ == GGUFValueType.BOOL and data is not None else False,
424
  visible = True if typ == GGUFValueType.BOOL else False,
425
  ),
 
 
 
 
 
426
  meta_number: gr.Number(
427
  value = val if is_number and data is not None else 0,
428
  precision = 10 if typ == GGUFValueType.FLOAT32 or typ == GGUFValueType.FLOAT64 else 0,
@@ -537,6 +547,36 @@ with gr.Blocks(
537
  )
538
 
539
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  def add_metadata(
541
  meta: MetadataState,
542
  key: str,
@@ -550,7 +590,7 @@ with gr.Blocks(
550
 
551
  return {
552
  meta_changes: gr.HighlightedText(
553
- )
554
  }
555
 
556
  if key in meta.rem:
@@ -570,6 +610,45 @@ with gr.Blocks(
570
  )
571
 
572
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573
  meta_boolean.input(
574
  add_metadata,
575
  inputs = [
@@ -582,12 +661,6 @@ with gr.Blocks(
582
  ] + state_change_components,
583
  )
584
 
585
- # meta_lookup.input(
586
- # lambda token: gr.Number(value = token),
587
- # inputs = meta_lookup,
588
- # outputs = meta_number,
589
- # )
590
-
591
  meta_number.submit(
592
  add_metadata,
593
  inputs = [
 
106
  )
107
 
108
  with gr.Row():
109
+ meta_lookup = gr.Dropdown(
110
+ label = 'Lookup token',
111
+ allow_custom_value = True,
112
+ visible = False,
113
+ )
 
114
 
115
  meta_number = gr.Number(
116
  visible = False,
 
363
  typ = None
364
  if (val := meta.key.get(key, standard_metadata.get(key))) is not None:
365
  typ = GGUFValueType(val[0]).name
366
+ elif key:
367
+ if key.startswith('tokenizer.chat_template.'):
368
+ typ = GGUFValueType.STRING.name
369
+ elif key.endswith('_token_id'):
370
+ typ = GGUFValueType.UINT32.name
371
 
372
  return {
373
  meta_types: gr.Dropdown(
 
393
  ],
394
  outputs = [
395
  meta_boolean,
396
+ meta_lookup,
397
  meta_number,
398
  meta_string,
399
  meta_array,
 
405
  typ: int,
406
  ):
407
  val = None
408
+ tokens = meta.key.get('tokenizer.ggml.tokens', (-1, []))[1]
409
+
410
  if (data := meta.key.get(key, standard_metadata.get(key))) is not None:
411
  typ = data[0]
412
  val = data[1]
 
428
  value = val if typ == GGUFValueType.BOOL and data is not None else False,
429
  visible = True if typ == GGUFValueType.BOOL else False,
430
  ),
431
+ meta_lookup: gr.Dropdown(
432
+ None,
433
+ value = tokens[val] if is_number and data is not None and key.endswith('_token_id') and val < len(tokens) else '',
434
+ visible = True if is_number and key.endswith('_token_id') else False,
435
+ ),
436
  meta_number: gr.Number(
437
  value = val if is_number and data is not None else 0,
438
  precision = 10 if typ == GGUFValueType.FLOAT32 or typ == GGUFValueType.FLOAT64 else 0,
 
547
  )
548
 
549
 
550
+ @gr.on(
551
+ triggers = [
552
+ meta_lookup.key_up,
553
+ ],
554
+ inputs = [
555
+ meta_state,
556
+ ],
557
+ outputs = [
558
+ meta_lookup,
559
+ ],
560
+ show_progress = 'hidden',
561
+ trigger_mode = 'always_last',
562
+ )
563
+ def token_lookup(
564
+ meta: MetadataState,
565
+ keyup: gr.KeyUpData,
566
+ ):
567
+ found = []
568
+ value = keyup.input_value.lower()
569
+ tokens = meta.key.get('tokenizer.ggml.tokens', (-1, []))[1]
570
+
571
+ any(((found.append(t), len(found) > 5)[1] for i, t in enumerate(tokens) if value in t.lower()))
572
+
573
+ return {
574
+ meta_lookup: gr.Dropdown(
575
+ found,
576
+ ),
577
+ }
578
+
579
+
580
  def add_metadata(
581
  meta: MetadataState,
582
  key: str,
 
590
 
591
  return {
592
  meta_changes: gr.HighlightedText(
593
+ ),
594
  }
595
 
596
  if key in meta.rem:
 
610
  )
611
 
612
 
613
+ def token_to_id(
614
+ meta: MetadataState,
615
+ token: str,
616
+ ):
617
+ tokens = meta.key.get('tokenizer.ggml.tokens', (-1, []))[1]
618
+
619
+ try:
620
+ found = tokens.index(token)
621
+ except Exception as e:
622
+ raise gr.Error('Token not found')
623
+
624
+ return {
625
+ meta_number: gr.Number(
626
+ found,
627
+ ),
628
+ }
629
+
630
+
631
+ meta_lookup.input(
632
+ token_to_id,
633
+ inputs = [
634
+ meta_state,
635
+ meta_lookup,
636
+ ],
637
+ outputs = [
638
+ meta_number,
639
+ ],
640
+ ).success(
641
+ add_metadata,
642
+ inputs = [
643
+ meta_state,
644
+ meta_keys,
645
+ meta_types,
646
+ meta_number,
647
+ ],
648
+ outputs = [
649
+ ] + state_change_components,
650
+ )
651
+
652
  meta_boolean.input(
653
  add_metadata,
654
  inputs = [
 
661
  ] + state_change_components,
662
  )
663
 
 
 
 
 
 
 
664
  meta_number.submit(
665
  add_metadata,
666
  inputs = [