Spaces:
Sleeping
Sleeping
Implemented edit support for tokenizer.ggml.scores and token_types
Browse files- _hf_explorer.py +2 -2
- _hf_gguf.py +50 -0
- app.py +207 -26
_hf_explorer.py
CHANGED
@@ -33,7 +33,7 @@ class FileExplorer(Component):
|
|
33 |
value: str | list[str] | Callable | None = None,
|
34 |
file_count: Literal["single", "multiple"] = "multiple",
|
35 |
root_dir: str = None,
|
36 |
-
branch: str =
|
37 |
token: str | None = None,
|
38 |
ignore_glob: str | None = None,
|
39 |
label: str | None = None,
|
@@ -76,7 +76,7 @@ class FileExplorer(Component):
|
|
76 |
key: if assigned, will be used to assume identity across a re-render. Components that have the same key across a re-render will have their value preserved.
|
77 |
"""
|
78 |
self.root_dir = root_dir
|
79 |
-
self.branch = branch
|
80 |
self.fs = HfFileSystem(token = token)
|
81 |
self.glob = glob
|
82 |
self.ignore_glob = ignore_glob
|
|
|
33 |
value: str | list[str] | Callable | None = None,
|
34 |
file_count: Literal["single", "multiple"] = "multiple",
|
35 |
root_dir: str = None,
|
36 |
+
branch: str | None = None,
|
37 |
token: str | None = None,
|
38 |
ignore_glob: str | None = None,
|
39 |
label: str | None = None,
|
|
|
76 |
key: if assigned, will be used to assume identity across a re-render. Components that have the same key across a re-render will have their value preserved.
|
77 |
"""
|
78 |
self.root_dir = root_dir
|
79 |
+
self.branch = branch or "main"
|
80 |
self.fs = HfFileSystem(token = token)
|
81 |
self.glob = glob
|
82 |
self.ignore_glob = ignore_glob
|
_hf_gguf.py
CHANGED
@@ -4,6 +4,56 @@ from fsspec.spec import AbstractBufferedFile
|
|
4 |
from typing import Any, Iterator, NamedTuple
|
5 |
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
class GGUFValueType(IntEnum):
|
8 |
UINT8 = 0
|
9 |
INT8 = 1
|
|
|
4 |
from typing import Any, Iterator, NamedTuple
|
5 |
|
6 |
|
7 |
+
class TokenType(IntEnum):
|
8 |
+
NORMAL = 1
|
9 |
+
UNKNOWN = 2
|
10 |
+
CONTROL = 3
|
11 |
+
USER_DEFINED = 4
|
12 |
+
UNUSED = 5
|
13 |
+
BYTE = 6
|
14 |
+
|
15 |
+
|
16 |
+
class LlamaFileType(IntEnum):
|
17 |
+
ALL_F32 = 0
|
18 |
+
MOSTLY_F16 = 1
|
19 |
+
MOSTLY_Q4_0 = 2
|
20 |
+
MOSTLY_Q4_1 = 3
|
21 |
+
MOSTLY_Q4_1_SOME_F16 = 4
|
22 |
+
MOSTLY_Q4_2 = 5
|
23 |
+
MOSTLY_Q4_3 = 6
|
24 |
+
MOSTLY_Q8_0 = 7
|
25 |
+
MOSTLY_Q5_0 = 8
|
26 |
+
MOSTLY_Q5_1 = 9
|
27 |
+
MOSTLY_Q2_K = 10
|
28 |
+
MOSTLY_Q3_K_S = 11
|
29 |
+
MOSTLY_Q3_K_M = 12
|
30 |
+
MOSTLY_Q3_K_L = 13
|
31 |
+
MOSTLY_Q4_K_S = 14
|
32 |
+
MOSTLY_Q4_K_M = 15
|
33 |
+
MOSTLY_Q5_K_S = 16
|
34 |
+
MOSTLY_Q5_K_M = 17
|
35 |
+
MOSTLY_Q6_K = 18
|
36 |
+
MOSTLY_IQ2_XXS = 19
|
37 |
+
MOSTLY_IQ2_XS = 20
|
38 |
+
MOSTLY_Q2_K_S = 21
|
39 |
+
MOSTLY_IQ3_XS = 22
|
40 |
+
MOSTLY_IQ3_XXS = 23
|
41 |
+
MOSTLY_IQ1_S = 24
|
42 |
+
MOSTLY_IQ4_NL = 25
|
43 |
+
MOSTLY_IQ3_S = 26
|
44 |
+
MOSTLY_IQ3_M = 27
|
45 |
+
MOSTLY_IQ2_S = 28
|
46 |
+
MOSTLY_IQ2_M = 29
|
47 |
+
MOSTLY_IQ4_XS = 30
|
48 |
+
MOSTLY_IQ1_M = 31
|
49 |
+
MOSTLY_BF16 = 32
|
50 |
+
MOSTLY_Q4_0_4_4 = 33
|
51 |
+
MOSTLY_Q4_0_4_8 = 34
|
52 |
+
MOSTLY_Q4_0_8_8 = 35
|
53 |
+
MOSTLY_TQ1_0 = 36
|
54 |
+
MOSTLY_TQ2_0 = 37
|
55 |
+
|
56 |
+
|
57 |
class GGUFValueType(IntEnum):
|
58 |
UINT8 = 0
|
59 |
INT8 = 1
|
app.py
CHANGED
@@ -9,7 +9,7 @@ from typing import Annotated, Any, NamedTuple
|
|
9 |
from urllib.parse import urlencode
|
10 |
|
11 |
from _hf_explorer import FileExplorer
|
12 |
-
from _hf_gguf import standard_metadata, GGUFValueType, HuggingGGUFstream
|
13 |
|
14 |
|
15 |
hfapi = HfApi()
|
@@ -49,6 +49,14 @@ def human_readable_metadata(
|
|
49 |
val = str(val[:8])[:-1] + ', ...]'
|
50 |
else:
|
51 |
val = str(val)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
elif key.endswith('_token_id'):
|
53 |
tokens = meta.key.get('tokenizer.ggml.tokens', (-1, []))[1]
|
54 |
|
@@ -113,8 +121,23 @@ with gr.Blocks(
|
|
113 |
)
|
114 |
|
115 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
meta_lookup = gr.Dropdown(
|
117 |
label = 'Lookup token',
|
|
|
118 |
allow_custom_value = True,
|
119 |
visible = False,
|
120 |
)
|
@@ -169,6 +192,8 @@ with gr.Blocks(
|
|
169 |
# BUG: For some reason using gr.State initial value turns tuple to list?
|
170 |
meta_state.value = init_state()
|
171 |
|
|
|
|
|
172 |
file_change_components = [
|
173 |
meta_changes,
|
174 |
file_meta,
|
@@ -400,6 +425,8 @@ with gr.Blocks(
|
|
400 |
],
|
401 |
outputs = [
|
402 |
meta_boolean,
|
|
|
|
|
403 |
meta_lookup,
|
404 |
meta_number,
|
405 |
meta_string,
|
@@ -420,7 +447,21 @@ with gr.Blocks(
|
|
420 |
elif not key:
|
421 |
typ = None
|
422 |
|
423 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
# TODO: Support arrays?
|
425 |
typ = GGUFValueType.ARRAY
|
426 |
|
@@ -435,15 +476,25 @@ with gr.Blocks(
|
|
435 |
value = val if typ == GGUFValueType.BOOL and data is not None else False,
|
436 |
visible = True if typ == GGUFValueType.BOOL else False,
|
437 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
meta_lookup: gr.Dropdown(
|
439 |
None,
|
440 |
-
value = tokens[val] if is_number and data is not None and
|
441 |
-
visible = True if is_number and
|
442 |
),
|
443 |
meta_number: gr.Number(
|
444 |
-
value = val if is_number and data is not None else 0,
|
445 |
precision = 10 if typ == GGUFValueType.FLOAT32 or typ == GGUFValueType.FLOAT64 else 0,
|
446 |
-
|
|
|
447 |
),
|
448 |
meta_string: gr.Textbox(
|
449 |
value = val if typ == GGUFValueType.STRING else '',
|
@@ -483,8 +534,9 @@ with gr.Blocks(
|
|
483 |
changes = [(k, 'rem') for k in meta.rem]
|
484 |
|
485 |
for k, v in meta.add.items():
|
|
|
486 |
changes.append((k, 'add'))
|
487 |
-
changes.append((str(
|
488 |
|
489 |
m = []
|
490 |
for k, v in meta.key.items():
|
@@ -498,7 +550,7 @@ with gr.Blocks(
|
|
498 |
link += '&' + urlencode(
|
499 |
{
|
500 |
'rem': meta.rem,
|
501 |
-
'add': [json.dumps([k, *v], ensure_ascii = False) for k, v in meta.add.items()],
|
502 |
},
|
503 |
doseq = True,
|
504 |
safe = '[]{}:"\',',
|
@@ -554,6 +606,97 @@ with gr.Blocks(
|
|
554 |
)
|
555 |
|
556 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
557 |
@gr.on(
|
558 |
triggers = [
|
559 |
meta_lookup.key_up,
|
@@ -563,6 +706,7 @@ with gr.Blocks(
|
|
563 |
],
|
564 |
outputs = [
|
565 |
meta_lookup,
|
|
|
566 |
],
|
567 |
show_progress = 'hidden',
|
568 |
trigger_mode = 'always_last',
|
@@ -571,16 +715,13 @@ with gr.Blocks(
|
|
571 |
meta: MetadataState,
|
572 |
keyup: gr.KeyUpData,
|
573 |
):
|
574 |
-
found =
|
575 |
-
value = keyup.input_value.lower()
|
576 |
-
tokens = meta.key.get('tokenizer.ggml.tokens', (-1, []))[1]
|
577 |
-
|
578 |
-
any(((found.append(t), len(found) > 5)[1] for i, t in enumerate(tokens) if value in t.lower()))
|
579 |
|
580 |
return {
|
581 |
meta_lookup: gr.Dropdown(
|
582 |
-
found,
|
583 |
),
|
|
|
584 |
}
|
585 |
|
586 |
|
@@ -590,6 +731,8 @@ with gr.Blocks(
|
|
590 |
typ: int | None,
|
591 |
val: Any,
|
592 |
request: gr.Request,
|
|
|
|
|
593 |
):
|
594 |
if not key or typ is None:
|
595 |
if key:
|
@@ -603,7 +746,18 @@ with gr.Blocks(
|
|
603 |
if key in meta.rem:
|
604 |
meta.rem.remove(key)
|
605 |
|
606 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
607 |
|
608 |
if key.startswith('tokenizer.chat_template.'):
|
609 |
template = key[24:]
|
@@ -617,29 +771,25 @@ with gr.Blocks(
|
|
617 |
)
|
618 |
|
619 |
|
620 |
-
def
|
621 |
-
|
622 |
-
|
623 |
):
|
624 |
-
|
625 |
-
|
626 |
-
try:
|
627 |
-
found = tokens.index(token)
|
628 |
-
except Exception as e:
|
629 |
raise gr.Error('Token not found')
|
630 |
|
631 |
return {
|
632 |
meta_number: gr.Number(
|
633 |
-
|
634 |
),
|
635 |
}
|
636 |
|
637 |
|
638 |
meta_lookup.input(
|
639 |
-
|
640 |
inputs = [
|
641 |
-
meta_state,
|
642 |
meta_lookup,
|
|
|
643 |
],
|
644 |
outputs = [
|
645 |
meta_number,
|
@@ -668,6 +818,20 @@ with gr.Blocks(
|
|
668 |
] + state_change_components,
|
669 |
)
|
670 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
671 |
meta_number.submit(
|
672 |
add_metadata,
|
673 |
inputs = [
|
@@ -675,6 +839,8 @@ with gr.Blocks(
|
|
675 |
meta_keys,
|
676 |
meta_types,
|
677 |
meta_number,
|
|
|
|
|
678 |
],
|
679 |
outputs = [
|
680 |
] + state_change_components,
|
@@ -736,9 +902,24 @@ def stream_repo_file(
|
|
736 |
for k in rem_meta:
|
737 |
gguf.remove_metadata(k)
|
738 |
|
|
|
739 |
for k in add_meta:
|
740 |
k = json.loads(k)
|
741 |
if isinstance(k, list) and len(k) == 3:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
742 |
gguf.add_metadata(*k)
|
743 |
|
744 |
yield gguf.filesize
|
|
|
9 |
from urllib.parse import urlencode
|
10 |
|
11 |
from _hf_explorer import FileExplorer
|
12 |
+
from _hf_gguf import standard_metadata, TokenType, LlamaFileType, GGUFValueType, HuggingGGUFstream
|
13 |
|
14 |
|
15 |
hfapi = HfApi()
|
|
|
49 |
val = str(val[:8])[:-1] + ', ...]'
|
50 |
else:
|
51 |
val = str(val)
|
52 |
+
elif isinstance(val, dict):
|
53 |
+
val = '[' + ', '.join((f'{k}: {v}' for k, v in val.items())) + ']'
|
54 |
+
elif key == 'general.file_type':
|
55 |
+
try:
|
56 |
+
ftype = LlamaFileType(val).name
|
57 |
+
except:
|
58 |
+
ftype = 'UNKNOWN'
|
59 |
+
val = f'{ftype} ({val})'
|
60 |
elif key.endswith('_token_id'):
|
61 |
tokens = meta.key.get('tokenizer.ggml.tokens', (-1, []))[1]
|
62 |
|
|
|
121 |
)
|
122 |
|
123 |
with gr.Row():
|
124 |
+
meta_token_select = gr.Dropdown(
|
125 |
+
label = 'Select token',
|
126 |
+
type = 'index',
|
127 |
+
allow_custom_value = True,
|
128 |
+
visible = False,
|
129 |
+
)
|
130 |
+
|
131 |
+
meta_token_type = gr.Dropdown(
|
132 |
+
[e.name for e in TokenType],
|
133 |
+
label = 'Token type',
|
134 |
+
type = 'index',
|
135 |
+
visible = False,
|
136 |
+
)
|
137 |
+
|
138 |
meta_lookup = gr.Dropdown(
|
139 |
label = 'Lookup token',
|
140 |
+
type = 'index',
|
141 |
allow_custom_value = True,
|
142 |
visible = False,
|
143 |
)
|
|
|
192 |
# BUG: For some reason using gr.State initial value turns tuple to list?
|
193 |
meta_state.value = init_state()
|
194 |
|
195 |
+
token_select_indices = gr.State([])
|
196 |
+
|
197 |
file_change_components = [
|
198 |
meta_changes,
|
199 |
file_meta,
|
|
|
425 |
],
|
426 |
outputs = [
|
427 |
meta_boolean,
|
428 |
+
meta_token_select,
|
429 |
+
meta_token_type,
|
430 |
meta_lookup,
|
431 |
meta_number,
|
432 |
meta_string,
|
|
|
447 |
elif not key:
|
448 |
typ = None
|
449 |
|
450 |
+
do_select_token = False
|
451 |
+
do_lookup_token = False
|
452 |
+
do_token_type = False
|
453 |
+
match key:
|
454 |
+
case 'tokenizer.ggml.scores':
|
455 |
+
do_select_token = True
|
456 |
+
case 'tokenizer.ggml.token_type':
|
457 |
+
do_select_token = True
|
458 |
+
do_token_type = True
|
459 |
+
case s if s.endswith('_token_id'):
|
460 |
+
do_lookup_token = True
|
461 |
+
case _:
|
462 |
+
pass
|
463 |
+
|
464 |
+
if isinstance(val, list) and not do_select_token:
|
465 |
# TODO: Support arrays?
|
466 |
typ = GGUFValueType.ARRAY
|
467 |
|
|
|
476 |
value = val if typ == GGUFValueType.BOOL and data is not None else False,
|
477 |
visible = True if typ == GGUFValueType.BOOL else False,
|
478 |
),
|
479 |
+
meta_token_select: gr.Dropdown(
|
480 |
+
None,
|
481 |
+
value = '',
|
482 |
+
visible = True if do_select_token else False,
|
483 |
+
),
|
484 |
+
meta_token_type: gr.Dropdown(
|
485 |
+
interactive = False,
|
486 |
+
visible = True if do_token_type else False,
|
487 |
+
),
|
488 |
meta_lookup: gr.Dropdown(
|
489 |
None,
|
490 |
+
value = tokens[val] if is_number and data is not None and do_lookup_token and val < len(tokens) else '',
|
491 |
+
visible = True if is_number and do_lookup_token else False,
|
492 |
),
|
493 |
meta_number: gr.Number(
|
494 |
+
value = val if is_number and data is not None and not do_select_token else 0,
|
495 |
precision = 10 if typ == GGUFValueType.FLOAT32 or typ == GGUFValueType.FLOAT64 else 0,
|
496 |
+
interactive = False if do_select_token else True,
|
497 |
+
visible = True if is_number and not do_token_type else False,
|
498 |
),
|
499 |
meta_string: gr.Textbox(
|
500 |
value = val if typ == GGUFValueType.STRING else '',
|
|
|
534 |
changes = [(k, 'rem') for k in meta.rem]
|
535 |
|
536 |
for k, v in meta.add.items():
|
537 |
+
key, typ, val = human_readable_metadata(meta, k, *v)
|
538 |
changes.append((k, 'add'))
|
539 |
+
changes.append((str(val), None))
|
540 |
|
541 |
m = []
|
542 |
for k, v in meta.key.items():
|
|
|
550 |
link += '&' + urlencode(
|
551 |
{
|
552 |
'rem': meta.rem,
|
553 |
+
'add': [json.dumps([k, *v], ensure_ascii = False, separators = (',', ':')) for k, v in meta.add.items()],
|
554 |
},
|
555 |
doseq = True,
|
556 |
safe = '[]{}:"\',',
|
|
|
606 |
)
|
607 |
|
608 |
|
609 |
+
def token_search(
|
610 |
+
meta: MetadataState,
|
611 |
+
name: str,
|
612 |
+
):
|
613 |
+
found = {}
|
614 |
+
name = name.lower()
|
615 |
+
tokens = meta.key.get('tokenizer.ggml.tokens', (-1, []))[1]
|
616 |
+
|
617 |
+
any(((len(found) > 5, found.setdefault(i, t))[0] for i, t in enumerate(tokens) if name in t.lower()))
|
618 |
+
|
619 |
+
return found
|
620 |
+
|
621 |
+
|
622 |
+
@gr.on(
|
623 |
+
triggers = [
|
624 |
+
meta_token_select.key_up,
|
625 |
+
],
|
626 |
+
inputs = [
|
627 |
+
meta_state,
|
628 |
+
],
|
629 |
+
outputs = [
|
630 |
+
meta_token_select,
|
631 |
+
token_select_indices,
|
632 |
+
],
|
633 |
+
show_progress = 'hidden',
|
634 |
+
trigger_mode = 'always_last',
|
635 |
+
)
|
636 |
+
def token_select(
|
637 |
+
meta: MetadataState,
|
638 |
+
keyup: gr.KeyUpData,
|
639 |
+
):
|
640 |
+
found = token_search(meta, keyup.input_value)
|
641 |
+
|
642 |
+
return {
|
643 |
+
meta_token_select: gr.Dropdown(
|
644 |
+
list(found.values()),
|
645 |
+
),
|
646 |
+
token_select_indices: list(found.keys()),
|
647 |
+
}
|
648 |
+
|
649 |
+
|
650 |
+
@gr.on(
|
651 |
+
triggers = [
|
652 |
+
meta_token_select.input,
|
653 |
+
],
|
654 |
+
inputs = [
|
655 |
+
meta_state,
|
656 |
+
meta_keys,
|
657 |
+
meta_token_select,
|
658 |
+
token_select_indices,
|
659 |
+
],
|
660 |
+
outputs = [
|
661 |
+
meta_token_type,
|
662 |
+
meta_number,
|
663 |
+
],
|
664 |
+
)
|
665 |
+
def token_selected(
|
666 |
+
meta: MetadataState,
|
667 |
+
key: str,
|
668 |
+
choice: int,
|
669 |
+
indices: list[int],
|
670 |
+
):
|
671 |
+
if choice < 0 or choice >= len(indices) or (token := indices[choice]) < 0:
|
672 |
+
raise gr.Error('Token not found')
|
673 |
+
|
674 |
+
tokens = meta.key.get('tokenizer.ggml.tokens', (-1, []))[1]
|
675 |
+
|
676 |
+
if token >= len(tokens):
|
677 |
+
raise gr.Error('Invalid token')
|
678 |
+
|
679 |
+
data = meta.key.get(key, (-1, []))[1]
|
680 |
+
|
681 |
+
match key:
|
682 |
+
case 'tokenizer.ggml.scores':
|
683 |
+
return {
|
684 |
+
meta_number: gr.Number(
|
685 |
+
value = data[token] if data and len(data) > token else 0.0,
|
686 |
+
interactive = True,
|
687 |
+
),
|
688 |
+
}
|
689 |
+
case 'tokenizer.ggml.token_type':
|
690 |
+
return {
|
691 |
+
meta_token_type: gr.Dropdown(
|
692 |
+
value = TokenType(data[token]).name if data and len(data) > token else TokenType.NORMAL.name,
|
693 |
+
interactive = True,
|
694 |
+
),
|
695 |
+
}
|
696 |
+
case _:
|
697 |
+
raise gr.Error('Invalid metadata key')
|
698 |
+
|
699 |
+
|
700 |
@gr.on(
|
701 |
triggers = [
|
702 |
meta_lookup.key_up,
|
|
|
706 |
],
|
707 |
outputs = [
|
708 |
meta_lookup,
|
709 |
+
token_select_indices,
|
710 |
],
|
711 |
show_progress = 'hidden',
|
712 |
trigger_mode = 'always_last',
|
|
|
715 |
meta: MetadataState,
|
716 |
keyup: gr.KeyUpData,
|
717 |
):
|
718 |
+
found = token_search(meta, keyup.input_value)
|
|
|
|
|
|
|
|
|
719 |
|
720 |
return {
|
721 |
meta_lookup: gr.Dropdown(
|
722 |
+
list(found.values()),
|
723 |
),
|
724 |
+
token_select_indices: list(found.keys()),
|
725 |
}
|
726 |
|
727 |
|
|
|
731 |
typ: int | None,
|
732 |
val: Any,
|
733 |
request: gr.Request,
|
734 |
+
choice: int | None = None,
|
735 |
+
indices: list[int] | None = None,
|
736 |
):
|
737 |
if not key or typ is None:
|
738 |
if key:
|
|
|
746 |
if key in meta.rem:
|
747 |
meta.rem.remove(key)
|
748 |
|
749 |
+
match key:
|
750 |
+
case 'tokenizer.ggml.scores' | 'tokenizer.ggml.token_type':
|
751 |
+
if choice >= 0 and choice < len(indices) and (token := indices[choice]) >= 0:
|
752 |
+
tok = meta.add.setdefault(key, (typ, {}))[1]
|
753 |
+
tok[str(token)] = val + 1 if key == 'tokenizer.ggml.token_type' else val
|
754 |
+
|
755 |
+
data = meta.key.setdefault(key, (typ, [0.0 if key == 'tokenizer.ggml.scores' else int(TokenType.NORMAL)] * len(meta.key.get('tokenizer.ggml.tokens', (-1, []))[1])))[1]
|
756 |
+
if data:
|
757 |
+
for k, v in tok.items():
|
758 |
+
data[int(k)] = v
|
759 |
+
case _:
|
760 |
+
meta.key[key] = meta.add[key] = (typ, val)
|
761 |
|
762 |
if key.startswith('tokenizer.chat_template.'):
|
763 |
template = key[24:]
|
|
|
771 |
)
|
772 |
|
773 |
|
774 |
+
def token_select_to_id(
|
775 |
+
choice: int,
|
776 |
+
indices: list[int],
|
777 |
):
|
778 |
+
if choice < 0 or choice >= len(indices) or (token := indices[choice]) < 0:
|
|
|
|
|
|
|
|
|
779 |
raise gr.Error('Token not found')
|
780 |
|
781 |
return {
|
782 |
meta_number: gr.Number(
|
783 |
+
token,
|
784 |
),
|
785 |
}
|
786 |
|
787 |
|
788 |
meta_lookup.input(
|
789 |
+
token_select_to_id,
|
790 |
inputs = [
|
|
|
791 |
meta_lookup,
|
792 |
+
token_select_indices,
|
793 |
],
|
794 |
outputs = [
|
795 |
meta_number,
|
|
|
818 |
] + state_change_components,
|
819 |
)
|
820 |
|
821 |
+
meta_token_type.input(
|
822 |
+
add_metadata,
|
823 |
+
inputs = [
|
824 |
+
meta_state,
|
825 |
+
meta_keys,
|
826 |
+
meta_types,
|
827 |
+
meta_token_type,
|
828 |
+
meta_token_select,
|
829 |
+
token_select_indices,
|
830 |
+
],
|
831 |
+
outputs = [
|
832 |
+
] + state_change_components,
|
833 |
+
)
|
834 |
+
|
835 |
meta_number.submit(
|
836 |
add_metadata,
|
837 |
inputs = [
|
|
|
839 |
meta_keys,
|
840 |
meta_types,
|
841 |
meta_number,
|
842 |
+
meta_token_select,
|
843 |
+
token_select_indices,
|
844 |
],
|
845 |
outputs = [
|
846 |
] + state_change_components,
|
|
|
902 |
for k in rem_meta:
|
903 |
gguf.remove_metadata(k)
|
904 |
|
905 |
+
tokens = gguf.metadata.get('tokenizer.ggml.tokens')
|
906 |
for k in add_meta:
|
907 |
k = json.loads(k)
|
908 |
if isinstance(k, list) and len(k) == 3:
|
909 |
+
if isinstance(k[2], dict):
|
910 |
+
if tokens:
|
911 |
+
if (data := gguf.metadata.get(k[0])):
|
912 |
+
data = data.value
|
913 |
+
else:
|
914 |
+
data = [0.0 if k[0] == 'tokenizer.ggml.scores' else int(TokenType.NORMAL)] * len(tokens.value)
|
915 |
+
|
916 |
+
for i, v in k[2].items():
|
917 |
+
data[int(i)] = v
|
918 |
+
|
919 |
+
k[2] = data
|
920 |
+
else:
|
921 |
+
k[2] = []
|
922 |
+
|
923 |
gguf.add_metadata(*k)
|
924 |
|
925 |
yield gguf.filesize
|