GPTQ model commit
Browse files- config.json +64 -0
- generation_config.json +6 -0
- handler.py +200 -0
- model.safetensors +3 -0
- quantize_config.json +16 -0
- special_tokens_map.json +16 -0
- tokenizer.json +0 -0
- tokenizer_config.json +12 -0
config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/workspace/process/openllm-france_claire-7b-0.1/source",
|
3 |
+
"alibi": false,
|
4 |
+
"apply_residual_connection_post_layernorm": false,
|
5 |
+
"architectures": [
|
6 |
+
"FalconForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"auto_map": {
|
10 |
+
"AutoConfig": "configuration_falcon.FalconConfig",
|
11 |
+
"AutoModel": "modeling_falcon.FalconModel",
|
12 |
+
"AutoModelForCausalLM": "modeling_falcon.FalconForCausalLM",
|
13 |
+
"AutoModelForQuestionAnswering": "modeling_falcon.FalconForQuestionAnswering",
|
14 |
+
"AutoModelForSequenceClassification": "modeling_falcon.FalconForSequenceClassification",
|
15 |
+
"AutoModelForTokenClassification": "modeling_falcon.FalconForTokenClassification"
|
16 |
+
},
|
17 |
+
"bias": false,
|
18 |
+
"bos_token_id": 11,
|
19 |
+
"eos_token_id": 11,
|
20 |
+
"hidden_dropout": 0.0,
|
21 |
+
"hidden_size": 4544,
|
22 |
+
"initializer_range": 0.02,
|
23 |
+
"layer_norm_epsilon": 1e-05,
|
24 |
+
"max_position_embeddings": 2048,
|
25 |
+
"model_type": "falcon",
|
26 |
+
"multi_query": true,
|
27 |
+
"new_decoder_architecture": false,
|
28 |
+
"num_attention_heads": 71,
|
29 |
+
"num_hidden_layers": 32,
|
30 |
+
"num_kv_heads": 71,
|
31 |
+
"pad_token_id": 0,
|
32 |
+
"parallel_attn": true,
|
33 |
+
"pretraining_tp": 1,
|
34 |
+
"quantization_config": {
|
35 |
+
"batch_size": 1,
|
36 |
+
"bits": 4,
|
37 |
+
"block_name_to_quantize": "transformer.h",
|
38 |
+
"cache_block_outputs": true,
|
39 |
+
"damp_percent": 0.1,
|
40 |
+
"desc_act": true,
|
41 |
+
"exllama_config": {
|
42 |
+
"version": 1
|
43 |
+
},
|
44 |
+
"group_size": 128,
|
45 |
+
"max_input_length": null,
|
46 |
+
"model_seqlen": 2048,
|
47 |
+
"module_name_preceding_first_block": [
|
48 |
+
"transformer.word_embeddings"
|
49 |
+
],
|
50 |
+
"pad_token_id": null,
|
51 |
+
"quant_method": "gptq",
|
52 |
+
"sym": true,
|
53 |
+
"tokenizer": null,
|
54 |
+
"true_sequential": true,
|
55 |
+
"use_cuda_fp16": false,
|
56 |
+
"use_exllama": false
|
57 |
+
},
|
58 |
+
"rope_scaling": null,
|
59 |
+
"rope_theta": 10000.0,
|
60 |
+
"torch_dtype": "bfloat16",
|
61 |
+
"transformers_version": "4.35.0",
|
62 |
+
"use_cache": true,
|
63 |
+
"vocab_size": 65024
|
64 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 11,
|
4 |
+
"eos_token_id": 11,
|
5 |
+
"transformers_version": "4.34.0"
|
6 |
+
}
|
handler.py
ADDED
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch, transformers
|
2 |
+
from typing import Any, Dict
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
+
import re
|
5 |
+
|
6 |
+
|
7 |
+
class EndpointHandler:
|
8 |
+
def __init__(self, path):
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained(path)
|
10 |
+
model = AutoModelForCausalLM.from_pretrained(
|
11 |
+
path, device_map="auto", torch_dtype=torch.bfloat16, load_in_4bit=True
|
12 |
+
)
|
13 |
+
self.pipeline = transformers.pipeline(
|
14 |
+
"text-generation", model=model, tokenizer=tokenizer
|
15 |
+
)
|
16 |
+
|
17 |
+
def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
|
18 |
+
# process input
|
19 |
+
inputs = data.pop("inputs", data)
|
20 |
+
|
21 |
+
# default parameters
|
22 |
+
parameters = {
|
23 |
+
"max_new_tokens": 128,
|
24 |
+
"do_sample": True,
|
25 |
+
"top_k": 10,
|
26 |
+
"temperature": 1.0,
|
27 |
+
"return_full_text": False,
|
28 |
+
}
|
29 |
+
|
30 |
+
# user parameters
|
31 |
+
parameters.update(data.pop("parameters", {}))
|
32 |
+
|
33 |
+
unique = isinstance(inputs, str)
|
34 |
+
inputs, denormalize_funcs = claire_text_preproc(inputs)
|
35 |
+
|
36 |
+
sequences = self.pipeline(inputs, **parameters)
|
37 |
+
|
38 |
+
if unique:
|
39 |
+
return [{"generated_text": denormalize_funcs(sequences[0]["generated_text"])}]
|
40 |
+
else:
|
41 |
+
assert len(denormalize_funcs) == len(sequences)
|
42 |
+
return [{"generated_text": denormalize_func(seq[0]["generated_text"])} for denormalize_func, seq in zip(denormalize_funcs, sequences)]
|
43 |
+
|
44 |
+
|
45 |
+
def claire_text_preproc(text):
|
46 |
+
if isinstance(text, (list, tuple)):
|
47 |
+
assert len(text)
|
48 |
+
# Apply and transpose
|
49 |
+
texts, denormalize_funcs = zip(*[claire_text_preproc(t) for t in text])
|
50 |
+
return list(texts), list(denormalize_funcs)
|
51 |
+
|
52 |
+
if not isinstance(text, str):
|
53 |
+
return text
|
54 |
+
|
55 |
+
text = format_special_characters(text)
|
56 |
+
|
57 |
+
# text = remove_ligatures(text)
|
58 |
+
|
59 |
+
text = re.sub(" - | -$|^- ", " ", text.strip(" "))
|
60 |
+
|
61 |
+
global _reverse_tag_transfo
|
62 |
+
_reverse_tag_transfo = {}
|
63 |
+
text = format_special_tags(text)
|
64 |
+
|
65 |
+
text = collapse_whitespaces(text)
|
66 |
+
|
67 |
+
if _reverse_tag_transfo:
|
68 |
+
reverse_tag_transfo = _reverse_tag_transfo.copy()
|
69 |
+
def denormalize_func(t):
|
70 |
+
for k, v in reverse_tag_transfo.items():
|
71 |
+
if k in t:
|
72 |
+
t = t.replace(k, v)
|
73 |
+
return t
|
74 |
+
|
75 |
+
return text, lambda x: denormalize_func(x)
|
76 |
+
|
77 |
+
else:
|
78 |
+
return text, lambda x: x
|
79 |
+
|
80 |
+
|
81 |
+
_brackets = re.compile(r"\[([^\]]*)\]")
|
82 |
+
_pattern_speaker = re.compile(r"[^\]]+:")
|
83 |
+
_non_printable_pattern = r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]"
|
84 |
+
|
85 |
+
# Global variable to remember some normalizations that were done and apply it back
|
86 |
+
_reverse_tag_transfo = {}
|
87 |
+
_anonymized_prefix = None
|
88 |
+
|
89 |
+
def collapse_whitespaces(text):
|
90 |
+
text = re.sub(r" +", " ", text)
|
91 |
+
text = re.sub(r"\n+", "\n", text)
|
92 |
+
text = re.sub(r" ([\.,])", r"\1", text)
|
93 |
+
return text.lstrip().rstrip(" ")
|
94 |
+
|
95 |
+
|
96 |
+
def format_special_tags(text):
|
97 |
+
global _reverse_tag_transfo, _anonymized_prefix
|
98 |
+
_anonymized_prefix = None
|
99 |
+
text = re.sub(_brackets, _format_special_tags, text)
|
100 |
+
# At last the generic anonymization
|
101 |
+
if _anonymized_prefix:
|
102 |
+
_reverse_tag_transfo["[Intervenant "] = _anonymized_prefix
|
103 |
+
return text
|
104 |
+
|
105 |
+
|
106 |
+
def _format_special_tags(match):
|
107 |
+
content_within_brackets = match.group(1)
|
108 |
+
if re.match(_pattern_speaker, content_within_brackets):
|
109 |
+
return _format_tag(match.group())
|
110 |
+
else:
|
111 |
+
return ""
|
112 |
+
|
113 |
+
def _format_tag(text):
|
114 |
+
global _reverse_tag_transfo, _anonymized_prefix
|
115 |
+
if text.endswith(":]"):
|
116 |
+
anonymized_spk_prefixes = ["speaker", "spk", "locuteur"]
|
117 |
+
# Conversion "[speaker001:]" -> "[Intervenant 1:]"
|
118 |
+
for prefix in anonymized_spk_prefixes:
|
119 |
+
if text.lower().startswith("["+prefix):
|
120 |
+
try:
|
121 |
+
index = int(text[len(prefix)+1:-2])
|
122 |
+
except ValueError:
|
123 |
+
return text
|
124 |
+
new_spk_tag = f"[Intervenant {index}:]"
|
125 |
+
_reverse_tag_transfo[new_spk_tag] = text
|
126 |
+
if _anonymized_prefix is None:
|
127 |
+
prefix = "["+prefix
|
128 |
+
while len(prefix) < len(text) and text[len(prefix)] in " 0":
|
129 |
+
prefix += text[len(prefix)]
|
130 |
+
_anonymized_prefix = prefix
|
131 |
+
return "\n" + new_spk_tag
|
132 |
+
|
133 |
+
# Capitalize speaker name
|
134 |
+
speaker = text[1:-2]
|
135 |
+
speaker = capitalize(speaker)
|
136 |
+
new_spk_tag = f"[{speaker}:]"
|
137 |
+
if text != new_spk_tag:
|
138 |
+
_reverse_tag_transfo[new_spk_tag] = text
|
139 |
+
return "\n" + new_spk_tag
|
140 |
+
|
141 |
+
if text == "[PII]":
|
142 |
+
return "[Nom]"
|
143 |
+
if text == "[NOISE]":
|
144 |
+
return "[bruit]"
|
145 |
+
if text == "[LAUGHTER]":
|
146 |
+
return "[rire]"
|
147 |
+
|
148 |
+
|
149 |
+
def capitalize(text):
|
150 |
+
# Custom capitalization for first and last names
|
151 |
+
words = text.split(" ")
|
152 |
+
words = [w.capitalize() if (not w.isupper() or len(w) > 2) else w for w in words]
|
153 |
+
for i, w in enumerate(words):
|
154 |
+
for sep in "-", "'":
|
155 |
+
if sep in w:
|
156 |
+
words[i] = sep.join(
|
157 |
+
[x.capitalize() if not x.isupper() else x for x in w.split(sep)]
|
158 |
+
)
|
159 |
+
return " ".join(words)
|
160 |
+
|
161 |
+
|
162 |
+
def format_special_characters(text):
|
163 |
+
for before, after in [
|
164 |
+
("â", "â"),
|
165 |
+
("à", "à"),
|
166 |
+
("á", "á"),
|
167 |
+
("ê", "ê"),
|
168 |
+
("é", "é"),
|
169 |
+
("è", "è"),
|
170 |
+
("ô", "ô"),
|
171 |
+
("û", "û"),
|
172 |
+
("î", "î"),
|
173 |
+
("\x92", "'"),
|
174 |
+
("…", "..."),
|
175 |
+
(r"[«“][^\S\r\n]*", '"'),
|
176 |
+
(r"[^\S\r\n]*[»”″„]", '"'),
|
177 |
+
(r"(``|'')", '"'),
|
178 |
+
(r"[’‘‛ʿ]", "'"),
|
179 |
+
("‚", ","),
|
180 |
+
(r"–", "-"),
|
181 |
+
("[ ]", " "), # weird whitespace
|
182 |
+
(_non_printable_pattern, ""), # non-printable characters
|
183 |
+
("·", "."),
|
184 |
+
(r"ᵉʳ", "er"),
|
185 |
+
(r"ᵉ", "e"),
|
186 |
+
]:
|
187 |
+
text = re.sub(before, after, text)
|
188 |
+
|
189 |
+
return text
|
190 |
+
|
191 |
+
|
192 |
+
def remove_ligatures(text):
|
193 |
+
text = re.sub(r"œ", "oe", text)
|
194 |
+
text = re.sub(r"æ", "ae", text)
|
195 |
+
text = re.sub(r"fi", "fi", text)
|
196 |
+
text = re.sub(r"fl", "fl", text)
|
197 |
+
text = re.sub("ij", "ij", text)
|
198 |
+
text = re.sub(r"Œ", "Oe", text)
|
199 |
+
text = re.sub(r"Æ", "Ae", text)
|
200 |
+
return text
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34de5983730ed8dd74bf9eb49757c6a12d96ba168216e6caac9165f3a4f18580
|
3 |
+
size 4041221888
|
quantize_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bits": [
|
3 |
+
4
|
4 |
+
],
|
5 |
+
"group_size": [
|
6 |
+
128
|
7 |
+
],
|
8 |
+
"damp_percent": [
|
9 |
+
0.1
|
10 |
+
],
|
11 |
+
"desc_act": [
|
12 |
+
true
|
13 |
+
],
|
14 |
+
"sym": true,
|
15 |
+
"true_sequential": true
|
16 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
">>TITLE<<",
|
4 |
+
">>ABSTRACT<<",
|
5 |
+
">>INTRODUCTION<<",
|
6 |
+
">>SUMMARY<<",
|
7 |
+
">>COMMENT<<",
|
8 |
+
">>ANSWER<<",
|
9 |
+
">>QUESTION<<",
|
10 |
+
">>DOMAIN<<",
|
11 |
+
">>PREFIX<<",
|
12 |
+
">>SUFFIX<<",
|
13 |
+
">>MIDDLE<<"
|
14 |
+
],
|
15 |
+
"eos_token": "<|endoftext|>"
|
16 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"eos_token": "<|endoftext|>",
|
4 |
+
"model_input_names": [
|
5 |
+
"input_ids",
|
6 |
+
"attention_mask"
|
7 |
+
],
|
8 |
+
"model_max_length": 2048,
|
9 |
+
"name_or_path": "tiiuae/falcon_tokenizer",
|
10 |
+
"special_tokens_map_file": null,
|
11 |
+
"tokenizer_class": "PreTrainedTokenizerFast"
|
12 |
+
}
|