GPTQ model commit

Browse files

Files changed (8) hide show

config.json +64 -0
generation_config.json +6 -0
handler.py +200 -0
model.safetensors +3 -0
quantize_config.json +16 -0
special_tokens_map.json +16 -0
tokenizer.json +0 -0
tokenizer_config.json +12 -0

config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+    "_name_or_path": "/workspace/process/openllm-france_claire-7b-0.1/source",
+    "alibi": false,
+    "apply_residual_connection_post_layernorm": false,
+    "architectures": [
+        "FalconForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "auto_map": {
+        "AutoConfig": "configuration_falcon.FalconConfig",
+        "AutoModel": "modeling_falcon.FalconModel",
+        "AutoModelForCausalLM": "modeling_falcon.FalconForCausalLM",
+        "AutoModelForQuestionAnswering": "modeling_falcon.FalconForQuestionAnswering",
+        "AutoModelForSequenceClassification": "modeling_falcon.FalconForSequenceClassification",
+        "AutoModelForTokenClassification": "modeling_falcon.FalconForTokenClassification"
+    },
+    "bias": false,
+    "bos_token_id": 11,
+    "eos_token_id": 11,
+    "hidden_dropout": 0.0,
+    "hidden_size": 4544,
+    "initializer_range": 0.02,
+    "layer_norm_epsilon": 1e-05,
+    "max_position_embeddings": 2048,
+    "model_type": "falcon",
+    "multi_query": true,
+    "new_decoder_architecture": false,
+    "num_attention_heads": 71,
+    "num_hidden_layers": 32,
+    "num_kv_heads": 71,
+    "pad_token_id": 0,
+    "parallel_attn": true,
+    "pretraining_tp": 1,
+    "quantization_config": {
+        "batch_size": 1,
+        "bits": 4,
+        "block_name_to_quantize": "transformer.h",
+        "cache_block_outputs": true,
+        "damp_percent": 0.1,
+        "desc_act": true,
+        "exllama_config": {
+            "version": 1
+        },
+        "group_size": 128,
+        "max_input_length": null,
+        "model_seqlen": 2048,
+        "module_name_preceding_first_block": [
+            "transformer.word_embeddings"
+        ],
+        "pad_token_id": null,
+        "quant_method": "gptq",
+        "sym": true,
+        "tokenizer": null,
+        "true_sequential": true,
+        "use_cuda_fp16": false,
+        "use_exllama": false
+    },
+    "rope_scaling": null,
+    "rope_theta": 10000.0,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.35.0",
+    "use_cache": true,
+    "vocab_size": 65024
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 11,
+  "eos_token_id": 11,
+  "transformers_version": "4.34.0"
+}

handler.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import torch, transformers
+from typing import Any, Dict
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import re
+class EndpointHandler:
+    def __init__(self, path):
+        tokenizer = AutoTokenizer.from_pretrained(path)
+        model = AutoModelForCausalLM.from_pretrained(
+            path, device_map="auto", torch_dtype=torch.bfloat16, load_in_4bit=True
+        )
+        self.pipeline = transformers.pipeline(
+            "text-generation", model=model, tokenizer=tokenizer
+        )
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
+        # process input
+        inputs = data.pop("inputs", data)
+        # default parameters
+        parameters = {
+            "max_new_tokens": 128,
+            "do_sample": True,
+            "top_k": 10,
+            "temperature": 1.0,
+            "return_full_text": False,
+        }
+        # user parameters
+        parameters.update(data.pop("parameters", {}))
+        unique = isinstance(inputs, str)
+        inputs, denormalize_funcs = claire_text_preproc(inputs)
+        sequences = self.pipeline(inputs, **parameters)
+        if unique:
+            return [{"generated_text": denormalize_funcs(sequences[0]["generated_text"])}]
+        else:
+            assert len(denormalize_funcs) == len(sequences)
+            return [{"generated_text": denormalize_func(seq[0]["generated_text"])} for denormalize_func, seq in zip(denormalize_funcs, sequences)]
+def claire_text_preproc(text):
+    if isinstance(text, (list, tuple)):
+        assert len(text)
+        # Apply and transpose
+        texts, denormalize_funcs = zip(*[claire_text_preproc(t) for t in text])
+        return list(texts), list(denormalize_funcs)
+    if not isinstance(text, str):
+        return text
+    text = format_special_characters(text)
+    # text = remove_ligatures(text)
+    text = re.sub(" - | -$|^- ", " ", text.strip(" "))
+    global _reverse_tag_transfo
+    _reverse_tag_transfo = {}
+    text = format_special_tags(text)
+    text = collapse_whitespaces(text)
+    if _reverse_tag_transfo:
+        reverse_tag_transfo = _reverse_tag_transfo.copy()
+        def denormalize_func(t):
+            for k, v in reverse_tag_transfo.items():
+                if k in t:
+                    t = t.replace(k, v)
+            return t
+        return text, lambda x: denormalize_func(x)
+    else:
+        return text, lambda x: x
+_brackets = re.compile(r"\[([^\]]*)\]")
+_pattern_speaker = re.compile(r"[^\]]+:")
+_non_printable_pattern = r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]"
+# Global variable to remember some normalizations that were done and apply it back
+_reverse_tag_transfo = {}
+_anonymized_prefix = None
+def collapse_whitespaces(text):
+    text = re.sub(r" +", " ", text)
+    text = re.sub(r"\n+", "\n", text)
+    text = re.sub(r" ([\.,])", r"\1", text)
+    return text.lstrip().rstrip(" ")
+def format_special_tags(text):
+    global _reverse_tag_transfo, _anonymized_prefix
+    _anonymized_prefix = None
+    text = re.sub(_brackets, _format_special_tags, text)
+    # At last the generic anonymization
+    if _anonymized_prefix:
+        _reverse_tag_transfo["[Intervenant "] = _anonymized_prefix
+    return text
+def _format_special_tags(match):
+    content_within_brackets = match.group(1)
+    if re.match(_pattern_speaker, content_within_brackets):
+        return _format_tag(match.group())
+    else:
+        return ""
+def _format_tag(text):
+    global _reverse_tag_transfo, _anonymized_prefix
+    if text.endswith(":]"):
+        anonymized_spk_prefixes = ["speaker", "spk", "locuteur"]
+        # Conversion "[speaker001:]" -> "[Intervenant 1:]"
+        for prefix in anonymized_spk_prefixes:
+            if text.lower().startswith("["+prefix):
+                try:
+                    index = int(text[len(prefix)+1:-2])
+                except ValueError:
+                    return text
+                new_spk_tag = f"[Intervenant {index}:]"
+                _reverse_tag_transfo[new_spk_tag] = text
+                if _anonymized_prefix is None:
+                    prefix = "["+prefix
+                    while len(prefix) < len(text) and text[len(prefix)] in " 0":
+                        prefix += text[len(prefix)]
+                    _anonymized_prefix = prefix
+                return "\n" + new_spk_tag
+        # Capitalize speaker name
+        speaker = text[1:-2]
+        speaker = capitalize(speaker)
+        new_spk_tag = f"[{speaker}:]"
+        if text != new_spk_tag:
+            _reverse_tag_transfo[new_spk_tag] = text
+        return "\n" + new_spk_tag
+    if text == "[PII]":
+        return "[Nom]"
+    if text == "[NOISE]":
+        return "[bruit]"
+    if text == "[LAUGHTER]":
+        return "[rire]"
+def capitalize(text):
+    # Custom capitalization for first and last names
+    words = text.split(" ")
+    words = [w.capitalize() if (not w.isupper() or len(w) > 2) else w for w in words]
+    for i, w in enumerate(words):
+        for sep in "-", "'":
+            if sep in w:
+                words[i] = sep.join(
+                    [x.capitalize() if not x.isupper() else x for x in w.split(sep)]
+                )
+    return " ".join(words)
+def format_special_characters(text):
+    for before, after in [
+        ("â", "â"),
+        ("à", "à"),
+        ("á", "á"),
+        ("ê", "ê"),
+        ("é", "é"),
+        ("è", "è"),
+        ("ô", "ô"),
+        ("û", "û"),
+        ("î", "î"),
+        ("\x92", "'"),
+        ("…", "..."),
+        (r"[«“][^\S\r\n]*", '"'),
+        (r"[^\S\r\n]*[»”″„]", '"'),
+        (r"(``|'')", '"'),
+        (r"[’‘‛ʿ]", "'"),
+        ("‚", ","),
+        (r"–", "-"),
+        ("[  ]", " "),  # weird whitespace
+        (_non_printable_pattern, ""),  # non-printable characters
+        ("·", "."),
+        (r"ᵉʳ", "er"),
+        (r"ᵉ", "e"),
+    ]:
+        text = re.sub(before, after, text)
+    return text
+def remove_ligatures(text):
+    text = re.sub(r"œ", "oe", text)
+    text = re.sub(r"æ", "ae", text)
+    text = re.sub(r"ﬁ", "fi", text)
+    text = re.sub(r"ﬂ", "fl", text)
+    text = re.sub("ĳ", "ij", text)
+    text = re.sub(r"Œ", "Oe", text)
+    text = re.sub(r"Æ", "Ae", text)
+    return text

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34de5983730ed8dd74bf9eb49757c6a12d96ba168216e6caac9165f3a4f18580
+size 4041221888

quantize_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "bits": [
+        4
+    ],
+    "group_size": [
+        128
+    ],
+    "damp_percent": [
+        0.1
+    ],
+    "desc_act": [
+        true
+    ],
+    "sym": true,
+    "true_sequential": true
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "additional_special_tokens": [
+    ">>TITLE<<",
+    ">>ABSTRACT<<",
+    ">>INTRODUCTION<<",
+    ">>SUMMARY<<",
+    ">>COMMENT<<",
+    ">>ANSWER<<",
+    ">>QUESTION<<",
+    ">>DOMAIN<<",
+    ">>PREFIX<<",
+    ">>SUFFIX<<",
+    ">>MIDDLE<<"
+  ],
+  "eos_token": "<|endoftext|>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "add_prefix_space": false,
+  "eos_token": "<|endoftext|>",
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 2048,
+  "name_or_path": "tiiuae/falcon_tokenizer",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}