{ | |
"api_key": null, | |
"verify_url": "http://johnrachwan.pythonanywhere.com", | |
"smash_config": { | |
"pruners": "None", | |
"factorizers": "None", | |
"quantizers": "['llm-int8']", | |
"compilers": "None", | |
"task": "text_text_generation", | |
"device": "cuda", | |
"cache_dir": "/ceph/hdd/staff/charpent/.cache/modelscam53e_h", | |
"batch_size": 1, | |
"model_name": "NousResearch/Nous-Hermes-llama-2-7b", | |
"pruning_ratio": 0.0, | |
"n_quantization_bits": 4, | |
"output_deviation": 0.005, | |
"max_batch_size": 1, | |
"qtype_weight": "torch.qint8", | |
"qtype_activation": "torch.quint8", | |
"qobserver": "<class 'torch.ao.quantization.observer.MinMaxObserver'>", | |
"qscheme": "torch.per_tensor_symmetric", | |
"qconfig": "x86", | |
"group_size": 128, | |
"damp_percent": 0.1, | |
"save_load_fn": "bitsandbytes" | |
} | |
} |