{ "api_key": null, "verify_url": "http://johnrachwan.pythonanywhere.com", "smash_config": { "pruners": "None", "factorizers": "None", "quantizers": "['llm-int8']", "compilers": "None", "task": "text_text_generation", "device": "cuda", "cache_dir": "/ceph/hdd/staff/charpent/.cache/modelsbk0sasu4", "batch_size": 1, "model_name": "JackFram/llama-68m", "pruning_ratio": 0.0, "n_quantization_bits": 8, "output_deviation": 0.005, "max_batch_size": 1, "qtype_weight": "torch.qint8", "qtype_activation": "torch.quint8", "qobserver": "", "qscheme": "torch.per_tensor_symmetric", "qconfig": "x86", "group_size": 128, "damp_percent": 0.1, "save_load_fn": "bitsandbytes" } }