Spaces:
Runtime error
Runtime error
File size: 1,049 Bytes
ee21b96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# This file defines example configuration arguments for quantizing
# a transformer model with product quantization
# Number of Centroids for Product Quantization, by default 256 (byte-aligned)
n_centroids:
Linear:
key: in_features
value: {"*": 256}
Embedding:
key: embedding_dim
value: {"*": 256}
# Block Sizes for Product Quantization
# We suggest: 8 for FFN, 4 for ATTN, 4 for embedding projections, 8 for embeddings
block_sizes:
Linear:
key: fuzzy_name
value: {fc: 8, attn: 4, emb: 4}
Embedding:
key: fuzzy_name
value: {emb: 8}
# Layers to Quantize Sequentially
# We suggest: first FFN, then EMB, then ATTN
layers_to_quantize:
- decoder\\.layers\\.\d+\\.fc[12]
- decoder\\.embed_tokens\\.embeddings\\.[012]\\.[01]
- decoder\\.layers\\.\d+\\.self_attn\\.(k_proj|v_proj|q_proj|out_proj)
|