File size: 4,604 Bytes
803ccbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# coding=utf-8
# Copyright 2022 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Helper routines for quantization."""

from typing import Any

import chex
import jax.numpy as jnp
from flax import struct


# pylint:disable=no-value-for-parameter
@struct.dataclass
class QuantizedValue:
    """State associated with quantized value."""

    quantized: chex.Array
    diagonal: chex.Array  # Diagonal (if extract_diagonal is set)
    bucket_size: chex.Array
    quantized_dtype: jnp.dtype = struct.field(
        pytree_node=False
    )  # Dtype for the quantized value.
    extract_diagonal: bool = struct.field(pytree_node=False)  # In case its centered.
    shape: Any = struct.field(pytree_node=False)  # Shape of the tensor.

    @classmethod
    def from_float_value(cls, fvalue, quantized_dtype, extract_diagonal=False):
        if isinstance(fvalue, list) and not fvalue:
            return QuantizedValue([], [], [], quantized_dtype, extract_diagonal, [])
        quantized, diagonal_fvalue, bucket_size = QuantizedValue.quantize(
            fvalue, quantized_dtype, extract_diagonal
        )
        return QuantizedValue(
            quantized,
            diagonal_fvalue,
            bucket_size,
            quantized_dtype,
            extract_diagonal,
            list(quantized.shape),
        )

    # Quantization is from Lingvo JAX optimizers.
    # We extend it for int16 quantization of PSD matrices.
    @classmethod
    def quantize(cls, fvalue, quantized_dtype, extract_diagonal=False):
        """Returns quantized value and the bucket."""
        if quantized_dtype == jnp.float32:
            return fvalue, [], []
        elif quantized_dtype == jnp.bfloat16:
            return fvalue.astype(jnp.bfloat16), [], []

        float_dtype = fvalue.dtype
        if quantized_dtype == jnp.int8:
            # value -128 is not used.
            num_buckets = jnp.array(127.0, dtype=float_dtype)
        elif quantized_dtype == jnp.int16:
            # value -32768 is not used.
            num_buckets = jnp.array(32767.0, dtype=float_dtype)
        else:
            raise ValueError(f"Quantized dtype {quantized_dtype} not supported.")
        # max value is mapped to num_buckets

        if extract_diagonal and fvalue.ndim != 2:
            raise ValueError(
                f"Input array {fvalue} must be 2D to work with extract_diagonal."
            )

        diagonal_fvalue = []
        if extract_diagonal:
            diagonal_fvalue = jnp.diag(fvalue)
            # Remove the diagonal entries.
            fvalue = fvalue - jnp.diag(diagonal_fvalue)

        # TODO(rohananil): Extend this by making use of information about the blocks
        # SM3 style which will be useful for diagonal statistics
        # We first decide the scale.
        if fvalue.ndim < 1:
            raise ValueError(
                f"Input array {fvalue} must have a strictly positive number of "
                "dimensions."
            )

        max_abs = jnp.max(jnp.abs(fvalue), axis=0)
        bucket_size = max_abs / num_buckets
        bs_expanded = bucket_size[jnp.newaxis, Ellipsis]
        # To avoid divide by 0.0
        bs_nonzero = jnp.where(
            bs_expanded > 0.0, bs_expanded, jnp.ones_like(bs_expanded)
        )
        ratio = fvalue / bs_nonzero
        # We use rounding to remove bias.
        quantized = jnp.round(ratio)
        return quantized.astype(quantized_dtype), diagonal_fvalue, bucket_size

    def to_float(self):
        """Returns the float value."""
        if isinstance(self.quantized, list) and not self.quantized:
            return self.quantized

        if self.quantized_dtype == jnp.float32:
            return self.quantized

        if self.quantized_dtype == jnp.bfloat16:
            return self.quantized.astype(jnp.float32)

        float_dtype = self.bucket_size.dtype
        bucket_size = self.bucket_size[jnp.newaxis, Ellipsis]
        val = self.quantized.astype(float_dtype) * bucket_size
        if self.extract_diagonal:
            val += jnp.diag(self.diagonal)
        return val