File size: 13,341 Bytes
d1a84ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
/*
 * Copyright 2021 Google LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Various utility functions related to reading and writing files, vectors, etc.
// Would be much simpler if Android supported File.

#ifndef LYRA_CODEC_SPARSE_MATMUL_LAYERS_UTILS_H_
#define LYRA_CODEC_SPARSE_MATMUL_LAYERS_UTILS_H_

#include <errno.h>
#include <stdio.h>
#include <sys/stat.h>

#include <algorithm>
#include <cstdint>
#include <limits>
#include <string>
#include <type_traits>
#include <vector>

#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/substitute.h"
#include "include/ghc/filesystem.hpp"
#include "sparse_matmul/layers/errno_mapping.h"
#include "sparse_matmul/layers/masked_sparse_matrix.h"
#include "sparse_matmul/layers/read_array_ifstream.h"
#include "sparse_matmul/layers/sparse_linear_layer.h"
#include "sparse_matmul/layers/status_macros.h"
#include "sparse_matmul/numerics/fixed_types.h"
#include "sparse_matmul/numerics/float16_types.h"
#include "sparse_matmul/numerics/type_utils.h"
#include "sparse_matmul/vector/cache_aligned_vector.h"
#include "sparse_matmul/zlib_wrapper/zlibwrapper.h"

namespace csrblocksparse {

template <typename T>
void unzip(int64_t st_size, std::vector<T>* array) {
  ZLib z;
  z.SetGzipHeaderMode();
  if (z.HasGzipHeader(reinterpret_cast<char*>(array->data()), st_size)) {
    const std::size_t kMaxBufferSize = 1 << 27;  // 128MB

    Bytef* dest;
    uLongf dest_len = kMaxBufferSize;
    CHECK_EQ(z.UncompressGzipAndAllocate(&dest, &dest_len,
                                         (Bytef*)array->data(), st_size),
             Z_OK);
    CHECK_EQ(dest_len % sizeof(T), 0);
    array->assign(reinterpret_cast<T*>(dest),
                  reinterpret_cast<T*>(dest + dest_len));
    free(dest);
  } else {
    CHECK_EQ(st_size % sizeof(T), 0);
  }
}

// Reads a file that contains an array of a single POD type.  Eventually we
// will replace serializiation with protos, but for now this is the easiest way
// to interface with the rest of the pipeline.
//
// StatusOr might be preferred but does not compile on ARM.
// |DiskType| and |ElemType| template types have no effect in this function
// version and are only used to handle fixed_type disk storage.
template <typename T, typename DiskType = T, typename ElemType = T>
typename std::enable_if<!csrblocksparse::IsFixed16Type<DiskType>::value,
                        absl::Status>::type
ReadArrayFromFile(const std::string& file_name, std::vector<T>* array,
                  const std::string& path = "/data/local/tmp/") {
  int64_t length = 0;
  const absl::Status status =
      detail::ReadArrayIfstream(file_name, path, array, &length);
  if (!status.ok()) {
    return status;
  }
  unzip(length, array);

  return absl::OkStatus();
}

// If the metatype |DiskType| is of fixed16_type, we load int16_ts from disk and
// construct |ElemType| from them. |ElemType|  is necessary because we need to
// know the mantissa/exponent bit split before casting to float. We need a
// separate function template for fixed rather than an if block because the
// compiler will complain bfloat not having an int16_t constructor.
template <typename T, typename DiskType, typename ElemType>
typename std::enable_if<std::is_same<T, float>::value &&
                            csrblocksparse::IsFixed16Type<DiskType>::value,
                        absl::Status>::type
ReadArrayFromFile(const std::string& file_name, std::vector<T>* array,
                  const std::string& path = "/data/local/tmp/") {
  std::vector<int16_t> disk_values;
  SPARSE_MATMUL_RETURN_IF_ERROR(
      ReadArrayFromFile(file_name, &disk_values, path));
  array->resize(disk_values.size());
  std::transform(
      disk_values.begin(), disk_values.end(), array->begin(),
      [](int16_t disk_value) { return static_cast<T>(ElemType(disk_value)); });
  return absl::OkStatus();
}

// Writes a vector to a binary file.  Eventually serialization will be handled
// with protos.
template <typename T>
absl::Status WriteArrayToFile(const std::vector<T>& array,
                              const std::string& file_name,
                              std::string path = "/data/local/tmp/") {
  path = (ghc::filesystem::path(path) / file_name).string();
  FILE* fp = fopen(path.c_str(), "wb");
  if (fp == nullptr)
    return ErrnoToCanonicalStatus(errno,
                                  absl::Substitute("Error opening $0", path));
  size_t write_count = fwrite(array.data(), sizeof(T), array.size(), fp);
  if (write_count != array.size()) {
    return ErrnoToCanonicalStatus(
        errno,
        absl::Substitute(
            "Error writing array, only wrote $0 of $1 elements for file $2",
            write_count, array.size(), path));
  }
  SPARSE_MATMUL_RETURN_IF_ERROR(ErrnoToCanonicalStatus(
      fclose(fp), absl::Substitute("Error closing $0", path)));
  return absl::OkStatus();
}

// Reads an entire layer that consists of weights, bias and mask as a
// SparseLinearLayer.  Eventually this serialization will be handled with
// protos, but the rest of the system currently does naive serialization.
//
// StatusOr might be preferred but does not compile on ARM.
//
// Here |DiskWeightType| is the metatype used to store the weights, usually
// fixed16_type, float, or bfloat.
// For |DiskWeightType| = fixed16_type specialization, this loads a file with a
// "fixed16_weights.raw" suffix which stores int16_ts as its element datatype.
// The disk elements should match fixed16<WeightType::kExponentBits>. This cuts
// down disk storage of weights by
// >= half. For all other types it reads the weights as floats.
template <typename WeightType, typename RhsType,
          typename DiskWeightType = float>
absl::Status LoadGenericLayer(
    const std::string& prefix, bool zipped, const std::string& path,
    float default_bias,
    SparseLinearLayer<WeightType, RhsType>* sparse_linear_layer) {
  std::string fixed_prefix =
      csrblocksparse::IsFixed16Type<DiskWeightType>::value ? "fixed16_" : "";
  std::string extension = zipped ? ".gz" : "";
  std::string weight_name =
      absl::StrCat(prefix, fixed_prefix, "weights.raw", extension);
  std::string mask_name = absl::StrCat(prefix, "mask.raw", extension);
  std::string bias_name = absl::StrCat(prefix, "bias.raw", extension);

  std::vector<float> weight_vector;
  std::vector<float> mask_vector;
  std::vector<float> bias_vector;

  const auto status = ReadArrayFromFile<float, DiskWeightType, WeightType>(
      weight_name, &weight_vector, path);
  SPARSE_MATMUL_RETURN_IF_ERROR(status);
  SPARSE_MATMUL_RETURN_IF_ERROR(
      ReadArrayFromFile(mask_name, &mask_vector, path));
  SPARSE_MATMUL_RETURN_IF_ERROR(
      ReadArrayFromFile(bias_name, &bias_vector, path));

  CHECK(weight_vector.size() == mask_vector.size())
      << "Weight and mask must be"
      << " the same size, weights: " << weight_vector.size()
      << " mask: " << mask_vector.size();
  CHECK(weight_vector.size() % bias_vector.size() == 0)
      << "Weights size must "
         "be a multiple of the bias size. Weights: "
      << weight_vector.size()
      << " "
         "bias: "
      << bias_vector.size()
      << " remainder: " << weight_vector.size() % bias_vector.size();

  int rows = bias_vector.size();
  int cols = weight_vector.size() / rows;

  MaskedSparseMatrix<float> weights_masked(rows, cols, mask_vector.data(),
                                           weight_vector.data());

  weights_masked.template CastWeights<WeightType>();
  using csrmatrix = CsrBlockSparseMatrix<WeightType, RhsType>;

  csrmatrix weights(weights_masked);
  // If the weights were not a multiple of the block size in rows, we need to
  // expand the bias vector to match using the provided default_bias value.
  bias_vector.resize(weights.rows(), default_bias);
  using BiasType = typename TypeOfProduct<WeightType, RhsType>::type;
  CacheAlignedVector<BiasType> bias(bias_vector);

  *sparse_linear_layer = std::move(SparseLinearLayer<WeightType, RhsType>(
      std::move(weights), std::move(bias)));

  return absl::OkStatus();
}
template <typename WeightType, typename RhsType,
          typename DiskWeightType = float>
absl::Status LoadSparseLayer(
    const std::string& prefix, bool zipped,
    SparseLinearLayer<WeightType, RhsType>* sparse_linear_layer,
    const std::string& path = "/data/local/tmp/") {
  return LoadGenericLayer<WeightType, RhsType, DiskWeightType>(
      prefix, zipped, path, 0.0f, sparse_linear_layer);
}
template <typename WeightType, typename RhsType,
          typename DiskWeightType = float>
absl::Status LoadLogitLayer(
    const std::string& prefix, bool zipped, const std::string& path,
    SparseLinearLayer<WeightType, RhsType>* sparse_linear_layer) {
  return LoadGenericLayer<WeightType, RhsType, DiskWeightType>(
      prefix, zipped, path, std::numeric_limits<float>::lowest(),
      sparse_linear_layer);
}

// Reads an entire layer that consists of weights, bias and mask as a
// MaskedLinearLayer.  Eventually this serialization will be handled with
// protos, but the rest of the system currently does naive serialization.
//
// StatusOr might be preferred but does not compile on ARM.
template <typename T>
absl::Status LoadMaskedLayer(const std::string& prefix, bool zipped,
                             MaskedLinearLayer<T>* masked_sparse_matrix,
                             const std::string& path = "/data/local/tmp/") {
  std::string extension = zipped ? ".gz" : "";
  std::string weight_name = absl::StrCat(prefix, "weights.raw", extension);
  std::string mask_name = absl::StrCat(prefix, "mask.raw", extension);
  std::string bias_name = absl::StrCat(prefix, "bias.raw", extension);

  std::vector<float> weight_vector;
  std::vector<float> mask_vector;
  std::vector<float> bias_vector;

  SPARSE_MATMUL_RETURN_IF_ERROR(
      ReadArrayFromFile(weight_name, &weight_vector, path));
  SPARSE_MATMUL_RETURN_IF_ERROR(
      ReadArrayFromFile(mask_name, &mask_vector, path));
  SPARSE_MATMUL_RETURN_IF_ERROR(
      ReadArrayFromFile(bias_name, &bias_vector, path));

  CHECK(weight_vector.size() == mask_vector.size())
      << "Weight and mask must be"
      << " the same size, weights: " << weight_vector.size()
      << " mask: " << mask_vector.size();
  CHECK(weight_vector.size() % bias_vector.size() == 0)
      << "Weights size must "
         "be a multiple of the bias size. Weights: "
      << weight_vector.size()
      << " "
         "bias: "
      << bias_vector.size()
      << " remainder: " << weight_vector.size() % bias_vector.size();

  int rows = bias_vector.size();
  int cols = weight_vector.size() / rows;

  MaskedSparseMatrix<T> weights_masked(rows, cols, mask_vector.data(),
                                       weight_vector.data());
  CacheAlignedVector<T> bias(bias_vector);

  *masked_sparse_matrix =
      MaskedLinearLayer<T>(std::move(weights_masked), std::move(bias));
  return absl::OkStatus();
}

// Load a vector of POD into a CacheAlignedVector.
//
// StatusOr might be preferred but does not compile on ARM.
template <typename T>
absl::Status LoadVector(const std::string& file_name,
                        CacheAlignedVector<T>* cache_aligned_vector,
                        const std::string& path = "/data/local/tmp/") {
  std::vector<float> values;

  SPARSE_MATMUL_RETURN_IF_ERROR(ReadArrayFromFile(file_name, &values, path));

  *cache_aligned_vector = std::move(CacheAlignedVector<T>(values));

  return absl::OkStatus();
}

// Loads a 2D vector from a file.  One of rows or cols can optionally be
// -1 to indicate that dimension should be inferred.
template <typename T>
absl::Status LoadFatVector(const std::string& file_name, int rows, int cols,
                           FatCacheAlignedVector<T>* fat_cache_aligned_vector,
                           const std::string& path = "/data/local/tmp/") {
  // neither can be zero
  CHECK(rows != 0 && cols != 0);
  // only one can be -1
  CHECK(rows != -1 || cols != -1);
  // otherwise must be positive
  CHECK(rows >= -1 && cols >= -1);

  CacheAlignedVector<T> values;

  SPARSE_MATMUL_RETURN_IF_ERROR(LoadVector(file_name, &values, path));

  if (rows > 0)
    CHECK_EQ(values.size() % rows, 0);
  else
    rows = values.size() / cols;

  if (cols > 0)
    CHECK_EQ(values.size() % cols, 0);
  else
    cols = values.size() / rows;

  *fat_cache_aligned_vector = std::move(FatCacheAlignedVector<T>(values, rows));

  return absl::OkStatus();
}

// Return all files in a given directory
// If only File worked on Android and Windows...
absl::Status FilesInDirectory(const std::string& path,
                              const std::string& must_contain,
                              std::vector<std::string>* result);

}  // namespace csrblocksparse

#endif  // LYRA_CODEC_SPARSE_MATMUL_LAYERS_UTILS_H_