dacorvo HF staff commited on
Commit
de9e259
1 Parent(s): 5694f75

Update inference-cache-config/llama3-8b.json

Browse files
inference-cache-config/llama3-8b.json CHANGED
@@ -4,43 +4,43 @@
4
  "batch_size": 1,
5
  "sequence_length": 4096,
6
  "num_cores": 2,
7
- "auto_cast_type": "fp16"
8
  },
9
  {
10
  "batch_size": 4,
11
  "sequence_length": 4096,
12
  "num_cores": 2,
13
- "auto_cast_type": "fp16"
14
  },
15
  {
16
  "batch_size": 8,
17
  "sequence_length": 4096,
18
  "num_cores": 2,
19
- "auto_cast_type": "fp16"
20
  },
21
  {
22
  "batch_size": 4,
23
  "sequence_length": 4096,
24
  "num_cores": 8,
25
- "auto_cast_type": "fp16"
26
  },
27
  {
28
  "batch_size": 8,
29
  "sequence_length": 4096,
30
  "num_cores": 8,
31
- "auto_cast_type": "fp16"
32
  },
33
  {
34
  "batch_size": 16,
35
  "sequence_length": 4096,
36
  "num_cores": 8,
37
- "auto_cast_type": "fp16"
38
  },
39
  {
40
  "batch_size": 32,
41
  "sequence_length": 4096,
42
  "num_cores": 8,
43
- "auto_cast_type": "fp16"
44
  }
45
  ]
46
  }
 
4
  "batch_size": 1,
5
  "sequence_length": 4096,
6
  "num_cores": 2,
7
+ "auto_cast_type": "bf16"
8
  },
9
  {
10
  "batch_size": 4,
11
  "sequence_length": 4096,
12
  "num_cores": 2,
13
+ "auto_cast_type": "bf16"
14
  },
15
  {
16
  "batch_size": 8,
17
  "sequence_length": 4096,
18
  "num_cores": 2,
19
+ "auto_cast_type": "bf16"
20
  },
21
  {
22
  "batch_size": 4,
23
  "sequence_length": 4096,
24
  "num_cores": 8,
25
+ "auto_cast_type": "bf16"
26
  },
27
  {
28
  "batch_size": 8,
29
  "sequence_length": 4096,
30
  "num_cores": 8,
31
+ "auto_cast_type": "bf16"
32
  },
33
  {
34
  "batch_size": 16,
35
  "sequence_length": 4096,
36
  "num_cores": 8,
37
+ "auto_cast_type": "bf16"
38
  },
39
  {
40
  "batch_size": 32,
41
  "sequence_length": 4096,
42
  "num_cores": 8,
43
+ "auto_cast_type": "bf16"
44
  }
45
  ]
46
  }