dacorvo HF staff commited on
Commit
6c4c814
1 Parent(s): 320841a

Update inference-cache-config/mistral.json

Browse files
Files changed (1) hide show
  1. inference-cache-config/mistral.json +29 -13
inference-cache-config/mistral.json CHANGED
@@ -4,71 +4,87 @@
4
  "batch_size": 1,
5
  "sequence_length": 4096,
6
  "num_cores": 2,
7
- "auto_cast_type": "fp16"
8
  },
9
  {
10
  "batch_size": 4,
11
  "sequence_length": 4096,
12
  "num_cores": 4,
13
- "auto_cast_type": "fp16"
14
  },
15
  {
16
  "batch_size": 8,
17
  "sequence_length": 4096,
18
  "num_cores": 2,
19
- "auto_cast_type": "fp16"
20
  },
21
  {
22
  "batch_size": 1,
23
  "sequence_length": 4096,
24
  "num_cores": 8,
25
- "auto_cast_type": "fp16"
26
  },
27
  {
28
  "batch_size": 4,
29
  "sequence_length": 4096,
30
  "num_cores": 2,
31
- "auto_cast_type": "fp16"
32
  },
33
  {
34
  "batch_size": 4,
35
  "sequence_length": 4096,
36
  "num_cores": 8,
37
- "auto_cast_type": "fp16"
38
  },
39
  {
40
  "batch_size": 8,
41
  "sequence_length": 4096,
42
  "num_cores": 8,
43
- "auto_cast_type": "fp16"
44
  },
45
  {
46
  "batch_size": 16,
47
  "sequence_length": 4096,
48
  "num_cores": 8,
49
- "auto_cast_type": "fp16"
50
  },
51
  {
52
  "batch_size": 32,
53
  "sequence_length": 4096,
54
  "num_cores": 8,
55
- "auto_cast_type": "fp16"
56
  }
57
  ],
58
- "mistralai/Mistral-7B-Instruct-v0.2": [
59
  {
60
  "batch_size": 1,
61
  "sequence_length": 4096,
62
  "num_cores": 2,
63
- "auto_cast_type": "fp16"
64
  }
65
  ],
66
- "mistralai/Mistral-7B-Instruct-v0.1": [
67
  {
68
  "batch_size": 1,
69
  "sequence_length": 4096,
70
  "num_cores": 2,
71
- "auto_cast_type": "fp16"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  }
73
  ]
74
  }
 
4
  "batch_size": 1,
5
  "sequence_length": 4096,
6
  "num_cores": 2,
7
+ "auto_cast_type": "bf16"
8
  },
9
  {
10
  "batch_size": 4,
11
  "sequence_length": 4096,
12
  "num_cores": 4,
13
+ "auto_cast_type": "bf16"
14
  },
15
  {
16
  "batch_size": 8,
17
  "sequence_length": 4096,
18
  "num_cores": 2,
19
+ "auto_cast_type": "bf16"
20
  },
21
  {
22
  "batch_size": 1,
23
  "sequence_length": 4096,
24
  "num_cores": 8,
25
+ "auto_cast_type": "bf16"
26
  },
27
  {
28
  "batch_size": 4,
29
  "sequence_length": 4096,
30
  "num_cores": 2,
31
+ "auto_cast_type": "bf16"
32
  },
33
  {
34
  "batch_size": 4,
35
  "sequence_length": 4096,
36
  "num_cores": 8,
37
+ "auto_cast_type": "bf16"
38
  },
39
  {
40
  "batch_size": 8,
41
  "sequence_length": 4096,
42
  "num_cores": 8,
43
+ "auto_cast_type": "bf16"
44
  },
45
  {
46
  "batch_size": 16,
47
  "sequence_length": 4096,
48
  "num_cores": 8,
49
+ "auto_cast_type": "bf16"
50
  },
51
  {
52
  "batch_size": 32,
53
  "sequence_length": 4096,
54
  "num_cores": 8,
55
+ "auto_cast_type": "bf16"
56
  }
57
  ],
58
+ "mistralai/Mistral-7B-Instruct-v0.1": [
59
  {
60
  "batch_size": 1,
61
  "sequence_length": 4096,
62
  "num_cores": 2,
63
+ "auto_cast_type": "bf16"
64
  }
65
  ],
66
+ "mistralai/Mistral-7B-Instruct-v0.2": [
67
  {
68
  "batch_size": 1,
69
  "sequence_length": 4096,
70
  "num_cores": 2,
71
+ "auto_cast_type": "bf16"
72
+ }
73
+ ],
74
+ "mistralai/Mistral-Small-Instruct-v2409": [
75
+ {
76
+ "batch_size": 1,
77
+ "sequence_length": 4096,
78
+ "num_cores": 12,
79
+ "auto_cast_type": "bf16"
80
+ }
81
+ ],
82
+ "mistralai/Mistral-Small-Instruct-v2409": [
83
+ {
84
+ "batch_size": 4,
85
+ "sequence_length": 4096,
86
+ "num_cores": 12,
87
+ "auto_cast_type": "bf16"
88
  }
89
  ]
90
  }