dacorvo HF staff commited on
Commit
219c5fd
·
verified ·
1 Parent(s): afb9fe6

Delete inference-cache-config/llama2-7b-13b.json

Browse files
inference-cache-config/llama2-7b-13b.json DELETED
@@ -1,102 +0,0 @@
1
- {
2
- "meta-llama/Llama-2-7b-chat-hf": [
3
- {
4
- "batch_size": 1,
5
- "sequence_length": 4096,
6
- "num_cores": 2,
7
- "auto_cast_type": "fp16"
8
- },
9
- {
10
- "batch_size": 4,
11
- "sequence_length": 4096,
12
- "num_cores": 2,
13
- "auto_cast_type": "fp16"
14
- },
15
- {
16
- "batch_size": 4,
17
- "sequence_length": 4096,
18
- "num_cores": 8,
19
- "auto_cast_type": "fp16"
20
- },
21
- {
22
- "batch_size": 8,
23
- "sequence_length": 4096,
24
- "num_cores": 8,
25
- "auto_cast_type": "fp16"
26
- },
27
- {
28
- "batch_size": 16,
29
- "sequence_length": 4096,
30
- "num_cores": 8,
31
- "auto_cast_type": "fp16"
32
- },
33
- {
34
- "batch_size": 32,
35
- "sequence_length": 4096,
36
- "num_cores": 8,
37
- "auto_cast_type": "fp16"
38
- }
39
- ],
40
- "meta-llama/Llama-2-13b-chat-hf": [
41
- {
42
- "batch_size": 1,
43
- "sequence_length": 4096,
44
- "num_cores": 12,
45
- "auto_cast_type": "fp16"
46
- },
47
- {
48
- "batch_size": 1,
49
- "sequence_length": 4096,
50
- "num_cores": 24,
51
- "auto_cast_type": "fp16"
52
- },
53
- {
54
- "batch_size": 4,
55
- "sequence_length": 4096,
56
- "num_cores": 12,
57
- "auto_cast_type": "fp16"
58
- },
59
- {
60
- "batch_size": 4,
61
- "sequence_length": 4096,
62
- "num_cores": 24,
63
- "auto_cast_type": "fp16"
64
- },
65
- {
66
- "batch_size": 8,
67
- "sequence_length": 4096,
68
- "num_cores": 12,
69
- "auto_cast_type": "fp16"
70
- },
71
- {
72
- "batch_size": 8,
73
- "sequence_length": 4096,
74
- "num_cores": 24,
75
- "auto_cast_type": "fp16"
76
- },
77
- {
78
- "batch_size": 16,
79
- "sequence_length": 4096,
80
- "num_cores": 12,
81
- "auto_cast_type": "fp16"
82
- },
83
- {
84
- "batch_size": 16,
85
- "sequence_length": 4096,
86
- "num_cores": 24,
87
- "auto_cast_type": "fp16"
88
- },
89
- {
90
- "batch_size": 32,
91
- "sequence_length": 4096,
92
- "num_cores": 12,
93
- "auto_cast_type": "fp16"
94
- },
95
- {
96
- "batch_size": 32,
97
- "sequence_length": 4096,
98
- "num_cores": 24,
99
- "auto_cast_type": "fp16"
100
- }
101
- ]
102
- }