dacorvo HF staff commited on
Commit
d05f579
1 Parent(s): 2af9f87

Update inference-cache-config/llama.json

Browse files
Files changed (1) hide show
  1. inference-cache-config/llama.json +16 -0
inference-cache-config/llama.json CHANGED
@@ -66,5 +66,21 @@
66
  "num_cores": 2,
67
  "auto_cast_type": "bf16"
68
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  ]
70
  }
 
66
  "num_cores": 2,
67
  "auto_cast_type": "bf16"
68
  }
69
+ ],
70
+ "meta-llama/Llama-3.2-1B": [
71
+ {
72
+ "batch_size": 1,
73
+ "sequence_length": 4096,
74
+ "num_cores": 2,
75
+ "auto_cast_type": "bf16"
76
+ }
77
+ ],
78
+ "meta-llama/Llama-3.2-3B": [
79
+ {
80
+ "batch_size": 1,
81
+ "sequence_length": 4096,
82
+ "num_cores": 2,
83
+ "auto_cast_type": "bf16"
84
+ }
85
  ]
86
  }