aws-neuron
/

optimum-neuron-cache

dacorvo HF staff commited on Sep 26, 2024

Commit

afb9fe6

verified ·

1 Parent(s): 14844a0

Update inference-cache-config/llama.json

Files changed (1) hide show

inference-cache-config/llama.json CHANGED Viewed

@@ -42,5 +42,21 @@
       "num_cores": 8,
       "auto_cast_type": "bf16"
     }
-  ]
 }

       "num_cores": 8,
       "auto_cast_type": "bf16"
     }
+  ],
+  "meta-llama/Llama-2-7b-hf": [
+    {
+      "batch_size": 1,
+      "sequence_length": 2048,
+      "num_cores": 2,
+      "auto_cast_type": "bf16"
+    }
+  ],
+  "meta-llama/Llama-2-13b-hf": [
+    {
+      "batch_size": 1,
+      "sequence_length": 2048,
+      "num_cores": 8,
+      "auto_cast_type": "bf16"
+    }
+  ],
 }