aws-neuron
/

optimum-neuron-cache

Model card Files Files and versions Community

341

dacorvo HF staff commited on Sep 30, 2024

Commit

eee32f0

verified ·

1 Parent(s): 06f6bcf

Remove obsolete llama variants

Browse files

Files changed (1) hide show

inference-cache-config/llama-variants.json +1 -105

inference-cache-config/llama-variants.json CHANGED Viewed

@@ -13,7 +13,7 @@
       "auto_cast_type": "fp16"
     }
   ],
-   "defog/sqlcoder-7b-2": [
     {
       "batch_size": 1,
       "sequence_length": 4096,
@@ -26,109 +26,5 @@
       "num_cores": 2,
       "auto_cast_type": "fp16"
     }
-  ],
-  "m-a-p/OpenCodeInterpreter-DS-6.7B": [
-    {
-      "batch_size": 1,
-      "sequence_length": 4096,
-      "num_cores": 2,
-      "auto_cast_type": "fp16"
-    },
-    {
-      "batch_size": 4,
-      "sequence_length": 4096,
-      "num_cores": 2,
-      "auto_cast_type": "fp16"
-    }
-  ],
-  "ibm/labradorite-13b": [
-    {
-      "batch_size": 1,
-      "sequence_length": 4096,
-      "num_cores": 8,
-      "auto_cast_type": "fp16"
-    },
-    {
-      "batch_size": 4,
-      "sequence_length": 4096,
-      "num_cores": 8,
-      "auto_cast_type": "fp16"
-    },
-    {
-      "batch_size": 8,
-      "sequence_length": 4096,
-      "num_cores": 8,
-      "auto_cast_type": "fp16"
-    }
-  ],
-  "gorilla-llm/gorilla-openfunctions-v2": [
-    {
-      "batch_size": 1,
-      "sequence_length": 4096,
-      "num_cores": 2,
-      "auto_cast_type": "fp16"
-    },
-    {
-      "batch_size": 4,
-      "sequence_length": 4096,
-      "num_cores": 2,
-      "auto_cast_type": "fp16"
-    }
-  ],
-   "m-a-p/ChatMusician": [
-    {
-      "batch_size": 1,
-      "sequence_length": 4096,
-      "num_cores": 2,
-      "auto_cast_type": "fp16"
-    },
-    {
-      "batch_size": 4,
-      "sequence_length": 4096,
-      "num_cores": 2,
-      "auto_cast_type": "fp16"
-    }
-  ],
-   "LargeWorldModel/LWM-Text-Chat-1M": [
-    {
-      "batch_size": 1,
-      "sequence_length": 4096,
-      "num_cores": 2,
-      "auto_cast_type": "fp16"
-    },
-    {
-      "batch_size": 4,
-      "sequence_length": 4096,
-      "num_cores": 2,
-      "auto_cast_type": "fp16"
-    }
-  ],
-  "01-ai/Yi-34B-200K": [
-    {
-      "batch_size": 1,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
-    },
-    {
-      "batch_size": 4,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
-    }
-  ],
-  "abacusai/Smaug-72B-v0.1": [
-    {
-      "batch_size": 1,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
-    },
-    {
-      "batch_size": 4,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
-    }
   ]
 }

       "auto_cast_type": "fp16"
     }
   ],
+  "lmsys/vicuna-7b-v1.5": [
     {
       "batch_size": 1,
       "sequence_length": 4096,
       "num_cores": 2,
       "auto_cast_type": "fp16"
     }
   ]
 }