flyingfishinwater
/

good_and_small_models

GGUF

Inference Endpoints

conversational

Model card Files Files and versions Community

flyingfishinwater commited on Mar 6, 2024

Commit

bafc0de

verified ·

1 Parent(s): 4a4b250

Upload models.json

Browse files

Files changed (1) hide show

models.json +100 -14

models.json CHANGED Viewed

@@ -1,12 +1,12 @@
 [
     {
         "id": "Internal",
-        "model_title": "Llama2 Lite",
         "model_file": "ggml-model-Q8_0.gguf",
         "model_url": "https://",
         "model_info_url": "https://huggingface.co/princeton-nlp/Sheared-LLaMA-1.3B",
         "model_avatar": "ava0",
-        "model_description": "The standard Llama2 based 1.3B LLM.",
         "developer": "Meta",
         "developer_url": "https://ai.meta.com/llama/",
         "file_size": 1430,
@@ -19,29 +19,45 @@
         "n_batch" : 10,
         "template_name" : "HumanBot",
         "is_ready": true,
-        "is_internal": true
     },
     {
          "id": "LiteLlama-460M-1T-Q8",
-         "model_title": "LiteLlama-460M-1T",
          "model_file": "LiteLlama-460M-1T-Q8_0.gguf",
          "model_url": "https://huggingface.co/flyingfishinwater/goodmodels/resolve/main/LiteLlama-460M-1T-Q8_0.gguf?download=true",
          "model_info_url": "https://huggingface.co/ahxt/LiteLlama-460M-1T",
          "model_avatar": "logo_litellama",
-         "model_description": "We present an open-source reproduction of Meta AI's LLaMa 2. However, with significantly reduced model sizes, LiteLlama-460M-1T has 460M parameters trained with 1T tokens.",
          "developer": "Xiaotian Han from Texas A&M University",
          "developer_url": "https://huggingface.co/ahxt/LiteLlama-460M-1T",
          "file_size": 493,
-         "context" : 4096,
          "temp" : 0.6,
-         "prompt_format" : "<|system|>You are a friendly chatbot who always responds in the style of a pirate.</s><|user|>{{prompt}}</s><|assistant|>",
          "top_k" : 5,
          "top_p" : 0.9,
          "model_inference" : "llama",
          "n_batch" : 10,
          "template_name" : "TinyLlama",
          "is_ready": true,
-         "is_internal": false
     },
     {
          "id": "tinyllama-1.1B-chat-Q8",
@@ -63,7 +79,15 @@
          "n_batch" : 10,
          "template_name" : "TinyLlama",
          "is_ready": true,
-         "is_internal": false
     },
     {
         "id": "mistral-7b-instruct-v0.2-Q8",
@@ -85,7 +109,15 @@
         "n_batch" : 10,
         "template_name" : "Mistral",
         "is_ready": true,
-        "is_internal": false
    },
    {
         "id": "openchat-3.5-1210-Q8",
@@ -107,7 +139,15 @@
         "n_batch" : 10,
         "template_name" : "Mistral",
         "is_ready": true,
-        "is_internal": false
    },
    {
         "id": "phi-2",
@@ -129,7 +169,15 @@
         "n_batch" : 10,
         "template_name" : "PHI",
         "is_ready": true,
-        "is_internal": false
    },
    {
         "id": "yi-6b",
@@ -151,6 +199,44 @@
         "n_batch" : 10,
         "template_name" : "yi",
         "is_ready": true,
-        "is_internal": false
    }
-]

 [
     {
         "id": "Internal",
+        "model_title": "AI Assistant",
         "model_file": "ggml-model-Q8_0.gguf",
         "model_url": "https://",
         "model_info_url": "https://huggingface.co/princeton-nlp/Sheared-LLaMA-1.3B",
         "model_avatar": "ava0",
+        "model_description": "It is an AI assistant who can talk with you and help solve simple problems. It's based on a lite LLAMA2 model developed by Meta Inc.",
         "developer": "Meta",
         "developer_url": "https://ai.meta.com/llama/",
         "file_size": 1430,
         "n_batch" : 10,
         "template_name" : "HumanBot",
         "is_ready": true,
+        "is_internal": true,
+        "use_metal": true,
+        "mlock": false,
+        "mmap": true,
+        "repeat_last_n": 64,
+        "repeat_penalty": 1.2,
+        "add_bos_token": true,
+        "add_eos_token": false,
+        "parse_special_tokens": true
     },
     {
          "id": "LiteLlama-460M-1T-Q8",
+         "model_title": "LiteLlama",
          "model_file": "LiteLlama-460M-1T-Q8_0.gguf",
          "model_url": "https://huggingface.co/flyingfishinwater/goodmodels/resolve/main/LiteLlama-460M-1T-Q8_0.gguf?download=true",
          "model_info_url": "https://huggingface.co/ahxt/LiteLlama-460M-1T",
          "model_avatar": "logo_litellama",
+         "model_description": "It's a very small LLAMA2 model with only 460M parameters trained with 1T tokens. It's best for testing.",
          "developer": "Xiaotian Han from Texas A&M University",
          "developer_url": "https://huggingface.co/ahxt/LiteLlama-460M-1T",
          "file_size": 493,
+         "context" : 1024,
          "temp" : 0.6,
+         "prompt_format" : "<human>: {{prompt}}\n<bot>:",
          "top_k" : 5,
          "top_p" : 0.9,
          "model_inference" : "llama",
          "n_batch" : 10,
          "template_name" : "TinyLlama",
          "is_ready": true,
+         "is_internal": false,
+         "use_metal": true,
+         "mlock": false,
+         "mmap": true,
+         "repeat_last_n": 64,
+         "repeat_penalty": 1.2,
+         "add_bos_token": true,
+         "add_eos_token": false,
+         "parse_special_tokens": true
     },
     {
          "id": "tinyllama-1.1B-chat-Q8",
          "n_batch" : 10,
          "template_name" : "TinyLlama",
          "is_ready": true,
+         "is_internal": false,
+         "use_metal": true,
+         "mlock": false,
+         "mmap": true,
+         "repeat_last_n": 64,
+         "repeat_penalty": 1.2,
+         "add_bos_token": true,
+         "add_eos_token": false,
+         "parse_special_tokens": true
     },
     {
         "id": "mistral-7b-instruct-v0.2-Q8",
         "n_batch" : 10,
         "template_name" : "Mistral",
         "is_ready": true,
+        "is_internal": false,
+        "use_metal": true,
+        "mlock": false,
+        "mmap": true,
+        "repeat_last_n": 64,
+        "repeat_penalty": 1.2,
+        "add_bos_token": true,
+        "add_eos_token": false,
+        "parse_special_tokens": true
    },
    {
         "id": "openchat-3.5-1210-Q8",
         "n_batch" : 10,
         "template_name" : "Mistral",
         "is_ready": true,
+        "is_internal": false,
+        "use_metal": true,
+        "mlock": false,
+        "mmap": true,
+        "repeat_last_n": 64,
+        "repeat_penalty": 1.2,
+        "add_bos_token": true,
+        "add_eos_token": false,
+        "parse_special_tokens": true
    },
    {
         "id": "phi-2",
         "n_batch" : 10,
         "template_name" : "PHI",
         "is_ready": true,
+        "is_internal": false,
+        "use_metal": true,
+        "mlock": false,
+        "mmap": true,
+        "repeat_last_n": 64,
+        "repeat_penalty": 1.2,
+        "add_bos_token": true,
+        "add_eos_token": false,
+        "parse_special_tokens": true
    },
    {
         "id": "yi-6b",
         "n_batch" : 10,
         "template_name" : "yi",
         "is_ready": true,
+        "is_internal": false,
+        "use_metal": true,
+        "mlock": false,
+        "mmap": true,
+        "repeat_last_n": 64,
+        "repeat_penalty": 1.2,
+        "add_bos_token": true,
+        "add_eos_token": false,
+        "parse_special_tokens": true
+   },
+   {
+        "id": "gemma-2b",
+        "model_title": "Google Gemma 2B",
+        "model_file": "gemma-2b-it-q8_0.gguf",
+        "model_url": "https://huggingface.co/flyingfishinwater/goodmodels/resolve/main/gemma-2b-it-q8_0.gguf?download=true",
+        "model_info_url": "https://huggingface.co/google/gemma-2b",
+        "model_avatar": "logo_google",
+        "model_description": "Gemma is a family of lightweight, state-of-the-art open models built from the same research and technology used to create the Gemini models. Developed by Google DeepMind and other teams across Google, Gemma is named after the Latin gemma, meaning 'precious stone.' The Gemma model weights are supported by developer tools that promote innovation, collaboration, and the responsible use of artificial intelligence (AI).",
+        "developer": "Google",
+        "developer_url": "https://huggingface.co/google",
+        "file_size": 2669,
+        "context" : 8192,
+        "temp" : 0.6,
+        "prompt_format" : "<bos><start_of_turn>user\n{{prompt}}<end_of_turn>\n<start_of_turn>model\n",
+        "top_k" : 5,
+        "top_p" : 0.9,
+        "model_inference" : "llama",
+        "n_batch" : 10,
+        "template_name" : "gemma",
+        "is_ready": true,
+        "is_internal": false,
+        "use_metal": true,
+        "mlock": false,
+        "mmap": true,
+        "repeat_last_n": 64,
+        "repeat_penalty": 1.2,
+        "add_bos_token": true,
+        "add_eos_token": false,
+        "parse_special_tokens": true
    }
+]