|
MODELS = { |
|
"pretrained": { |
|
"<=4B": [ |
|
"microsoft/phi-1", |
|
"microsoft/phi-1_5", |
|
"microsoft/phi-2", |
|
"Qwen/Qwen1.5-0.5B", |
|
"Qwen/Qwen1.5-1.8B", |
|
"Qwen/Qwen1.5-4B", |
|
"google/gemma-2b", |
|
"allenai/OLMo-1B", |
|
], |
|
"<=7B": [ |
|
"google/gemma-7b", |
|
"mistralai/Mistral-7B-v0.1", |
|
"Qwen/Qwen1.5-7B", |
|
"01-ai/Yi-6B", |
|
"meta-llama/Llama-2-7b-hf", |
|
"codellama/CodeLlama-7b-hf", |
|
"EleutherAI/llemma_7b", |
|
"allenai/OLMo-7B", |
|
"mistral-community/Mistral-7B-v0.2", |
|
], |
|
"<=14B": [ |
|
"Qwen/Qwen1.5-14B", |
|
"meta-llama/Llama-2-13b-hf", |
|
"codellama/CodeLlama-13b-hf", |
|
"upstage/SOLAR-10.7B-v1.0", |
|
"meta-llama/Meta-Llama-3-8B", |
|
], |
|
"<=50B": [ |
|
"01-ai/Yi-34B", |
|
"EleutherAI/llemma_34b", |
|
"codellama/CodeLlama-34b-hf", |
|
"mistralai/Mixtral-8x7B-v0.1", |
|
"Qwen/Qwen1.5-32B", |
|
], |
|
"<=75B": [ |
|
"meta-llama/Llama-2-70b-hf", |
|
"codellama/CodeLlama-70b-hf", |
|
"meta-llama/Meta-Llama-3-70B", |
|
"Qwen/Qwen1.5-72B", |
|
], |
|
"<=175B": [ |
|
"mistral-community/Mixtral-8x22B-v0.1-AWQ", |
|
], |
|
}, |
|
"instruction_tuned": { |
|
"<=4B": [ |
|
"Qwen/Qwen1.5-0.5B-Chat", |
|
"Qwen/Qwen1.5-1.8B-Chat", |
|
"Qwen/Qwen1.5-4B-Chat", |
|
"google/gemma-2b-it", |
|
"google/gemma-1.1-2b-it", |
|
"microsoft/Phi-3-mini-4k-instruct", |
|
"microsoft/Phi-3-mini-128k-instruct", |
|
], |
|
"<=7B": [ |
|
"google/gemma-7b-it", |
|
"mistralai/Mistral-7B-Instruct-v0.2", |
|
"Qwen/Qwen1.5-7B-Chat", |
|
"01-ai/Yi-6B-Chat", |
|
"meta-llama/Llama-2-7b-chat-hf", |
|
"codellama/CodeLlama-7b-Instruct-hf", |
|
"allenai/OLMo-7B-SFT", |
|
"allenai/OLMo-7B-Instruct", |
|
"allenai/tulu-2-7b", |
|
"allenai/tulu-2-dpo-7b", |
|
"allenai/codetulu-2-7b", |
|
"microsoft/Orca-2-7b", |
|
"openchat/openchat-3.5-0106", |
|
"teknium/OpenHermes-2-Mistral-7B", |
|
"teknium/OpenHermes-2.5-Mistral-7B", |
|
"NousResearch/Nous-Hermes-2-Mistral-7B-DPO", |
|
"HuggingFaceH4/zephyr-7b-beta", |
|
"berkeley-nest/Starling-LM-7B-alpha", |
|
"Nexusflow/Starling-LM-7B-beta", |
|
"kaist-ai/mistral-orpo-alpha", |
|
"kaist-ai/mistral-orpo-beta", |
|
"google/gemma-1.1-7b-it", |
|
], |
|
"<=14B": [ |
|
"Qwen/Qwen1.5-14B-Chat", |
|
"meta-llama/Llama-2-13b-chat-hf", |
|
"codellama/CodeLlama-13b-Instruct-hf", |
|
"allenai/tulu-2-13b", |
|
"allenai/tulu-2-dpo-13b", |
|
"allenai/codetulu-2-13b", |
|
"microsoft/Orca-2-13b", |
|
"upstage/SOLAR-10.7B-Instruct-v1.0", |
|
"meta-llama/Meta-Llama-3-8B-Instruct", |
|
"CohereForAI/aya-101", |
|
], |
|
"<=50B": [ |
|
"01-ai/Yi-34B-Chat", |
|
"codellama/CodeLlama-34b-Instruct-hf", |
|
"allenai/codetulu-2-34b", |
|
"mistralai/Mixtral-8x7B-Instruct-v0.1", |
|
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", |
|
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", |
|
"NousResearch/Nous-Hermes-2-Yi-34B", |
|
"CohereForAI/c4ai-command-r-v01", |
|
"Qwen/Qwen1.5-32B-Chat", |
|
], |
|
"<=75B": [ |
|
"meta-llama/Llama-2-70b-chat-hf", |
|
"codellama/CodeLlama-70b-Instruct-hf", |
|
"Qwen/Qwen1.5-72B-Chat", |
|
"allenai/tulu-2-dpo-70b", |
|
"meta-llama/Meta-Llama-3-70B-Instruct", |
|
], |
|
"<=175B": [ |
|
"alpindale/c4ai-command-r-plus-GPTQ", |
|
"MaziyarPanahi/zephyr-orpo-141b-A35b-v0.1-AWQ", |
|
"MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-AWQ", |
|
], |
|
}, |
|
} |
|
|
|
API_MODELS = [ |
|
"gpt-3.5-turbo-0125", |
|
"gpt-3.5-turbo-1106", |
|
"gpt-4-0125-preview", |
|
"gpt-4-1106-preview", |
|
"gpt-4-turbo-2024-04-09", |
|
"gpt-4o-2024-05-13", |
|
"claude-3-haiku-20240307", |
|
"claude-3-opus-20240229", |
|
"claude-3-sonnet-20240229", |
|
"mistral-large", |
|
"mistral-medium", |
|
"gemini-1.0-pro", |
|
"gemini-pro-1.5", |
|
"google/gemini-flash-1.5", |
|
"qwen/qwen-110b-chat", |
|
] |
|
|
|
|
|
ORDERED_MODELS = [ |
|
"microsoft/phi-1", |
|
"microsoft/phi-1_5", |
|
"microsoft/phi-2", |
|
"Qwen/Qwen1.5-0.5B", |
|
"Qwen/Qwen1.5-1.8B", |
|
"Qwen/Qwen1.5-4B", |
|
"google/gemma-2b", |
|
"allenai/OLMo-1B", |
|
"Qwen/Qwen1.5-0.5B-Chat", |
|
"Qwen/Qwen1.5-1.8B-Chat", |
|
"Qwen/Qwen1.5-4B-Chat", |
|
"microsoft/Phi-3-mini-4k-instruct", |
|
"microsoft/Phi-3-mini-128k-instruct", |
|
"google/gemma-2b-it", |
|
"google/gemma-1.1-2b-it", |
|
"google/gemma-7b", |
|
"mistralai/Mistral-7B-v0.1", |
|
"mistral-community/Mistral-7B-v0.2", |
|
"Qwen/Qwen1.5-7B", |
|
"01-ai/Yi-6B", |
|
"meta-llama/Llama-2-7b-hf", |
|
"codellama/CodeLlama-7b-hf", |
|
"meta-llama/Meta-Llama-3-8B", |
|
"EleutherAI/llemma_7b", |
|
"allenai/OLMo-7B", |
|
"google/gemma-7b-it", |
|
"google/gemma-1.1-7b-it", |
|
"mistralai/Mistral-7B-Instruct-v0.2", |
|
"Qwen/Qwen1.5-7B-Chat", |
|
"01-ai/Yi-6B-Chat", |
|
"meta-llama/Llama-2-7b-chat-hf", |
|
"codellama/CodeLlama-7b-Instruct-hf", |
|
"meta-llama/Meta-Llama-3-8B-Instruct", |
|
"allenai/OLMo-7B-SFT", |
|
"allenai/OLMo-7B-Instruct", |
|
"allenai/tulu-2-7b", |
|
"allenai/tulu-2-dpo-7b", |
|
"allenai/codetulu-2-7b", |
|
"microsoft/Orca-2-7b", |
|
"openchat/openchat-3.5-0106", |
|
"teknium/OpenHermes-2-Mistral-7B", |
|
"teknium/OpenHermes-2.5-Mistral-7B", |
|
"NousResearch/Nous-Hermes-2-Mistral-7B-DPO", |
|
"Starling-LM-7B-alpha", |
|
"Starling-LM-7B-beta", |
|
"kaist-ai/mistral-orpo-alpha", |
|
"kaist-ai/mistral-orpo-beta", |
|
"HuggingFaceH4/zephyr-7b-beta", |
|
"Qwen/Qwen1.5-14B", |
|
"meta-llama/Llama-2-13b-hf", |
|
"codellama/CodeLlama-13b-hf", |
|
"upstage/SOLAR-10.7B-v1.0", |
|
"Qwen/Qwen1.5-14B-Chat", |
|
"upstage/SOLAR-10.7B-Instruct-v1.0", |
|
"CohereForAI/aya-101", |
|
"meta-llama/Llama-2-13b-chat-hf", |
|
"codellama/CodeLlama-13b-Instruct-hf", |
|
"allenai/tulu-2-13b", |
|
"allenai/tulu-2-dpo-13b", |
|
"allenai/codetulu-2-13b", |
|
"microsoft/Orca-2-13b", |
|
"01-ai/Yi-34B", |
|
"EleutherAI/llemma_34b", |
|
"Qwen/Qwen1.5-32B", |
|
"codellama/CodeLlama-34b-hf", |
|
"mistralai/Mixtral-8x7B-v0.1", |
|
"01-ai/Yi-34B-Chat", |
|
"NousResearch/Nous-Hermes-2-Yi-34B", |
|
"codellama/CodeLlama-34b-Instruct-hf", |
|
"allenai/codetulu-2-34b", |
|
"Qwen/Qwen1.5-32B-Chat", |
|
"mistralai/Mixtral-8x7B-Instruct-v0.1", |
|
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", |
|
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", |
|
"CohereForAI/c4ai-command-r-v01", |
|
"meta-llama/Llama-2-70b-hf", |
|
"codellama/CodeLlama-70b-hf", |
|
"mistral-community/Mixtral-8x22B-v0.1-AWQ", |
|
"meta-llama/Meta-Llama-3-70B", |
|
"Qwen/Qwen1.5-72B", |
|
"meta-llama/Llama-2-70b-chat-hf", |
|
"codellama/CodeLlama-70b-Instruct-hf", |
|
"allenai/tulu-2-dpo-70b", |
|
"alpindale/c4ai-command-r-plus-GPTQ", |
|
"meta-llama/Meta-Llama-3-70B-Instruct", |
|
"MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-AWQ", |
|
"MaziyarPanahi/zephyr-orpo-141b-A35b-v0.1-AWQ", |
|
"Qwen/Qwen1.5-72B-Chat", |
|
"qwen/qwen-110b-chat", |
|
"gpt-3.5-turbo-1106", |
|
"gpt-3.5-turbo-0125", |
|
"gpt-4-1106-preview", |
|
"gpt-4-0125-preview", |
|
"gpt-4-turbo-2024-04-09", |
|
"gpt-4o-2024-05-13", |
|
"mistral-medium", |
|
"mistral-large", |
|
"gemini-1.0-pro", |
|
"gemini-pro-1.5", |
|
"google/gemini-flash-1.5", |
|
"claude-3-haiku-20240307", |
|
"claude-3-sonnet-20240229", |
|
"claude-3-opus-20240229", |
|
] |
|
|
|
|
|
bgb_trained_models = [ |
|
"microsoft/phi-1", |
|
"microsoft/phi-1_5", |
|
"microsoft/phi-2", |
|
"Qwen/Qwen1.5-0.5B", |
|
"Qwen/Qwen1.5-1.8B", |
|
"Qwen/Qwen1.5-4B", |
|
"google/gemma-2b", |
|
"allenai/OLMo-1B", |
|
"google/gemma-7b", |
|
"mistralai/Mistral-7B-v0.1", |
|
"Qwen/Qwen1.5-7B", |
|
"01-ai/Yi-6B", |
|
"meta-llama/Llama-2-7b-hf", |
|
"codellama/CodeLlama-7b-hf", |
|
"EleutherAI/llemma_7b", |
|
"allenai/OLMo-7B", |
|
"Qwen/Qwen1.5-14B", |
|
"meta-llama/Llama-2-13b-hf", |
|
"codellama/CodeLlama-13b-hf", |
|
"upstage/SOLAR-10.7B-v1.0", |
|
"01-ai/Yi-34B", |
|
"EleutherAI/llemma_34b", |
|
"codellama/CodeLlama-34b-hf", |
|
"mistralai/Mixtral-8x7B-v0.1", |
|
"meta-llama/Llama-2-70b-hf", |
|
"codellama/CodeLlama-70b-hf", |
|
"Qwen/Qwen1.5-72B", |
|
"Qwen/Qwen1.5-0.5B-Chat", |
|
"Qwen/Qwen1.5-1.8B-Chat", |
|
"Qwen/Qwen1.5-4B-Chat", |
|
"google/gemma-2b-it", |
|
"google/gemma-7b-it", |
|
"mistralai/Mistral-7B-Instruct-v0.2", |
|
"Qwen/Qwen1.5-7B-Chat", |
|
"01-ai/Yi-6B-Chat", |
|
"meta-llama/Llama-2-7b-chat-hf", |
|
"codellama/CodeLlama-7b-Instruct-hf", |
|
"allenai/OLMo-7B-SFT", |
|
"allenai/OLMo-7B-Instruct", |
|
"allenai/tulu-2-7b", |
|
"allenai/tulu-2-dpo-7b", |
|
"allenai/codetulu-2-7b", |
|
"microsoft/Orca-2-7b", |
|
"openchat/openchat-3.5-0106", |
|
"teknium/OpenHermes-2-Mistral-7B", |
|
"teknium/OpenHermes-2.5-Mistral-7B", |
|
"NousResearch/Nous-Hermes-2-Mistral-7B-DPO", |
|
"HuggingFaceH4/zephyr-7b-beta", |
|
"Qwen/Qwen1.5-14B-Chat", |
|
"meta-llama/Llama-2-13b-chat-hf", |
|
"codellama/CodeLlama-13b-Instruct-hf", |
|
"allenai/tulu-2-13b", |
|
"allenai/tulu-2-dpo-13b", |
|
"allenai/codetulu-2-13b", |
|
"microsoft/Orca-2-13b", |
|
"01-ai/Yi-34B-Chat", |
|
"codellama/CodeLlama-34b-Instruct-hf", |
|
"allenai/codetulu-2-34b", |
|
"mistralai/Mixtral-8x7B-Instruct-v0.1", |
|
"NousResearch/Nous-Hermes-2-Mistral-8x7B-SFT", |
|
"NousResearch/Nous-Hermes-2-Mistral-8x7B-DPO", |
|
"NousResearch/Nous-Hermes-2-Yi-34B", |
|
"meta-llama/Llama-2-70b-chat-hf", |
|
"codellama/CodeLlama-70b-Instruct-hf", |
|
"Qwen/Qwen1.5-72B-Chat", |
|
"allenai/tulu-2-dpo-72b", |
|
] |
|
|
|
|
|
MODEL_MAPPING = { |
|
"microsoft/phi-1": [1.3, "Base"], |
|
"microsoft/phi-1_5": [1.3, "Base"], |
|
"microsoft/phi-2": [2.7, "Base"], |
|
"Qwen/Qwen1.5-0.5B": [0.5, "Base"], |
|
"Qwen/Qwen1.5-1.8B": [1.8, "Base"], |
|
"Qwen/Qwen1.5-4B": [4.0, "Base"], |
|
"google/gemma-2b": [2.0, "Base"], |
|
"allenai/OLMo-1B": [1.0, "Base"], |
|
"Qwen/Qwen1.5-0.5B-Chat": [0.5, "Chat", "Qwen/Qwen1.5-0.5B"], |
|
"Qwen/Qwen1.5-1.8B-Chat": [1.8, "Chat", "Qwen/Qwen1.5-1.8B"], |
|
"Qwen/Qwen1.5-4B-Chat": [4.0, "Chat", "Qwen/Qwen1.5-4B"], |
|
"microsoft/Phi-3-mini-4k-instruct": [3.8, "Chat"], |
|
"microsoft/Phi-3-mini-128k-instruct": [3.8, "Chat"], |
|
"google/gemma-2b-it": [2.0, "Chat", "google/gemma-2b"], |
|
"google/gemma-1.1-2b-it": [2.0, "Chat"], |
|
"google/gemma-7b": [7.0, "Base"], |
|
"mistralai/Mistral-7B-v0.1": [7.0, "Base"], |
|
"mistral-community/Mistral-7B-v0.2": [7.0, "Base"], |
|
"Qwen/Qwen1.5-7B": [7.0, "Base"], |
|
"01-ai/Yi-6B": [6.0, "Base"], |
|
"meta-llama/Llama-2-7b-hf": [7.0, "Base"], |
|
"codellama/CodeLlama-7b-hf": [7.0, "Base"], |
|
"meta-llama/Meta-Llama-3-8B": [8.0, "Base"], |
|
"EleutherAI/llemma_7b": [7.0, "Base"], |
|
"allenai/OLMo-7B": [7.0, "Base"], |
|
"google/gemma-7b-it": [7.0, "Chat", "google/gemma-7b"], |
|
"google/gemma-1.1-7b-it": [7.0, "Chat"], |
|
"mistralai/Mistral-7B-Instruct-v0.2": [7.0, "Chat", "mistral-community/Mistral-7B-v0.2"], |
|
"Qwen/Qwen1.5-7B-Chat": [7.0, "Chat", "Qwen/Qwen1.5-7B"], |
|
"01-ai/Yi-6B-Chat": [6.0, "Chat", "01-ai/Yi-6B"], |
|
"meta-llama/Llama-2-7b-chat-hf": [7.0, "Chat", "meta-llama/Llama-2-7b-hf"], |
|
"codellama/CodeLlama-7b-Instruct-hf": [7.0, "Chat", "codellama/CodeLlama-7b-hf"], |
|
"meta-llama/Meta-Llama-3-8B-Instruct": [8.0, "Chat", "meta-llama/Meta-Llama-3-8B"], |
|
"allenai/OLMo-7B-SFT": [7.0, "Chat", "allenai/OLMo-7B"], |
|
"allenai/OLMo-7B-Instruct": [7.0, "Chat", "allenai/OLMo-7B"], |
|
"allenai/tulu-2-7b": [7.0, "Chat", "meta-llama/Llama-2-7b-hf"], |
|
"allenai/tulu-2-dpo-7b": [7.0, "Chat", "meta-llama/Llama-2-7b-hf"], |
|
"allenai/codetulu-2-7b": [7.0, "Chat", "codellama/CodeLlama-7b-hf"], |
|
"microsoft/Orca-2-7b": [7.0, "Chat", "meta-llama/Llama-2-7b-hf"], |
|
"openchat/openchat-3.5-0106": [7.0, "Chat", "mistralai/Mistral-7B-v0.1"], |
|
"teknium/OpenHermes-2-Mistral-7B": [7.0, "Chat", "mistralai/Mistral-7B-v0.1"], |
|
"teknium/OpenHermes-2.5-Mistral-7B": [7.0, "Chat", "mistralai/Mistral-7B-v0.1"], |
|
"NousResearch/Nous-Hermes-2-Mistral-7B-DPO": [7.0, "Chat", "mistralai/Mistral-7B-v0.1"], |
|
"Starling-LM-7B-alpha": [7.0, "Chat"], |
|
"Starling-LM-7B-beta": [7.0, "Chat"], |
|
"kaist-ai/mistral-orpo-alpha": [7.0, "Chat", "mistralai/Mistral-7B-v0.1"], |
|
"kaist-ai/mistral-orpo-beta": [7.0, "Chat", "mistralai/Mistral-7B-v0.1"], |
|
"HuggingFaceH4/zephyr-7b-beta": [7.0, "Chat", "mistralai/Mistral-7B-v0.1"], |
|
"Qwen/Qwen1.5-14B": [14.0, "Base"], |
|
"meta-llama/Llama-2-13b-hf": [13.0, "Base"], |
|
"codellama/CodeLlama-13b-hf": [13.0, "Base"], |
|
"upstage/SOLAR-10.7B-v1.0": [10.7, "Base"], |
|
"Qwen/Qwen1.5-14B-Chat": [14.0, "Chat", "Qwen/Qwen1.5-14B"], |
|
"upstage/SOLAR-10.7B-Instruct-v1.0": [10.7, "Chat", "upstage/SOLAR-10.7B-v1.0"], |
|
"CohereForAI/aya-101": [13.0, "Chat"], |
|
"meta-llama/Llama-2-13b-chat-hf": [13.0, "Chat", "meta-llama/Llama-2-13b-hf"], |
|
"codellama/CodeLlama-13b-Instruct-hf": [13.0, "Chat", "codellama/CodeLlama-13b-hf"], |
|
"allenai/tulu-2-13b": [13.0, "Chat", "meta-llama/Llama-2-13b-hf"], |
|
"allenai/tulu-2-dpo-13b": [13.0, "Chat", "meta-llama/Llama-2-13b-hf"], |
|
"allenai/codetulu-2-13b": [13.0, "Chat", "codellama/CodeLlama-13b-hf"], |
|
"microsoft/Orca-2-13b": [13.0, "Chat", "meta-llama/Llama-2-13b-hf"], |
|
"01-ai/Yi-34B": [34.0, "Base"], |
|
"EleutherAI/llemma_34b": [34.0, "Base"], |
|
"Qwen/Qwen1.5-32B": [32.0, "Base"], |
|
"codellama/CodeLlama-34b-hf": [34.0, "Base"], |
|
"mistralai/Mixtral-8x7B-v0.1": [46.7, "Base"], |
|
"01-ai/Yi-34B-Chat": [34.0, "Chat", "01-ai/Yi-34B"], |
|
"NousResearch/Nous-Hermes-2-Yi-34B": [34.0, "Chat", "01-ai/Yi-34B"], |
|
"codellama/CodeLlama-34b-Instruct-hf": [34.0, "Chat", "codellama/CodeLlama-34b-hf"], |
|
"allenai/codetulu-2-34b": [34.0, "Chat", "codellama/CodeLlama-34b-hf"], |
|
"Qwen/Qwen1.5-32B-Chat": [32.0, "Chat", "Qwen/Qwen1.5-32B"], |
|
"mistralai/Mixtral-8x7B-Instruct-v0.1": [46.7, "Chat", "mistralai/Mixtral-8x7B-v0.1"], |
|
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT": [46.7, "Chat", "mistralai/Mixtral-8x7B-v0.1"], |
|
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": [46.7, "Chat", "mistralai/Mixtral-8x7B-v0.1"], |
|
"CohereForAI/c4ai-command-r-v01": [35.0, "Chat"], |
|
"meta-llama/Llama-2-70b-hf": [70.0, "Base"], |
|
"codellama/CodeLlama-70b-hf": [70.0, "Base"], |
|
"mistral-community/Mixtral-8x22B-v0.1-AWQ": ["AWQ", "Base"], |
|
"meta-llama/Meta-Llama-3-70B": [70.0, "Base"], |
|
"Qwen/Qwen1.5-72B": [72.0, "Base"], |
|
"meta-llama/Llama-2-70b-chat-hf": [70.0, "Chat", "meta-llama/Llama-2-70b-hf"], |
|
"codellama/CodeLlama-70b-Instruct-hf": [70.0, "Chat", "codellama/CodeLlama-70b-hf"], |
|
"allenai/tulu-2-dpo-70b": [70.0, "Chat", "meta-llama/Llama-2-70b-hf"], |
|
"alpindale/c4ai-command-r-plus-GPTQ": ["GPTQ", "Chat"], |
|
"meta-llama/Meta-Llama-3-70B-Instruct": [70.0, "Chat", "meta-llama/Meta-Llama-3-70B"], |
|
"MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-AWQ": ["AWQ", "Chat", "mistral-community/Mixtral-8x22B-v0.1-AWQ"], |
|
"MaziyarPanahi/zephyr-orpo-141b-A35b-v0.1-AWQ": ["AWQ", "Chat", "mistral-community/Mixtral-8x22B-v0.1-AWQ"], |
|
"Qwen/Qwen1.5-72B-Chat": [72.0, "Chat", "Qwen/Qwen1.5-72B"], |
|
"qwen/qwen-110b-chat": [110.0, "Chat", None], |
|
"gpt-3.5-turbo-1106": ["Proprietary", "Proprietary"], |
|
"gpt-3.5-turbo-0125": ["Proprietary", "Proprietary"], |
|
"gpt-4-1106-preview": ["Proprietary", "Proprietary"], |
|
"gpt-4-0125-preview": ["Proprietary", "Proprietary"], |
|
"gpt-4-turbo-2024-04-09": ["Proprietary", "Proprietary"], |
|
"gpt-4o-2024-05-13": ["Proprietary", "Proprietary"], |
|
"mistral-medium": ["Proprietary", "Proprietary"], |
|
"mistral-large": ["Proprietary", "Proprietary"], |
|
"gemini-1.0-pro": ["Proprietary", "Proprietary"], |
|
"gemini-pro-1.5": ["Proprietary", "Proprietary"], |
|
"google/gemini-flash-1.5": ["Proprietary", "Proprietary"], |
|
"claude-3-haiku-20240307": ["Proprietary", "Proprietary"], |
|
"claude-3-sonnet-20240229": ["Proprietary", "Proprietary"], |
|
"claude-3-opus-20240229": ["Proprietary", "Proprietary"], |
|
} |
|
|
|
|
|
MODEL_SHORT_TO_LONG = {model.split("/")[-1]: model for model in ORDERED_MODELS} |
|
|
|
|
|
def get_model_type(model_name: str) -> str: |
|
for _, model_list in MODELS["pretrained"].items(): |
|
if model_name in model_list: |
|
return "base" |
|
|
|
for _, model_list in MODELS["instruction_tuned"].items(): |
|
if model_name in model_list: |
|
return "instruct" |
|
|
|
if model_name in API_MODELS: |
|
return "api" |
|
|
|
raise ValueError(f"Model {model_name} not found in model_list.py") |
|
return None |
|
|
|
|
|
def get_open_model_list() -> list: |
|
all_models = [] |
|
for _, model_list in MODELS["pretrained"].items(): |
|
all_models.extend(model_list) |
|
|
|
for _, model_list in MODELS["instruction_tuned"].items(): |
|
all_models.extend(model_list) |
|
|
|
return all_models |
|
|
|
|
|
def get_all_model_list() -> list: |
|
all_models = [] |
|
for _, model_list in MODELS["pretrained"].items(): |
|
all_models.extend(model_list) |
|
|
|
for _, model_list in MODELS["instruction_tuned"].items(): |
|
all_models.extend(model_list) |
|
|
|
all_models.extend(API_MODELS) |
|
|
|
return all_models |
|
|
|
|
|
def get_pretrained_models() -> list: |
|
all_models = [] |
|
for _, model_list in MODELS["pretrained"].items(): |
|
all_models.extend(model_list) |
|
return all_models |
|
|
|
|
|
def get_instruct_models() -> list: |
|
all_models = [] |
|
for _, model_list in MODELS["instruction_tuned"].items(): |
|
all_models.extend(model_list) |
|
return all_models |
|
|
|
|
|
def get_model_params(model_name: str) -> int: |
|
for size_range, model_list in MODELS["pretrained"].items(): |
|
if model_name in model_list: |
|
return int(size_range.split("B")[0].replace("<=", "")) |
|
|
|
for size_range, model_list in MODELS["instruction_tuned"].items(): |
|
if model_name in model_list: |
|
return int(size_range.split("B")[0].replace("<=", "")) |
|
|
|
raise ValueError(f"Model {model_name} not found in model_list.py") |
|
|
|
|
|
def get_model_num_gpus(model_name: str) -> int: |
|
model_params = get_model_params(model_name) |
|
num_gpus = { |
|
4: 1, |
|
7: 1, |
|
14: 2, |
|
50: 4, |
|
75: 8, |
|
175: 4, |
|
}[model_params] |
|
return num_gpus |
|
|
|
|
|
def get_not_trained_models() -> list: |
|
all_models = get_all_model_list() |
|
trained_models = bgb_trained_models |
|
not_trained_models = [model for model in all_models if model not in trained_models] |
|
return not_trained_models |
|
|
|
|
|
def is_trained_model(model_name: str) -> bool: |
|
return model_name in bgb_trained_models |
|
|
|
|
|
if __name__ == "__main__": |
|
assert get_model_type("microsoft/phi-1"), "base" |
|
assert get_model_params("microsoft/phi-2"), 4 |
|
|
|
models = get_all_model_list() |
|
|
|
model_list_str = "" |
|
for model in models: |
|
model_list_str += f'"{model}"\n' |
|
print(model_list_str) |
|
|
|
print(f"{len(models)} models found in src/model_list.py") |
|
|
|
print(get_not_trained_models()) |
|
|