Spaces:

kanhatakeyama
/

chatbotarena-ja

Runtime error

App Files Files Community

a100 kh commited on Oct 22, 2024

Commit

0f859ae

1 Parent(s): cac0300

add local

Browse files

Files changed (6) hide show

api_endpoints copy.json → api_endpoints all.json +10 -10
api_endpoints.json +65 -0
api_endpoints_apis.json +120 -0
local/local_setup +34 -0
local/nginx +14 -0
serve/api_provider.py +12 -5

api_endpoints copy.json → api_endpoints all.json RENAMED Viewed

@@ -120,8 +120,8 @@
     "tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.1-Q8_0": {
         "model_name": "tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.1-Q8_0",
         "api_type": "openai-llama3.1",
-        "api_base": "http://localhost:8010/v1",
-        "api_key": "12345",
         "anony_only": false,
         "recommended_config": {
             "temperature": 0.7,
@@ -133,8 +133,8 @@
     "cyberagent/calm3-22b-chat-BitsAndBytes": {
         "model_name": "cyberagent/calm3-22b-chat",
         "api_type": "openai-custom-calm",
-        "api_base": "http://localhost:8011/v1",
-        "api_key": "12345",
         "anony_only": false,
         "recommended_config": {
             "temperature": 0.7,
@@ -146,8 +146,8 @@
     "weblab-GENIAC/Tanuki-8B-dpo-v1.0-BitsAndBytes": {
         "model_name": "weblab-GENIAC/Tanuki-8B-dpo-v1.0",
         "api_type": "openai-custom-tanuki",
-        "api_base": "http://localhost:8012/v1",
-        "api_key": "12345",
         "anony_only": false,
         "recommended_config": {
             "temperature": 0.7,
@@ -159,8 +159,8 @@
     "llm-jp-3-13b-instruct-Q8_0.gguf": {
         "model_name": "llm-jp-3-13b-instruct-Q8_0.gguf",
         "api_type": "openai-llmjp3",
-        "api_base": "http://localhost:8016/v1",
-        "api_key": "12345",
         "anony_only": false,
         "recommended_config": {
             "temperature": 0.7,
@@ -172,8 +172,8 @@
     "tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1-BitsAndBytes": {
         "model_name": "tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1",
         "api_type": "openai-llama3.1",
-        "api_base": "http://localhost:8019/v1",
-        "api_key": "12345",
         "anony_only": false,
         "recommended_config": {
             "temperature": 0.7,

     "tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.1-Q8_0": {
         "model_name": "tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.1-Q8_0",
         "api_type": "openai-llama3.1",
+        "api_end": "Swallow-8B",
+        "env_api_key": "VLLM_API_KEY",
         "anony_only": false,
         "recommended_config": {
             "temperature": 0.7,
     "cyberagent/calm3-22b-chat-BitsAndBytes": {
         "model_name": "cyberagent/calm3-22b-chat",
         "api_type": "openai-custom-calm",
+        "api_end": "calm3-22b-chat",
+        "env_api_key": "VLLM_API_KEY",
         "anony_only": false,
         "recommended_config": {
             "temperature": 0.7,
     "weblab-GENIAC/Tanuki-8B-dpo-v1.0-BitsAndBytes": {
         "model_name": "weblab-GENIAC/Tanuki-8B-dpo-v1.0",
         "api_type": "openai-custom-tanuki",
+        "api_end": "Tanuki-8B-dpo",
+        "env_api_key": "VLLM_API_KEY",
         "anony_only": false,
         "recommended_config": {
             "temperature": 0.7,
     "llm-jp-3-13b-instruct-Q8_0.gguf": {
         "model_name": "llm-jp-3-13b-instruct-Q8_0.gguf",
         "api_type": "openai-llmjp3",
+        "api_end": "llm-jp-13b",
+        "env_api_key": "VLLM_API_KEY",
         "anony_only": false,
         "recommended_config": {
             "temperature": 0.7,
     "tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1-BitsAndBytes": {
         "model_name": "tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1",
         "api_type": "openai-llama3.1",
+        "api_end": "swallow70",
+        "env_api_key": "VLLM_API_KEY",
         "anony_only": false,
         "recommended_config": {
             "temperature": 0.7,

api_endpoints.json CHANGED Viewed

@@ -116,5 +116,70 @@
         },
         "text-arena": true,
         "vision-arena": false
     }
 }

         },
         "text-arena": true,
         "vision-arena": false
+    },
+    "tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.1-Q8_0": {
+        "model_name": "tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.1-Q8_0",
+        "api_type": "openai-llama3.1",
+        "api_end": "Swallow-8B",
+        "env_api_key": "VLLM_API_KEY",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "cyberagent/calm3-22b-chat-BitsAndBytes": {
+        "model_name": "cyberagent/calm3-22b-chat",
+        "api_type": "openai-custom-calm",
+        "api_end": "calm3-22b-chat",
+        "env_api_key": "VLLM_API_KEY",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "weblab-GENIAC/Tanuki-8B-dpo-v1.0-BitsAndBytes": {
+        "model_name": "weblab-GENIAC/Tanuki-8B-dpo-v1.0",
+        "api_type": "openai-custom-tanuki",
+        "api_end": "Tanuki-8B-dpo",
+        "env_api_key": "VLLM_API_KEY",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "llm-jp-3-13b-instruct-Q8_0.gguf": {
+        "model_name": "llm-jp-3-13b-instruct-Q8_0.gguf",
+        "api_type": "openai-llmjp3",
+        "api_end": "llm-jp-13b",
+        "env_api_key": "VLLM_API_KEY",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1-BitsAndBytes": {
+        "model_name": "tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1",
+        "api_type": "openai-llama3.1",
+        "api_end": "swallow70",
+        "env_api_key": "VLLM_API_KEY",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
     }
 }

api_endpoints_apis.json ADDED Viewed

	@@ -0,0 +1,120 @@

+{
+    "claude-3-5-sonnet-20240620": {
+        "model_name": "claude-3-5-sonnet-20240620",
+        "api_type": "anthropic",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "command-r-plus": {
+        "model_name": "command-r-plus",
+        "api_type": "cohere",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "deepseek-chat": {
+        "model_name": "deepseek-chat",
+        "api_type": "openai-custom-deepinfra",
+        "api_base": "https://api.deepseek.com/v1",
+        "env_api_key": "DEEPSEEK_API_KEY",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "mistral-large-latest": {
+        "model_name": "mistral-large-latest",
+        "api_type": "mistral",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "Qwen/Qwen2.5-72B-Instruct": {
+        "model_name": "Qwen/Qwen2.5-72B-Instruct",
+        "api_type": "openai-custom-deepinfra",
+        "api_base": "https://api.deepinfra.com/v1/openai",
+        "env_api_key": "DEEPINFRA_API_KEY",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "google/gemma-2-27b-it": {
+        "model_name": "google/gemma-2-27b-it",
+        "api_type": "openai-custom-deepinfra",
+        "api_base": "https://api.deepinfra.com/v1/openai",
+        "env_api_key": "DEEPINFRA_API_KEY",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "gemini-1.5-flash-latest": {
+        "model_name": "gemini-1.5-flash-latest",
+        "api_type": "gemini",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "gemini-1.5-pro-latest": {
+        "model_name": "gemini-1.5-pro-latest",
+        "api_type": "gemini",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "gpt-4-turbo-2024-04-09": {
+        "model_name": "gpt-4-turbo-2024-04-09",
+        "api_type": "openai",
+        "api_base": "https://api.openai.com/v1",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "gpt-4o-mini-2024-07-18": {
+        "model_name": "gpt-4o-mini-2024-07-18",
+        "api_type": "openai",
+        "api_base": "https://api.openai.com/v1",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    }
+}

local/local_setup ADDED Viewed

	@@ -0,0 +1,34 @@

+#install ngninx
+sudo apt update
+sudo apt install nginx
+#lauch local server
+export CUDA_VISIBLE_DEVICES=0
+python -m vllm.entrypoints.openai.api_server --model cyberagent/calm3-22b-chat \
+--max-model-len 4096 --port 8011 \
+--gpu-memory-utilization 0.4 --trust-remote-code \
+--quantization bitsandbytes --load-format bitsandbytes \
+--api-key $VLLM_API_KEY
+#vllm tanuki8
+export CUDA_VISIBLE_DEVICES=0
+python -m vllm.entrypoints.openai.api_server --model weblab-GENIAC/Tanuki-8B-dpo-v1.0 --max-model-len 4096 --port 8012 --gpu-memory-utilization 0.2 --trust-remote-code --quantization bitsandbytes --load-format bitsandbytes --api-key $VLLM_API_KEY
+export CUDA_VISIBLE_DEVICES=0
+#llama.cpp swallow 8b
+../llama-server -m tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.1-Q8_0.gguf  --n_gpu_layers 100 --port 8010
+#llmjp13b
+export CUDA_VISIBLE_DEVICES=0
+ ../llama-server -m llm-jp-3-13b-instruct-Q8_0.gguf --n_gpu_layers 100 --port 8016
+#swallow70
+export CUDA_VISIBLE_DEVICES=1
+python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1 --max-model-len 4096 --port 8019 --gpu-memory-utilization 0.6 --trust-remote-code --quantization bitsandbytes --load-format bitsandbytes --api-key $VLLM_API_KEY
+#launch ngrok
+ngrok http http://localhost:8765

local/nginx ADDED Viewed

	@@ -0,0 +1,14 @@

+#sudo vi /etc/nginx/sites-available/default
+#sudo systemctl restart nginx
+server {
+    listen 8765;  # 一つのポートでまとめる
+    location /swallow70/ {proxy_pass http://localhost:8019/v1/;}
+    location /llm-jp-13b/ {proxy_pass http://localhost:8016/v1/;}
+    location /Tanuki-8B-dpo/ {proxy_pass http://localhost:8012/v1/;}
+    location /calm3-22b-chat/ {proxy_pass http://localhost:8011/v1/;}
+    location /Swallow-8B/ {proxy_pass http://localhost:8010/v1/;}
+}

serve/api_provider.py CHANGED Viewed

@@ -54,6 +54,13 @@ def get_api_provider_stream_iter(
         else:
             api_key = os.environ[model_api_dict["env_api_key"]]
         messages = conv.to_openai_api_messages()
         stream_iter = openai_api_stream_iter(
             model_api_dict["model_name"],
@@ -61,7 +68,7 @@ def get_api_provider_stream_iter(
             temperature,
             top_p,
             max_new_tokens,
-            api_base=model_api_dict["api_base"],
             api_key=api_key,
             # api_key=os.environ[model_api_dict["env_api_key"]],
             # api_key=model_api_dict["api_key"],
@@ -77,8 +84,8 @@ def get_api_provider_stream_iter(
             temperature,
             top_p,
             max_new_tokens,
-            api_base=model_api_dict["api_base"],
-            api_key=model_api_dict["api_key"],
             stop="<|im_end|>",
         )
     elif model_api_dict["api_type"] == "openai-llmjp3":
@@ -92,8 +99,8 @@ def get_api_provider_stream_iter(
             temperature,
             top_p,
             max_new_tokens,
-            api_base=model_api_dict["api_base"],
-            api_key=model_api_dict["api_key"],
             stop="<|im_end|>",
         )
     elif model_api_dict["api_type"] == "openai_no_stream":

         else:
             api_key = os.environ[model_api_dict["env_api_key"]]
+        if "api_base" in model_api_dict:
+            api_base = model_api_dict["api_base"]
+        elif "api_end" in model_api_dict:
+            api_base = os.environ["LOCAL_LLM_URL"]
+            end = model_api_dict["api_end"]
+            api_base = f"{api_base}/{end}/"
         messages = conv.to_openai_api_messages()
         stream_iter = openai_api_stream_iter(
             model_api_dict["model_name"],
             temperature,
             top_p,
             max_new_tokens,
+            api_base=api_base,
             api_key=api_key,
             # api_key=os.environ[model_api_dict["env_api_key"]],
             # api_key=model_api_dict["api_key"],
             temperature,
             top_p,
             max_new_tokens,
+            api_base=f"{os.environ["LOCAL_LLM_URL"]}/{model_api_dict["api_end"]}/",
+            api_key=os.environ[model_api_dict["env_api_key"]],
             stop="<|im_end|>",
         )
     elif model_api_dict["api_type"] == "openai-llmjp3":
             temperature,
             top_p,
             max_new_tokens,
+            api_base=f"{os.environ["LOCAL_LLM_URL"]}/{model_api_dict["api_end"]}/",
+            api_key=os.environ[model_api_dict["env_api_key"]],
             stop="<|im_end|>",
         )
     elif model_api_dict["api_type"] == "openai_no_stream":