Spaces:

kanhatakeyama
/

chatbotarena-ja

Runtime error

a100 kh commited on Oct 23, 2024

Commit

ffb63fa

1 Parent(s): 22f931e

granite

Files changed (4) hide show

api_endpoints.json CHANGED Viewed

@@ -245,5 +245,18 @@
         },
         "text-arena": true,
         "vision-arena": false
     }
 }

         },
         "text-arena": true,
         "vision-arena": false
+    },
+    "ibm-granite/granite-3.0-8b-instruct-fp8": {
+        "model_name": "ibm-granite/granite-3.0-8b-instruct",
+        "api_type": "openai-custom-tanuki",
+        "api_end": "granite-8B",
+        "env_api_key": "VLLM_API_KEY",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
     }
 }

local/hf_upload.py CHANGED Viewed

@@ -21,6 +21,15 @@ data["full"]["elo_rating_final"]
 df2 = data["full"]["leaderboard_table_df"]
 df2 = df2.sort_values("rating", ascending=False)
 ds2 = datasets.Dataset.from_pandas(df2)
 ds2.push_to_hub("kanhatakeyama/chatbot-arena-ja-elo-rating")

 df2 = data["full"]["leaderboard_table_df"]
 df2 = df2.sort_values("rating", ascending=False)
+# index名がdrop_listに含まれる行を削除
+drop_list = [
+    "weblab-GENIAC/Tanuki-8x8B-dpo-v1.0",
+    "Llama-3.1-Swallow-70B-v0.1.Q8_0.gguf",
+]
+df2 = df2.reset_index()
+df2 = df2[~df2["index"].isin(drop_list)]
 ds2 = datasets.Dataset.from_pandas(df2)
 ds2.push_to_hub("kanhatakeyama/chatbot-arena-ja-elo-rating")

local/local_setup CHANGED Viewed

@@ -39,6 +39,16 @@ python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swa
 export CUDA_VISIBLE_DEVICES=1
 python -m vllm.entrypoints.openai.api_server --model team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ --max-model-len 4096 --port 8020 --gpu-memory-utilization 0.35 --trust-remote-code --quantization awq --api-key $VLLM_API_KEY
 #########################
 #launch ngrok
 ngrok http http://localhost:8765

 export CUDA_VISIBLE_DEVICES=1
 python -m vllm.entrypoints.openai.api_server --model team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ --max-model-len 4096 --port 8020 --gpu-memory-utilization 0.35 --trust-remote-code --quantization awq --api-key $VLLM_API_KEY
+###################
+#server2
+export CUDA_VISIBLE_DEVICES=0
+python -m vllm.entrypoints.openai.api_server --model ibm-granite/granite-3.0-8b-instruct --max-model-len 4096 \
+--port 8020 --gpu-memory-utilization 0.4 --trust-remote-code \
+--quantization fp8 \
+--api-key $VLLM_API_KEY
 #########################
 #launch ngrok
 ngrok http http://localhost:8765

local/nginx CHANGED Viewed

@@ -1,6 +1,6 @@
-#sudo vi /etc/nginx/sites-available/default
-#sudo systemctl stop nginx
-#sudo systemctl restart nginx
 server {
@@ -11,5 +11,6 @@ location /llm-jp-13b/ {proxy_pass http://localhost:8016/v1/;}
 location /Tanuki-8B-dpo/ {proxy_pass http://localhost:8012/v1/;}
 location /calm3-22b-chat/ {proxy_pass http://localhost:8011/v1/;}
 location /Swallow-8B/ {proxy_pass http://localhost:8010/v1/;}
 }

+sudo vi /etc/nginx/sites-available/default
+sudo systemctl stop nginx
+sudo systemctl restart nginx
 server {
 location /Tanuki-8B-dpo/ {proxy_pass http://localhost:8012/v1/;}
 location /calm3-22b-chat/ {proxy_pass http://localhost:8011/v1/;}
 location /Swallow-8B/ {proxy_pass http://localhost:8010/v1/;}
+location /granite-8B/ {proxy_pass http://192.168.11.2:8020/v1/;}
 }