a100 kh commited on
Commit
ffb63fa
1 Parent(s): 22f931e
Files changed (4) hide show
  1. api_endpoints.json +13 -0
  2. local/hf_upload.py +9 -0
  3. local/local_setup +10 -0
  4. local/nginx +4 -3
api_endpoints.json CHANGED
@@ -245,5 +245,18 @@
245
  },
246
  "text-arena": true,
247
  "vision-arena": false
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  }
249
  }
 
245
  },
246
  "text-arena": true,
247
  "vision-arena": false
248
+ },
249
+ "ibm-granite/granite-3.0-8b-instruct-fp8": {
250
+ "model_name": "ibm-granite/granite-3.0-8b-instruct",
251
+ "api_type": "openai-custom-tanuki",
252
+ "api_end": "granite-8B",
253
+ "env_api_key": "VLLM_API_KEY",
254
+ "anony_only": false,
255
+ "recommended_config": {
256
+ "temperature": 0.7,
257
+ "top_p": 1.0
258
+ },
259
+ "text-arena": true,
260
+ "vision-arena": false
261
  }
262
  }
local/hf_upload.py CHANGED
@@ -21,6 +21,15 @@ data["full"]["elo_rating_final"]
21
 
22
  df2 = data["full"]["leaderboard_table_df"]
23
  df2 = df2.sort_values("rating", ascending=False)
 
 
 
 
 
 
 
 
 
24
  ds2 = datasets.Dataset.from_pandas(df2)
25
  ds2.push_to_hub("kanhatakeyama/chatbot-arena-ja-elo-rating")
26
 
 
21
 
22
  df2 = data["full"]["leaderboard_table_df"]
23
  df2 = df2.sort_values("rating", ascending=False)
24
+
25
+ # index名がdrop_listに含まれる行を削除
26
+ drop_list = [
27
+ "weblab-GENIAC/Tanuki-8x8B-dpo-v1.0",
28
+ "Llama-3.1-Swallow-70B-v0.1.Q8_0.gguf",
29
+ ]
30
+
31
+ df2 = df2.reset_index()
32
+ df2 = df2[~df2["index"].isin(drop_list)]
33
  ds2 = datasets.Dataset.from_pandas(df2)
34
  ds2.push_to_hub("kanhatakeyama/chatbot-arena-ja-elo-rating")
35
 
local/local_setup CHANGED
@@ -39,6 +39,16 @@ python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swa
39
  export CUDA_VISIBLE_DEVICES=1
40
  python -m vllm.entrypoints.openai.api_server --model team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ --max-model-len 4096 --port 8020 --gpu-memory-utilization 0.35 --trust-remote-code --quantization awq --api-key $VLLM_API_KEY
41
 
 
 
 
 
 
 
 
 
 
 
42
  #########################
43
  #launch ngrok
44
  ngrok http http://localhost:8765
 
39
  export CUDA_VISIBLE_DEVICES=1
40
  python -m vllm.entrypoints.openai.api_server --model team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ --max-model-len 4096 --port 8020 --gpu-memory-utilization 0.35 --trust-remote-code --quantization awq --api-key $VLLM_API_KEY
41
 
42
+ ###################
43
+ #server2
44
+ export CUDA_VISIBLE_DEVICES=0
45
+ python -m vllm.entrypoints.openai.api_server --model ibm-granite/granite-3.0-8b-instruct --max-model-len 4096 \
46
+ --port 8020 --gpu-memory-utilization 0.4 --trust-remote-code \
47
+ --quantization fp8 \
48
+ --api-key $VLLM_API_KEY
49
+
50
+
51
+
52
  #########################
53
  #launch ngrok
54
  ngrok http http://localhost:8765
local/nginx CHANGED
@@ -1,6 +1,6 @@
1
- #sudo vi /etc/nginx/sites-available/default
2
- #sudo systemctl stop nginx
3
- #sudo systemctl restart nginx
4
 
5
 
6
  server {
@@ -11,5 +11,6 @@ location /llm-jp-13b/ {proxy_pass http://localhost:8016/v1/;}
11
  location /Tanuki-8B-dpo/ {proxy_pass http://localhost:8012/v1/;}
12
  location /calm3-22b-chat/ {proxy_pass http://localhost:8011/v1/;}
13
  location /Swallow-8B/ {proxy_pass http://localhost:8010/v1/;}
 
14
  }
15
 
 
1
+ sudo vi /etc/nginx/sites-available/default
2
+ sudo systemctl stop nginx
3
+ sudo systemctl restart nginx
4
 
5
 
6
  server {
 
11
  location /Tanuki-8B-dpo/ {proxy_pass http://localhost:8012/v1/;}
12
  location /calm3-22b-chat/ {proxy_pass http://localhost:8011/v1/;}
13
  location /Swallow-8B/ {proxy_pass http://localhost:8010/v1/;}
14
+ location /granite-8B/ {proxy_pass http://192.168.11.2:8020/v1/;}
15
  }
16