Spaces:
Running
Running
a100 kh
commited on
Commit
•
ffb63fa
1
Parent(s):
22f931e
granite
Browse files- api_endpoints.json +13 -0
- local/hf_upload.py +9 -0
- local/local_setup +10 -0
- local/nginx +4 -3
api_endpoints.json
CHANGED
@@ -245,5 +245,18 @@
|
|
245 |
},
|
246 |
"text-arena": true,
|
247 |
"vision-arena": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
}
|
249 |
}
|
|
|
245 |
},
|
246 |
"text-arena": true,
|
247 |
"vision-arena": false
|
248 |
+
},
|
249 |
+
"ibm-granite/granite-3.0-8b-instruct-fp8": {
|
250 |
+
"model_name": "ibm-granite/granite-3.0-8b-instruct",
|
251 |
+
"api_type": "openai-custom-tanuki",
|
252 |
+
"api_end": "granite-8B",
|
253 |
+
"env_api_key": "VLLM_API_KEY",
|
254 |
+
"anony_only": false,
|
255 |
+
"recommended_config": {
|
256 |
+
"temperature": 0.7,
|
257 |
+
"top_p": 1.0
|
258 |
+
},
|
259 |
+
"text-arena": true,
|
260 |
+
"vision-arena": false
|
261 |
}
|
262 |
}
|
local/hf_upload.py
CHANGED
@@ -21,6 +21,15 @@ data["full"]["elo_rating_final"]
|
|
21 |
|
22 |
df2 = data["full"]["leaderboard_table_df"]
|
23 |
df2 = df2.sort_values("rating", ascending=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
ds2 = datasets.Dataset.from_pandas(df2)
|
25 |
ds2.push_to_hub("kanhatakeyama/chatbot-arena-ja-elo-rating")
|
26 |
|
|
|
21 |
|
22 |
df2 = data["full"]["leaderboard_table_df"]
|
23 |
df2 = df2.sort_values("rating", ascending=False)
|
24 |
+
|
25 |
+
# index名がdrop_listに含まれる行を削除
|
26 |
+
drop_list = [
|
27 |
+
"weblab-GENIAC/Tanuki-8x8B-dpo-v1.0",
|
28 |
+
"Llama-3.1-Swallow-70B-v0.1.Q8_0.gguf",
|
29 |
+
]
|
30 |
+
|
31 |
+
df2 = df2.reset_index()
|
32 |
+
df2 = df2[~df2["index"].isin(drop_list)]
|
33 |
ds2 = datasets.Dataset.from_pandas(df2)
|
34 |
ds2.push_to_hub("kanhatakeyama/chatbot-arena-ja-elo-rating")
|
35 |
|
local/local_setup
CHANGED
@@ -39,6 +39,16 @@ python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swa
|
|
39 |
export CUDA_VISIBLE_DEVICES=1
|
40 |
python -m vllm.entrypoints.openai.api_server --model team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ --max-model-len 4096 --port 8020 --gpu-memory-utilization 0.35 --trust-remote-code --quantization awq --api-key $VLLM_API_KEY
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
#########################
|
43 |
#launch ngrok
|
44 |
ngrok http http://localhost:8765
|
|
|
39 |
export CUDA_VISIBLE_DEVICES=1
|
40 |
python -m vllm.entrypoints.openai.api_server --model team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ --max-model-len 4096 --port 8020 --gpu-memory-utilization 0.35 --trust-remote-code --quantization awq --api-key $VLLM_API_KEY
|
41 |
|
42 |
+
###################
|
43 |
+
#server2
|
44 |
+
export CUDA_VISIBLE_DEVICES=0
|
45 |
+
python -m vllm.entrypoints.openai.api_server --model ibm-granite/granite-3.0-8b-instruct --max-model-len 4096 \
|
46 |
+
--port 8020 --gpu-memory-utilization 0.4 --trust-remote-code \
|
47 |
+
--quantization fp8 \
|
48 |
+
--api-key $VLLM_API_KEY
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
#########################
|
53 |
#launch ngrok
|
54 |
ngrok http http://localhost:8765
|
local/nginx
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
|
5 |
|
6 |
server {
|
@@ -11,5 +11,6 @@ location /llm-jp-13b/ {proxy_pass http://localhost:8016/v1/;}
|
|
11 |
location /Tanuki-8B-dpo/ {proxy_pass http://localhost:8012/v1/;}
|
12 |
location /calm3-22b-chat/ {proxy_pass http://localhost:8011/v1/;}
|
13 |
location /Swallow-8B/ {proxy_pass http://localhost:8010/v1/;}
|
|
|
14 |
}
|
15 |
|
|
|
1 |
+
sudo vi /etc/nginx/sites-available/default
|
2 |
+
sudo systemctl stop nginx
|
3 |
+
sudo systemctl restart nginx
|
4 |
|
5 |
|
6 |
server {
|
|
|
11 |
location /Tanuki-8B-dpo/ {proxy_pass http://localhost:8012/v1/;}
|
12 |
location /calm3-22b-chat/ {proxy_pass http://localhost:8011/v1/;}
|
13 |
location /Swallow-8B/ {proxy_pass http://localhost:8010/v1/;}
|
14 |
+
location /granite-8B/ {proxy_pass http://192.168.11.2:8020/v1/;}
|
15 |
}
|
16 |
|