qiantong-xu commited on
Commit
3176152
·
1 Parent(s): 9dfb4f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -46
app.py CHANGED
@@ -3,52 +3,54 @@ __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissi
3
 
4
  import gradio as gr
5
  import pandas as pd
6
- from huggingface_hub import HfApi, repocard
7
 
8
- def is_duplicated(space_id:str)->None:
9
- card = repocard.RepoCard.load(space_id, repo_type="space")
10
- return getattr(card.data, "duplicated_from", None) is not None
11
 
12
-
13
-
14
- def make_clickable_model(model_name, link=None):
15
- if link is None:
16
- link = "https://huggingface.co/" + "spaces/" + model_name
17
- return f'<a target="_blank" href="{link}">{model_name.split("/")[-1]}</a>'
18
-
19
- def get_space_ids():
20
- api = HfApi()
21
- spaces = api.list_spaces(filter="making-demos")
22
- print(spaces)
23
- space_ids = [x for x in spaces]
24
- return space_ids
25
-
26
-
27
- def make_clickable_user(user_id):
28
- link = "https://huggingface.co/" + user_id
29
- return f'<a target="_blank" href="{link}">{user_id}</a>'
30
-
31
- def get_submissions():
32
- submissions = get_space_ids()
33
- leaderboard_models = []
34
-
35
- for submission in submissions:
36
- # user, model, likes
37
- if not is_duplicated(submission.id):
38
- user_id = submission.id.split("/")[0]
39
- leaderboard_models.append(
40
- (
41
- make_clickable_user(user_id),
42
- make_clickable_model(submission.id),
43
- submission.likes,
44
- )
45
- )
46
-
47
- df = pd.DataFrame(data=leaderboard_models, columns=["User", "Space", "Likes"])
48
- df.sort_values(by=["Likes"], ascending=False, inplace=True)
49
- df.insert(0, "Rank", list(range(1, len(df) + 1)))
 
 
 
 
 
50
  return df
51
 
 
52
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
53
  CITATION_BUTTON_TEXT = r"""@misc{xu2023tool,
54
  title={On the Tool Manipulation Capability of Open-source Large Language Models},
@@ -84,14 +86,14 @@ with block:
84
 
85
  with gr.Row():
86
  data = gr.components.Dataframe(
87
- type="pandas", datatype=["number", "markdown", "markdown", "number"]
88
  )
89
- with gr.Row():
90
  data_run = gr.Button("Refresh")
 
91
  data_run.click(
92
- get_submissions, outputs=data
93
  )
94
 
95
- block.load(get_submissions, outputs=data)
96
 
97
  block.launch()
 
3
 
4
  import gradio as gr
5
  import pandas as pd
 
6
 
 
 
 
7
 
8
+ BENCHMARK_RESULTS = '''[gpt4](https://platform.openai.com/docs/models/gpt-4) & 93.0 & 96.0 & 97.0 & 96.7 & 62.9 & 23.0 / 23.5 & 0.0 & 0.0 & 81.0 \\
9
+ [text-davinci-003](https://platform.openai.com/docs/models/gpt-3) & 99.0 & 98.0 & 97.0 & 89.2 & 62.9 & 31.0 / 25.1 & 0.0 & 0.0 & 66.7 \\
10
+ [gpt-3.5-turbo](https://platform.openai.com/docs/models/gpt-3-5) & 90.0 & 92.0 & 80.0 & 85.8 & 51.4 & 20.0 / 18.9 & 0.0 & 1.8 & 33.3 \\
11
+ [text-curie-001](https://platform.openai.com/docs/models/gpt-3) & 8.0 & 58.0 & 6.0 & 6.7 & 1.4 & 12.0 / 4.1 & 0.0 & 0.0 & 1.0 \\
12
+ [llama-65b](https://huggingface.co/huggyllama/llama-65b) & 90.0 & 80.0 & 84.0 & 65.8 & 32.9 & 32.0 / 20.3 & 0.0 & 41.2 & 30.5 \\
13
+ [llama-30b](https://huggingface.co/huggyllama/llama-30b) & 78.0 & 84.0 & 66.0 & 45.0 & 37.1 & 27.0 / 21.7 & 0.0 & 30.6 & 34.3 \\
14
+ [llama-13b](https://huggingface.co/huggyllama/llama-13b) & 70.0 & 74.0 & 45.0 & 35.8 & 5.7 & 28.0 / 18.9 & 0.0 & 27.6 & 17.1 \\
15
+ [llama-13b-alpaca](https://huggingface.co/chavinlo/gpt4-x-alpaca) & 62.0 & 43.0 & 44.0 & 40.8 & 11.4 & 1.0 / 1.6 & 0.0 & 2.7 & 9.5 \\
16
+ [starcoder](https://huggingface.co/bigcode/starcoder) & 91.0 & 84.0 & 82.0 & 51.7 & 48.0 & 23.0 / 19.4 & 2.6 & 0.0 & 21.9 \\
17
+ [starcoderbase](https://huggingface.co/bigcode/starcoderbase) & 90.0 & 86.0 & 79.0 & 63.3 & 42.9 & 24.0 / 16.3 & 5.8 & 23.1 & 17.1 \\
18
+ [codegen-16B-nl](https://huggingface.co/Salesforce/codegen-16B-nl) & 51.0 & 75.0 & 37.0 & 21.7 & 7.1 & 43.0 / 18.0 & 0.0 & 0.0 & 16.2 \\
19
+ [codegen-16B-multi](https://huggingface.co/Salesforce/codegen-16B-multi) & 56.0 & 75.0 & 47.0 & 7.5 & 21.4 & 31.0 / 14.1 & 0.0 & 0.5 & 8.6 \\
20
+ [codegen-16B-mono](https://huggingface.co/Salesforce/codegen-16B-mono) & 63.7 & 72.0 & 52.0 & 28.3 & 31.5 & 28.0 / 15.7 & 1.5 & 6.6 & 15.2 \\
21
+ [bloomz](https://huggingface.co/bigscience/bloomz) & 58.0 & 85.0 & 36.0 & 22.5 & 14.3 & 9.0 / 4.9 & 0.0 & 1.0 & 1.0 \\
22
+ [opt-iml-30b](https://huggingface.co/facebook/opt-iml-30b) & 44.0 & 48.0 & 5.0 & 3.3 & 2.9 & 13.0 / 8.3 & 0.0 & 0.0 & 1.0 \\
23
+ [opt-30b](https://huggingface.co/facebook/opt-30b) & 46.0 & 35.0 & 2.0 & 3.3 & 8.6 & 24.0 / 11.7 & 0.0 & 0.0 & 1.0 \\
24
+ [opt-iml-1.3b](https://huggingface.co/facebook/opt-iml-1.3b) & 20.0 & 28.0 & 0.0 & 0.0 & 4.3 & 13.0 / 3.1 & 0.0 & 0.0 & 1.0 \\
25
+ [opt-1.3b](https://huggingface.co/facebook/opt-1.3b) & 18.0 & 30.0 & 0.0 & 0.0 & 1.4 & 31.0 / 9.7 & 0.0 & 0.0 & 1.0 \\
26
+ [neox-20b](https://huggingface.co/EleutherAI/gpt-neox-20b) & 55.0 & 69.0 & 27.0 & 10.8 & 18.6 & 28.0 / 15.3 & 0.0 & 8.8 & 6.7 \\
27
+ [GPT-NeoXT-Chat-Base-20B](https://huggingface.co/togethercomputer/GPT-NeoXT-Chat-Base-20B) & 43.0 & 73.0 & 28.0 & 10.8 & 4.3 & 26.0 / 13.1 & 0.0 & 0.7 & 7.6 \\
28
+ [pythia-12b](https://huggingface.co/EleutherAI/pythia-12b) & 53.0 & 65.0 & 12.0 & 0.8 & 11.4 & 17.0 / 12.1 & 0.0 & 0.0 & 1.9 \\
29
+ [dolly-v2-12b]() & 0.0 & 1.0 & 10.0 & 5.0 & 7.1 & 11.0 / 8.9 & 0.0 & 0.0 & 7.6 \\
30
+ [pythia-6.9b](https://huggingface.co/EleutherAI/pythia-6.9b) & 41.0 & 72.0 & 8.0 & 7.5 & 4.3 & 29.0 / 14.0 & 0.0 & 0.0 & 8.6 \\
31
+ [pythia-2.8b](https://huggingface.co/EleutherAI/pythia-2.8b) & 49.0 & 54.0 & 7.0 & 3.3 & 12.9 & 24.0 / 14.8 & 0.0 & 0.0 & 7.6 \\
32
+ [pythia-1.4b](https://huggingface.co/EleutherAI/pythia-1.4b) & 37.0 & 48.0 & 4.0 & 5.0 & 10.0 & 22.0 / 10.7 & 0.0 & 5.2 & 7.6 \\
33
+ [stablelm-base-alpha-7b](https://huggingface.co/stabilityai/stablelm-base-alpha-7b) & 22.0 & 47.0 & 0.0 & 0.0 & 4.3 & 28.0 / 10.3 & 0.0 & 0.0 & 2.9 \\
34
+ [stablelm-tuned-alpha-7b](https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b) & 23.0 & 38.0 & 0.0 & 0.0 & 1.4 & 26.0 / 7.3 & 0.0 & 0.0 & 3.8 \\
35
+ [stablelm-base-alpha-3b](https://huggingface.co/stabilityai/stablelm-base-alpha-3b) & 6.0 & 28.0 & 0.0 & 0.0 & 1.4 & 29.0 / 5.3 & 0.0 & 0.0 & 1.0 \\
36
+ [stablelm-tuned-alpha-3b](https://huggingface.co/stabilityai/stablelm-tuned-alpha-3b) & 14.0 & 31.0 & 0.0 & 0.8 & 0.0 & 8.0 / 5.6 & 0.0 & 0.0 & 1.0 \\
37
+ [llama-30b-toolbench](https://huggingface.co/sambanovasystems/LLaMA-30b-toolbench) & 100.0 & 94.0 & 87.0 & 85.8 & 2.9 & 16.0/ 24.3& 0.0 & 0.0 & 7.5 \\
38
+ [starcoder-toolbench](https://huggingface.co/sambanovasystems/starcoder-toolbench) & 99.0 & 97.0 & 83.0 & 80.8 & 21.2 & 31.0/ 18.4& 0.0 & 0.0 & 13.9 \\
39
+ [codegen-16B-mono-toolbench](https://huggingface.co/sambanovasystems/codegen-16B-mono-toolbench) & 97.7 & 99.0 & 82.0 & 77.5 & 19.8 & 29.0/ 17.2& 0.0 & 3.5 & 16.2 \\'''
40
+
41
+
42
+ def get_baseline_df():
43
+ lines = BENCHMARK_RESULTS.split("\n")
44
+ df_data = []
45
+ for line in lines:
46
+ model_results = line.replace(" ", "").strip("\\").split("&")
47
+ assert len(model_results) == 10
48
+ df_data.append(model_results)
49
+ print(len(df_data))
50
+ df = pd.DataFrame(df_data, columns=column_names)
51
  return df
52
 
53
+
54
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
55
  CITATION_BUTTON_TEXT = r"""@misc{xu2023tool,
56
  title={On the Tool Manipulation Capability of Open-source Large Language Models},
 
86
 
87
  with gr.Row():
88
  data = gr.components.Dataframe(
89
+ type="pandas", datatype=["markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
90
  )
 
91
  data_run = gr.Button("Refresh")
92
+
93
  data_run.click(
94
+ get_baseline_df, outputs=data
95
  )
96
 
97
+ block.load(get_baseline_df, outputs=data)
98
 
99
  block.launch()