Weyaxi commited on
Commit
fb3b924
·
1 Parent(s): 3f4ce84

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +209 -0
app.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+ import requests
4
+ import pandas as pd
5
+ from tqdm import tqdm
6
+ from bs4 import BeautifulSoup
7
+ from huggingface_hub import HfApi, list_models, list_datasets, list_spaces
8
+ import gradio as gr
9
+
10
+ api = HfApi()
11
+
12
+ def get_models(org_name, which_one):
13
+ all_list = []
14
+ if which_one == "models":
15
+ things = api.list_models(author=org_name)
16
+ elif which_one == "datasets":
17
+ things = api.list_datasets(author=org_name)
18
+ elif which_one == "spaces":
19
+ things = api.list_spaces(author=org_name)
20
+
21
+ for i in things:
22
+ i = i.__dict__
23
+ json_format_data = {"id": i['id'], "downloads": i['downloads'], "likes": i['likes']} if which_one != "spaces" else {"id": i['id'], "downloads": 0, "likes": i['likes']}
24
+
25
+ all_list.append(json_format_data)
26
+
27
+
28
+ df_all_list = (pd.DataFrame(all_list))
29
+
30
+ return df_all_list
31
+
32
+ def get_most(df_for_most_function):
33
+ download_sorted_df = df_for_most_function.sort_values(by=['downloads'], ascending=False)
34
+ most_downloaded = download_sorted_df.iloc[0]
35
+
36
+ like_sorted_df = df_for_most_function.sort_values(by=['likes'], ascending=False)
37
+ most_liked = like_sorted_df.iloc[0]
38
+
39
+ return {"Most Download": {"id": most_downloaded['id'], "downloads": most_downloaded['downloads'], "likes": most_downloaded['likes']}, "Most Likes": {"id": most_liked['id'], "downloads": most_liked['downloads'], "likes": most_liked['likes']}}
40
+
41
+ def get_sum(df_for_sum_function):
42
+ sum_downloads = sum(df_for_sum_function['downloads'].tolist())
43
+ sum_likes = sum(df_for_sum_function['likes'].tolist())
44
+
45
+ return {"Downloads": sum_downloads, "Likes": sum_likes}
46
+
47
+ def get_openllm_leaderboard():
48
+ url = 'https://huggingfaceh4-open-llm-leaderboard.hf.space/'
49
+ response = requests.get(url)
50
+ soup = BeautifulSoup(response.content, 'html.parser')
51
+ script_elements = soup.find_all('script')
52
+ data = json.loads(str(script_elements[1])[31:-10])
53
+
54
+ component_index = 11
55
+ pattern = r'href="([^"]*)"'
56
+ zero_or_one = 1
57
+
58
+ result_list = []
59
+ i = 0
60
+ while True:
61
+ try:
62
+ unfiltered = data['components'][component_index]['props']['value']['data'][i][zero_or_one].rstrip("\n")
63
+ normal_name = re.search(pattern, unfiltered).group(1)
64
+ normal_name = "/".join(normal_name.split("/")[-2:])
65
+ result_list.append(normal_name)
66
+ i += 1
67
+ except (IndexError, AttributeError):
68
+ return result_list
69
+
70
+ def get_ranking(model_list, target_org):
71
+ for index, model in enumerate(model_list):
72
+ if model.split("/")[0].lower() == target_org.lower():
73
+ return [index+1, model]
74
+ return "Not Found"
75
+
76
+ def make_leaderboard(orgs, which_one):
77
+ data_rows = []
78
+ open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
79
+
80
+ for org in tqdm(orgs, desc=f"Scraping Organizations ({which_one})", position=0, leave=True):
81
+ df = get_models(org, which_one)
82
+ if len(df) == 0:
83
+ continue
84
+ num_things = len(df)
85
+ sum_info = get_sum(df)
86
+ most_info = get_most(df)
87
+
88
+ if which_one == "models":
89
+ open_llm_leaderboard_get_org = get_ranking(open_llm_leaderboard, org)
90
+ data_rows.append({
91
+ "Organization Name": org,
92
+ "Total Downloads": sum_info["Downloads"],
93
+ "Total Likes": sum_info["Likes"],
94
+ "Number of Models": num_things,
95
+ "Best Model On Open LLM Leaderboard": open_llm_leaderboard_get_org[1] if open_llm_leaderboard_get_org != "Not Found" else open_llm_leaderboard_get_org,
96
+ "Best Rank On Open LLM Leaderboard": open_llm_leaderboard_get_org[0] if open_llm_leaderboard_get_org != "Not Found" else open_llm_leaderboard_get_org,
97
+ "Average Downloads per Model": int(sum_info["Downloads"] / num_things) if num_things != 0 else 0,
98
+ "Average Likes per Model": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
99
+ "Most Downloaded Model": most_info["Most Download"]["id"],
100
+ "Most Download Count": most_info["Most Download"]["downloads"],
101
+ "Most Liked Model": most_info["Most Likes"]["id"],
102
+ "Most Like Count": most_info["Most Likes"]["likes"]
103
+ })
104
+ elif which_one == "datasets":
105
+ data_rows.append({
106
+ "Organization Name": org,
107
+ "Total Downloads": sum_info["Downloads"],
108
+ "Total Likes": sum_info["Likes"],
109
+ "Number of Datasets": num_things,
110
+ "Average Downloads per Dataset": int(sum_info["Downloads"] / num_things) if num_things != 0 else 0,
111
+ "Average Likes per Dataset": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
112
+ "Most Downloaded Dataset": most_info["Most Download"]["id"],
113
+ "Most Download Count": most_info["Most Download"]["downloads"],
114
+ "Most Liked Dataset": most_info["Most Likes"]["id"],
115
+ "Most Like Count": most_info["Most Likes"]["likes"]
116
+ })
117
+
118
+ elif which_one == "spaces":
119
+ data_rows.append({
120
+ "Organization Name": org,
121
+ "Total Likes": sum_info["Likes"],
122
+ "Number of Spaces": num_things,
123
+ "Average Likes per Space": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
124
+ "Most Liked Space": most_info["Most Likes"]["id"],
125
+ "Most Like Count": most_info["Most Likes"]["likes"]
126
+ })
127
+
128
+ leaderboard = pd.DataFrame(data_rows)
129
+ leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
130
+ return leaderboard
131
+
132
+ """# Gradio başlasın
133
+
134
+ """
135
+
136
+ with open("org_names.txt", "r") as f:
137
+ org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
138
+
139
+
140
+ INTRODUCTION_TEXT = f"""
141
+ 🎯 The Organization Leaderboard aims to track organizations ranking. This space is inspired by [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
142
+
143
+ ## Dataframes Available:
144
+
145
+ - 🏛️ Models
146
+
147
+ - 📊 Datasets
148
+
149
+ - 🚀 Spaces
150
+
151
+ ## Backend
152
+
153
+ 🛠️ The leaderboard's backend mainly runs the [Hugging Face Hub API](https://huggingface.co/docs/huggingface_hub/v0.5.1/en/package_reference/hf_api).
154
+
155
+ 🛠️ Organization names are being retrieved using web scrabing ([HUggingface Organizations](https://huggingface.co/organizations))
156
+
157
+ **🌐 Note:** In model's dataframe there is some columns related to [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). These datas are also being retrieved with web scrabing.
158
+
159
+ """
160
+
161
+ def clickable(x, which_one):
162
+ if which_one == "models":
163
+ if x != "Not Found":
164
+ return f'<a target="_blank" href="https://huggingface.co/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>'
165
+ else:
166
+ return "Not Found"
167
+ else:
168
+ return f'<a target="_blank" href="https://huggingface.co/{which_one}/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>'
169
+
170
+ def models_df_to_clickable(df, columns, which_one):
171
+ for column in columns:
172
+ if column == "Organization Name":
173
+ df[column] = df[column].apply(lambda x: clickable(x, "models"))
174
+ df[column] = df[column].apply(lambda x: clickable(x, which_one))
175
+ return df
176
+
177
+ demo = gr.Blocks()
178
+
179
+ with gr.Blocks() as demo:
180
+ gr.Markdown("""<h1 align="center" id="space-title">🤗 Organization Leaderboard</h1>""")
181
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
182
+
183
+ with gr.TabItem("🏛️ Models", id=1):
184
+
185
+ columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model"]
186
+ models_df = make_leaderboard(org_names_in_list, "models")
187
+ models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
188
+
189
+ headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "🤖 Number of Models", "🏆 Best Model On Open LLM Leaderboard", "🥇 Best Rank On Open LLM Leaderboard", "📊 Average Downloads per Model", "📈 Average Likes per Model", "🚀 Most Downloaded Model", "📈 Most Download Count", "❤️ Most Liked Model", "👍 Most Like Count"]
190
+ gr.Dataframe(models_df, headers=headers, interactive=True, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str"])
191
+
192
+ with gr.TabItem("📊 Dataset", id=2):
193
+ columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset"]
194
+ dataset_df = make_leaderboard(org_names_in_list, "datasets")
195
+ dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
196
+
197
+ headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "📊 Number of Datasets", "📊 Average Downloads per Dataset", "📈 Average Likes per Dataset", "🚀 Most Downloaded Dataset", "📈 Most Download Count", "❤️ Most Liked Dataset", "👍 Most Like Count"]
198
+ gr.Dataframe(dataset_df, headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "str", "str", "markdown", "str", "markdown", "str"])
199
+
200
+ with gr.TabItem("🚀 Spaces", id=3):
201
+ columns_to_convert = ["Organization Name", "Most Liked Space"]
202
+
203
+ spaces_df = make_leaderboard(org_names_in_list, "spaces")
204
+ spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
205
+
206
+ headers = ["🔢 Serial Number", "🏢 Organization Name", "👍 Total Likes", "🚀 Number of Spaces", "📈 Average Likes per Space", "❤️ Most Liked Space", "👍 Most Like Count"]
207
+ gr.Dataframe(spaces_df, headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "markdown", "str"])
208
+
209
+ demo.launch()