Weyaxi commited on
Commit
6ad2cd0
·
verified ·
1 Parent(s): 43cdccf
Files changed (1) hide show
  1. app.py +14 -1
app.py CHANGED
@@ -40,12 +40,14 @@ df_author_copy = df.copy()
40
 
41
  df["author"] = df["author"].apply(lambda x: clickable(x))
42
  df['Total Usage'] = df[['models', 'datasets', 'spaces']].sum(axis=1)
43
- df = df[['Serial Number', "author", "Total Usage", "models", "datasets", "spaces"]]
44
  df = df.sort_values(by='Total Usage', ascending=False)
45
 
 
 
46
  naturalsize_columns = ['Total Usage', 'models', 'datasets', 'spaces']
47
  df[naturalsize_columns] = df[naturalsize_columns].applymap(naturalsize)
48
 
 
49
  df['Serial Number'] = [i for i in range(1, len(df)+1)]
50
 
51
  df = apply_headers(df, ["🔢 Serial Number", "👤 Author", "⚡️ Total Usage", "🏛️ Models", "📊 Datasets", "🚀 Spaces"])
@@ -65,6 +67,17 @@ These 125k authors have been selected based on their [🤗 Huggingface Leaderboa
65
 
66
  - 🚀 Top 50k authors in the spaces category
67
 
 
 
 
 
 
 
 
 
 
 
 
68
  """
69
  # Write note maybe?
70
 
 
40
 
41
  df["author"] = df["author"].apply(lambda x: clickable(x))
42
  df['Total Usage'] = df[['models', 'datasets', 'spaces']].sum(axis=1)
 
43
  df = df.sort_values(by='Total Usage', ascending=False)
44
 
45
+ sum_all_author = naturalsize(sum(merged_df['models'].tolist()+merged_df['datasets'].tolist()+merged_df['spaces'].tolist()))
46
+
47
  naturalsize_columns = ['Total Usage', 'models', 'datasets', 'spaces']
48
  df[naturalsize_columns] = df[naturalsize_columns].applymap(naturalsize)
49
 
50
+ df = df[['Serial Number', "author", "Total Usage", "models", "datasets", "spaces"]]
51
  df['Serial Number'] = [i for i in range(1, len(df)+1)]
52
 
53
  df = apply_headers(df, ["🔢 Serial Number", "👤 Author", "⚡️ Total Usage", "🏛️ Models", "📊 Datasets", "🚀 Spaces"])
 
67
 
68
  - 🚀 Top 50k authors in the spaces category
69
 
70
+ ## 📒 Notes
71
+
72
+ Note that these numbers may not be entirely accurate due to the following reasons:
73
+
74
+ - I only calculated the data usage from the main branch and did not include deleted files that cannot be directly seen.
75
+
76
+ - There may be large datasets/models to which I don't have access (either private or gated).
77
+
78
+ # 📶 Total Data Usage From All Authors
79
+
80
+ According to this leaderboard, there is a total of {sum_all_author} of data on this platform.
81
  """
82
  # Write note maybe?
83