jsulz HF staff commited on
Commit
3522965
·
1 Parent(s): a57b3d3

minor text tweaks

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -209,7 +209,7 @@ with gr.Blocks() as demo:
209
  with gr.Column(scale=1):
210
  gr.Markdown("# Repository Growth")
211
  gr.Markdown(
212
- "The cumulative growth of models, spaces, and datasets over time can be seen in the adjacent chart. Beside that is a view of the total change, month to month, of LFS files stored on the hub over 2024. We're averaging nearly 2.3 PBs uploaded to LFS per month!"
213
  )
214
  gr.Dataframe(last_10_months, height=250)
215
  with gr.Column(scale=3):
@@ -217,7 +217,7 @@ with gr.Blocks() as demo:
217
  with gr.Row():
218
  with gr.Column(scale=1):
219
  gr.Markdown(
220
- "This table shows the total number of files and cumulative size of those files across all repositories on the Hub. These numbers might be hard to grok, so let's try to put them in context. The last [Common Crawl](https://commoncrawl.org/) download was [451 TBs](https://github.com/commoncrawl/cc-crawl-statistics/blob/master/stats/crawler/CC-MAIN-2024-38.json#L31). The Spaces repositories alone outpaces that. Meanwhile, between Datasets and Model repos, the Hub stores 64 Common Crawls."
221
  )
222
  with gr.Column(scale=3):
223
  gr.Dataframe(by_type)
@@ -236,6 +236,16 @@ with gr.Blocks() as demo:
236
  gr.Plot(plot_total_sum(by_extension_size[["extension", "size"]].values))
237
  # drop the unnamed: 0 column
238
  by_extension_size = by_extension_size.drop(columns=["Unnamed: 0"])
 
 
 
 
 
 
 
 
 
 
239
  # format the size column
240
  by_extension_size = format_dataframe_size_column(by_extension_size, "size")
241
  # Rename the other columns
 
209
  with gr.Column(scale=1):
210
  gr.Markdown("# Repository Growth")
211
  gr.Markdown(
212
+ "The cumulative growth of models, spaces, and datasets over time can be seen in the adjacent chart. Beside that is a view of the total change, month to month, of LFS files stored on the hub over 2024. We're averaging nearly **2.3 PBs uploaded to LFS per month!**"
213
  )
214
  gr.Dataframe(last_10_months, height=250)
215
  with gr.Column(scale=3):
 
217
  with gr.Row():
218
  with gr.Column(scale=1):
219
  gr.Markdown(
220
+ "This table shows the total number of files and cumulative size of those files across all repositories on the Hub. These numbers might be hard to grok, so let's try to put them in context. The last [Common Crawl](https://commoncrawl.org/) download was [451 TBs](https://github.com/commoncrawl/cc-crawl-statistics/blob/master/stats/crawler/CC-MAIN-2024-38.json#L31). The Spaces repositories alone outpaces that. Meanwhile, between Datasets and Model repos, the Hub stores **64 Common Crawls** 🤯."
221
  )
222
  with gr.Column(scale=3):
223
  gr.Dataframe(by_type)
 
236
  gr.Plot(plot_total_sum(by_extension_size[["extension", "size"]].values))
237
  # drop the unnamed: 0 column
238
  by_extension_size = by_extension_size.drop(columns=["Unnamed: 0"])
239
+ # average size
240
+ by_extension_size["Average File Size (MBs)"] = (
241
+ by_extension_size["size"].astype(float) / by_extension_size["count"]
242
+ )
243
+ by_extension_size["Average File Size (MBs)"] = (
244
+ by_extension_size["Average File Size (MBs)"] / 1e6
245
+ )
246
+ by_extension_size["Average File Size (MBs)"] = by_extension_size[
247
+ "Average File Size (MBs)"
248
+ ].map("{:.2f}".format)
249
  # format the size column
250
  by_extension_size = format_dataframe_size_column(by_extension_size, "size")
251
  # Rename the other columns