meg-huggingface commited on
Commit
fff0313
2 Parent(s): a52c513 9bb1a4c

Merge branch 'main' of https://huggingface.co/spaces/huggingface/data-measurements-tool-2 into main

Browse files
data_measurements/streamlit_utils.py CHANGED
@@ -434,10 +434,16 @@ def npmi_show(paired_results):
434
  s.index.name = "word"
435
  npmi_cols = s.filter(like="npmi").columns
436
  count_cols = s.filter(like="count").columns
 
 
 
 
 
 
437
  # TODO: This is very different look than the duplicates table above. Should probably standardize.
438
  cm = sns.palplot(sns.diverging_palette(270, 36, s=99, l=48, n=16))
439
  out_df = (
440
- s.style.background_gradient(subset=npmi_cols, cmap=cm)
441
  .format(subset=npmi_cols, formatter="{:,.3f}")
442
  .format(subset=count_cols, formatter=int)
443
  .set_properties(
 
434
  s.index.name = "word"
435
  npmi_cols = s.filter(like="npmi").columns
436
  count_cols = s.filter(like="count").columns
437
+ if s.shape[0] > 10000:
438
+ bias_thres = max(abs(s["npmi-bias"][5000]), abs(s["npmi-bias"][-5000]))
439
+ print(f"filtering with bias threshold: {bias_thres}")
440
+ s_filtered = s[s["npmi-bias"].abs() > bias_thres]
441
+ else:
442
+ s_filtered = s
443
  # TODO: This is very different look than the duplicates table above. Should probably standardize.
444
  cm = sns.palplot(sns.diverging_palette(270, 36, s=99, l=48, n=16))
445
  out_df = (
446
+ s_filtered.style.background_gradient(subset=npmi_cols, cmap=cm)
447
  .format(subset=npmi_cols, formatter="{:,.3f}")
448
  .format(subset=count_cols, formatter=int)
449
  .set_properties(
requirements.txt CHANGED
@@ -10,7 +10,7 @@ iso_639==0.4.5
10
  datasets==1.15.1
11
  powerlaw==1.5
12
  numpy==1.19.5
13
- pandas==1.3.0
14
  dataclasses==0.6
15
  iso639==0.1.4
16
  python_igraph==0.9.6
@@ -23,4 +23,4 @@ numexpr==2.7.3
23
  scikit-learn~=0.24.2
24
  scipy~=1.7.3
25
  tqdm~=4.62.3
26
- pyarrow~=6.0.1
 
10
  datasets==1.15.1
11
  powerlaw==1.5
12
  numpy==1.19.5
13
+ pandas==1.0.0
14
  dataclasses==0.6
15
  iso639==0.1.4
16
  python_igraph==0.9.6
 
23
  scikit-learn~=0.24.2
24
  scipy~=1.7.3
25
  tqdm~=4.62.3
26
+ pyarrow~=6.0.1