victormiller commited on
Commit
2018e3d
1 Parent(s): e93fc1a

Update curated.py

Browse files
Files changed (1) hide show
  1. curated.py +50 -0
curated.py CHANGED
@@ -856,6 +856,55 @@ fig.update_layout(
856
  # Show the plot
857
  diff_stacked_bar = fig
858
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
859
 
860
  def curated(request):
861
 
@@ -992,6 +1041,7 @@ def curated(request):
992
  plotly2fasthtml(get_chart_new()),
993
  plotly2fasthtml(stacked_bar),
994
  plotly2fasthtml(diff_stacked_bar),
 
995
  H2("Curated Sources Processing"),
996
  filtering_process,
997
  data_preparation_div,
 
856
  # Show the plot
857
  diff_stacked_bar = fig
858
 
859
+ # Data for the stacked bar chart
860
+ data = {
861
+ 'Filter': ['Downloaded Lines', 'Language Filter', 'Min Word Count', 'Unigram Log Probability'],
862
+ 'Wikipedia': [61614907, 61614907, 60468491, 60468491],
863
+ 'Freelaw': [75971288, 73690766, 68171834, 68123174],
864
+ 'DM Maths': [112559888, 112559888, 112559888, 112559888],
865
+ 'USPTO': [6880276, 6878964, 6749922, 6749389],
866
+ 'PG19': [28752, 28683, 28682, 28632],
867
+ 'Hackernews': [2064931, 2010802, 2010488, 2003636],
868
+ 'Ubuntu IRC': [37966, 23501, 23468, 23205],
869
+ 'Europarl': [69814, 69814, 69814, 69814],
870
+ 'StackExchange': [23246548, 23246548, 23246352, 23246352],
871
+ 'Arxiv': [1911867, 1869441, 1763840, 1762661],
872
+ 'S2ORC': [12963563, 12963563, 12963563, 12963563],
873
+ 'S2ORC Abstract': [102324176, 83867601, 82889293, 82777912],
874
+ 'Pubmed Central': [5230932, 4830486, 4768310, 4767474],
875
+ 'Pubmed Abstract': [25787474, 25784374, 25747955, 25746724],
876
+ 'Phil Papers': [49389, 39175, 39175, 39128]
877
+ }
878
+
879
+ # Creating a dataframe
880
+ df = pd.DataFrame(data)
881
+
882
+ # Creating the stacked bar chart
883
+ fig = go.Figure()
884
+
885
+ # Add trace for each dataset
886
+ for dataset in df.columns[1:]:
887
+ fig.add_trace(go.Bar(
888
+ name=dataset,
889
+ x=df['Filter'],
890
+ y=df[dataset]
891
+ ))
892
+
893
+ # Update the layout
894
+ fig.update_layout(
895
+ barmode='stack',
896
+ title='Stacked Bar Chart of Line Reductions by Filter for Each Dataset',
897
+ xaxis_title='Filter',
898
+ yaxis_title='Number of Lines',
899
+ legend_title='Dataset',
900
+ height=600,
901
+ width=1000
902
+ )
903
+
904
+ # Show the plot
905
+ diff2_stacked_bar = fig
906
+
907
+
908
 
909
  def curated(request):
910
 
 
1041
  plotly2fasthtml(get_chart_new()),
1042
  plotly2fasthtml(stacked_bar),
1043
  plotly2fasthtml(diff_stacked_bar),
1044
+ plotly2fasthtml(diff2_stacked_bar),
1045
  H2("Curated Sources Processing"),
1046
  filtering_process,
1047
  data_preparation_div,