import ssl import warnings import datasets import subprocess import pandas as pd import urllib.request from time import sleep import streamlit as st from datetime import date import plotly.express as px from urllib.error import HTTPError warnings.simplefilter("ignore", UserWarning) warnings.simplefilter("ignore", FutureWarning) pd.options.display.float_format = '{:.2f}'.format ssl._create_default_https_context = ssl._create_unverified_context st.set_page_config(page_title="Wikipedia Corpora Report", page_icon="https://webspace.clarkson.edu/~alshahsf/images/wikipedia1.png") st.markdown("""
ⓘ Latest Metadata Update: {retrieval_date}
", unsafe_allow_html=True) with col2: download_button = st.download_button(label="Download Metadata", data=wiki_metadata.to_csv().encode('utf-8'), file_name=f'{selected_language.split("(")[0].strip(" ")}-Metadata-{retrieval_date}.csv', mime='text/csv',) fig = px.sunburst(data_frame=wiki_metadata, path=['Wiki','Metric', 'Sub-Metric', 'Editors'], values='Values', branchvalues="total", color_discrete_sequence=['darkgray', 'black'], template='xgridoff') fig.update_traces(textinfo='label+percent parent') fig.update_traces(hovertemplate="Label=%{label}Wikipedia | Totals | Pages | Editors |
{selected_language} | Pages ({total_pages:,}) | Articles ({pages_content_pages:,}) | Bots ({pages_content_bots:,}) |
Humans ({pages_content_humans:,}) | |||
Non-Articles ({pages_non_content_pages:,}) | Bots ({pages_non_content_bots:,}) | ||
Humans ({pages_non_content_humans:,}) | |||
Edits ({total_edits:,}) | Articles ({edits_content_pages:,}) | Bots ({edits_content_bots:,}) | |
Humans ({edits_content_humans:,}) | |||
Non-Articles ({edits_non_content_pages:,}) | Bots ({edits_non_content_bots:,}) | ||
Humans ({edits_non_content_humans:,}) |