import gradio as gr import pandas as pd import plotly.graph_objects as go def get_covered_languages(): #Load data all_languages = pd.read_csv('data/merged_language_list_with_duplicates.csv') with open("data/covered_languages.txt") as f: covered_languages = f.read().splitlines() # Split strings with commas and flatten the list covered_languages = [lang.strip() for sublist in covered_languages for lang in sublist.split(',')] covered_languages = list(set(covered_languages)) # Get language codes covered_language_codes = [all_languages.loc[all_languages['Language'] == lang, 'Code'].values[0] for lang in covered_languages if lang in all_languages['Language'].values] assert len(covered_language_codes) == len(covered_languages), "Mismatch between covered languages and their codes" return covered_language_codes def build_dataframes(covered_language_codes): # Load data clean_languages = pd.read_csv('data/merged_language_list_clean.csv') # Create a dataframe for languages with a lead languages_with_lead = clean_languages[clean_languages['Code'].isin(covered_language_codes)].sort_values(by='Code') # Create a dataframe for languages without a lead languages_without_lead = clean_languages[~clean_languages['Code'].isin(covered_language_codes)].sort_values(by='Code') return languages_with_lead, languages_without_lead def create_piechart(): colors = ['#ffd21e', '#0b4a70'] fig = go.Figure( go.Pie( labels=["With lead", "Without lead"], values=[len(languages_with_lead), len(languages_without_lead)], marker=dict(colors=colors) ) ) fig.update_traces(textposition='inside', textinfo='label+value') return fig def filter_dataframes(search_term=None): if search_term: search_terms = search_term.lower().split(" ") filtered_with_lead = languages_with_lead[ languages_with_lead.apply(lambda row: any(term in str(row['Language']).lower() or term in str(row['Code']).lower() for term in search_terms), axis=1) ] filtered_without_lead = languages_without_lead[ languages_without_lead.apply(lambda row: any(term in str(row['Language']).lower() or term in str(row['Code']).lower() for term in search_terms), axis=1) ] return filtered_without_lead, filtered_with_lead else: return languages_without_lead, languages_with_lead def load_demo(): languages_with_lead, languages_without_lead = build_dataframes(get_covered_languages()) piechart = create_piechart() return languages_without_lead,languages_with_lead,piechart with gr.Blocks() as demo: gr.Markdown("## Language Leads Dashboard") languages_with_lead, languages_without_lead = build_dataframes(get_covered_languages()) gr_piechart = gr.Plot(label="Language Leads") search_box = gr.Textbox(type="text", label="Search your language:") with gr.Row(): search_button = gr.Button("Search 🔎") reset_button = gr.Button("Reset 🔁") with gr.Tab("Looking for leads!"): gr.Markdown("These languages don't have a lead yet! Would you like to lead one of them? Sign up using [this form](https://forms.gle/mFCMXNRjxvyFvW5q9).") gr_languages_without_lead = gr.DataFrame() with gr.Tab("Languages with leads"): gr.Markdown("We found at least one lead for these languages:") gr_languages_with_lead = gr.DataFrame() demo.load( load_demo, outputs=[gr_languages_without_lead, gr_languages_with_lead, gr_piechart], ) search_button.click( fn=filter_dataframes, inputs=search_box, outputs=[gr_languages_without_lead,gr_languages_with_lead] ) reset_button.click( fn=filter_dataframes, inputs=None, outputs=[gr_languages_without_lead,gr_languages_with_lead] ) demo.launch()