File size: 3,686 Bytes
1f70be8
 
 
 
 
 
 
 
6c8936b
 
1f70be8
 
 
 
 
 
 
 
 
6c8936b
 
 
 
 
 
 
1f70be8
6c8936b
 
 
 
 
 
 
1f70be8
 
 
6c8936b
 
1f70be8
 
 
 
 
6c8936b
 
 
 
 
1f70be8
 
 
6c8936b
 
 
 
 
1f70be8
 
6c8936b
 
 
 
 
1f70be8
 
6c8936b
 
 
 
 
 
 
 
 
1f70be8
6c8936b
 
 
 
1f70be8
6c8936b
 
1f70be8
7cb31c4
6c8936b
 
 
 
 
 
 
7cb31c4
 
1f70be8
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import gradio as gr
import papermill as pm


def run_notebook():
    try:
        # Execute the notebook
        pm.execute_notebook(
            "hub_datasets_by_language.ipynb",
            "hub_datasets_by_language_output.ipynb",  # Save the output in a new notebook
        )
        return "Notebook executed successfully!"
    except Exception as e:
        return f"Failed to execute notebook: {str(e)}"


def create_app():
    with gr.Blocks() as app:

        gr.Markdown(
            """
            # Visualizing The Language Gap In The Hugging Face Hub
                    
            The open-source community is creating more a more resources in languages other than English but there is still a huge gap. This Space showcases plots that can help visualize this gap in the case of Spanish and can easily be adapted to other languages.
        """
        )

        gr.Markdown(
            """
            ## English vs Spanish Monolingual Datasets
                    
            Note: We consider only **monolingual** datasets in these plots, i.e. datasets that only contain data in one language. This is because *most* of the multilingual datasets are usually machine-translated and we want to focus on original data.        
        """
        )
        with gr.Row():
            with gr.Column():
                image1 = gr.Image(
                    value="plots/bar_plot_horizontal.png",
                    label="Bar Plot Horizontal",
                    show_label=True,
                    show_download_button=True,
                    show_share_button=True,
                )
                image2 = gr.Image(
                    value="plots/bar_plot_vertical.png",
                    label="Bar Plot Vertical",
                    show_label=True,
                    show_download_button=True,
                    show_share_button=True,
                )
            with gr.Column():
                image3 = gr.Image(
                    value="plots/stack_area.png",
                    label="Stack Area",
                    show_label=True,
                    show_download_button=True,
                    show_share_button=True,
                )
                image4 = gr.Image(
                    value="plots/time_series.png",
                    label="Time Series",
                    show_label=True,
                    show_download_button=True,
                    show_share_button=True,
                )

        gr.Markdown(
            """
            ## Adapt to other languages

            This Space is WIP and more languages and visuals will be included shortly. Meanwhile, you can clone the Space, adapt the code in the notebook and run it to generate plots for other languages.
            """
        )
        run_button = gr.Button("Run Notebook")
        output_label = gr.Label()  # Display the result of running the notebook

        run_button.click(run_notebook, outputs=output_label)

        gr.Markdown("## Citation")
        with gr.Accordion("Citation information", open=False):
            gr.Markdown(
                r"""
                If you use these plots or the code please cite:

                ```
                @misc{grandury2024gaphf,
                    author = {María Grandury},
                    title = {Visualizing The Language Gap In The Hugging Face Hub},
                    year = {2024},
                    publisher = {Hugging Face},
                    howpublished = {\url{https://huggingface.co/spaces/mariagrandury/language-gap-in-hf-hub}},
                }
                ```
                """
            )

    return app


app = create_app()
app.launch()