File size: 1,812 Bytes
a5be261
 
 
 
3e130f5
a5be261
3a71c6f
a5be261
 
 
 
a8d50b5
 
 
 
 
 
 
 
45cea57
 
 
 
 
a8d50b5
 
 
a5be261
0cae08c
9f151d6
a5be261
 
 
 
 
3a71c6f
a5be261
616fbdf
a5be261
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
import pandas as pd
from collections import defaultdict

df = pd.read_csv("./stackv2_languages_freq.csv")

langs = sorted(list(df["language"].unique()))
exts = list(df["extension"].unique())

def compute(lang):
    df_lang = df[df["language"]==lang]
    # clean up weird exts
    df_lang = df_lang[df_lang["ext_fraction_per_lang"] > 0.0001].reset_index()
    
    df_lang_uniq = df_lang.groupby("extension").first().reset_index()
    
    report = f"## Summary:\n\n The `{lang}` language has {df_lang_uniq.shape[0]} extensions: \n\n"

    for i, (ext, ext_fraction, gen_fraction, vend_fraction) in enumerate(zip(df_lang_uniq["extension"], df_lang_uniq["ext_fraction_per_lang"], df_lang_uniq["generated_fraction"], df_lang_uniq["vendor_fraction"])):
        fractions_string = f"{max(ext_fraction, 1)*100:.2f}%"
        if gen_fraction > 0.2:
            fractions_string += f", autogenerated: {max(gen_fraction, 1)*100:.2f}%"
        if vend_fraction > 0.2:
            fractions_string += f", vendor files: {max(vend_fraction, 1)*100:.2f}%"
        
        report += f"`{ext}` ({fractions_string}), \n\n"
    report = report[:-2] + "\n\n\n\n"

    for i, (ext, example) in enumerate(zip(df_lang["extension"], df_lang["content"])):
        example_string = f"**Example {i+1} (extension=`{ext}`):**\n\n ```\n{example}\n```\n\n"
        report += example_string

    return report.strip()

with gr.Blocks() as demo:
    gr.Markdown(f"# Programming Languages for The Stack v2\n\nIn total there are **{len(langs)} languages** and **{len(exts)} extensions.**")

    lang_select = gr.Dropdown(choices=langs, value="Python")
    md = gr.Markdown("")

    lang_select.change(fn=compute, inputs=[lang_select], outputs=[md])
    demo.load(fn=compute, inputs=[lang_select], outputs=[md])
demo.launch()