File size: 2,703 Bytes
cb97e31
 
 
 
 
 
 
 
 
e6fd0e8
 
cb97e31
e6fd0e8
cb97e31
 
 
 
 
 
 
820797e
 
 
 
 
cb97e31
 
 
 
 
e6fd0e8
 
 
 
 
cb97e31
 
 
 
e6fd0e8
 
cb97e31
 
 
 
e6fd0e8
cb97e31
 
 
e6fd0e8
 
 
 
cb97e31
 
e6fd0e8
 
cb97e31
 
 
 
 
 
e6fd0e8
cb97e31
 
820797e
 
 
 
cb97e31
 
 
 
e6fd0e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb97e31
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
https://huggingface.co/spaces/sayakpaul/demo-docker-gradio
"""
import argparse
import json
import platform

import fasttext
from fasttext.FastText import load_model, _FastText
import gradio as gr
from gradio import inputs, outputs
from langid.langid import LanguageIdentifier, model

from project_settings import project_path, temp_directory


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--language_identification_md_file",
        default=(project_path / "language_identification.md").as_posix(),
        type=str
    )
    parser.add_argument(
        "--lang_id_examples_file",
        default=(project_path / "lang_id_examples.json").as_posix(),
        type=str
    )
    parser.add_argument(
        "--fasttext_model",
        default=(project_path / "pretrained_models/lid.176.bin").as_posix(),
        type=str
    )
    args = parser.parse_args()
    return args


lang_id_identifier: LanguageIdentifier = None
fasttext_model: _FastText = None


def click_lang_id_button(text: str, ground_true: str, model_name: str):
    global lang_id_identifier
    global fasttext_model

    if model_name == "langid":
        label, prob = lang_id_identifier.classify(text)
    elif model_name == "fasttext":
        label, prob = fasttext_model.predict(text, k=1)
        label = label[0][9:]
        prob = prob[0]
    else:
        label = "model_name not available."
        prob = -1
    return label, str(round(prob, 4))


def main():
    args = get_args()

    brief_description = """
    Language Identification
    """

    # description
    with open(args.language_identification_md_file, "r", encoding="utf-8") as f:
        description = f.read()

    # examples
    with open(args.lang_id_examples_file, "r", encoding="utf-8") as f:
        lang_id_examples = json.load(f)

    global lang_id_identifier
    global fasttext_model
    lang_id_identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)
    fasttext_model = fasttext.load_model(args.fasttext_model)

    blocks = gr.Interface(
        click_lang_id_button,
        inputs=[
            inputs.Textbox(lines=3, label="text"),
            inputs.Textbox(label="ground_true"),
            inputs.Dropdown(choices=["langid", "fasttext"], default="langid", label="model_name"),
        ],
        outputs=[
            outputs.Textbox(label="label"),
            outputs.Textbox(label="prob"),
        ],
        examples=lang_id_examples,
        description=brief_description
    )

    blocks.launch(
        share=False if platform.system() == "Windows" else False,
    )
    return


if __name__ == "__main__":
    main()