File size: 3,457 Bytes
cb97e31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
https://huggingface.co/spaces/sayakpaul/demo-docker-gradio
"""
import argparse
import json
import platform
from typing import Tuple

import gradio as gr
import langid
from langid.langid import LanguageIdentifier, model
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

from project_settings import project_path, temp_directory


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--lang_id_examples_file",
        default=(project_path / "lang_id_examples.json").as_posix(),
        type=str
    )
    args = parser.parse_args()
    return args


lang_id_identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)


def click_lang_id_button(text: str, ground_true: str, model_name: str):
    global lang_id_identifier

    if model_name == "langid":
        label, prob = lang_id_identifier.classify(text)
    else:
        label = "model_name not available."
        prob = 0.0
    return label, round(prob, 4)


def main():
    args = get_args()

    brief_description = """
    ## Language Identification

    langid 识别 97 种语言。
    https://github.com/saffsd/langid.py
    """

    # examples
    with open(args.lang_id_examples_file, "r", encoding="utf-8") as f:
        lang_id_examples = json.load(f)

    # ui
    with gr.Blocks() as blocks:
        gr.Markdown(value=brief_description)

        with gr.Row():
            with gr.Column(scale=5):
                with gr.Tabs():
                    with gr.TabItem("lang_id"):
                        gr.Markdown(value="")

                        with gr.Row():
                            with gr.Column(scale=1):
                                lang_id_text = gr.Textbox(lines=2, max_lines=50, label="text")
                                lang_id_ground_true = gr.Textbox(label="ground_true")

                                lang_id_model_name = gr.Dropdown(choices=["langid"], value="langid", label="model_name")
                                lang_id_button = gr.Button("run", variant="primary")

                            with gr.Column(scale=1):
                                lang_id_label = gr.Textbox(label="label")
                                lang_id_prob = gr.Number(label="prob")

                        gr.Examples(
                            examples=lang_id_examples,
                            inputs=[
                                lang_id_text,
                                lang_id_ground_true,
                                lang_id_model_name,
                            ],
                            outputs=[lang_id_label, lang_id_prob],
                            fn=click_lang_id_button
                        )

                        # click event
                        lang_id_button.click(
                            click_lang_id_button,
                            inputs=[
                                lang_id_text,
                                lang_id_ground_true,
                                lang_id_model_name,
                            ],
                            outputs=[lang_id_label, lang_id_prob],
                        )

    blocks.queue().launch(
        share=False if platform.system() == "Windows" else False,
        server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
        server_port=7860
    )
    return


if __name__ == "__main__":
    main()