HoneyTian commited on
Commit
cb97e31
1 Parent(s): f3f6402
Files changed (7) hide show
  1. .gitattributes +4 -0
  2. .gitignore +15 -0
  3. Dockerfile +30 -0
  4. lang_id_examples.json +12 -0
  5. main.py +113 -0
  6. project_settings.py +16 -0
  7. requirements.txt +2 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.xlsx filter=lfs diff=lfs merge=lfs -text
37
+ *.pkl filter=lfs diff=lfs merge=lfs -text
38
+ *.wav filter=lfs diff=lfs merge=lfs -text
39
+ *.jit filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ .git/
3
+ .idea/
4
+
5
+ #data/
6
+ #pretrained_models/
7
+ temp/
8
+
9
+ **/cache/
10
+ **/__pycache__/
11
+
12
+ **/*.env
13
+ **/*.mp3
14
+ **/*.png
15
+ **/*.xlsx
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.8
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --upgrade pip
11
+
12
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
13
+
14
+ # Set up a new user named "user" with user ID 1000
15
+ RUN useradd -m -u 1000 user
16
+
17
+ # Switch to the "user" user
18
+ USER user
19
+
20
+ # Set home to the user's home directory
21
+ ENV HOME=/home/user \
22
+ PATH=/home/user/.local/bin:$PATH
23
+
24
+ # Set the working directory to the user's home directory
25
+ WORKDIR $HOME/app
26
+
27
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
28
+ COPY --chown=user . $HOME/app
29
+
30
+ CMD ["python", "main.py"]
lang_id_examples.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ [
3
+ "你好。",
4
+ "zh",
5
+ "langid"
6
+ ],
7
+ [
8
+ "普段使いとバイクに乗るときのブーツ兼用として購入しました。",
9
+ "ja",
10
+ "langid"
11
+ ]
12
+ ]
main.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ https://huggingface.co/spaces/sayakpaul/demo-docker-gradio
5
+ """
6
+ import argparse
7
+ import json
8
+ import platform
9
+ from typing import Tuple
10
+
11
+ import gradio as gr
12
+ import langid
13
+ from langid.langid import LanguageIdentifier, model
14
+ import matplotlib.pyplot as plt
15
+ import numpy as np
16
+ from PIL import Image
17
+
18
+ from project_settings import project_path, temp_directory
19
+
20
+
21
+ def get_args():
22
+ parser = argparse.ArgumentParser()
23
+ parser.add_argument(
24
+ "--lang_id_examples_file",
25
+ default=(project_path / "lang_id_examples.json").as_posix(),
26
+ type=str
27
+ )
28
+ args = parser.parse_args()
29
+ return args
30
+
31
+
32
+ lang_id_identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)
33
+
34
+
35
+ def click_lang_id_button(text: str, ground_true: str, model_name: str):
36
+ global lang_id_identifier
37
+
38
+ if model_name == "langid":
39
+ label, prob = lang_id_identifier.classify(text)
40
+ else:
41
+ label = "model_name not available."
42
+ prob = 0.0
43
+ return label, round(prob, 4)
44
+
45
+
46
+ def main():
47
+ args = get_args()
48
+
49
+ brief_description = """
50
+ ## Language Identification
51
+
52
+ langid 识别 97 种语言。
53
+ https://github.com/saffsd/langid.py
54
+ """
55
+
56
+ # examples
57
+ with open(args.lang_id_examples_file, "r", encoding="utf-8") as f:
58
+ lang_id_examples = json.load(f)
59
+
60
+ # ui
61
+ with gr.Blocks() as blocks:
62
+ gr.Markdown(value=brief_description)
63
+
64
+ with gr.Row():
65
+ with gr.Column(scale=5):
66
+ with gr.Tabs():
67
+ with gr.TabItem("lang_id"):
68
+ gr.Markdown(value="")
69
+
70
+ with gr.Row():
71
+ with gr.Column(scale=1):
72
+ lang_id_text = gr.Textbox(lines=2, max_lines=50, label="text")
73
+ lang_id_ground_true = gr.Textbox(label="ground_true")
74
+
75
+ lang_id_model_name = gr.Dropdown(choices=["langid"], value="langid", label="model_name")
76
+ lang_id_button = gr.Button("run", variant="primary")
77
+
78
+ with gr.Column(scale=1):
79
+ lang_id_label = gr.Textbox(label="label")
80
+ lang_id_prob = gr.Number(label="prob")
81
+
82
+ gr.Examples(
83
+ examples=lang_id_examples,
84
+ inputs=[
85
+ lang_id_text,
86
+ lang_id_ground_true,
87
+ lang_id_model_name,
88
+ ],
89
+ outputs=[lang_id_label, lang_id_prob],
90
+ fn=click_lang_id_button
91
+ )
92
+
93
+ # click event
94
+ lang_id_button.click(
95
+ click_lang_id_button,
96
+ inputs=[
97
+ lang_id_text,
98
+ lang_id_ground_true,
99
+ lang_id_model_name,
100
+ ],
101
+ outputs=[lang_id_label, lang_id_prob],
102
+ )
103
+
104
+ blocks.queue().launch(
105
+ share=False if platform.system() == "Windows" else False,
106
+ server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
107
+ server_port=7860
108
+ )
109
+ return
110
+
111
+
112
+ if __name__ == "__main__":
113
+ main()
project_settings.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import os
4
+ from pathlib import Path
5
+
6
+
7
+ project_path = os.path.abspath(os.path.dirname(__file__))
8
+ project_path = Path(project_path)
9
+
10
+
11
+ temp_directory = project_path / "temp"
12
+ temp_directory.mkdir(exist_ok=True)
13
+
14
+
15
+ if __name__ == '__main__':
16
+ pass
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio==4.28.3
2
+ langid==1.1.6