HoneyTian commited on
Commit
1959c8d
·
1 Parent(s): ff566e2
main.py CHANGED
@@ -6,11 +6,7 @@ https://huggingface.co/spaces/sayakpaul/demo-docker-gradio
6
  import argparse
7
  import json
8
  import platform
9
- from collections import OrderedDict
10
- # from typing import OrderedDict
11
 
12
- from allennlp.models.archival import archive_model, load_archive
13
- from allennlp.predictors.text_classifier import TextClassifierPredictor
14
  import fasttext
15
  from fasttext.FastText import load_model, _FastText
16
  import gradio as gr
@@ -18,7 +14,6 @@ from gradio import inputs, outputs
18
  from langid.langid import LanguageIdentifier, model
19
 
20
  from project_settings import project_path, temp_directory
21
- from toolbox.os.command import Command
22
 
23
 
24
  def get_args():
@@ -44,34 +39,11 @@ def get_args():
44
 
45
  lang_id_identifier: LanguageIdentifier = None
46
  fasttext_model: _FastText = None
47
- qgyd_lang_id_predictor: TextClassifierPredictor = None
48
-
49
-
50
- trained_model_dir = project_path / "trained_models/huggingface"
51
- trained_model_dir.mkdir(parents=True, exist_ok=True)
52
-
53
-
54
- def init_qgyd_lang_id_predictor() -> TextClassifierPredictor:
55
- model_name = "qgyd2021/language_identification"
56
- model_path = trained_model_dir / model_name
57
- if not model_path.exists():
58
- model_path.parent.mkdir(exist_ok=True)
59
- Command.cd(model_path.parent.as_posix())
60
- Command.popen("git clone https://huggingface.co/{}".format(model_name))
61
-
62
- archive = load_archive(archive_file=model_path.as_posix())
63
-
64
- predictor = TextClassifierPredictor(
65
- model=archive.model,
66
- dataset_reader=archive.dataset_reader,
67
- )
68
- return predictor
69
 
70
 
71
  def click_lang_id_button(text: str, ground_true: str, model_name: str):
72
  global lang_id_identifier
73
  global fasttext_model
74
- global qgyd_lang_id_predictor
75
 
76
  text = str(text).strip()
77
 
@@ -81,16 +53,6 @@ def click_lang_id_button(text: str, ground_true: str, model_name: str):
81
  label, prob = fasttext_model.predict(text, k=1)
82
  label = label[0][9:]
83
  prob = prob[0]
84
- elif model_name == "qgyd_lang_id_1":
85
- json_dict = {
86
- "sentence": text
87
- }
88
- outputs = qgyd_lang_id_predictor.predict_json(
89
- json_dict
90
- )
91
- label = outputs["label"]
92
- probs = outputs["probs"]
93
- prob = max(probs)
94
  else:
95
  label = "model_name not available."
96
  prob = -1
@@ -114,10 +76,8 @@ def main():
114
 
115
  global lang_id_identifier
116
  global fasttext_model
117
- global qgyd_lang_id_predictor
118
  lang_id_identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)
119
  fasttext_model = fasttext.load_model(args.fasttext_model)
120
- qgyd_lang_id_predictor = init_qgyd_lang_id_predictor()
121
 
122
  blocks = gr.Interface(
123
  click_lang_id_button,
 
6
  import argparse
7
  import json
8
  import platform
 
 
9
 
 
 
10
  import fasttext
11
  from fasttext.FastText import load_model, _FastText
12
  import gradio as gr
 
14
  from langid.langid import LanguageIdentifier, model
15
 
16
  from project_settings import project_path, temp_directory
 
17
 
18
 
19
  def get_args():
 
39
 
40
  lang_id_identifier: LanguageIdentifier = None
41
  fasttext_model: _FastText = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
 
44
  def click_lang_id_button(text: str, ground_true: str, model_name: str):
45
  global lang_id_identifier
46
  global fasttext_model
 
47
 
48
  text = str(text).strip()
49
 
 
53
  label, prob = fasttext_model.predict(text, k=1)
54
  label = label[0][9:]
55
  prob = prob[0]
 
 
 
 
 
 
 
 
 
 
56
  else:
57
  label = "model_name not available."
58
  prob = -1
 
76
 
77
  global lang_id_identifier
78
  global fasttext_model
 
79
  lang_id_identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)
80
  fasttext_model = fasttext.load_model(args.fasttext_model)
 
81
 
82
  blocks = gr.Interface(
83
  click_lang_id_button,
requirements.txt CHANGED
@@ -1,5 +1,3 @@
1
  fasttext==0.9.2
2
  langid==1.1.6
3
  gradio==2.3.0
4
- allennlp==0.9.0
5
- overrides==1.9.0
 
1
  fasttext==0.9.2
2
  langid==1.1.6
3
  gradio==2.3.0
 
 
toolbox/__init__.py DELETED
@@ -1,5 +0,0 @@
1
- #!/usr/bin/python3
2
- # -*- coding: utf-8 -*-
3
-
4
- if __name__ == "__main__":
5
- pass
 
 
 
 
 
 
toolbox/os/__init__.py DELETED
@@ -1,6 +0,0 @@
1
- #!/usr/bin/python3
2
- # -*- coding: utf-8 -*-
3
-
4
-
5
- if __name__ == '__main__':
6
- pass
 
 
 
 
 
 
 
toolbox/os/command.py DELETED
@@ -1,53 +0,0 @@
1
- import os
2
-
3
-
4
- class Command(object):
5
- custom_command = [
6
- 'cd'
7
- ]
8
-
9
- @staticmethod
10
- def _get_cmd(command):
11
- command = str(command).strip()
12
- if command == '':
13
- return None
14
- cmd_and_args = command.split(sep=' ')
15
- cmd = cmd_and_args[0]
16
- args = ' '.join(cmd_and_args[1:])
17
- return cmd, args
18
-
19
- @classmethod
20
- def popen(cls, command):
21
- cmd, args = cls._get_cmd(command)
22
- if cmd in cls.custom_command:
23
- method = getattr(cls, cmd)
24
- return method(args)
25
- else:
26
- resp = os.popen(command)
27
- result = resp.read()
28
- resp.close()
29
- return result
30
-
31
- @classmethod
32
- def cd(cls, args):
33
- if args.startswith('/'):
34
- os.chdir(args)
35
- else:
36
- pwd = os.getcwd()
37
- path = os.path.join(pwd, args)
38
- os.chdir(path)
39
-
40
- @classmethod
41
- def system(cls, command):
42
- return os.system(command)
43
-
44
- def __init__(self):
45
- pass
46
-
47
-
48
- def ps_ef_grep(keyword: str):
49
- cmd = 'ps -ef | grep {}'.format(keyword)
50
- rows = Command.popen(cmd)
51
- rows = str(rows).split('\n')
52
- rows = [row for row in rows if row.__contains__(keyword) and not row.__contains__('grep')]
53
- return rows
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
toolbox/os/environment.py DELETED
@@ -1,114 +0,0 @@
1
- #!/usr/bin/python3
2
- # -*- coding: utf-8 -*-
3
- import json
4
- import os
5
-
6
- from dotenv import load_dotenv
7
- from dotenv.main import DotEnv
8
-
9
- from smart.json.misc import traverse
10
-
11
-
12
- class EnvironmentManager(object):
13
- def __init__(self, path, env, override=False):
14
- filename = os.path.join(path, '{}.env'.format(env))
15
- self.filename = filename
16
-
17
- load_dotenv(
18
- dotenv_path=filename,
19
- override=override
20
- )
21
-
22
- self._environ = dict()
23
-
24
- def open_dotenv(self, filename: str = None):
25
- filename = filename or self.filename
26
- dotenv = DotEnv(
27
- dotenv_path=filename,
28
- stream=None,
29
- verbose=False,
30
- interpolate=False,
31
- override=False,
32
- encoding="utf-8",
33
- )
34
- result = dotenv.dict()
35
- return result
36
-
37
- def get(self, key, default=None, dtype=str):
38
- result = os.environ.get(key)
39
- if result is None:
40
- if default is None:
41
- result = None
42
- else:
43
- result = default
44
- else:
45
- result = dtype(result)
46
- self._environ[key] = result
47
- return result
48
-
49
-
50
- _DEFAULT_DTYPE_MAP = {
51
- 'int': int,
52
- 'float': float,
53
- 'str': str,
54
- 'json.loads': json.loads
55
- }
56
-
57
-
58
- class JsonConfig(object):
59
- """
60
- 将 json 中, 形如 `$float:threshold` 的值, 处理为:
61
- 从环境变量中查到 threshold, 再将其转换为 float 类型.
62
- """
63
- def __init__(self, dtype_map: dict = None, environment: EnvironmentManager = None):
64
- self.dtype_map = dtype_map or _DEFAULT_DTYPE_MAP
65
- self.environment = environment or os.environ
66
-
67
- def sanitize_by_filename(self, filename: str):
68
- with open(filename, 'r', encoding='utf-8') as f:
69
- js = json.load(f)
70
-
71
- return self.sanitize_by_json(js)
72
-
73
- def sanitize_by_json(self, js):
74
- js = traverse(
75
- js,
76
- callback=self.sanitize,
77
- environment=self.environment
78
- )
79
- return js
80
-
81
- def sanitize(self, string, environment):
82
- """支持 $ 符开始的, 环境变量配置"""
83
- if isinstance(string, str) and string.startswith('$'):
84
- dtype, key = string[1:].split(':')
85
- dtype = self.dtype_map[dtype]
86
-
87
- value = environment.get(key)
88
- if value is None:
89
- raise AssertionError('environment not exist. key: {}'.format(key))
90
-
91
- value = dtype(value)
92
- result = value
93
- else:
94
- result = string
95
- return result
96
-
97
-
98
- def demo1():
99
- import json
100
-
101
- from project_settings import project_path
102
-
103
- environment = EnvironmentManager(
104
- path=os.path.join(project_path, 'server/callbot_server/dotenv'),
105
- env='dev',
106
- )
107
- init_scenes = environment.get(key='init_scenes', dtype=json.loads)
108
- print(init_scenes)
109
- print(environment._environ)
110
- return
111
-
112
-
113
- if __name__ == '__main__':
114
- demo1()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
toolbox/os/other.py DELETED
@@ -1,9 +0,0 @@
1
- import os
2
- import inspect
3
-
4
-
5
- def pwd():
6
- """你在哪个文件调用此函数, 它就会返回那个文件所在的 dir 目标"""
7
- frame = inspect.stack()[1]
8
- module = inspect.getmodule(frame[0])
9
- return os.path.dirname(os.path.abspath(module.__file__))