Spaces:
Runtime error
Runtime error
use run inference from Space
Browse files- app.py +16 -12
- lid.176.ftz +3 -0
- lid218e.bin +3 -0
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import requests
|
2 |
import os
|
3 |
|
|
|
4 |
import gradio as gr
|
5 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
6 |
import torch
|
@@ -11,11 +12,8 @@ When comments are created in the community tab, detect the language of the conte
|
|
11 |
Then, if the detected language is different from the user's language, display an option to translate it.
|
12 |
"""
|
13 |
|
14 |
-
|
15 |
-
TRANSLATION_API_URL = "https://api-inference.huggingface.co/models/t5-base"
|
16 |
-
LANG_ID_API_URL = "https://noe30ht5sav83xm1.us-east-1.aws.endpoints.huggingface.cloud"
|
17 |
ACCESS_TOKEN = os.environ.get("ACCESS_TOKEN")
|
18 |
-
ACCESS_TOKEN = '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'
|
19 |
headers = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
|
20 |
|
21 |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
|
@@ -32,18 +30,22 @@ language_code_map = {
|
|
32 |
"Japanese": "jpn_Jpan"
|
33 |
}
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
41 |
|
42 |
|
43 |
def translate(text, src_lang, tgt_lang):
|
44 |
src_lang_code = language_code_map[src_lang]
|
45 |
tgt_lang_code = language_code_map[tgt_lang]
|
46 |
-
|
47 |
translation_pipeline = pipeline(
|
48 |
"translation", model=model, tokenizer=tokenizer, src_lang=src_lang_code, tgt_lang=tgt_lang_code, device=device)
|
49 |
result = translation_pipeline(text)
|
@@ -55,8 +57,10 @@ def query(text, src_lang, tgt_lang):
|
|
55 |
lang_id_response = requests.post(LANG_ID_API_URL, headers=headers, json={
|
56 |
"inputs": text, "wait_for_model": True, "use_cache": True})
|
57 |
lang_id = lang_id_response.json()[0]
|
|
|
|
|
58 |
|
59 |
-
return [
|
60 |
|
61 |
|
62 |
examples = [
|
|
|
1 |
import requests
|
2 |
import os
|
3 |
|
4 |
+
import fasttext
|
5 |
import gradio as gr
|
6 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
7 |
import torch
|
|
|
12 |
Then, if the detected language is different from the user's language, display an option to translate it.
|
13 |
"""
|
14 |
|
15 |
+
LANG_ID_API_URL = "https://q5esh83u7boq5qwd.us-east-1.aws.endpoints.huggingface.cloud"
|
|
|
|
|
16 |
ACCESS_TOKEN = os.environ.get("ACCESS_TOKEN")
|
|
|
17 |
headers = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
|
18 |
|
19 |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
|
|
|
30 |
"Japanese": "jpn_Jpan"
|
31 |
}
|
32 |
|
33 |
+
def identify_language(text):
|
34 |
+
model_file = "lid218e.bin"
|
35 |
+
model_full_path = os.path.join(os.path.dirname(__file__), model_file)
|
36 |
+
model = fasttext.load_model(model_full_path)
|
37 |
+
predictions = model.predict(text, k=1) # e.g., (('__label__eng_Latn',), array([0.81148803]))
|
38 |
+
|
39 |
+
PREFIX_LENGTH = 7 # To strip away '__label__' from language code
|
40 |
+
language_code = predictions[0][0][PREFIX_LENGTH:]
|
41 |
+
return language_code
|
42 |
+
|
43 |
|
44 |
|
45 |
def translate(text, src_lang, tgt_lang):
|
46 |
src_lang_code = language_code_map[src_lang]
|
47 |
tgt_lang_code = language_code_map[tgt_lang]
|
48 |
+
|
49 |
translation_pipeline = pipeline(
|
50 |
"translation", model=model, tokenizer=tokenizer, src_lang=src_lang_code, tgt_lang=tgt_lang_code, device=device)
|
51 |
result = translation_pipeline(text)
|
|
|
57 |
lang_id_response = requests.post(LANG_ID_API_URL, headers=headers, json={
|
58 |
"inputs": text, "wait_for_model": True, "use_cache": True})
|
59 |
lang_id = lang_id_response.json()[0]
|
60 |
+
|
61 |
+
language_code = identify_language(text)
|
62 |
|
63 |
+
return [language_code, translation]
|
64 |
|
65 |
|
66 |
examples = [
|
lid.176.ftz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f3472cfe8738a7b6099e8e999c3cbfae0dcd15696aac7d7738a8039db603e83
|
3 |
+
size 938013
|
lid218e.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ded5749a2ad79ae9ab7c9190c7c8b97ff20d54ad8b9527ffa50107238fc7f6a
|
3 |
+
size 1176355829
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
|
|
1 |
torch
|
2 |
transformers
|
|
|
1 |
+
fasttext
|
2 |
torch
|
3 |
transformers
|