Spaces:
Sleeping
Sleeping
franco-bach
commited on
Commit
•
5f102c2
1
Parent(s):
351494a
feat: language detection
Browse files
app.py
CHANGED
@@ -92,7 +92,7 @@ def check_errors(text):
|
|
92 |
gr.Warning(f'La longitud del texto ({len(text)} caracteres) sobrepasa el máximo permitido.')
|
93 |
error = True
|
94 |
if not CheckLanguageIsSpanish().detect_spanish(text):
|
95 |
-
gr.Warning('El texto
|
96 |
error = True
|
97 |
return error
|
98 |
|
|
|
92 |
gr.Warning(f'La longitud del texto ({len(text)} caracteres) sobrepasa el máximo permitido.')
|
93 |
error = True
|
94 |
if not CheckLanguageIsSpanish().detect_spanish(text):
|
95 |
+
gr.Warning('El texto está en Ingles o posee oraciones en dicho idioma.')
|
96 |
error = True
|
97 |
return error
|
98 |
|
utils.py
CHANGED
@@ -1,37 +1,25 @@
|
|
1 |
-
|
2 |
-
# from spacy.language import Language
|
3 |
-
# from spacy_langdetect import LanguageDetector
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
# Language.factory("language_detector", func=self.get_lang_detector)
|
10 |
-
# self.nlp_es.add_pipe('language_detector', last=True)
|
11 |
-
|
12 |
-
# def get_lang_detector(self, nlp, name):
|
13 |
-
# return LanguageDetector()
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
# lang_prediction = doc._.language
|
18 |
-
# confidence = lang_prediction['score'] * 100
|
19 |
-
# if confidence > 95:
|
20 |
-
# print(f"espanio perri {confidence}")
|
21 |
-
# return True
|
22 |
-
# else:
|
23 |
-
# print(f"no espanio perri {confidence}")
|
24 |
-
# return False
|
25 |
-
|
26 |
-
from langdetect import detect_langs
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
|
34 |
-
if
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from lingua import Language, LanguageDetectorBuilder
|
|
|
|
|
2 |
|
3 |
+
class CheckLanguageIsSpanish:
|
4 |
+
def __init__(self):
|
5 |
+
self.languages = [Language.SPANISH, Language.ENGLISH]
|
6 |
+
self.detector = LanguageDetectorBuilder.from_languages(*self.languages).build()
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
def detect_english(self, text):
|
9 |
+
spanish = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
# Compares confidence values between languages
|
12 |
+
confidence_values = self.detector.compute_language_confidence_values(text)
|
13 |
+
confidence_dict = {confidence.language.name: confidence.value for confidence in confidence_values}
|
14 |
+
if confidence_dict["ENGLISH"] > confidence_dict["SPANISH"]:
|
15 |
+
spanish = False
|
16 |
|
17 |
+
# Checks if there is at least one sentence with 5 words or more in English
|
18 |
+
languages = self.detector.detect_multiple_languages_of(text)
|
19 |
+
for result in languages:
|
20 |
+
if result.language.name == "ENGLISH":
|
21 |
+
if result.word_count >= 5:
|
22 |
+
spanish = False
|
23 |
+
break
|
24 |
+
|
25 |
+
return spanish
|