Create get_voices.py
Browse files- get_voices.py +115 -0
get_voices.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import subprocess
|
2 |
+
import json
|
3 |
+
import re
|
4 |
+
from collections import defaultdict
|
5 |
+
|
6 |
+
# Dicion谩rio para mapear c贸digos de idioma para nomes completos
|
7 |
+
language_mapping = {
|
8 |
+
"af": "Afrikaans",
|
9 |
+
"am": "Amharic",
|
10 |
+
"ar": "Arabic",
|
11 |
+
"az": "Azerbaijani",
|
12 |
+
"bg": "Bulgarian",
|
13 |
+
"bn": "Bengali",
|
14 |
+
"bs": "Bosnian",
|
15 |
+
"ca": "Catalan",
|
16 |
+
"cs": "Czech",
|
17 |
+
"cy": "Welsh",
|
18 |
+
"da": "Danish",
|
19 |
+
"de": "Deutsch",
|
20 |
+
"al": "Albanian",
|
21 |
+
"el": "Greek",
|
22 |
+
"in": "Indonesian",
|
23 |
+
"en": "English",
|
24 |
+
"es": "Spanish",
|
25 |
+
"et": "Estonian",
|
26 |
+
"fa": "Persian",
|
27 |
+
"fi": "Finnish",
|
28 |
+
"fil": "Filipino",
|
29 |
+
"fr": "French",
|
30 |
+
"ga": "Irish",
|
31 |
+
"gl": "Galician",
|
32 |
+
"gu": "Gujarati",
|
33 |
+
"he": "Hebrew",
|
34 |
+
"hi": "Hindi",
|
35 |
+
"hr": "Croatian",
|
36 |
+
"hu": "Hungarian",
|
37 |
+
"id": "Indonesian",
|
38 |
+
"is": "Icelandic",
|
39 |
+
"it": "Italian",
|
40 |
+
"ja": "Japanese",
|
41 |
+
"jv": "Javanese",
|
42 |
+
"ka": "Georgian",
|
43 |
+
"kk": "Kazakh",
|
44 |
+
"km": "Khmer",
|
45 |
+
"kn": "Kannada",
|
46 |
+
"ko": "Korean",
|
47 |
+
"lo": "Lao",
|
48 |
+
"lt": "Lithuanian",
|
49 |
+
"lv": "Latvian",
|
50 |
+
"mk": "Macedonian",
|
51 |
+
"ml": "Malayalam",
|
52 |
+
"mn": "Mongolian",
|
53 |
+
"mr": "Marathi",
|
54 |
+
"ms": "Malay",
|
55 |
+
"mt": "Maltese",
|
56 |
+
"my": "Burmese",
|
57 |
+
"nb": "Norwegian Bokm氓l",
|
58 |
+
"ne": "Nepali",
|
59 |
+
"nl": "Dutch",
|
60 |
+
"pl": "Polish",
|
61 |
+
"ps": "Pashto",
|
62 |
+
"pt": "Portuguese",
|
63 |
+
"ro": "Romanian",
|
64 |
+
"ru": "Russian",
|
65 |
+
"si": "Sinhala",
|
66 |
+
"sk": "Slovak",
|
67 |
+
"sl": "Slovenian",
|
68 |
+
"so": "Somali",
|
69 |
+
"sq": "Albanian",
|
70 |
+
"sr": "Serbian",
|
71 |
+
"su": "Sundanese",
|
72 |
+
"sv": "Swedish",
|
73 |
+
"sw": "Swahili",
|
74 |
+
"ta": "Tamil",
|
75 |
+
"te": "Telugu",
|
76 |
+
"th": "Thai",
|
77 |
+
"tr": "Turkish",
|
78 |
+
"uk": "Ukrainian",
|
79 |
+
"ur": "Urdu",
|
80 |
+
"uz": "Uzbek",
|
81 |
+
"vi": "Vietnamese",
|
82 |
+
"zh": "Chinese",
|
83 |
+
"zu": "Zulu"
|
84 |
+
}
|
85 |
+
|
86 |
+
def get_voices():
|
87 |
+
# Executa o comando edge-tts --list-voices
|
88 |
+
result = subprocess.run(['edge-tts', '--list-voices'], capture_output=True, text=True)
|
89 |
+
|
90 |
+
if result.returncode != 0:
|
91 |
+
print("Erro ao executar o comando edge-tts.")
|
92 |
+
return
|
93 |
+
|
94 |
+
# Processa a sa铆da
|
95 |
+
voices = result.stdout.strip().split("\n\n")
|
96 |
+
voices_data = defaultdict(list)
|
97 |
+
|
98 |
+
for voice in voices:
|
99 |
+
# Usa regex para capturar o nome e o g锚nero
|
100 |
+
match = re.findall(r'Name:\s*(.*?)\s*Gender:\s*(\w+)', voice)
|
101 |
+
if match:
|
102 |
+
name, gender = match[0]
|
103 |
+
language_code = name.split('-')[0] # Pega o c贸digo do idioma
|
104 |
+
language_name = language_mapping.get(language_code, language_code) # Obt茅m o nome completo do idioma
|
105 |
+
voices_data[language_name].append({
|
106 |
+
'name': name,
|
107 |
+
'gender': gender
|
108 |
+
})
|
109 |
+
|
110 |
+
# Salva em um arquivo JSON
|
111 |
+
with open('voices.json', 'w', encoding='utf-8') as json_file:
|
112 |
+
json.dump(voices_data, json_file, ensure_ascii=False, indent=4)
|
113 |
+
|
114 |
+
if __name__ == "__main__":
|
115 |
+
get_voices()
|