fantiti commited on
Commit
b3ce554
verified
1 Parent(s): 61f0605

Create get_voices.py

Browse files
Files changed (1) hide show
  1. get_voices.py +115 -0
get_voices.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import json
3
+ import re
4
+ from collections import defaultdict
5
+
6
+ # Dicion谩rio para mapear c贸digos de idioma para nomes completos
7
+ language_mapping = {
8
+ "af": "Afrikaans",
9
+ "am": "Amharic",
10
+ "ar": "Arabic",
11
+ "az": "Azerbaijani",
12
+ "bg": "Bulgarian",
13
+ "bn": "Bengali",
14
+ "bs": "Bosnian",
15
+ "ca": "Catalan",
16
+ "cs": "Czech",
17
+ "cy": "Welsh",
18
+ "da": "Danish",
19
+ "de": "Deutsch",
20
+ "al": "Albanian",
21
+ "el": "Greek",
22
+ "in": "Indonesian",
23
+ "en": "English",
24
+ "es": "Spanish",
25
+ "et": "Estonian",
26
+ "fa": "Persian",
27
+ "fi": "Finnish",
28
+ "fil": "Filipino",
29
+ "fr": "French",
30
+ "ga": "Irish",
31
+ "gl": "Galician",
32
+ "gu": "Gujarati",
33
+ "he": "Hebrew",
34
+ "hi": "Hindi",
35
+ "hr": "Croatian",
36
+ "hu": "Hungarian",
37
+ "id": "Indonesian",
38
+ "is": "Icelandic",
39
+ "it": "Italian",
40
+ "ja": "Japanese",
41
+ "jv": "Javanese",
42
+ "ka": "Georgian",
43
+ "kk": "Kazakh",
44
+ "km": "Khmer",
45
+ "kn": "Kannada",
46
+ "ko": "Korean",
47
+ "lo": "Lao",
48
+ "lt": "Lithuanian",
49
+ "lv": "Latvian",
50
+ "mk": "Macedonian",
51
+ "ml": "Malayalam",
52
+ "mn": "Mongolian",
53
+ "mr": "Marathi",
54
+ "ms": "Malay",
55
+ "mt": "Maltese",
56
+ "my": "Burmese",
57
+ "nb": "Norwegian Bokm氓l",
58
+ "ne": "Nepali",
59
+ "nl": "Dutch",
60
+ "pl": "Polish",
61
+ "ps": "Pashto",
62
+ "pt": "Portuguese",
63
+ "ro": "Romanian",
64
+ "ru": "Russian",
65
+ "si": "Sinhala",
66
+ "sk": "Slovak",
67
+ "sl": "Slovenian",
68
+ "so": "Somali",
69
+ "sq": "Albanian",
70
+ "sr": "Serbian",
71
+ "su": "Sundanese",
72
+ "sv": "Swedish",
73
+ "sw": "Swahili",
74
+ "ta": "Tamil",
75
+ "te": "Telugu",
76
+ "th": "Thai",
77
+ "tr": "Turkish",
78
+ "uk": "Ukrainian",
79
+ "ur": "Urdu",
80
+ "uz": "Uzbek",
81
+ "vi": "Vietnamese",
82
+ "zh": "Chinese",
83
+ "zu": "Zulu"
84
+ }
85
+
86
+ def get_voices():
87
+ # Executa o comando edge-tts --list-voices
88
+ result = subprocess.run(['edge-tts', '--list-voices'], capture_output=True, text=True)
89
+
90
+ if result.returncode != 0:
91
+ print("Erro ao executar o comando edge-tts.")
92
+ return
93
+
94
+ # Processa a sa铆da
95
+ voices = result.stdout.strip().split("\n\n")
96
+ voices_data = defaultdict(list)
97
+
98
+ for voice in voices:
99
+ # Usa regex para capturar o nome e o g锚nero
100
+ match = re.findall(r'Name:\s*(.*?)\s*Gender:\s*(\w+)', voice)
101
+ if match:
102
+ name, gender = match[0]
103
+ language_code = name.split('-')[0] # Pega o c贸digo do idioma
104
+ language_name = language_mapping.get(language_code, language_code) # Obt茅m o nome completo do idioma
105
+ voices_data[language_name].append({
106
+ 'name': name,
107
+ 'gender': gender
108
+ })
109
+
110
+ # Salva em um arquivo JSON
111
+ with open('voices.json', 'w', encoding='utf-8') as json_file:
112
+ json.dump(voices_data, json_file, ensure_ascii=False, indent=4)
113
+
114
+ if __name__ == "__main__":
115
+ get_voices()