normalizing entries
Browse files- prepare_model.py +86 -0
- vctk_speakers.json → speakers-log.json +0 -0
- speakers.md +18 -24
prepare_model.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import subprocess
|
4 |
+
|
5 |
+
def generate_html_output(data, repository_path):
|
6 |
+
with open('speakers.md', 'a') as file:
|
7 |
+
for speaker_id, speaker_info in data.items():
|
8 |
+
out_path = f"{repository_path}/samples/{speaker_id}.wav"
|
9 |
+
age = speaker_info['age']
|
10 |
+
gender = speaker_info['gender']
|
11 |
+
if gender == 'F':
|
12 |
+
gender = 'female'
|
13 |
+
elif gender == "M":
|
14 |
+
gender = 'male'
|
15 |
+
accents = speaker_info['accents']
|
16 |
+
region = speaker_info['region']
|
17 |
+
|
18 |
+
file.write(f"<p>VCTK_{speaker_id}: {age} year old {gender}, {accents} accent ({region})<audio controls><source src=\"{out_path}\" type=\"audio/wav\"></audio> </p>\n")
|
19 |
+
|
20 |
+
|
21 |
+
# Load the data from the provided dictionary
|
22 |
+
data = {
|
23 |
+
"p225": {"age": 23, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""},
|
24 |
+
"p228": {"age": 22, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""},
|
25 |
+
"p229": {"age": 23, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""},
|
26 |
+
"p230": {"age": 22, "gender": "F", "accents": "English", "region": "Stockton-on-tees", "comments": ""},
|
27 |
+
"p231": {"age": 23, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""},
|
28 |
+
"p233": {"age": 23, "gender": "F", "accents": "English", "region": "Staffordshire", "comments": ""},
|
29 |
+
"p236": {"age": 23, "gender": "F", "accents": "English", "region": "Manchester", "comments": ""},
|
30 |
+
"p239": {"age": 22, "gender": "F", "accents": "English", "region": "Southwest England", "comments": ""},
|
31 |
+
"p240": {"age": 21, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""},
|
32 |
+
"p244": {"age": 22, "gender": "F", "accents": "English", "region": "Manchester", "comments": ""},
|
33 |
+
"p250": {"age": 22, "gender": "F", "accents": "English", "region": "Southeast England", "comments": ""},
|
34 |
+
"p257": {"age": 24, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""},
|
35 |
+
"p267": {"age": 23, "gender": "F", "accents": "English", "region": "Yorkshire", "comments": ""},
|
36 |
+
"p268": {"age": 23, "gender": "F", "accents": "English", "region": "Southern England", "comments": ""},
|
37 |
+
"p269": {"age": 20, "gender": "F", "accents": "English", "region": "Newcastle", "comments": ""},
|
38 |
+
"p276": {"age": 24, "gender": "F", "accents": "English", "region": "Oxford", "comments": ""},
|
39 |
+
"p277": {"age": 23, "gender": "F", "accents": "English", "region": "Northeast England", "comments": ""},
|
40 |
+
"p282": {"age": 23, "gender": "F", "accents": "English", "region": "Newcastle", "comments": ""}
|
41 |
+
}
|
42 |
+
|
43 |
+
# Convert the data to JSON format
|
44 |
+
json_data = json.dumps(data, indent=2)
|
45 |
+
|
46 |
+
# Save the JSON data to a file
|
47 |
+
with open('speakers-log.json', 'w') as file:
|
48 |
+
file.write(json_data)
|
49 |
+
|
50 |
+
# Run the TTS command to get the speaker indices
|
51 |
+
command = "tts --model_path checkpoint_85000.pth --config_path config.json --list_speaker_idxs | grep -vE '^(\s*\||\s*>|\s*$)'"
|
52 |
+
output = subprocess.check_output(command, shell=True, text=True)
|
53 |
+
|
54 |
+
# Parse the JSON output into a Python dictionary
|
55 |
+
speaker_indices = eval(output)
|
56 |
+
|
57 |
+
# Load the speaker IDs from speakers.json
|
58 |
+
with open('speakers-log.json', 'r') as file:
|
59 |
+
speaker_ids = json.load(file)
|
60 |
+
|
61 |
+
# Create the speakers.md file
|
62 |
+
with open('speakers.md', 'w') as file:
|
63 |
+
for speaker_idx in speaker_indices:
|
64 |
+
# Remove the 'VCTK_' prefix
|
65 |
+
speaker_id = speaker_idx.replace('VCTK_', '')
|
66 |
+
|
67 |
+
# Lookup the speaker ID in the loaded speaker IDs
|
68 |
+
if speaker_id in speaker_ids:
|
69 |
+
speaker_id_json = speaker_ids[speaker_id]
|
70 |
+
else:
|
71 |
+
continue
|
72 |
+
|
73 |
+
# # Generate the TTS command to create the audio file
|
74 |
+
# text = f"Hello, I am from {speaker_id_json['region']}. I hope that you will select my voice for your project. Thank you."
|
75 |
+
# # make samples directory if it doesn't exist
|
76 |
+
# if not os.path.exists("samples"):
|
77 |
+
# os.makedirs("samples")
|
78 |
+
|
79 |
+
out_path = f"samples/{speaker_id}.wav"
|
80 |
+
tts_command = f"tts --text \"{text}\" --model_path checkpoint_85000.pth --language_idx en --config_path config.json --speaker_idx \"VCTK_{speaker_id}\" --out_path {out_path}"
|
81 |
+
|
82 |
+
Execute the TTS command
|
83 |
+
os.system(tts_command)
|
84 |
+
|
85 |
+
# Write the speaker information to the speakers.md file
|
86 |
+
generate_html_output({speaker_id: speaker_id_json}, "https://huggingface.co/voices/VCTK_European_English_Females/resolve/main")
|
vctk_speakers.json → speakers-log.json
RENAMED
File without changes
|
speakers.md
CHANGED
@@ -1,24 +1,18 @@
|
|
1 |
-
<audio controls>
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
[p267](samples/p267.wav) Age 23 Gender F Accents English Region Yorkshire Comments
|
20 |
-
[p268](samples/p268.wav) Age 23 Gender F Accents English Region Southern England Comments
|
21 |
-
[p269](samples/p269.wav) Age 20 Gender F Accents English Region Newcastle Comments
|
22 |
-
[p276](samples/p276.wav) Age 24 Gender F Accents English Region Oxford Comments
|
23 |
-
[p277](samples/p277.wav) Age 23 Gender F Accents English Region Northeast England Comments
|
24 |
-
[p282](samples/p282.wav) Age 23 Gender F Accents English Region Newcastle Comments
|
|
|
1 |
+
<p>VCTK_p225: 23 year old female, English accent (Southern England)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p225.wav" type="audio/wav"></audio> </p>
|
2 |
+
<p>VCTK_p228: 22 year old female, English accent (Southern England)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p228.wav" type="audio/wav"></audio> </p>
|
3 |
+
<p>VCTK_p229: 23 year old female, English accent (Southern England)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p229.wav" type="audio/wav"></audio> </p>
|
4 |
+
<p>VCTK_p230: 22 year old female, English accent (Stockton-on-tees)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p230.wav" type="audio/wav"></audio> </p>
|
5 |
+
<p>VCTK_p231: 23 year old female, English accent (Southern England)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p231.wav" type="audio/wav"></audio> </p>
|
6 |
+
<p>VCTK_p233: 23 year old female, English accent (Staffordshire)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p233.wav" type="audio/wav"></audio> </p>
|
7 |
+
<p>VCTK_p236: 23 year old female, English accent (Manchester)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p236.wav" type="audio/wav"></audio> </p>
|
8 |
+
<p>VCTK_p239: 22 year old female, English accent (Southwest England)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p239.wav" type="audio/wav"></audio> </p>
|
9 |
+
<p>VCTK_p240: 21 year old female, English accent (Southern England)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p240.wav" type="audio/wav"></audio> </p>
|
10 |
+
<p>VCTK_p244: 22 year old female, English accent (Manchester)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p244.wav" type="audio/wav"></audio> </p>
|
11 |
+
<p>VCTK_p250: 22 year old female, English accent (Southeast England)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p250.wav" type="audio/wav"></audio> </p>
|
12 |
+
<p>VCTK_p257: 24 year old female, English accent (Southern England)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p257.wav" type="audio/wav"></audio> </p>
|
13 |
+
<p>VCTK_p267: 23 year old female, English accent (Yorkshire)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p267.wav" type="audio/wav"></audio> </p>
|
14 |
+
<p>VCTK_p268: 23 year old female, English accent (Southern England)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p268.wav" type="audio/wav"></audio> </p>
|
15 |
+
<p>VCTK_p269: 20 year old female, English accent (Newcastle)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p269.wav" type="audio/wav"></audio> </p>
|
16 |
+
<p>VCTK_p276: 24 year old female, English accent (Oxford)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p276.wav" type="audio/wav"></audio> </p>
|
17 |
+
<p>VCTK_p277: 23 year old female, English accent (Northeast England)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p277.wav" type="audio/wav"></audio> </p>
|
18 |
+
<p>VCTK_p282: 23 year old female, English accent (Newcastle)<audio controls><source src="https://huggingface.co/voices/VCTK_European_English_Females/resolve/main/samples/p282.wav" type="audio/wav"></audio> </p>
|
|
|
|
|
|
|
|
|
|
|
|