fffiloni commited on
Commit
0cdadc9
1 Parent(s): 0bd14d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -2
app.py CHANGED
@@ -71,8 +71,48 @@ def infer(prompt, input_wav_file):
71
  for item in contents:
72
  print(item)
73
 
74
- return "output.wav", f"bark_voices/{file_name}/{contents[1]}"
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  css = """
77
  #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
78
  """
@@ -95,6 +135,7 @@ with gr.Blocks(css=css) as demo:
95
  )
96
 
97
  submit_btn = gr.Button("Submit")
 
98
 
99
  cloned_out = gr.Audio(
100
  label="Text to speech output"
@@ -112,7 +153,29 @@ with gr.Blocks(css=css) as demo:
112
  ],
113
  outputs = [
114
  cloned_out,
115
- npz_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  ]
117
  )
118
 
 
71
  for item in contents:
72
  print(item)
73
 
74
+ return "output.wav", f"bark_voices/{file_name}/{contents[1]}", gr.update(visible=False), gr.update(visible=True)
75
 
76
+ def infer_with_npz(prompt, input_wav_file):
77
+ # Path to your WAV file
78
+ source_path = input_wav_file
79
+ # Extract the file name without the extension
80
+ file_name = os.path.splitext(os.path.basename(source_path))[0]
81
+ # List all the files and subdirectories in the given directory
82
+ contents = os.listdir(f"bark_voices/{file_name}")
83
+ # Print the contents
84
+ for item in contents:
85
+ print(item)
86
+ os.remove(contents[0])
87
+
88
+ # cloning a speaker.
89
+ text = prompt
90
+ # It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.npz`
91
+ output_dict = model.synthesize(
92
+ text,
93
+ config,
94
+ speaker_id=f"{file_name}",
95
+ voice_dirs="bark_voices/"
96
+ )
97
+
98
+ print(output_dict)
99
+
100
+ sample_rate = 24000 # Replace with the actual sample rate
101
+
102
+ wavfile.write(
103
+ 'output.wav',
104
+ sample_rate,
105
+ output_dict['wav']
106
+ )
107
+
108
+ # Print again the contents
109
+ for item in contents:
110
+ print(item)
111
+
112
+ return 'output.wav'
113
+
114
+ def uploaded_audio():
115
+ return gr.update(visible=True), gr.update(visible=False)
116
  css = """
117
  #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
118
  """
 
135
  )
136
 
137
  submit_btn = gr.Button("Submit")
138
+ submit_with_npz_btn = gr.Button("Submit 2", visible=False)
139
 
140
  cloned_out = gr.Audio(
141
  label="Text to speech output"
 
153
  ],
154
  outputs = [
155
  cloned_out,
156
+ npz_file,
157
+ submit_btn,
158
+ submit_with_npz_btn
159
+ ]
160
+ )
161
+
162
+ submit_with_npz_btn.click(
163
+ fn = infer_with_npz,
164
+ inputs = [
165
+ prompt,
166
+ audio_in
167
+ ],
168
+ outputs = [
169
+ cloned_out
170
+ ]
171
+ )
172
+
173
+ audio_in.upload(
174
+ fn=uploaded_audio,
175
+ inputs=[],
176
+ outputs=[
177
+ submit_btn,
178
+ submit_with_npz_btn
179
  ]
180
  )
181