nakas's picture
Update app.py
1f1c84d
raw
history blame
1.63 kB
import gradio as gr
from audio_style_transfer.models import timedomain
def audioStyleTransfer(content,style):
print (style,content)
output = "/tmp/outputfile.wav"
sr = librosa.get_samplerate(content)
timedomain.run( style_fname=style,
content_fname=content,
output_fname=output,
norm=False,
n_fft=8192, # 512 to sr / 2. Higher is better quality but is slower.
n_layers=2, # 1 to 3. Higher is better quality but is slower.
n_filters=4096, # 512 - 4096. Higher is better quality but is slower.
hop_length=256, # 256 to n_fft / 2. The lower this value, the better the temporal resolution.
alpha=0.0005, # 0.0001 to 0.01. The higher this value, the more of the original "content" bleeds
k_w=4, # 3 to 5. The higher this value, the more complex the patterns it can synthesize.
iterations=300, # 100 to 1000. Higher is better quality but is slower.
stride=1, # 1 to 3. Lower is better quality but is slower.
sr=sr)
print ("output is " ,output)
return output
iface = gr.Interface(fn=audioStyleTransfer, title="Time Domain Audio Style transfer",
description="Forked from https://github.com/pkmital/time-domain-neural-audio-style-transfer Built to style transfer to audio using style audio.\
it seems to work best for shorter clips", inputs=[gr.Audio(source="upload",type="filepath",label="Content"),gr.Audio(source="upload",type="filepath",label="Style")], outputs=gr.Audio(label="Output"))
iface.launch()