|
|
|
from diffusers import DiffusionPipeline |
|
from riffusion.spectrogram_image_converter import SpectrogramImageConverter |
|
from riffusion.spectrogram_params import SpectrogramParams |
|
from io import BytesIO |
|
|
|
|
|
|
|
|
|
params = SpectrogramParams() |
|
converter = SpectrogramImageConverter(params) |
|
|
|
|
|
def preprocess_function(text): |
|
with open(text, "r", encoding="utf-8") as f: |
|
data = f.read() |
|
print(data) |
|
|
|
|
|
prompt = data.split(";")[0] |
|
negative_prompt = data.split(";")[1].strip() |
|
print(negative_prompt.strip()) |
|
print(data) |
|
return (prompt, negative_prompt) |
|
|
|
|
|
def predict_function(params, pipe): |
|
prompt, negative_prompt = params |
|
spec = pipe( |
|
prompt, |
|
negative_prompt=negative_prompt, |
|
width=768, |
|
).images[0] |
|
|
|
wav = converter.audio_from_spectrogram_image(image=spec) |
|
wav.export("output.wav", format="wav") |
|
return ("output.wav", spec) |
|
|
|
|
|
def model_load_function(model_path): |
|
pipe = DiffusionPipeline.from_pretrained(model_path) |
|
pipe = pipe.to("cuda") |
|
return pipe |
|
|
|
|
|
def postprocess_function(audio_file, content_type=None): |
|
audio = open(audio_file, "rb") |
|
audio = audio.read() |
|
print(type(audio)) |
|
audio_bytes = BytesIO(audio) |
|
response = dict() |
|
audio_bytes.seek(0) |
|
response["output"] = {"data": audio_bytes, "ext": "wav"} |
|
return response |
|
|
|
|
|
|
|
""" |
|
if __name__ == '__main__': |
|
text = "" |
|
data = preprocess_function(text) |
|
model_path = "./model_files" |
|
path = model_load_function(model_path) |
|
predictions = predict_function(data,path) |
|
out = postprocess_function(audio_file) |
|
print(out) |
|
""" |
|
|