Aud2Stm2Mdi / app.py
eyov's picture
selfish plug
e1cd101 verified
import gradio as gr
import os
import tempfile
from pathlib import Path
from typing import List, Tuple, Optional
from concurrent.futures import ThreadPoolExecutor
import logging
import soundfile as sf
import numpy as np
import shutil
from validators import AudioValidator
from demucs_handler import DemucsProcessor
from basic_pitch_handler import BasicPitchConverter
# Suppress TF logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
logging.getLogger('tensorflow').setLevel(logging.ERROR)
logger = logging.getLogger(__name__)
# Create a persistent directory for outputs
OUTPUT_DIR = Path("/tmp/audio_processor")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
def process_single_audio(audio_path: str, stem_type: str, convert_midi: bool) -> Tuple[Tuple[int, np.ndarray], Optional[str]]:
try:
# Create unique subdirectory for this processing
process_dir = OUTPUT_DIR / str(hash(audio_path))
process_dir.mkdir(parents=True, exist_ok=True)
processor = DemucsProcessor()
converter = BasicPitchConverter()
print(f"Starting processing of file: {audio_path}")
# Process stems
sources, sample_rate = processor.separate_stems(audio_path)
print(f"Number of sources returned: {sources.shape}")
print(f"Stem type requested: {stem_type}")
# Get the requested stem
stem_index = ["drums", "bass", "other", "vocals"].index(stem_type)
selected_stem = sources[0, stem_index]
# Save stem
stem_path = process_dir / f"{stem_type}.wav"
processor.save_stem(selected_stem, stem_type, str(process_dir), sample_rate)
print(f"Saved stem to: {stem_path}")
# Load the saved audio file for Gradio
audio_data, sr = sf.read(str(stem_path))
if len(audio_data.shape) > 1:
audio_data = audio_data.mean(axis=1) # Convert to mono if stereo
# Convert to int16 format
audio_data = (audio_data * 32767).astype(np.int16)
# Convert to MIDI if requested
midi_path = None
if convert_midi:
midi_path = process_dir / f"{stem_type}.mid"
converter.convert_to_midi(str(stem_path), str(midi_path))
print(f"Saved MIDI to: {midi_path}")
return (sr, audio_data), str(midi_path) if midi_path else None
except Exception as e:
print(f"Error in process_single_audio: {str(e)}")
raise
def create_interface():
processor = DemucsProcessor()
converter = BasicPitchConverter()
validator = AudioValidator()
def process_audio(
audio_files: List[str],
stem_type: str,
convert_midi: bool = True,
progress=gr.Progress()
) -> Tuple[Tuple[int, np.ndarray], Optional[str]]:
try:
print(f"Starting processing of {len(audio_files)} files")
print(f"Selected stem type: {stem_type}")
# Process single file for now
if len(audio_files) > 0:
audio_path = audio_files[0] # Take first file
print(f"Processing file: {audio_path}")
return process_single_audio(audio_path, stem_type, convert_midi)
else:
raise ValueError("No audio files provided")
except Exception as e:
print(f"Error in audio processing: {str(e)}")
raise gr.Error(str(e))
interface = gr.Interface(
fn=process_audio,
inputs=[
gr.File(
file_count="multiple",
file_types=AudioValidator.SUPPORTED_FORMATS,
label="Upload Audio Files"
),
gr.Dropdown(
choices=["vocals", "drums", "bass", "other"],
label="Select Stem",
value="vocals"
),
gr.Checkbox(label="Convert to MIDI", value=True)
],
outputs=[
gr.Audio(label="Separated Stems", type="numpy"),
gr.File(label="MIDI Files")
],
title="Audio Stem Separator & MIDI Converter",
description="Upload audio files to separate stems and convert to MIDI\n\n" +
"Created by Ever Olivares - Looking for Summer 2025 Internship Opportunities\n" +
"Connect with me: [LinkedIn](https://www.linkedin.com/in/everolivares/)",
cache_examples=True,
allow_flagging="never"
)
return interface
if __name__ == "__main__":
interface = create_interface()
interface.launch(
share=False,
server_name="0.0.0.0",
server_port=7860,
auth=None,
ssl_keyfile=None,
ssl_certfile=None
)