Spaces:

eyov
/

Aud2Stm2Mdi

Running

App Files Files Community

Aud2Stm2Mdi / app.py

eyov

selfish plug

e1cd101 verified 19 days ago

raw

history blame contribute delete

4.76 kB

	import gradio as gr
	import os
	import tempfile
	from pathlib import Path
	from typing import List, Tuple, Optional
	from concurrent.futures import ThreadPoolExecutor
	import logging
	import soundfile as sf
	import numpy as np
	import shutil
	from validators import AudioValidator
	from demucs_handler import DemucsProcessor
	from basic_pitch_handler import BasicPitchConverter

	# Suppress TF logging
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
	logging.getLogger('tensorflow').setLevel(logging.ERROR)

	logger = logging.getLogger(__name__)

	# Create a persistent directory for outputs
	OUTPUT_DIR = Path("/tmp/audio_processor")
	OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

	def process_single_audio(audio_path: str, stem_type: str, convert_midi: bool) -> Tuple[Tuple[int, np.ndarray], Optional[str]]:
	try:
	# Create unique subdirectory for this processing
	process_dir = OUTPUT_DIR / str(hash(audio_path))
	process_dir.mkdir(parents=True, exist_ok=True)

	processor = DemucsProcessor()
	converter = BasicPitchConverter()

	print(f"Starting processing of file: {audio_path}")

	# Process stems
	sources, sample_rate = processor.separate_stems(audio_path)
	print(f"Number of sources returned: {sources.shape}")
	print(f"Stem type requested: {stem_type}")

	# Get the requested stem
	stem_index = ["drums", "bass", "other", "vocals"].index(stem_type)
	selected_stem = sources[0, stem_index]

	# Save stem
	stem_path = process_dir / f"{stem_type}.wav"
	processor.save_stem(selected_stem, stem_type, str(process_dir), sample_rate)
	print(f"Saved stem to: {stem_path}")

	# Load the saved audio file for Gradio
	audio_data, sr = sf.read(str(stem_path))
	if len(audio_data.shape) > 1:
	audio_data = audio_data.mean(axis=1) # Convert to mono if stereo

	# Convert to int16 format
	audio_data = (audio_data * 32767).astype(np.int16)

	# Convert to MIDI if requested
	midi_path = None
	if convert_midi:
	midi_path = process_dir / f"{stem_type}.mid"
	converter.convert_to_midi(str(stem_path), str(midi_path))
	print(f"Saved MIDI to: {midi_path}")

	return (sr, audio_data), str(midi_path) if midi_path else None
	except Exception as e:
	print(f"Error in process_single_audio: {str(e)}")
	raise

	def create_interface():
	processor = DemucsProcessor()
	converter = BasicPitchConverter()
	validator = AudioValidator()

	def process_audio(
	audio_files: List[str],
	stem_type: str,
	convert_midi: bool = True,
	progress=gr.Progress()
	) -> Tuple[Tuple[int, np.ndarray], Optional[str]]:
	try:
	print(f"Starting processing of {len(audio_files)} files")
	print(f"Selected stem type: {stem_type}")

	# Process single file for now
	if len(audio_files) > 0:
	audio_path = audio_files[0] # Take first file
	print(f"Processing file: {audio_path}")
	return process_single_audio(audio_path, stem_type, convert_midi)
	else:
	raise ValueError("No audio files provided")

	except Exception as e:
	print(f"Error in audio processing: {str(e)}")
	raise gr.Error(str(e))

	interface = gr.Interface(
	fn=process_audio,
	inputs=[
	gr.File(
	file_count="multiple",
	file_types=AudioValidator.SUPPORTED_FORMATS,
	label="Upload Audio Files"
	),
	gr.Dropdown(
	choices=["vocals", "drums", "bass", "other"],
	label="Select Stem",
	value="vocals"
	),
	gr.Checkbox(label="Convert to MIDI", value=True)
	],
	outputs=[
	gr.Audio(label="Separated Stems", type="numpy"),
	gr.File(label="MIDI Files")
	],
	title="Audio Stem Separator & MIDI Converter",
	description="Upload audio files to separate stems and convert to MIDI\n\n" +
	"Created by Ever Olivares - Looking for Summer 2025 Internship Opportunities\n" +
	"Connect with me: [LinkedIn](https://www.linkedin.com/in/everolivares/)",
	cache_examples=True,
	allow_flagging="never"
	)

	return interface

	if __name__ == "__main__":
	interface = create_interface()
	interface.launch(
	share=False,
	server_name="0.0.0.0",
	server_port=7860,
	auth=None,
	ssl_keyfile=None,
	ssl_certfile=None
	)