roszcz's picture
Add vgmidi dataset
bce64c5
raw
history blame
1.7 kB
import streamlit as st
import streamlit_pianoroll
from fortepyan import MidiPiece
from datasets import load_dataset, Dataset
def main():
available_datasets = [
"maestro-sustain-v2",
"pijamia-midi-v1",
"vgmidi",
"piano-midi-de",
"lakh-lmd-full",
"giant-midi-sustain-v2",
"imslp-midi-v1",
"atepp-1.1-sustain-v2",
]
dataset_name = st.selectbox(
label="Select dataset",
options=available_datasets,
)
preview_dataset(dataset_name)
@st.cache_data(persist="disk")
def get_dataset(dataset_name: str) -> Dataset:
dataset = load_dataset(f"epr-labs/{dataset_name}", split="train")
return dataset
def preview_dataset(dataset_name: str):
st.write(f"### Dataset: {dataset_name}")
with st.spinner("Loading the dataset, hang tight!"):
dataset = get_dataset(dataset_name)
st.write(f"""
Number of records: {len(dataset)}
""")
code = f"""
dataset = load_dataset("epr-labs/{dataset_name}", split="train")
record = dataset[321]
piece = MidiPiece.from_huggingface(record)
# Playback in streamlit
streamlit_pianoroll.from_fortepyan(piece)
"""
st.code(code, language="python")
record_idx = st.number_input(
label="record id",
min_value=0,
max_value=len(dataset) - 1,
value=50,
)
record = dataset[record_idx]
piece = MidiPiece.from_huggingface(record)
# TODO Improve fortepyan to make this cleaner
piece.time_shift(-piece.df.start.min())
streamlit_pianoroll.from_fortepyan(piece)
st.write("#### Piece metadata")
st.json(piece.source)
if __name__ == "__main__":
main()