roszcz's picture
show more datasets
b31ba68
raw
history blame
1.4 kB
import streamlit as st
import streamlit_pianoroll
from fortepyan import MidiPiece
from datasets import load_dataset
def main():
available_datasets = [
"pijamia-midi-v1",
"lakh-lmd-full",
"giant-midi-sustain-v2",
"maestro-sustain-v2",
]
dataset_name = st.selectbox(
label="Select dataset",
options=available_datasets,
)
preview_dataset(dataset_name)
def preview_dataset(dataset_name: str):
dataset = load_dataset(f"epr-labs/{dataset_name}", split="train[100:200]")
st.write(f"### Dataset: {dataset_name}")
st.write(f"""
Number of records: {len(dataset)}
""")
code = f"""
dataset = load_dataset("epr-labs/{dataset_name}", split="train")
record = dataset[321]
piece = MidiPiece.from_huggingface(record)
# Playback in streamlit
streamlit_pianoroll.from_fortepyan(piece)
"""
st.code(code, language="python")
record_idx = st.number_input(
label="record id",
min_value=0,
max_value=len(dataset) - 1,
value=50,
)
record = dataset[record_idx]
piece = MidiPiece.from_huggingface(record)
# TODO Improve fortepyan to make this cleaner
piece.time_shift(-piece.df.start.min())
streamlit_pianoroll.from_fortepyan(piece)
st.write("#### Piece metadata")
st.json(piece.source)
if __name__ == "__main__":
main()