File size: 1,721 Bytes
fabcff4
 
 
 
2354dd6
fabcff4
 
 
b31ba68
bce64c5
b31ba68
bce64c5
a856ad2
f320495
b31ba68
 
f930d36
2354dd6
b31ba68
 
 
 
 
 
 
 
2354dd6
 
e0987f0
b31ba68
2354dd6
 
 
 
b31ba68
2354dd6
 
b31ba68
 
 
 
 
 
 
 
 
 
 
 
 
 
fabcff4
 
 
 
 
b31ba68
fabcff4
 
 
 
ba969e8
 
 
fabcff4
 
ba969e8
 
 
fabcff4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import streamlit as st
import streamlit_pianoroll
from fortepyan import MidiPiece

from datasets import load_dataset, Dataset


def main():
    available_datasets = [
        "maestro-sustain-v2",
        "pijamia-midi-v1",
        "vgmidi",
        "music-net",
        "piano-midi-de",
        "lakh-lmd-full",
        "giant-midi-sustain-v2",
        "imslp-midi-v1",
        "atepp-1.1-sustain-v2",
    ]
    dataset_name = st.selectbox(
        label="Select dataset",
        options=available_datasets,
    )
    preview_dataset(dataset_name)


@st.cache_data(persist="disk")
def get_dataset(dataset_name: str) -> Dataset:
    dataset = load_dataset(f"epr-labs/{dataset_name}", split="train")

    return dataset


def preview_dataset(dataset_name: str):
    st.write(f"### Dataset: {dataset_name}")
    with st.spinner("Loading the dataset, hang tight!"):
        dataset = get_dataset(dataset_name)

    st.write(f"""
    Number of records: {len(dataset)}
    """)
    code = f"""
    dataset = load_dataset("epr-labs/{dataset_name}", split="train")

    record = dataset[321]
    piece = MidiPiece.from_huggingface(record)

    # Playback in streamlit
    streamlit_pianoroll.from_fortepyan(piece)
    """
    st.code(code, language="python")

    record_idx = st.number_input(
        label="record id",
        min_value=0,
        max_value=len(dataset) - 1,
        value=50,
    )

    record = dataset[record_idx]
    piece = MidiPiece.from_huggingface(record)
    # TODO Improve fortepyan to make this cleaner
    piece.time_shift(-piece.df.start.min())

    streamlit_pianoroll.from_fortepyan(piece)

    st.write("#### Piece metadata")
    st.json(piece.source)


if __name__ == "__main__":
    main()