Dragunflie-420a
/

musicgen-small

Model card Files Files and versions Community

Dragunflie-420 commited on Sep 22, 2024

Commit

ddc68fa

·

verified ·

1 Parent(s): 38a830c

Create datasets

Files changed (1) hide show

datasets +89 -0

datasets ADDED Viewed

	@@ -0,0 +1,89 @@

+# Tutorial: Using the FMAHiphop Dataset
+# Installation Instructions:
+# Before running this script, make sure you have the required libraries installed.
+# You can install them using pip:
+#
+# pip install datasets pandas matplotlib
+#
+# If you're using a Jupyter notebook, you can run this in a cell:
+# !pip install datasets pandas matplotlib
+# First, let's import the necessary libraries
+try:
+    from datasets import load_dataset
+except ImportError:
+    print("The 'datasets' library is not installed. Please install it using: pip install datasets")
+    exit(1)
+try:
+    import pandas as pd
+except ImportError:
+    print("The 'pandas' library is not installed. Please install it using: pip install pandas")
+    exit(1)
+try:
+    import matplotlib.pyplot as plt
+except ImportError:
+    print("The 'matplotlib' library is not installed. Please install it using: pip install matplotlib")
+    exit(1)
+# Load the dataset
+try:
+    ds = load_dataset("Nkumar5/FMAHiphop")
+except Exception as e:
+    print(f"Error loading the dataset: {e}")
+    print("Please check your internet connection and ensure the dataset name is correct.")
+    exit(1)
+# Let's explore the dataset structure
+print("Dataset structure:")
+print(ds)
+# Look at the features of the training set
+print("\nFeatures in the training set:")
+print(ds['train'].features)
+# Get the first example from the training set
+first_example = ds['train'][0]
+print("\nFirst example:")
+print(first_example)
+# Convert the dataset to a pandas DataFrame for easier manipulation
+df = pd.DataFrame(ds['train'])
+# Basic statistics of the dataset
+print("\nDataset statistics:")
+print(df.describe())
+# If there's an 'artist' column, let's see the most common artists
+if 'artist' in df.columns:
+    print("\nTop 10 artists by track count:")
+    print(df['artist'].value_counts().head(10))
+else:
+    print("\nNo 'artist' column found in the dataset.")
+# If there's a 'tempo' column, let's visualize the distribution of tempos
+if 'tempo' in df.columns:
+    plt.figure(figsize=(10, 6))
+    df['tempo'].hist(bins=30)
+    plt.title('Distribution of Tempos in FMAHiphop Dataset')
+    plt.xlabel('Tempo (BPM)')
+    plt.ylabel('Count')
+    plt.show()
+else:
+    print("\nNo 'tempo' column found in the dataset.")
+# Example of how to access audio data (if available)
+if 'audio' in first_example:
+    print("\nAudio data shape:", first_example['audio']['array'].shape)
+    print("Audio sampling rate:", first_example['audio']['sampling_rate'])
+else:
+    print("\nNo 'audio' data found in the dataset examples.")
+# Note: To play audio in a Jupyter notebook, you can use:
+# from IPython.display import Audio
+# Audio(first_example['audio']['array'], rate=first_example['audio']['sampling_rate'])
+print("\nThis tutorial provides a basic exploration of the FMAHiphop dataset.")
+print("You can expand on this to perform more advanced analyses or machine learning tasks.")