Dragunflie-420 commited on
Commit
ddc68fa
·
verified ·
1 Parent(s): 38a830c

Create datasets

Browse files
Files changed (1) hide show
  1. datasets +89 -0
datasets ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tutorial: Using the FMAHiphop Dataset
2
+
3
+ # Installation Instructions:
4
+ # Before running this script, make sure you have the required libraries installed.
5
+ # You can install them using pip:
6
+ #
7
+ # pip install datasets pandas matplotlib
8
+ #
9
+ # If you're using a Jupyter notebook, you can run this in a cell:
10
+ # !pip install datasets pandas matplotlib
11
+
12
+ # First, let's import the necessary libraries
13
+ try:
14
+ from datasets import load_dataset
15
+ except ImportError:
16
+ print("The 'datasets' library is not installed. Please install it using: pip install datasets")
17
+ exit(1)
18
+
19
+ try:
20
+ import pandas as pd
21
+ except ImportError:
22
+ print("The 'pandas' library is not installed. Please install it using: pip install pandas")
23
+ exit(1)
24
+
25
+ try:
26
+ import matplotlib.pyplot as plt
27
+ except ImportError:
28
+ print("The 'matplotlib' library is not installed. Please install it using: pip install matplotlib")
29
+ exit(1)
30
+
31
+ # Load the dataset
32
+ try:
33
+ ds = load_dataset("Nkumar5/FMAHiphop")
34
+ except Exception as e:
35
+ print(f"Error loading the dataset: {e}")
36
+ print("Please check your internet connection and ensure the dataset name is correct.")
37
+ exit(1)
38
+
39
+ # Let's explore the dataset structure
40
+ print("Dataset structure:")
41
+ print(ds)
42
+
43
+ # Look at the features of the training set
44
+ print("\nFeatures in the training set:")
45
+ print(ds['train'].features)
46
+
47
+ # Get the first example from the training set
48
+ first_example = ds['train'][0]
49
+ print("\nFirst example:")
50
+ print(first_example)
51
+
52
+ # Convert the dataset to a pandas DataFrame for easier manipulation
53
+ df = pd.DataFrame(ds['train'])
54
+
55
+ # Basic statistics of the dataset
56
+ print("\nDataset statistics:")
57
+ print(df.describe())
58
+
59
+ # If there's an 'artist' column, let's see the most common artists
60
+ if 'artist' in df.columns:
61
+ print("\nTop 10 artists by track count:")
62
+ print(df['artist'].value_counts().head(10))
63
+ else:
64
+ print("\nNo 'artist' column found in the dataset.")
65
+
66
+ # If there's a 'tempo' column, let's visualize the distribution of tempos
67
+ if 'tempo' in df.columns:
68
+ plt.figure(figsize=(10, 6))
69
+ df['tempo'].hist(bins=30)
70
+ plt.title('Distribution of Tempos in FMAHiphop Dataset')
71
+ plt.xlabel('Tempo (BPM)')
72
+ plt.ylabel('Count')
73
+ plt.show()
74
+ else:
75
+ print("\nNo 'tempo' column found in the dataset.")
76
+
77
+ # Example of how to access audio data (if available)
78
+ if 'audio' in first_example:
79
+ print("\nAudio data shape:", first_example['audio']['array'].shape)
80
+ print("Audio sampling rate:", first_example['audio']['sampling_rate'])
81
+ else:
82
+ print("\nNo 'audio' data found in the dataset examples.")
83
+
84
+ # Note: To play audio in a Jupyter notebook, you can use:
85
+ # from IPython.display import Audio
86
+ # Audio(first_example['audio']['array'], rate=first_example['audio']['sampling_rate'])
87
+
88
+ print("\nThis tutorial provides a basic exploration of the FMAHiphop dataset.")
89
+ print("You can expand on this to perform more advanced analyses or machine learning tasks.")