import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
import math
Exploratory Data Analysis of Song Data
Song data comes from the top singles list for each year from 1958 to 2022.
Data features are gathered from the Spotify API (here)[https://developer.spotify.com/documentation/web-api/reference/#/operations/get-audio-features].
; plt.plot()
"datafantic-right.mplstyle") plt.style.use(
= pd.read_csv("songs.csv") df
df
entry_date | title | artist | peak | peak_date | weeks_top_ten | year | spotify_uri | danceability | energy | ... | liveness | valence | tempo | type | id | uri | track_href | analysis_url | duration_ms | time_signature | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | August 4 | "Poor Little Fool" | Ricky Nelson | 1 | August 4 | 6 | 1958 | spotify:track:5ayybTSXNwcarDtxQKqvWX | 0.474 | 0.338 | ... | 0.1300 | 0.8100 | 154.596 | audio_features | 5ayybTSXNwcarDtxQKqvWX | spotify:track:5ayybTSXNwcarDtxQKqvWX | https://api.spotify.com/v1/tracks/5ayybTSXNwca... | https://api.spotify.com/v1/audio-analysis/5ayy... | 153933.0 | 4.0 |
1 | August 4 | "Patricia" | Pérez Prado | 2 | August 4 | 6 | 1958 | spotify:track:2bwhOdCOLgQ8v6xStAqnju | 0.699 | 0.715 | ... | 0.0704 | 0.8100 | 137.373 | audio_features | 2bwhOdCOLgQ8v6xStAqnju | spotify:track:2bwhOdCOLgQ8v6xStAqnju | https://api.spotify.com/v1/tracks/2bwhOdCOLgQ8... | https://api.spotify.com/v1/audio-analysis/2bwh... | 140000.0 | 4.0 |
2 | August 4 | "Splish Splash" | Bobby Darin | 3 | August 4 | 3 | 1958 | spotify:track:40fD7ct05FvQHLdQTgJelG | 0.645 | 0.943 | ... | 0.3700 | 0.9650 | 147.768 | audio_features | 40fD7ct05FvQHLdQTgJelG | spotify:track:40fD7ct05FvQHLdQTgJelG | https://api.spotify.com/v1/tracks/40fD7ct05FvQ... | https://api.spotify.com/v1/audio-analysis/40fD... | 131720.0 | 4.0 |
3 | August 4 | "Hard Headed Woman" | Elvis Presley | 4 | August 4 | 2 | 1958 | spotify:track:3SU1TXJtAsf8jCKdUeYy53 | 0.616 | 0.877 | ... | 0.1840 | 0.9190 | 97.757 | audio_features | 3SU1TXJtAsf8jCKdUeYy53 | spotify:track:3SU1TXJtAsf8jCKdUeYy53 | https://api.spotify.com/v1/tracks/3SU1TXJtAsf8... | https://api.spotify.com/v1/audio-analysis/3SU1... | 114240.0 | 4.0 |
4 | August 4 | "When" | Kalin Twins | 5 | August 4 | 5 | 1958 | spotify:track:3HZJ9BLBpDya4p71VfXSWp | 0.666 | 0.468 | ... | 0.1190 | 0.9460 | 93.018 | audio_features | 3HZJ9BLBpDya4p71VfXSWp | spotify:track:3HZJ9BLBpDya4p71VfXSWp | https://api.spotify.com/v1/tracks/3HZJ9BLBpDya... | https://api.spotify.com/v1/audio-analysis/3HZJ... | 146573.0 | 4.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4246 | June 4 | "Matilda" | Harry Styles | 9 | June 4 | 1 | 2022 | spotify:track:6uvh0In7u1Xn4HgxOfAn8O | 0.507 | 0.294 | ... | 0.0966 | 0.3860 | 114.199 | audio_features | 6uvh0In7u1Xn4HgxOfAn8O | spotify:track:6uvh0In7u1Xn4HgxOfAn8O | https://api.spotify.com/v1/tracks/6uvh0In7u1Xn... | https://api.spotify.com/v1/audio-analysis/6uvh... | 245964.0 | 4.0 |
4247 | June 11 | "Running Up That Hill A Deal with God" | Kate Bush | 4 | June 18 | 4* | 2022 | spotify:track:29d0nY7TzCoi22XBqDQkiP | 0.625 | 0.533 | ... | 0.0546 | 0.1390 | 108.296 | audio_features | 29d0nY7TzCoi22XBqDQkiP | spotify:track:29d0nY7TzCoi22XBqDQkiP | https://api.spotify.com/v1/tracks/29d0nY7TzCoi... | https://api.spotify.com/v1/audio-analysis/29d0... | 300840.0 | 4.0 |
4248 | June 25 | "Glimpse of Us" | Joji | 8 | July 2 | 2* | 2022 | spotify:track:6xGruZOHLs39ZbVccQTuPZ | 0.440 | 0.317 | ... | 0.1410 | 0.2680 | 169.914 | audio_features | 6xGruZOHLs39ZbVccQTuPZ | spotify:track:6xGruZOHLs39ZbVccQTuPZ | https://api.spotify.com/v1/tracks/6xGruZOHLs39... | https://api.spotify.com/v1/audio-analysis/6xGr... | 233456.0 | 3.0 |
4249 | July 2 | "Sticky" | Drake | 6 | July 2 | 1* | 2022 | spotify:track:4rmVZajAF7PkrCagGPHbqa | 0.853 | 0.495 | ... | 0.0844 | 0.0774 | 137.027 | audio_features | 4rmVZajAF7PkrCagGPHbqa | spotify:track:4rmVZajAF7PkrCagGPHbqa | https://api.spotify.com/v1/tracks/4rmVZajAF7Pk... | https://api.spotify.com/v1/audio-analysis/4rmV... | 243228.0 | 4.0 |
4250 | July 2 | "Falling Back" | Drake | 7 | July 2 | 1* | 2022 | spotify:track:1vbn9fEyw1IYhqgZJdu9ZB | 0.718 | 0.758 | ... | 0.1100 | 0.3490 | 118.989 | audio_features | 1vbn9fEyw1IYhqgZJdu9ZB | spotify:track:1vbn9fEyw1IYhqgZJdu9ZB | https://api.spotify.com/v1/tracks/1vbn9fEyw1IY... | https://api.spotify.com/v1/audio-analysis/1vbn... | 266179.0 | 4.0 |
4251 rows × 26 columns
= _deepnote_execute_sql("""
df_1 """, 'SQL_DEEPNOTE_DATAFRAME_SQL')
df_1
= df.groupby(by='year').mean().drop(columns='peak').reset_index() year
year.head()
year | danceability | energy | key | loudness | mode | speechiness | acousticness | instrumentalness | liveness | valence | tempo | duration_ms | time_signature | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1958 | 0.577275 | 0.502250 | 5.625000 | -10.385025 | 0.875000 | 0.059357 | 0.642948 | 0.116704 | 0.185743 | 0.708625 | 122.238650 | 149064.300000 | 3.825000 |
1 | 1959 | 0.537851 | 0.528930 | 5.103448 | -10.256540 | 0.816092 | 0.059131 | 0.681779 | 0.091981 | 0.200289 | 0.683483 | 121.896690 | 150422.011494 | 3.804598 |
2 | 1960 | 0.537817 | 0.471824 | 4.827957 | -10.180505 | 0.870968 | 0.041285 | 0.650999 | 0.032680 | 0.199411 | 0.679237 | 120.076290 | 155792.612903 | 3.784946 |
3 | 1961 | 0.539490 | 0.470644 | 4.590000 | -10.746760 | 0.910000 | 0.049491 | 0.648535 | 0.096826 | 0.208194 | 0.679980 | 121.267490 | 151452.270000 | 3.760000 |
4 | 1962 | 0.561260 | 0.506891 | 5.538462 | -9.799865 | 0.798077 | 0.056326 | 0.608963 | 0.059317 | 0.226709 | 0.693452 | 117.217144 | 156609.653846 | 3.826923 |
Feature Plotting
= len(year.columns[1:])
N = 4
cols = int(math.ceil(N / cols))
rows
= gridspec.GridSpec(rows, cols)
gs = plt.figure()
fig
for n, column in zip(range(N), year.columns[1:]):
= fig.add_subplot(gs[n])
ax 'year'], year[column])
ax.plot(year[
ax.set_title(column)
fig.tight_layout()
There are some clear features that have changed over time:
Danceability (0-1): Danceability describes how suitable a track is for dancing based on a combination of musical elements including tempo, rhythm stability, beat strength, and overall regularity.
Energy (0-1): Typically, energetic tracks feel fast, loud, and noisy. For example, death metal has high energy, while a Bach prelude scores low on the scale. Perceptual features contributing to this attribute include dynamic range, perceived loudness, timbre, onset rate, and general entropy.
Loudness: The overall loudness of a track in decibels (dB). Loudness values are averaged across the entire track and are useful for comparing relative loudness of tracks.
Mode: Mode indicates the modality (major or minor) of a track, the type of scale from which its melodic content is derived.
Speechiness (0-1): Speechiness detects the presence of spoken words in a track. The more exclusively speech-like the recording (e.g. talk show, audio book, poetry), the closer to 1.0 the attribute value.
Acousticness: A confidence measure from 0.0 to 1.0 of whether the track is acoustic. 1.0 represents high confidence the track is acoustic.
Duration: The duration of the track in milliseconds.
Valence: A measure from 0.0 to 1.0 describing the musical positiveness conveyed by a track. Tracks with high valence sound more positive (e.g. happy, cheerful, euphoric), while tracks with low valence sound more negative (e.g. sad, depressed, angry).
Duration
= plt.subplots()
fig, ax 'year'], (year['duration_ms'] / 1_000) / 60)
ax.plot(year[1958, 2026)
ax.set_xlim(0,6.5)
ax.set_ylim(
# Add in title and subtitle
"""Ain't nobody got time for that""")
ax.set_title(=.08, y=.86,
ax.text(x="Average duration in minutes. Features from top ten singles, 1958-2022.",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.22)
imagebox = AnnotationBbox(imagebox, xy=(1,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab
ax.add_artist(ab)
# Export plot as high resolution PNG
'images/music_features_duration.png') plt.savefig(
Loudness
= plt.subplots()
fig, ax 'year'], year['loudness'])
ax.plot(year[1958, 2026)
ax.set_xlim(-15, -3)
ax.set_ylim(
# Add in title and subtitle
"""Hearing protection recommended""")
ax.set_title(=.08, y=.86,
ax.text(x="Average loudness in decibels (-60-0). Features from top ten singles, 1958-2022.",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.22)
imagebox = AnnotationBbox(imagebox, xy=(1,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab
ax.add_artist(ab)
# Export plot as high resolution PNG
'images/music_features_loudness.png') plt.savefig(
Features Trending Down
The features that are trending down:
Valence
Instrumentalness (shown separately)
Acousticness
= plt.subplots()
fig, ax 'year'], year['valence'])
ax.plot(year['year'], year['acousticness'])
ax.plot(year[1958, 2025)
ax.set_xlim(0,.85)
ax.set_ylim(
# Add in title and subtitle
"""I've got a negative feeling""")
ax.set_title(=.08, y=.86,
ax.text(x="Average features from top ten singles, 1958-2022, Scale 0-1",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Label the lines directly
=.78, y=.66, s="""Valence""",
ax.text(x=fig.transFigure, ha='left', fontsize=20, alpha=.7)
transform=.73, y=.30, s="""Acousticness""",
ax.text(x=fig.transFigure, ha='left', fontsize=20, alpha=.7)
transform
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.22)
imagebox = AnnotationBbox(imagebox, xy=(1,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab
ax.add_artist(ab)
# Export plot as high resolution PNG
'images/music_features_trending_down.png') plt.savefig(
Features Trending Up
Danceability
Energy
Speechiness (shown below)
= plt.subplots()
fig, ax 'year'], year['danceability'])
ax.plot(year['year'], year['energy'])
ax.plot(year[1958, 2025)
ax.set_xlim(0,.85)
ax.set_ylim(
# Add in title and subtitle
'>10% more danceability')
ax.set_title(=.08, y=.86,
ax.text(x="Average features from top ten singles, 1958-2022, Scale 0-1",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Label the lines directly
=.7, y=.795, s="""Energy""",
ax.text(x=fig.transFigure, ha='left', fontsize=20, alpha=.7)
transform=.685, y=.61, s="""Danceability""",
ax.text(x=fig.transFigure, ha='left', fontsize=20, alpha=.7)
transform
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.22)
imagebox = AnnotationBbox(imagebox, xy=(1,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab
ax.add_artist(ab)
# Export plot as high resolution PNG
'images/music_features_trending_up.png') plt.savefig(
Speechiness vs Instrumentalness
= plt.subplots()
fig, ax 'year'], year['instrumentalness'])
ax.plot(year['year'], year['speechiness'])
ax.plot(year[1958, 2025)
ax.set_xlim(0,.32)
ax.set_ylim(
0, .35, 0.05))
ax.set_yticks(np.arange(
# Add in title and subtitle
"They don't sing like they used to..")
ax.set_title(=.08, y=.86,
ax.text(x="Average features from top ten singles, 1958-2022, Scale 0-1",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Label the lines directly
=.57, y=.17, s="""Instrumentalness""",
ax.text(x=fig.transFigure, ha='left', fontsize=20, alpha=.7)
transform=.72, y=.52, s="""Speechiness""",
ax.text(x=fig.transFigure, ha='left', fontsize=20, alpha=.7)
transform
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.22)
imagebox = AnnotationBbox(imagebox, xy=(1,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab
ax.add_artist(ab)
# Export plot as high resolution PNG
'images/music_features_instrumentalness_speechiness.png') plt.savefig(