TyT2020W04 - Visualize Index

By Johanie Fournier, agr. in rstats tidyverse tidytuesday

January 22, 2020

Get the data

spotify_songs <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-21/spotify_songs.csv')
## Rows: 32833 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): track_id, track_name, track_artist, track_album_id, track_album_na...
## dbl (13): track_popularity, danceability, energy, key, loudness, mode, speec...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Explore the data

summary(spotify_songs)
##    track_id          track_name        track_artist       track_popularity
##  Length:32833       Length:32833       Length:32833       Min.   :  0.00  
##  Class :character   Class :character   Class :character   1st Qu.: 24.00  
##  Mode  :character   Mode  :character   Mode  :character   Median : 45.00  
##                                                           Mean   : 42.48  
##                                                           3rd Qu.: 62.00  
##                                                           Max.   :100.00  
##  track_album_id     track_album_name   track_album_release_date
##  Length:32833       Length:32833       Length:32833            
##  Class :character   Class :character   Class :character        
##  Mode  :character   Mode  :character   Mode  :character        
##                                                                
##                                                                
##                                                                
##  playlist_name      playlist_id        playlist_genre     playlist_subgenre 
##  Length:32833       Length:32833       Length:32833       Length:32833      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   danceability        energy              key            loudness      
##  Min.   :0.0000   Min.   :0.000175   Min.   : 0.000   Min.   :-46.448  
##  1st Qu.:0.5630   1st Qu.:0.581000   1st Qu.: 2.000   1st Qu.: -8.171  
##  Median :0.6720   Median :0.721000   Median : 6.000   Median : -6.166  
##  Mean   :0.6548   Mean   :0.698619   Mean   : 5.374   Mean   : -6.720  
##  3rd Qu.:0.7610   3rd Qu.:0.840000   3rd Qu.: 9.000   3rd Qu.: -4.645  
##  Max.   :0.9830   Max.   :1.000000   Max.   :11.000   Max.   :  1.275  
##       mode         speechiness      acousticness    instrumentalness   
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000000  
##  1st Qu.:0.0000   1st Qu.:0.0410   1st Qu.:0.0151   1st Qu.:0.0000000  
##  Median :1.0000   Median :0.0625   Median :0.0804   Median :0.0000161  
##  Mean   :0.5657   Mean   :0.1071   Mean   :0.1753   Mean   :0.0847472  
##  3rd Qu.:1.0000   3rd Qu.:0.1320   3rd Qu.:0.2550   3rd Qu.:0.0048300  
##  Max.   :1.0000   Max.   :0.9180   Max.   :0.9940   Max.   :0.9940000  
##     liveness         valence           tempo         duration_ms    
##  Min.   :0.0000   Min.   :0.0000   Min.   :  0.00   Min.   :  4000  
##  1st Qu.:0.0927   1st Qu.:0.3310   1st Qu.: 99.96   1st Qu.:187819  
##  Median :0.1270   Median :0.5120   Median :121.98   Median :216000  
##  Mean   :0.1902   Mean   :0.5106   Mean   :120.88   Mean   :225800  
##  3rd Qu.:0.2480   3rd Qu.:0.6930   3rd Qu.:133.92   3rd Qu.:253585  
##  Max.   :0.9960   Max.   :0.9910   Max.   :239.44   Max.   :517810
glimpse(spotify_songs)
## Rows: 32,833
## Columns: 23
## $ track_id                 <chr> "6f807x0ima9a1j3VPbc7VN", "0r7CVbZTWZgbTCYdfa…
## $ track_name               <chr> "I Don't Care (with Justin Bieber) - Loud Lux…
## $ track_artist             <chr> "Ed Sheeran", "Maroon 5", "Zara Larsson", "Th…
## $ track_popularity         <dbl> 66, 67, 70, 60, 69, 67, 62, 69, 68, 67, 58, 6…
## $ track_album_id           <chr> "2oCs0DGTsRO98Gh5ZSl2Cx", "63rPSO264uRjW1X5E6…
## $ track_album_name         <chr> "I Don't Care (with Justin Bieber) [Loud Luxu…
## $ track_album_release_date <chr> "2019-06-14", "2019-12-13", "2019-07-05", "20…
## $ playlist_name            <chr> "Pop Remix", "Pop Remix", "Pop Remix", "Pop R…
## $ playlist_id              <chr> "37i9dQZF1DXcZDD7cfEKhW", "37i9dQZF1DXcZDD7cf…
## $ playlist_genre           <chr> "pop", "pop", "pop", "pop", "pop", "pop", "po…
## $ playlist_subgenre        <chr> "dance pop", "dance pop", "dance pop", "dance…
## $ danceability             <dbl> 0.748, 0.726, 0.675, 0.718, 0.650, 0.675, 0.4…
## $ energy                   <dbl> 0.916, 0.815, 0.931, 0.930, 0.833, 0.919, 0.8…
## $ key                      <dbl> 6, 11, 1, 7, 1, 8, 5, 4, 8, 2, 6, 8, 1, 5, 5,…
## $ loudness                 <dbl> -2.634, -4.969, -3.432, -3.778, -4.672, -5.38…
## $ mode                     <dbl> 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, …
## $ speechiness              <dbl> 0.0583, 0.0373, 0.0742, 0.1020, 0.0359, 0.127…
## $ acousticness             <dbl> 0.10200, 0.07240, 0.07940, 0.02870, 0.08030, …
## $ instrumentalness         <dbl> 0.00e+00, 4.21e-03, 2.33e-05, 9.43e-06, 0.00e…
## $ liveness                 <dbl> 0.0653, 0.3570, 0.1100, 0.2040, 0.0833, 0.143…
## $ valence                  <dbl> 0.518, 0.693, 0.613, 0.277, 0.725, 0.585, 0.1…
## $ tempo                    <dbl> 122.036, 99.972, 124.008, 121.956, 123.976, 1…
## $ duration_ms              <dbl> 194754, 162600, 176616, 169093, 189052, 16304…

Prepare the data

# convertir le tempo en un indicateur qui varie entre 0 et 1
tempo<-data.frame(tempo=c(0,99.96,120.88,133.92,239.44),tempo_0_1=c(0,0.25,0.50,0.75,1))
formula <- y ~ x
gg<- ggplot(data=tempo, aes(x=tempo, y=tempo_0_1))
gg<- gg + geom_point()
gg<- gg + geom_smooth(method='lm', se=FALSE, formula=formula)
gg<- gg +  stat_poly_eq(aes(label =  paste(..eq.label.., ..adj.rr.label.., sep = "~~~~")),
                   formula = formula, parse = TRUE)
gg<- gg + theme_classic()
gg<- gg + scale_y_continuous(limits=c(0,1))
gg
## Warning: Removed 5 rows containing missing values (geom_smooth).
data<-spotify_songs %>% 
  select(playlist_genre, danceability, energy,valence,tempo) %>% 
  mutate(tempo_0_1=0.00438*tempo-0.0208) %>% 
  select(-tempo) %>% 
  gather(type, valeur,-playlist_genre) %>% 
  group_by(playlist_genre, type) %>% 
  summarise_if(is.numeric, median, na.rm = TRUE) %>%
  ungroup() 

Visualize the data

#Graphique
gg<- ggplot(data=data,aes(x = valeur, y=playlist_genre))
gg <- gg + geom_segment( aes(x=0.3, xend=0.9,y=playlist_genre, yend=playlist_genre),
                           color="#838A90", alpha=0.6, size=4)
gg <- gg + geom_segment( aes(x=0.3, xend=0.9,y=playlist_genre, yend=playlist_genre),
                           color="#ffffff", alpha=1, size=0.5)
gg <- gg +  geom_point(aes(fill=type, group=playlist_genre), color="#FFFFFF", size=20, pch=21, alpha=0.5)
gg <- gg +  geom_point(aes(fill=type, group=playlist_genre), color="#FFFFFF", size=12, pch=21)
gg <- gg + scale_fill_brewer(palette = "Set1",labels=c("danceability","Energy", "Tempo", "Valance"))
#ajouter les étiquettes de points
gg<-gg + geom_text(data=data, aes(x=valeur, y=playlist_genre, label=(round(data$valeur,1)), group=NULL), color="#ffffff", size=4.5, vjust=0.5, hjust=0.5, fontface="bold")
#pour l'axe des y à l'intérieur du graphique
gg<-gg + annotate(geom="text",x=0.3, y=1.3, label="Edm", color="#ffffff", size=5.5, vjust=0.5, hjust=0, fontface="bold")
gg<-gg + annotate(geom="text",x=0.3, y=2.3, label="Latin", color="#ffffff", size=5.5, vjust=0.5, hjust=0, fontface="bold")
gg<-gg + annotate(geom="text",x=0.3, y=3.3, label="Pop", color="#ffffff", size=5.5, vjust=0.5, hjust=0, fontface="bold")
gg<-gg + annotate(geom="text",x=0.3, y=4.3, label="R&B", color="#ffffff", size=5.5, vjust=0.5, hjust=0, fontface="bold")
gg<-gg + annotate(geom="text",x=0.3, y=5.3, label="Rap", color="#ffffff", size=5.5, vjust=0.5, hjust=0, fontface="bold")
gg<-gg + annotate(geom="text",x=0.3, y=6.3, label="Rock", color="#ffffff", size=5.5, vjust=0.5, hjust=0, fontface="bold")
#ajuster l'axe des x
gg <- gg + scale_x_continuous(expand = c(0,0))
#modifier le thème
gg <- gg +  theme(plot.background = element_rect(fill = "#171717"),
                  panel.background = element_rect(fill = "#171717"),
                  panel.grid.major.y= element_blank(),
                  panel.grid.major.x= element_blank(),
                  panel.grid.minor = element_blank(),
                  axis.line.x = element_blank(),
                  axis.line.y =element_blank(),
                  axis.ticks.x = element_blank(), 
                  axis.ticks.y = element_blank())
gg <- gg +  theme(legend.position="top", 
                  legend.title = element_blank(), 
                  legend.background = element_blank(),  
                  legend.key=element_blank(),
                  legend.spacing.x = unit(0.2, 'cm'), 
                  legend.text= element_text(hjust =0.5,size= 8, colour = "#FFFFFF"))
gg <- gg +    guides(fill = guide_legend(nrow = 1, reverse = TRUE))

#ajouter les titres
gg<-gg + labs(title="What kind of music do you prefer?",
            subtitle = "\nSpotify classifies music genres according to their characteristics to help users make their choice.\n",
              x="Index", 
              y=" ", 
              caption="\nSOURCE: Spotifyr   |  DESIGN: Johanie Fournier, agr.")
gg<-gg + theme(  plot.title    =  element_text(size=30, hjust=0,vjust=0.5, color="#FFFFFF", family="Tw Cen MT", face="bold"),
                 plot.subtitle = element_text(size=15, hjust=0,vjust=0.5, color="#FFFFFF", family="Tw Cen MT"),
                 plot.caption  = element_text(size=10, hjust=1,vjust=0.5, color="#FFFFFF", family="Tw Cen MT"),
                 axis.title.x  = element_text(size=15, hjust=0.05,vjust=0.5, color="#FFFFFF", family="Tw Cen MT", angle =0),
                 axis.title.y  = element_blank(),
                 axis.text.y   = element_blank(), 
                 axis.text.x   = element_blank())
Posted on:
January 22, 2020
Length:
5 minute read, 1022 words
Categories:
rstats tidyverse tidytuesday
Tags:
rstats tidyverse tidytuesday
See Also:
Predicting MO with H2O Models from IRDA data
IRDA soil data
This is the begining of a cheat sheet!