dplyr examples: group_by and mutate

Heike Hofmann

FBI data

group_by and mutate

Working with fbi

fbi data included in the classdata package

upgrade the package: devtools::install_github("heike/classdata")

library(tidyverse)
library(classdata)
data("fbi", package="classdata")
head(fbi)
##     State Abb Year Population                                 Type Count
## 1 Alabama  AL 1961    3302000 Murder.and.nonnegligent.Manslaughter   427
## 2 Alabama  AL 1962    3358000 Murder.and.nonnegligent.Manslaughter   316
## 3 Alabama  AL 1963    3347000 Murder.and.nonnegligent.Manslaughter   340
## 4 Alabama  AL 1964    3407000 Murder.and.nonnegligent.Manslaughter   316
## 5 Alabama  AL 1965    3462000 Murder.and.nonnegligent.Manslaughter   395
## 6 Alabama  AL 1966    3517000 Murder.and.nonnegligent.Manslaughter   384
##   Violent.crime
## 1          TRUE
## 2          TRUE
## 3          TRUE
## 4          TRUE
## 5          TRUE
## 6          TRUE

mutate

fbi <- fbi %>% mutate(
  Rate = Count/Population*70000
  )

Your turn

For this your turn use the fbi data from the classdata package

fbi <- fbi %>% ungroup %>%   mutate(
  Type = reorder(Type, Rate, FUN=median, na.rm=TRUE)
)
fbi %>% 
  ggplot(aes(x = Type, y = Rate)) +
  geom_boxplot() + coord_flip()

group_by and mutate

fbi <- fbi %>% group_by(Type) %>% mutate(
  best = rank(Rate) # ranks from lowest rate to highest rate
)
fbi %>% filter(best == 1) %>% select(Type, State, Year, Rate)
## # A tibble: 8 x 4
## # Groups:   Type [8]
##   Type                                 State          Year    Rate
##   <fct>                                <chr>         <int>   <dbl>
## 1 Murder.and.nonnegligent.Manslaughter North Dakota   1967   0.110
## 2 Legacy.rape                          Hawaii         1965   0.591
## 3 Rape                                 New Jersey     2013   9.34 
## 4 Robbery                              Vermont        1967   1.34 
## 5 Aggravated.assault                   Vermont        1962   2.51 
## 6 Burglary                             Mississippi    1963 130    
## 7 Larceny.theft                        West Virginia  1961 205    
## 8 Motor.vehicle.theft                  Vermont        2014  27.3

Your turn: group_by and mutate

For this your turn use the fbi data from the classdata package

fbi <- fbi %>% group_by(Type, Year) %>% mutate(
  best = rank(Rate, na.last= "keep") # ranks from lowest rate to highest rate
)
fbi %>% filter(best <= 3) %>% 
  ggplot(aes(x = Abb)) +
  geom_bar(aes(fill=factor(best))) + facet_wrap(~Type, scales="free") + coord_flip()