The United States Geological Survey continuously monitors earth quakes and makes the corresponding data available to the public. A dataset containing all worldwide earth quakes for a time frame of 30 days is available at http://www.hofroe.net/data/earthquakes.csv.
You can find the accompanying codebook at US Geology Survey (you should be able to answer all questions in this exam without the codebook).
Update: the link to the csv file above is not valid anymore. Please use the following code to load the data
eq <- read.csv("https://raw.githubusercontent.com/DS202-at-ISU/DS202-at-ISU.github.io/master/exams/earthquakes.csv")
eq <- read.csv("https://raw.githubusercontent.com/DS202-at-ISU/DS202-at-ISU.github.io/master/exams/earthquakes.csv")
# time frame:
eq$Date <- lubridate::ymd(eq$Date)
summary(eq$Date)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## "2012-09-05" "2012-09-13" "2012-09-21" "2012-09-20" "2012-09-27" "2012-10-05"
# the range of dates considered is between
min(eq$Date) # Sep 5 2012
## [1] "2012-09-05"
# and
max(eq$Date) # Oct 5 2012
## [1] "2012-10-05"
# number of earth quakes:
nrow(eq)
## [1] 7162
# [1] 7162
eq[which.max(eq$Magnitude), c("Date", "Location")]
## Date Location
## 952 2012-09-30 9km WNW of San Agustin
# Date Location
#952 2012-09-30 9km WNW of San Agustin
max(eq$Magnitude)
## [1] 7.3
#[1] 7.3
eq$Country <- factor(eq$Country)
levels(eq$Country)[11] <- "California"
sort(table(eq$Country), decreasing=TRUE)[1:5]
##
## California Alaska British Virgin Islands
## 2957 1907 479
## Nevada Washington
## 242 207
# California Alaska British Virgin Islands
# 2957 1907 479
# Nevada Washington
# 242 207
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
eq %>% mutate(
Country = reorder(Country, Country, length)) %>%
ggplot(aes(x = Country)) + geom_bar() + coord_flip()
top10 <- names(sort(table(eq$Country), decreasing = TRUE))[1:10]
eq$Country10 <- top10[with(eq, match(Country, top10))]
eq$Country10[is.na(eq$Country10)] <- "Other"
eq %>% mutate(
Country10 = reorder(Country10, Magnitude, na.rm=TRUE, FUN = median)
) %>%
ggplot(aes(x = Country10, y = Magnitude)) + geom_boxplot() + coord_flip()
library(ggplot2)
eq %>% ggplot(aes(x = Magnitude)) + geom_histogram(binwidth = 0.1)
# let's use 4 as the cutoff between 'small' and 'large' earth quakes
eq$size <- c("small", "large")[(eq$Magnitude >= 4)+1]
maps
package and extract a world map (hint:
think of map_data
). Plot the world map using a polygon
layer. Set the fill color to grey50
. Add a layer of points
to the map showing the locations of earthquakes use color to distinguish
between small and large earthquakes.Describe what you see.
library(maps)
world <- map_data("world")
worldmap <- world %>% ggplot(aes(x = long, y = lat, group=group)) +
geom_polygon(fill = "grey50")
worldmap + geom_point(aes(x = Longitude, y = Latitude, colour = size, group=1), data = eq)
# US has most of small earthquakes
Based on the summary data, draw a single chart that incorporates all of the above information.
eq.stats <- eq %>% group_by(Date) %>% summarize(
n = n(),
Magnitude = mean(Magnitude),
Country = names(sort(table(Country), decreasing=TRUE))[1]
)
eq.stats %>%
ggplot(aes(x = Date, y = n, colour=Country, size=Magnitude)) +
geom_point()