SeriesPost table
SeriesPost.Rd
Post season series information
Usage
data(SeriesPost)
Format
A data frame with 389 observations on the following 9 variables.
yearID
Year
round
Level of playoffs
teamIDwinner
Team ID of the team that won the series; a factor
lgIDwinner
League ID of the team that won the series; a factor with levels
AL
NL
teamIDloser
Team ID of the team that lost the series; a factor
lgIDloser
League ID of the team that lost the series; a factor with levels
AL
NL
wins
Wins by team that won the series
losses
Losses by team that won the series
ties
Tie games
Source
Lahman, S. (2024) Lahman's Baseball Database, 1871-2023, 2024 version, http://www.seanlahman.com/
Examples
data(SeriesPost)
# How many times has each team won the World Series?
# Notes:
# - the SeriesPost table includes an identifier for the
# team (teamID), but not the franchise (e.g. the Brooklyn Dodgers
# [BRO] and Los Angeles Dodgers [LAN] are counted separately)
#
# - the World Series was first played in 1903, but the
# Lahman data tables have the final round of the earlier
# playoffs labelled "WS", so it is necessary to
# filter the SeriesPost table to exclude years prior to 1903.
# using the dplyr data manipulation package
library("dplyr")
library("tidyr")
library("ggplot2")
## WS winners, arranged in descending order of titles won
ws_winner_table <- SeriesPost %>%
filter(yearID > "1902", round == "WS") %>%
group_by(teamIDwinner) %>%
summarise(wincount = n()) %>%
arrange(desc(wincount))
ws_winner_table
#> # A tibble: 31 × 2
#> teamIDwinner wincount
#> <fct> <int>
#> 1 NYA 27
#> 2 SLN 11
#> 3 BOS 9
#> 4 LAN 6
#> 5 CIN 5
#> 6 NY1 5
#> 7 PHA 5
#> 8 PIT 5
#> 9 DET 4
#> 10 OAK 4
#> # ℹ 21 more rows
## Expanded form of World Series team data in modern era
ws <- SeriesPost %>%
filter(yearID >= 1903 & round == "WS") %>%
select(-ties, -round) %>%
mutate(lgIDloser = droplevels(lgIDloser),
lgIDwinner = droplevels(lgIDwinner))
# Bar chart of length of series (# games played)
# 1903, 1919 and 1921 had eight games
ggplot(ws, aes(x = wins + losses)) +
geom_bar(fill = "dodgerblue") +
labs(x = "Number of games", y = "Frequency")
# Last year the Cubs appeared in the WS
ws %>%
filter(teamIDwinner == "CHN" | teamIDloser == "CHN") %>%
summarise(max(yearID))
#> max(yearID)
#> 1 2016
# Dot chart of number of WS appearances by teamID
ws %>%
gather(wl, team, teamIDwinner, teamIDloser) %>%
count(team) %>%
arrange(desc(n)) %>%
ggplot(., aes(x = reorder(team, n), y = n)) +
theme_bw() +
geom_point(size = 3, color = "dodgerblue") +
geom_segment(aes(xend = reorder(team, n), yend = 0),
linetype = "dotted", color = "dodgerblue",
size = 1) +
labs(x = NULL, y = "Number of WS appearances") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 42)) +
coord_flip() +
theme(axis.text.y = element_text(size = rel(0.8)),
axis.ticks.y = element_blank())
#> Warning: attributes are not identical across measure variables; they will be dropped
# Initial year of each round of championship series in modern era
SeriesPost %>%
filter(yearID >= 1903) %>% # modern WS started in 1903
group_by(round) %>%
summarise(first_year = min(yearID)) %>%
arrange(first_year)
#> # A tibble: 21 × 2
#> round first_year
#> <chr> <int>
#> 1 WS 1903
#> 2 ALCS 1969
#> 3 NLCS 1969
#> 4 AEDIV 1981
#> 5 AWDIV 1981
#> 6 NEDIV 1981
#> 7 NWDIV 1981
#> 8 ALDS1 1995
#> 9 ALDS2 1995
#> 10 NLDS1 1995
#> # ℹ 11 more rows
# Ditto, but with more information about each series played
SeriesPost %>%
filter(yearID >= 1903) %>%
group_by(round) %>%
arrange(yearID) %>%
do(head(., 1)) %>%
select(-lgIDwinner, -lgIDloser) %>%
arrange(yearID, round)
#> # A tibble: 21 × 7
#> # Groups: round [21]
#> yearID round teamIDwinner teamIDloser wins losses ties
#> <int> <chr> <fct> <fct> <int> <int> <int>
#> 1 1903 WS BOS PIT 5 3 0
#> 2 1969 ALCS BAL MIN 3 0 0
#> 3 1969 NLCS NYN ATL 3 0 0
#> 4 1981 AEDIV NYA ML4 3 2 0
#> 5 1981 AWDIV OAK KCA 3 0 0
#> 6 1981 NEDIV MON PHI 3 2 0
#> 7 1981 NWDIV LAN HOU 3 2 0
#> 8 1995 ALDS1 CLE BOS 3 0 0
#> 9 1995 ALDS2 SEA NYA 3 2 0
#> 10 1995 NLDS1 ATL COL 3 1 0
#> # ℹ 11 more rows