Pitching table
Pitching.Rd
Pitching table
Usage
data(Pitching)
Format
A data frame with 50402 observations on the following 30 variables.
playerID
Player ID code
yearID
Year
stint
player's stint (order of appearances within a season)
teamID
Team; a factor
lgID
League; a factor with levels
AA
AL
FL
NL
PL
UA
W
Wins
L
Losses
G
Games
GS
Games Started
CG
Complete Games
SHO
Shutouts
SV
Saves
IPouts
Outs Pitched (innings pitched x 3)
H
Hits
ER
Earned Runs
HR
Homeruns
BB
Walks
SO
Strikeouts
BAOpp
Opponent's Batting Average
ERA
Earned Run Average
IBB
Intentional Walks
WP
Wild Pitches
HBP
Batters Hit By Pitch
BK
Balks
BFP
Batters faced by Pitcher
GF
Games Finished
R
Runs Allowed
SH
Sacrifices by opposing batters
SF
Sacrifice flies by opposing batters
GIDP
Grounded into double plays by opposing batter
Source
Lahman, S. (2024) Lahman's Baseball Database, 1871-2023, 2024 version, http://www.seanlahman.com
Examples
# Pitching data
require("dplyr")
###################################
# cleanup, and add some other stats
###################################
# Restrict to AL and NL data, 1901+
# All data re SH, SF and GIDP are missing, so remove
# Intentional walks (IBB) not recorded until 1955
pitching <- Pitching %>%
filter(yearID >= 1901 & lgID %in% c("AL", "NL")) %>%
select(-(28:30)) %>% # remove SH, SF, GIDP
mutate(BAOpp = round(H/(H + IPouts), 3), # loose def'n
WHIP = round((H + BB) * 3/IPouts, 2),
KperBB = round(ifelse(yearID >= 1955,
SO/(BB - IBB), SO/BB), 2))
#####################
# some simple queries
#####################
# Team pitching statistics, Toronto Blue Jays, 1993
tor93 <- pitching %>%
filter(yearID == 1993 & teamID == "TOR") %>%
arrange(ERA)
# Career pitching statistics, Greg Maddux
subset(pitching, playerID == "maddugr01")
#> playerID yearID stint teamID lgID W L G GS CG SHO SV IPouts H ER
#> 26164 maddugr01 1986 1 CHN NL 2 4 6 5 1 0 0 93 44 19
#> 26165 maddugr01 1987 1 CHN NL 6 14 30 27 1 1 0 467 181 97
#> 26166 maddugr01 1988 1 CHN NL 18 8 34 34 9 3 0 747 230 88
#> 26167 maddugr01 1989 1 CHN NL 19 12 35 35 7 1 0 715 222 78
#> 26168 maddugr01 1990 1 CHN NL 15 15 35 35 8 2 0 711 242 91
#> 26169 maddugr01 1991 1 CHN NL 15 11 37 37 7 2 0 789 232 98
#> 26170 maddugr01 1992 1 CHN NL 20 11 35 35 9 4 0 804 201 65
#> 26171 maddugr01 1993 1 ATL NL 20 10 36 36 8 1 0 801 228 70
#> 26172 maddugr01 1994 1 ATL NL 16 6 25 25 10 3 0 606 150 35
#> 26173 maddugr01 1995 1 ATL NL 19 2 28 28 10 3 0 629 147 38
#> 26174 maddugr01 1996 1 ATL NL 15 11 35 35 5 1 0 735 225 74
#> 26175 maddugr01 1997 1 ATL NL 19 4 33 33 5 2 0 698 200 57
#> 26176 maddugr01 1998 1 ATL NL 18 9 34 34 9 5 0 753 201 62
#> 26177 maddugr01 1999 1 ATL NL 19 9 33 33 4 0 0 658 258 87
#> 26178 maddugr01 2000 1 ATL NL 19 9 35 35 6 3 0 748 225 83
#> 26179 maddugr01 2001 1 ATL NL 17 11 34 34 3 3 0 699 220 79
#> 26180 maddugr01 2002 1 ATL NL 16 6 34 34 0 0 0 598 194 58
#> 26181 maddugr01 2003 1 ATL NL 16 11 36 36 1 0 0 655 225 96
#> 26182 maddugr01 2004 1 CHN NL 16 11 33 33 2 1 0 638 218 95
#> 26183 maddugr01 2005 1 CHN NL 13 15 35 35 3 0 0 675 239 106
#> 26184 maddugr01 2006 1 CHN NL 9 11 22 22 0 0 0 409 153 71
#> 26185 maddugr01 2006 2 LAN NL 6 3 12 12 0 0 0 221 66 27
#> 26186 maddugr01 2007 1 SDN NL 14 11 34 34 1 0 0 594 221 91
#> 26187 maddugr01 2008 1 SDN NL 6 9 26 26 0 0 0 460 161 68
#> 26188 maddugr01 2008 2 LAN NL 2 4 7 7 0 0 0 122 43 23
#> HR BB SO BAOpp ERA IBB WP HBP BK BFP GF R WHIP KperBB
#> 26164 3 11 20 0.321 5.52 2 2 1 0 144 1 20 1.77 2.22
#> 26165 17 74 101 0.279 5.61 13 4 4 7 701 2 111 1.64 1.66
#> 26166 13 81 140 0.235 3.18 16 3 9 6 1047 0 97 1.25 2.15
#> 26167 13 82 135 0.237 2.95 13 5 6 3 1002 0 90 1.28 1.96
#> 26168 11 71 144 0.254 3.46 10 3 4 3 1011 0 116 1.32 2.36
#> 26169 18 66 198 0.227 3.35 9 6 6 3 1070 0 113 1.13 3.47
#> 26170 7 70 199 0.200 2.18 7 5 14 0 1061 0 68 1.01 3.16
#> 26171 14 52 197 0.222 2.36 7 5 6 1 1064 0 85 1.05 4.38
#> 26172 4 31 156 0.198 1.56 3 3 6 1 774 0 44 0.90 5.57
#> 26173 8 23 181 0.189 1.63 3 1 4 0 785 0 39 0.81 9.05
#> 26174 11 28 172 0.234 2.72 11 4 3 0 978 0 85 1.03 10.12
#> 26175 9 20 177 0.223 2.20 6 0 6 0 893 0 58 0.95 12.64
#> 26176 13 45 204 0.211 2.22 10 4 7 0 987 0 75 0.98 5.83
#> 26177 16 37 136 0.282 3.57 8 1 4 0 940 0 103 1.34 4.69
#> 26178 19 42 190 0.231 3.00 12 1 10 2 1012 0 91 1.07 6.33
#> 26179 20 27 173 0.239 3.05 10 2 7 0 927 0 86 1.06 10.18
#> 26180 14 45 118 0.245 2.62 7 1 4 0 820 0 67 1.20 3.11
#> 26181 24 33 124 0.256 3.96 7 3 8 0 901 0 112 1.18 4.77
#> 26182 35 33 151 0.255 4.02 4 2 9 0 872 0 103 1.18 5.21
#> 26183 29 36 136 0.261 4.24 4 8 7 0 936 0 112 1.22 4.25
#> 26184 14 23 81 0.272 4.69 3 0 0 0 572 0 78 1.29 4.05
#> 26185 6 14 36 0.230 3.30 4 0 0 0 290 0 31 1.09 3.60
#> 26186 14 25 104 0.271 4.14 3 5 6 0 830 0 92 1.24 4.73
#> 26187 16 26 80 0.259 3.99 4 2 5 2 638 0 80 1.22 3.64
#> 26188 5 4 18 0.261 5.09 1 0 1 0 166 0 25 1.16 6.00
# Best ERAs for starting pitchers post WWII
pitching %>%
filter(yearID >= 1946 & IPouts >= 600) %>%
group_by(lgID) %>%
arrange(ERA) %>%
do(head(., 5))
#> # A tibble: 10 × 29
#> # Groups: lgID [2]
#> playerID yearID stint teamID lgID W L G GS CG SHO SV
#> <chr> <int> <int> <fct> <fct> <int> <int> <int> <int> <int> <int> <int>
#> 1 tiantlu01 1968 1 CLE AL 21 9 34 32 19 9 0
#> 2 chancde01 1964 1 LAA AL 20 9 46 35 15 11 4
#> 3 guidrro01 1978 1 NYA AL 25 3 35 35 16 9 0
#> 4 martipe02 2000 1 BOS AL 18 6 29 29 7 4 0
#> 5 mcdowsa01 1968 1 CLE AL 15 14 38 37 11 3 0
#> 6 gibsobo01 1968 1 SLN NL 22 9 34 34 28 13 0
#> 7 goodedw01 1985 1 NYN NL 24 4 35 35 16 8 0
#> 8 maddugr01 1994 1 ATL NL 16 6 25 25 10 3 0
#> 9 maddugr01 1995 1 ATL NL 19 2 28 28 10 3 0
#> 10 greinza01 2015 1 LAN NL 19 3 32 32 1 0 0
#> # ℹ 17 more variables: IPouts <int>, H <int>, ER <int>, HR <int>, BB <int>,
#> # SO <int>, BAOpp <dbl>, ERA <dbl>, IBB <int>, WP <int>, HBP <int>, BK <int>,
#> # BFP <int>, GF <int>, R <int>, WHIP <dbl>, KperBB <dbl>
# Best K/BB ratios post-1955 among starters (excludes intentional walks)
pitching %>%
filter(yearID >= 1955 & IPouts >= 600) %>%
mutate(KperBB = SO/(BB - IBB)) %>%
arrange(desc(KperBB)) %>%
head(., 10)
#> playerID yearID stint teamID lgID W L G GS CG SHO SV IPouts H ER HR BB
#> 1 maddugr01 1997 1 ATL NL 19 4 33 33 5 2 0 698 200 57 9 20
#> 2 hugheph01 2014 1 MIN AL 16 10 32 32 1 0 0 629 221 82 16 16
#> 3 maddugr01 2001 1 ATL NL 17 11 34 34 3 3 0 699 220 79 20 27
#> 4 maddugr01 1996 1 ATL NL 15 11 35 35 5 1 0 735 225 74 11 28
#> 5 schilcu01 2002 1 ARI NL 23 7 36 35 5 1 0 778 218 93 29 33
#> 6 maddugr01 1995 1 ATL NL 19 2 28 28 10 3 0 629 147 38 8 23
#> 7 martipe02 2000 1 BOS AL 18 6 29 29 7 4 0 651 128 42 17 32
#> 8 martipe02 1999 1 BOS AL 23 4 31 29 5 1 0 640 160 49 9 37
#> 9 scherma01 2015 1 WAS NL 14 12 33 33 4 3 0 686 176 71 27 34
#> 10 sheetbe01 2004 1 MIL NL 12 14 34 34 5 0 0 711 201 71 25 32
#> SO BAOpp ERA IBB WP HBP BK BFP GF R WHIP KperBB
#> 1 177 0.223 2.20 6 0 6 0 893 0 58 0.95 12.642857
#> 2 186 0.260 3.52 1 1 5 0 855 0 88 1.13 12.400000
#> 3 173 0.239 3.05 10 2 7 0 927 0 86 1.06 10.176471
#> 4 172 0.234 2.72 11 4 3 0 978 0 85 1.03 10.117647
#> 5 316 0.219 3.23 1 6 3 0 1017 0 95 0.97 9.875000
#> 6 181 0.189 1.63 3 1 4 0 785 0 39 0.81 9.050000
#> 7 284 0.164 1.74 0 1 14 0 817 0 44 0.74 8.875000
#> 8 313 0.200 2.07 1 6 9 0 835 1 56 0.92 8.694444
#> 9 276 0.204 2.79 2 10 5 1 899 0 74 0.92 8.625000
#> 10 264 0.220 2.70 1 8 4 1 937 0 85 0.98 8.516129
# Best K/BB ratios among relievers post-1950 (min. 20 saves)
pitching %>%
filter(yearID >= 1950 & SV >= 20) %>%
arrange(desc(KperBB)) %>%
head(., 10)
#> playerID yearID stint teamID lgID W L G GS CG SHO SV IPouts H ER HR BB
#> 1 eckerde01 1990 1 OAK AL 4 2 63 0 0 0 48 220 41 5 2 4
#> 2 hendrli01 2021 1 CHA AL 8 3 69 0 0 0 38 213 45 20 11 7
#> 3 eckerde01 1992 1 OAK AL 7 1 69 0 0 0 51 240 62 17 5 11
#> 4 eckerde01 1989 1 OAK AL 4 0 51 0 0 0 33 173 32 10 5 3
#> 5 janseke01 2017 1 LAN NL 5 0 65 0 0 0 41 205 44 10 5 7
#> 6 eckerde01 1991 1 OAK AL 5 4 67 0 0 0 43 228 60 25 11 9
#> 7 ueharko01 2013 1 BOS AL 4 1 73 0 0 0 21 223 33 9 5 9
#> 8 riverma01 2008 1 NYA AL 6 5 64 0 0 0 39 212 41 11 4 6
#> 9 doolise01 2014 1 OAK AL 2 4 61 0 0 0 22 188 38 19 5 8
#> 10 eckerde01 1996 1 SLN NL 0 6 63 0 0 0 30 180 65 22 8 6
#> SO BAOpp ERA IBB WP HBP BK BFP GF R WHIP KperBB
#> 1 73 0.157 0.61 1 0 0 0 262 61 9 0.61 24.33
#> 2 113 0.174 2.54 1 6 1 0 267 58 23 0.73 18.83
#> 3 93 0.205 1.91 6 0 1 0 309 65 17 0.91 18.60
#> 4 55 0.156 1.56 0 0 1 0 206 46 10 0.61 18.33
#> 5 109 0.177 1.32 0 2 2 1 258 57 11 0.75 15.57
#> 6 87 0.208 2.96 3 1 1 0 299 59 26 0.91 14.50
#> 7 101 0.129 1.09 2 1 1 0 265 40 10 0.57 14.43
#> 8 77 0.162 1.40 0 1 2 0 259 60 11 0.67 12.83
#> 9 89 0.168 2.73 1 0 0 0 236 40 19 0.73 12.71
#> 10 49 0.265 3.30 2 0 4 0 251 53 26 1.18 12.25
###############################################
# Winningest pitchers in each league each year:
###############################################
# Add name & throws information:
peopleInfo <- People %>%
select(playerID, nameLast, nameFirst, throws)
# Merge peopleInfo into the pitching data
pitching1 <- right_join(peopleInfo, pitching, by = "playerID")
# Extract the pitcher with the maximum number of wins
# each year, by league
winp <- pitching1 %>%
group_by(yearID, lgID) %>%
filter(W == max(W)) %>%
select(nameLast, nameFirst, teamID, W, throws)
#> Adding missing grouping variables: `yearID`, `lgID`
# A simple ANCOVA model of wins vs. year, league and hand (L/R)
anova(lm(formula = W ~ yearID + I(yearID^2) + lgID + throws, data = winp))
#> Analysis of Variance Table
#>
#> Response: W
#> Df Sum Sq Mean Sq F value Pr(>F)
#> yearID 1 2449.00 2449.00 255.1550 < 2.2e-16 ***
#> I(yearID^2) 1 117.50 117.50 12.2419 0.0005337 ***
#> lgID 1 22.56 22.56 2.3504 0.1262433
#> throws 1 39.55 39.55 4.1201 0.0432051 *
#> Residuals 319 3061.80 9.60
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Nature of managing pitching staffs has altered importance of
# wins over time
if (FALSE) { # \dontrun{
require("ggplot2")
# compare loess smooth with quadratic fit
ggplot(winp, aes(x = yearID, y = W)) +
geom_point(aes(colour = throws, shape=lgID), size = 2) +
geom_smooth(method="loess", size=1.5, color="blue") +
geom_smooth(method = "lm", se=FALSE, color="black",
formula = y ~ poly(x,2)) +
ylab("League maximum Wins") + xlab("Year") +
ggtitle("Maximum pitcher wins by year")
## To reinforce this, plot the mean IPouts by year and league,
## which gives some idea of pitcher usage. Restrict pitcher
## pool to those who pitched at least 100 innings in a year.
pitching %>% filter(IPouts >= 300) %>% # >= 100 IP
ggplot(., aes(x = yearID, y = IPouts, color = lgID)) +
geom_smooth(method="loess") +
labs(x = "Year", y = "IPouts")
## Another indicator: total number of complete games pitched
## (Mirrors the trend from the preceding plot.)
pitching %>%
group_by(yearID, lgID) %>%
summarise(totalCG = sum(CG, na.rm = TRUE)) %>%
ggplot(., aes(x = yearID, y = totalCG, color = lgID)) +
geom_point() +
geom_path() +
labs(x = "Year", y = "Number of complete games")
} # }