Skip to contents

Pitching table

Usage

data(Pitching)

Format

A data frame with 50402 observations on the following 30 variables.

playerID

Player ID code

yearID

Year

stint

player's stint (order of appearances within a season)

teamID

Team; a factor

lgID

League; a factor with levels AA AL FL NL PL UA

W

Wins

L

Losses

G

Games

GS

Games Started

CG

Complete Games

SHO

Shutouts

SV

Saves

IPouts

Outs Pitched (innings pitched x 3)

H

Hits

ER

Earned Runs

HR

Homeruns

BB

Walks

SO

Strikeouts

BAOpp

Opponent's Batting Average

ERA

Earned Run Average

IBB

Intentional Walks

WP

Wild Pitches

HBP

Batters Hit By Pitch

BK

Balks

BFP

Batters faced by Pitcher

GF

Games Finished

R

Runs Allowed

SH

Sacrifices by opposing batters

SF

Sacrifice flies by opposing batters

GIDP

Grounded into double plays by opposing batter

Source

Lahman, S. (2024) Lahman's Baseball Database, 1871-2023, 2024 version, http://www.seanlahman.com

Examples

# Pitching data

require("dplyr")

###################################
# cleanup, and add some other stats
###################################

# Restrict to AL and NL data, 1901+
# All data re SH, SF and GIDP are missing, so remove
# Intentional walks (IBB) not recorded until 1955
pitching <- Pitching %>%
               filter(yearID >= 1901 & lgID %in% c("AL", "NL")) %>%
               select(-(28:30)) %>%  # remove SH, SF, GIDP
               mutate(BAOpp = round(H/(H + IPouts), 3),  # loose def'n
                      WHIP = round((H + BB) * 3/IPouts, 2),
                      KperBB = round(ifelse(yearID >= 1955, 
                                            SO/(BB - IBB), SO/BB), 2))
                                            

#####################
# some simple queries
#####################

# Team pitching statistics, Toronto Blue Jays, 1993
tor93 <- pitching %>%
           filter(yearID == 1993 & teamID == "TOR") %>%
           arrange(ERA)

# Career pitching statistics, Greg Maddux
subset(pitching, playerID == "maddugr01")
#>        playerID yearID stint teamID lgID  W  L  G GS CG SHO SV IPouts   H  ER
#> 26164 maddugr01   1986     1    CHN   NL  2  4  6  5  1   0  0     93  44  19
#> 26165 maddugr01   1987     1    CHN   NL  6 14 30 27  1   1  0    467 181  97
#> 26166 maddugr01   1988     1    CHN   NL 18  8 34 34  9   3  0    747 230  88
#> 26167 maddugr01   1989     1    CHN   NL 19 12 35 35  7   1  0    715 222  78
#> 26168 maddugr01   1990     1    CHN   NL 15 15 35 35  8   2  0    711 242  91
#> 26169 maddugr01   1991     1    CHN   NL 15 11 37 37  7   2  0    789 232  98
#> 26170 maddugr01   1992     1    CHN   NL 20 11 35 35  9   4  0    804 201  65
#> 26171 maddugr01   1993     1    ATL   NL 20 10 36 36  8   1  0    801 228  70
#> 26172 maddugr01   1994     1    ATL   NL 16  6 25 25 10   3  0    606 150  35
#> 26173 maddugr01   1995     1    ATL   NL 19  2 28 28 10   3  0    629 147  38
#> 26174 maddugr01   1996     1    ATL   NL 15 11 35 35  5   1  0    735 225  74
#> 26175 maddugr01   1997     1    ATL   NL 19  4 33 33  5   2  0    698 200  57
#> 26176 maddugr01   1998     1    ATL   NL 18  9 34 34  9   5  0    753 201  62
#> 26177 maddugr01   1999     1    ATL   NL 19  9 33 33  4   0  0    658 258  87
#> 26178 maddugr01   2000     1    ATL   NL 19  9 35 35  6   3  0    748 225  83
#> 26179 maddugr01   2001     1    ATL   NL 17 11 34 34  3   3  0    699 220  79
#> 26180 maddugr01   2002     1    ATL   NL 16  6 34 34  0   0  0    598 194  58
#> 26181 maddugr01   2003     1    ATL   NL 16 11 36 36  1   0  0    655 225  96
#> 26182 maddugr01   2004     1    CHN   NL 16 11 33 33  2   1  0    638 218  95
#> 26183 maddugr01   2005     1    CHN   NL 13 15 35 35  3   0  0    675 239 106
#> 26184 maddugr01   2006     1    CHN   NL  9 11 22 22  0   0  0    409 153  71
#> 26185 maddugr01   2006     2    LAN   NL  6  3 12 12  0   0  0    221  66  27
#> 26186 maddugr01   2007     1    SDN   NL 14 11 34 34  1   0  0    594 221  91
#> 26187 maddugr01   2008     1    SDN   NL  6  9 26 26  0   0  0    460 161  68
#> 26188 maddugr01   2008     2    LAN   NL  2  4  7  7  0   0  0    122  43  23
#>       HR BB  SO BAOpp  ERA IBB WP HBP BK  BFP GF   R WHIP KperBB
#> 26164  3 11  20 0.321 5.52   2  2   1  0  144  1  20 1.77   2.22
#> 26165 17 74 101 0.279 5.61  13  4   4  7  701  2 111 1.64   1.66
#> 26166 13 81 140 0.235 3.18  16  3   9  6 1047  0  97 1.25   2.15
#> 26167 13 82 135 0.237 2.95  13  5   6  3 1002  0  90 1.28   1.96
#> 26168 11 71 144 0.254 3.46  10  3   4  3 1011  0 116 1.32   2.36
#> 26169 18 66 198 0.227 3.35   9  6   6  3 1070  0 113 1.13   3.47
#> 26170  7 70 199 0.200 2.18   7  5  14  0 1061  0  68 1.01   3.16
#> 26171 14 52 197 0.222 2.36   7  5   6  1 1064  0  85 1.05   4.38
#> 26172  4 31 156 0.198 1.56   3  3   6  1  774  0  44 0.90   5.57
#> 26173  8 23 181 0.189 1.63   3  1   4  0  785  0  39 0.81   9.05
#> 26174 11 28 172 0.234 2.72  11  4   3  0  978  0  85 1.03  10.12
#> 26175  9 20 177 0.223 2.20   6  0   6  0  893  0  58 0.95  12.64
#> 26176 13 45 204 0.211 2.22  10  4   7  0  987  0  75 0.98   5.83
#> 26177 16 37 136 0.282 3.57   8  1   4  0  940  0 103 1.34   4.69
#> 26178 19 42 190 0.231 3.00  12  1  10  2 1012  0  91 1.07   6.33
#> 26179 20 27 173 0.239 3.05  10  2   7  0  927  0  86 1.06  10.18
#> 26180 14 45 118 0.245 2.62   7  1   4  0  820  0  67 1.20   3.11
#> 26181 24 33 124 0.256 3.96   7  3   8  0  901  0 112 1.18   4.77
#> 26182 35 33 151 0.255 4.02   4  2   9  0  872  0 103 1.18   5.21
#> 26183 29 36 136 0.261 4.24   4  8   7  0  936  0 112 1.22   4.25
#> 26184 14 23  81 0.272 4.69   3  0   0  0  572  0  78 1.29   4.05
#> 26185  6 14  36 0.230 3.30   4  0   0  0  290  0  31 1.09   3.60
#> 26186 14 25 104 0.271 4.14   3  5   6  0  830  0  92 1.24   4.73
#> 26187 16 26  80 0.259 3.99   4  2   5  2  638  0  80 1.22   3.64
#> 26188  5  4  18 0.261 5.09   1  0   1  0  166  0  25 1.16   6.00

# Best ERAs for starting pitchers post WWII
pitching %>% 
    filter(yearID >= 1946 & IPouts >= 600) %>%
    group_by(lgID) %>%
    arrange(ERA) %>%
    do(head(., 5))
#> # A tibble: 10 × 29
#> # Groups:   lgID [2]
#>    playerID  yearID stint teamID lgID      W     L     G    GS    CG   SHO    SV
#>    <chr>      <int> <int> <fct>  <fct> <int> <int> <int> <int> <int> <int> <int>
#>  1 tiantlu01   1968     1 CLE    AL       21     9    34    32    19     9     0
#>  2 chancde01   1964     1 LAA    AL       20     9    46    35    15    11     4
#>  3 guidrro01   1978     1 NYA    AL       25     3    35    35    16     9     0
#>  4 martipe02   2000     1 BOS    AL       18     6    29    29     7     4     0
#>  5 mcdowsa01   1968     1 CLE    AL       15    14    38    37    11     3     0
#>  6 gibsobo01   1968     1 SLN    NL       22     9    34    34    28    13     0
#>  7 goodedw01   1985     1 NYN    NL       24     4    35    35    16     8     0
#>  8 maddugr01   1994     1 ATL    NL       16     6    25    25    10     3     0
#>  9 maddugr01   1995     1 ATL    NL       19     2    28    28    10     3     0
#> 10 greinza01   2015     1 LAN    NL       19     3    32    32     1     0     0
#> # ℹ 17 more variables: IPouts <int>, H <int>, ER <int>, HR <int>, BB <int>,
#> #   SO <int>, BAOpp <dbl>, ERA <dbl>, IBB <int>, WP <int>, HBP <int>, BK <int>,
#> #   BFP <int>, GF <int>, R <int>, WHIP <dbl>, KperBB <dbl>


# Best K/BB ratios post-1955 among starters (excludes intentional walks)
pitching %>% 
    filter(yearID >= 1955 & IPouts >= 600) %>%
    mutate(KperBB = SO/(BB - IBB)) %>%
    arrange(desc(KperBB)) %>%
    head(., 10)
#>     playerID yearID stint teamID lgID  W  L  G GS CG SHO SV IPouts   H ER HR BB
#> 1  maddugr01   1997     1    ATL   NL 19  4 33 33  5   2  0    698 200 57  9 20
#> 2  hugheph01   2014     1    MIN   AL 16 10 32 32  1   0  0    629 221 82 16 16
#> 3  maddugr01   2001     1    ATL   NL 17 11 34 34  3   3  0    699 220 79 20 27
#> 4  maddugr01   1996     1    ATL   NL 15 11 35 35  5   1  0    735 225 74 11 28
#> 5  schilcu01   2002     1    ARI   NL 23  7 36 35  5   1  0    778 218 93 29 33
#> 6  maddugr01   1995     1    ATL   NL 19  2 28 28 10   3  0    629 147 38  8 23
#> 7  martipe02   2000     1    BOS   AL 18  6 29 29  7   4  0    651 128 42 17 32
#> 8  martipe02   1999     1    BOS   AL 23  4 31 29  5   1  0    640 160 49  9 37
#> 9  scherma01   2015     1    WAS   NL 14 12 33 33  4   3  0    686 176 71 27 34
#> 10 sheetbe01   2004     1    MIL   NL 12 14 34 34  5   0  0    711 201 71 25 32
#>     SO BAOpp  ERA IBB WP HBP BK  BFP GF  R WHIP    KperBB
#> 1  177 0.223 2.20   6  0   6  0  893  0 58 0.95 12.642857
#> 2  186 0.260 3.52   1  1   5  0  855  0 88 1.13 12.400000
#> 3  173 0.239 3.05  10  2   7  0  927  0 86 1.06 10.176471
#> 4  172 0.234 2.72  11  4   3  0  978  0 85 1.03 10.117647
#> 5  316 0.219 3.23   1  6   3  0 1017  0 95 0.97  9.875000
#> 6  181 0.189 1.63   3  1   4  0  785  0 39 0.81  9.050000
#> 7  284 0.164 1.74   0  1  14  0  817  0 44 0.74  8.875000
#> 8  313 0.200 2.07   1  6   9  0  835  1 56 0.92  8.694444
#> 9  276 0.204 2.79   2 10   5  1  899  0 74 0.92  8.625000
#> 10 264 0.220 2.70   1  8   4  1  937  0 85 0.98  8.516129
    
# Best K/BB ratios among relievers post-1950 (min. 20 saves)
pitching %>% 
    filter(yearID >= 1950 & SV >= 20) %>%
    arrange(desc(KperBB)) %>%
    head(., 10)
#>     playerID yearID stint teamID lgID W L  G GS CG SHO SV IPouts  H ER HR BB
#> 1  eckerde01   1990     1    OAK   AL 4 2 63  0  0   0 48    220 41  5  2  4
#> 2  hendrli01   2021     1    CHA   AL 8 3 69  0  0   0 38    213 45 20 11  7
#> 3  eckerde01   1992     1    OAK   AL 7 1 69  0  0   0 51    240 62 17  5 11
#> 4  eckerde01   1989     1    OAK   AL 4 0 51  0  0   0 33    173 32 10  5  3
#> 5  janseke01   2017     1    LAN   NL 5 0 65  0  0   0 41    205 44 10  5  7
#> 6  eckerde01   1991     1    OAK   AL 5 4 67  0  0   0 43    228 60 25 11  9
#> 7  ueharko01   2013     1    BOS   AL 4 1 73  0  0   0 21    223 33  9  5  9
#> 8  riverma01   2008     1    NYA   AL 6 5 64  0  0   0 39    212 41 11  4  6
#> 9  doolise01   2014     1    OAK   AL 2 4 61  0  0   0 22    188 38 19  5  8
#> 10 eckerde01   1996     1    SLN   NL 0 6 63  0  0   0 30    180 65 22  8  6
#>     SO BAOpp  ERA IBB WP HBP BK BFP GF  R WHIP KperBB
#> 1   73 0.157 0.61   1  0   0  0 262 61  9 0.61  24.33
#> 2  113 0.174 2.54   1  6   1  0 267 58 23 0.73  18.83
#> 3   93 0.205 1.91   6  0   1  0 309 65 17 0.91  18.60
#> 4   55 0.156 1.56   0  0   1  0 206 46 10 0.61  18.33
#> 5  109 0.177 1.32   0  2   2  1 258 57 11 0.75  15.57
#> 6   87 0.208 2.96   3  1   1  0 299 59 26 0.91  14.50
#> 7  101 0.129 1.09   2  1   1  0 265 40 10 0.57  14.43
#> 8   77 0.162 1.40   0  1   2  0 259 60 11 0.67  12.83
#> 9   89 0.168 2.73   1  0   0  0 236 40 19 0.73  12.71
#> 10  49 0.265 3.30   2  0   4  0 251 53 26 1.18  12.25

###############################################
# Winningest pitchers in each league each year:
###############################################

# Add name & throws information:
peopleInfo <- People %>%
                select(playerID, nameLast, nameFirst, throws)
                
# Merge peopleInfo into the pitching data
pitching1 <- right_join(peopleInfo, pitching, by = "playerID")

# Extract the pitcher with the maximum number of wins 
# each year, by league
winp <- pitching1 %>%
         group_by(yearID, lgID) %>%
         filter(W == max(W)) %>% 
         select(nameLast, nameFirst, teamID, W, throws)
#> Adding missing grouping variables: `yearID`, `lgID`

# A simple ANCOVA model of wins vs. year, league and hand (L/R)
anova(lm(formula = W ~ yearID + I(yearID^2) + lgID + throws, data = winp))
#> Analysis of Variance Table
#> 
#> Response: W
#>              Df  Sum Sq Mean Sq  F value    Pr(>F)    
#> yearID        1 2449.00 2449.00 255.1550 < 2.2e-16 ***
#> I(yearID^2)   1  117.50  117.50  12.2419 0.0005337 ***
#> lgID          1   22.56   22.56   2.3504 0.1262433    
#> throws        1   39.55   39.55   4.1201 0.0432051 *  
#> Residuals   319 3061.80    9.60                       
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Nature of managing pitching staffs has altered importance of
# wins over time
if (FALSE) { # \dontrun{
require("ggplot2") 

# compare loess smooth with quadratic fit
ggplot(winp, aes(x = yearID, y = W)) +
    geom_point(aes(colour = throws, shape=lgID), size = 2) +
    geom_smooth(method="loess", size=1.5, color="blue") +
    geom_smooth(method = "lm", se=FALSE, color="black", 
                 formula = y ~ poly(x,2)) +
    ylab("League maximum Wins") + xlab("Year") +
    ggtitle("Maximum pitcher wins by year")
    
## To reinforce this, plot the mean IPouts by year and league,
## which gives some idea of pitcher usage. Restrict pitcher
## pool to those who pitched at least 100 innings in a year.

pitching %>% filter(IPouts >= 300) %>%  # >= 100 IP

ggplot(., aes(x = yearID, y = IPouts, color = lgID)) +
  geom_smooth(method="loess") +
  labs(x = "Year", y = "IPouts")

## Another indicator: total number of complete games pitched
## (Mirrors the trend from the preceding plot.)
pitching %>% 
   group_by(yearID, lgID) %>%
   summarise(totalCG = sum(CG, na.rm = TRUE)) %>%
   ggplot(., aes(x = yearID, y = totalCG, color = lgID)) +
      geom_point() +
      geom_path() +
      labs(x = "Year", y = "Number of complete games")
} # }