Skip to contents

Hall of Fame table. This is composed of the voting results for all candidates nominated for the Baseball Hall of Fame.

Usage

data(HallOfFame)

Format

A data frame with 6382 observations on the following 9 variables.

playerID

Player ID code

yearID

Year of ballot

votedBy

Method by which player was voted upon. See Details

ballots

Total ballots cast in that year

needed

Number of votes needed for selection in that year

votes

Total votes received

inducted

Whether player was inducted by that vote or not (Y or N)

category

Category of candidate; a factor with levels Manager Pioneer/Executive Player Umpire

needed_note

Explanation of qualifiers for special elections

Details

This table links to the People table via the playerID.

votedBy: Most Hall of Fame inductees have been elected by the Baseball Writers Association of America (BBWAA). Rules for election are described in https://en.wikipedia.org/wiki/National_Baseball_Hall_of_Fame_and_Museum#Selection_process.

Source

Lahman, S. (2024) Lahman's Baseball Database, 1871-2023, 2024 version, http://www.seanlahman.com/

Examples

## Some examples for  Hall of Fame induction data

require("dplyr")
require("ggplot2")

############################################################
## Some simple queries

# What are the different types of HOF voters?
table(HallOfFame$votedBy)
#> 
#>                                 BBWAA                            Centennial 
#>                                  3932                                     6 
#> Contemporary Baseball Era Non-Players                      Contemporary Era 
#>                                     8                                     8 
#>                          Final Ballot                          Negro League 
#>                                    21                                    48 
#>                       Nominating Vote                            Old Timers 
#>                                    76                                    30 
#>                               Run Off                      Special Election 
#>                                    81                                     2 
#>                              Veterans             Veterans - 1943 and Later 
#>                                  1255                                    21 
#>               Veterans - Contributors         Veterans - Early Baseball Era 
#>                                   120                                    10 
#>                 Veterans - Executives              Veterans - Expansion Era 
#>                                    20                                    24 
#>             Veterans - Golden Age Era                 Veterans - Golden Era 
#>                                    10                                    20 
#>       Veterans - Managers and Umpires        Veterans - Modern Baseball Era 
#>                                    20                                    20 
#>                    Veterans - Players                   Veterans - Pre-1943 
#>                                   600                                    10 
#>        Veterans - Pre-Integration Era           Veterans - Today's Game Era 
#>                                    20                                    20 

# What was the first year of Hall of Fame elections?
sort(unique(HallOfFame$yearID))[1]
#> [1] 1936
# Who comprised the original class?
subset(HallOfFame, yearID == 1936 & inducted == "Y")
#>       playerID yearID votedBy ballots needed votes inducted category
#> 876   cobbty01   1936   BBWAA     226    170   222        Y   Player
#> 2845 mathech01   1936   BBWAA     226    170   205        Y   Player
#> 3045 johnswa01   1936   BBWAA     226    170   189        Y   Player
#> 4257  ruthba01   1936   BBWAA     226    170   215        Y   Player
#> 5888 wagneho01   1936   BBWAA     226    170   215        Y   Player
#>      needed_note
#> 876         <NA>
#> 2845        <NA>
#> 3045        <NA>
#> 4257        <NA>
#> 5888        <NA>

# Result of a player's last year on the BBWAA ballot
# Restrict to players voted by BBWAA:
HOFplayers <- subset(HallOfFame, 
                 votedBy == "BBWAA" & category == "Player")


# Number of years as HOF candidate, last pct vote, etc.
# for a given player
playerOutcomes <- HallOfFame %>%
    filter(votedBy == "BBWAA" & category == "Player") %>%
    group_by(playerID) %>%
    mutate(nyears = length(ballots)) %>%
    arrange(yearID) %>%
    do(tail(., 1)) %>%
    mutate(lastPct = 100 * round(votes/ballots, 3)) %>%
    select(playerID, nyears, inducted, lastPct, yearID) %>%
    rename(lastYear = yearID)


############################################################
# How many voting years until election?
inducted <- subset(playerOutcomes, inducted == "Y")
table(inducted$nyears)
#> 
#>  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 
#> 59 11 10  8  8  7  3  4  6  6  3  1  4  1  2 

# Bar chart of years to induction for inductees
barplot(table(inducted$nyears), 
        main="Number of voting years until election",
        ylab="Number of players", xlab="Years")
box()


# What is the form of this distribution?
require("vcd")
#> Loading required package: vcd
#> Loading required package: grid
goodfit(inducted$nyears)
#> 
#> Observed and fitted values for poisson distribution
#> with parameters estimated by `ML' 
#> 
#>  count observed       fitted pearson residual
#>      0        0  2.417733017     -1.554906112
#>      1       59  9.689110511     15.841667070
#>      2       11 19.414646250     -1.909726270
#>      3       10 25.934853261     -3.129004379
#>      4        8 25.983602985     -3.527987359
#>      5        8 20.825955475     -2.810523754
#>      6        7 13.910068005     -1.852753724
#>      7        3  7.963551286     -1.758891795
#>      8        4  3.989260183      0.005377132
#>      9        6  1.776337241      3.169031265
#>     10        6  0.711870488      6.267604568
#>     11        3  0.259348578      5.381605592
#>     12        1  0.086612025      3.103603212
#>     13        4  0.026699948     24.316225271
#>     14        1  0.007642896     11.351127767
#>     15        2  0.002041937     38.382154172
plot(goodfit(inducted$nyears), xlab="Number of years",
  main="Poissonness plot of number of years voting until election")

Ord_plot(table(inducted$nyears), xlab="Number of years")




# First ballot inductees sorted by vote percentage:
playerOutcomes %>%
  filter(nyears == 1L & inducted == "Y") %>%
  arrange(desc(lastPct))
#> # A tibble: 59 × 5
#> # Groups:   playerID [59]
#>    playerID  nyears inducted lastPct lastYear
#>    <chr>      <int> <fct>      <dbl>    <int>
#>  1 riverma01      1 Y          100       2019
#>  2 jeterde01      1 Y           99.7     2020
#>  3 griffke02      1 Y           99.3     2016
#>  4 ryanno01       1 Y           98.8     1999
#>  5 seaveto01      1 Y           98.8     1992
#>  6 ripkeca01      1 Y           98.5     2007
#>  7 brettge01      1 Y           98.2     1999
#>  8 cobbty01       1 Y           98.2     1936
#>  9 aaronha01      1 Y           97.8     1982
#> 10 gwynnto01      1 Y           97.6     2007
#> # ℹ 49 more rows

# Who took at least ten years on the ballot before induction?
playerOutcomes %>%
  filter(nyears >= 10L & inducted == "Y")
#> # A tibble: 17 × 5
#> # Groups:   playerID [17]
#>    playerID  nyears inducted lastPct lastYear
#>    <chr>      <int> <fct>      <dbl>    <int>
#>  1 blylebe01     14 Y           79.7     2011
#>  2 boudrlo01     10 Y           77.3     1970
#>  3 cronijo01     10 Y           78.8     1956
#>  4 drysddo01     10 Y           78.4     1984
#>  5 hartnga01     11 Y           77.7     1955
#>  6 heilmha01     11 Y           86.8     1952
#>  7 kinerra01     13 Y           75.4     1975
#>  8 lemonbo01     12 Y           78.6     1976
#>  9 maranra01     13 Y           82.9     1954
#> 10 martied01     10 Y           85.4     2019
#> 11 raineti01     10 Y           86       2017
#> 12 riceji01      15 Y           76.4     2009
#> 13 snidedu01     11 Y           86.5     1980
#> 14 suttebr01     13 Y           76.9     2006
#> 15 terrybi01     13 Y           77.4     1954
#> 16 vanceda01     15 Y           81.7     1955
#> 17 walkela01     10 Y           76.6     2020

############################################################
## Plots of voting percentages over time for the borderline
## HOF candidates, according to the BBWAA:

# Identify players on the BBWAA ballot for at least 10 years
# Returns a character vector of playerIDs
longTimers <- as.character(unlist(subset(playerOutcomes,
                                         nyears >= 10, select = "playerID")))

# Extract their information from the HallOfFame data
HOFlt <- HallOfFame %>% 
    filter(playerID %in% longTimers & votedBy == "BBWAA") %>%
    group_by(playerID) %>%
    mutate(elected = ifelse(any(inducted == "Y"), 
                              "Elected", "Not elected"),
           pct = 100 * round(votes/ballots, 3))

# Plot the voting profiles:
ggplot(HOFlt, aes(x = yearID, y = pct,
                  group = playerID)) +
    ggtitle("Profiles of BBWAA voting percentage, long-time HOF candidates") +
    geom_line() +
    geom_hline(yintercept = 75, colour = 'red') +
    labs(x = "Year", y = "Percentage of votes") +
    facet_wrap(~ elected, ncol = 1)

    
## Eventual inductees tend to have increasing support over time.
## Fit simple linear regression models to each player's voting
## percentage profile and extract the slopes. Then compare the
## distributions of the slopes in each group.

# data frame for playerID and induction status among
# long term candidates
HOFstatus <- HOFlt %>% 
               group_by(playerID) %>%
               select(playerID, elected, inducted) %>%
               do(tail(., 1))

# data frame of regression slopes, which represent average
# increase in percentage support by BBWAA members over a
# player's candidacy.
HOFslope <- HOFlt %>%
              group_by(playerID) %>%
              do(mod = lm(pct ~ yearID, data = .)) %>%
              do(data.frame(slope = coef(.$mod)[2]))

## Boxplots of regression slopes by induction group
ggplot(data.frame(HOFstatus, HOFslope), 
       aes(x = elected, y = slope)) +
    geom_boxplot(width = 0.5) +
    geom_point(position = position_jitter(width = 0.2))


# Note 1: Only two players whose maximum voting percentage
# was over 60% were not eventually inducted
# into the HOF: Gil Hodges and Jack Morris. 
# Red Ruffing was elected in a 1967 runoff election while
# the others have been voted in by the Veterans Committee. 

# Note 2: Of the players whose slope was >= 2.5 among 
# non-inductees, only Jack Morris has not (yet) been 
# subsequently inducted into the HOF; however, his last year of
# eligibility was 2014 so he could be inducted by a future
# Veterans Committee.