Skip to contents

Information on schools players attended, by school

Usage

data(Schools)

Format

A data frame with 1207 observations on the following 5 variables.

schoolID

school ID code

name_full

school name

city

city where school is located

state

state where school's city is located

country

country where school is located

Source

Lahman, S. (2025) Lahman's Baseball Database, 1871-2024, 2025 version, https://sabr.org/lahman-database/

Examples


require("dplyr")

# How many different schools are listed in each state?
table(Schools$state)
#> 
#>  AL  AR  AZ  CA  CO  CT  DC  DE  FL  GA  HI  IA  ID  IL  IN  KS  KY  LA  MA  MD 
#>  33  17  14 136  11  15   7   4  58  25   3  26   8  61  24  27  17  17  28  25 
#>  ME  MI  MN  MO  MS  NC  ND  NE  NH  NJ  NM  NV  NY  OH  OK  OR  PA  RI  SC  SD 
#>   8  31  15  32  22  39   2  12   5  24   7   3  55  42  23  17  72   7  19   1 
#>  TN  TX  UT  VA  VT  WA  WI  WV  WY 
#>  39  80  10  27   4  25  16  13   1 
 
# How many different schools are listed in each country?
table(Schools$country)
#> 
#>  USA 
#> 1207 

# Top 20 schools 
schoolInfo <- Schools %>% select(-country)

schoolCount <- CollegePlaying %>%
                 group_by(schoolID) %>%
                 summarise(players = length(schoolID)) %>%
                 left_join(schoolInfo, by = "schoolID") %>%
                 arrange(desc(players)) 
head(schoolCount, 20)
#> # A tibble: 20 × 5
#>    schoolID   players name_full                                   city     state
#>    <chr>        <int> <chr>                                       <chr>    <chr>
#>  1 texas          265 University of Texas at Austin               Austin   TX   
#>  2 usc            250 University of Southern California           Los Ang… CA   
#>  3 stanford       248 Stanford University                         Palo Al… CA   
#>  4 arizonast      236 Arizona State University                    Tempe    AZ   
#>  5 michigan       191 University of Michigan                      Ann Arb… MI   
#>  6 ucla           180 University of California - Los Angeles      Los Ang… CA   
#>  7 holycross      167 College of the Holy Cross                   Worcest… MA   
#>  8 california     162 University of California - Berkeley         Berkeley CA   
#>  9 arizona        161 University of Arizona                       Tucson   AZ   
#> 10 alabama        155 University of Alabama                       Tuscalo… AL   
#> 11 unc            154 University of North Carolina at Chapel Hill Chapel … NC   
#> 12 floridast      152 Florida State University                    Tallaha… FL   
#> 13 lsu            149 Louisiana State University                  Baton R… LA   
#> 14 illinois       141 University of Illinois at Urbana-Champaign  Champai… IL   
#> 15 clemson        138 Clemson University                          Clemson  SC   
#> 16 florida        138 University of Florida                       Gainesv… FL   
#> 17 gatech         137 Georgia Institute of Technology             Atlanta  GA   
#> 18 oklahoma       135 University of Oklahoma                      Norman   OK   
#> 19 notredame      134 University of Notre Dame                    South B… IN   
#> 20 okstate        132 Oklahoma State University                   Stillwa… OK   

# sum counts by state
schoolStates <- schoolCount %>%
                  group_by(state) %>%
                  summarise(players = sum(players),
                            schools = length(state))
str(schoolStates)
#> tibble [50 × 3] (S3: tbl_df/tbl/data.frame)
#>  $ state  : chr [1:50] "AL" "AR" "AZ" "CA" ...
#>  $ players: int [1:50] 459 165 524 2948 80 174 98 28 1056 352 ...
#>  $ schools: int [1:50] 27 15 13 123 7 15 5 4 55 11 ...
summary(schoolStates)
#>     state              players           schools      
#>  Length:50          Min.   :   3.00   Min.   :  1.00  
#>  Class :character   1st Qu.:  92.75   1st Qu.:  6.25  
#>  Mode  :character   Median : 214.00   Median : 15.00  
#>                     Mean   : 347.00   Mean   : 20.76  
#>                     3rd Qu.: 452.50   3rd Qu.: 25.00  
#>                     Max.   :2948.00   Max.   :123.00