Skip to contents

Information on schools players attended, by school

Usage

data(Schools)

Format

A data frame with 1241 observations on the following 5 variables.

schoolID

school ID code

name_full

school name

city

city where school is located

state

state where school's city is located

country

country where school is located

Source

Lahman, S. (2024) Lahman's Baseball Database, 1871-2023, 2024 version, http://www.seanlahman.com/

Examples


require("dplyr")

# How many different schools are listed in each state?
table(Schools$state)
#> 
#>              AL              AR              AZ       Baltimore        Berkeley 
#>              33              17              14               1               1 
#>       Bradenton        Brooklyn      Brookville         Buffalo              CA 
#>               1               2               1               1             127 
#>              CO              CT     Catonsville              DC              DE 
#>              11              15               1               7               4 
#>           Davis            Erie           Essex              FL              GA 
#>               1               1               1              54              25 
#>              HI         Houston              IA              ID              IL 
#>               3               1              25               8              61 
#>              IN          Irvine              KS              KY          Keokuk 
#>              24               1              27              17               1 
#>        Kirkwood              LA        La Jolla     Los Angeles              MA 
#>               1              17               1               1              28 
#>              MD              ME              MI              MN              MO 
#>              21               8              31              12              30 
#>              MS         Mankato           Miami     Monroeville        Moorhead 
#>              22               1               3               1               1 
#>          Morris              NC              ND              NE              NH 
#>               1              39               2              12               5 
#>              NJ              NM              NV              NY   New Brunswick 
#>              22               7               3              50               1 
#>        New York              OH              OK              OR              PA 
#>               1              42              23              17              69 
#>          Pomona              RI       Riverside       Rockville              SC 
#>               1               7               1               1              19 
#>              SD San Luis Obispo   Santa Barbara       St. Louis              TN 
#>               1               1               1               1              39 
#>              TX         Teaneck              UT              VA              VT 
#>              79               1              10              27               4 
#>              WA              WI              WV              WY    West Mifflin 
#>              25              16              13               1               1 
 
# How many different schools are listed in each country?
table(Schools$country)
#> 
#>   CA   FL   IA   MD   MN   MO   NJ   NY   PA   TX  USA 
#>    9    4    1    4    3    2    2    5    3    1 1173 

# Top 20 schools 
schoolInfo <- Schools %>% select(-country)

schoolCount <- CollegePlaying %>%
                 group_by(schoolID) %>%
                 summarise(players = length(schoolID)) %>%
                 left_join(schoolInfo, by = "schoolID") %>%
                 arrange(desc(players)) 
head(schoolCount, 20)
#> # A tibble: 20 × 5
#>    schoolID   players name_full                                   city     state
#>    <chr>        <int> <chr>                                       <chr>    <chr>
#>  1 texas          265 University of Texas at Austin               "Austin" TX   
#>  2 usc            250 University of Southern California           "Los An… CA   
#>  3 stanford       248 Stanford University                         "Palo A… CA   
#>  4 arizonast      236 Arizona State University                    "Tempe"  AZ   
#>  5 michigan       191 University of Michigan                      "Ann Ar… MI   
#>  6 ucla           180 University of California                    " Los A… Los …
#>  7 holycross      167 College of the Holy Cross                   "Worces… MA   
#>  8 california     162 University of California                    " Berke… Berk…
#>  9 arizona        161 University of Arizona                       "Tucson" AZ   
#> 10 alabama        155 University of Alabama                       "Tuscal… AL   
#> 11 unc            154 University of North Carolina at Chapel Hill "Chapel… NC   
#> 12 floridast      152 Florida State University                    "Tallah… FL   
#> 13 lsu            149 Louisiana State University                  "Baton … LA   
#> 14 illinois       141 University of Illinois at Urbana-Champaign  "Champa… IL   
#> 15 clemson        138 Clemson University                          "Clemso… SC   
#> 16 florida        138 University of Florida                       "Gaines… FL   
#> 17 gatech         137 Georgia Institute of Technology             "Atlant… GA   
#> 18 oklahoma       135 University of Oklahoma                      "Norman" OK   
#> 19 notredame      134 University of Notre Dame                    "South … IN   
#> 20 okstate        132 Oklahoma State University                   "Stillw… OK   

# sum counts by state
schoolStates <- schoolCount %>%
                  group_by(state) %>%
                  summarise(players = sum(players),
                            schools = length(state))
str(schoolStates)
#> tibble [76 × 3] (S3: tbl_df/tbl/data.frame)
#>  $ state  : chr [1:76] "AL" "AR" "AZ" "Baltimore" ...
#>  $ players: int [1:76] 459 165 524 3 162 39 13 3 2412 80 ...
#>  $ schools: int [1:76] 27 15 13 1 1 1 2 1 114 7 ...
summary(schoolStates)
#>     state              players          schools      
#>  Length:76          Min.   :   1.0   Min.   :  1.00  
#>  Class :character   1st Qu.:  16.0   1st Qu.:  1.00  
#>  Mode  :character   Median :  98.0   Median :  6.00  
#>                     Mean   : 228.3   Mean   : 13.66  
#>                     3rd Qu.: 358.0   3rd Qu.: 19.00  
#>                     Max.   :2412.0   Max.   :114.00