• During regular season, most games are played between teams within the conference.
  • Most games in the tournament are played between teams across conferences. Models should be able to generalize this way.
  • Ideas
    • Estimate the level of the conferences using multilevel models
    • Use matrix completion to find low-rank approximation to the pairwise matchup data

1 Setup

1.1 Load Packages

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from tabulate import tabulate
from src import utils  # see src/ folder in project repo
from src.data import make_dataset
SEASON = 2015

1.2 Helper Functions

print_df = utils.create_print_df_fcn(tablefmt='orgtbl');
show_fig = utils.create_show_fig_fcn(img_dir='eda/pairwise_matchups/'.format(SEASON));

1.3 Load Data

data = make_dataset.get_boxscore_dataset_v1(SEASON)
# difference in scores
data['scorediff'] = data['score1'] - data['score2']
# winning and losing scores
data['score_w'] = np.where(data.team1win == 1, data.score1, data.score2)
data['score_l'] = np.where(data.team1win == 0, data.score1, data.score2)
print('Data size = {}'.format(data.shape))
print_df(data.head())
Data size = (5421, 141)
|    |   season |   daynum |   numot |   tourney |   team1 |   team2 |   score1 |   score2 |   loc |   team1win | seed1   |   seednum1 |   seed2 |   seednum2 | confabbrev1   | conf_descr1              | confabbrev2   | conf_descr2                   | teamname1   |   firstd1season1 |   lastd1season1 | teamname2    |   firstd1season2 |   lastd1season2 |   seeddiff |             ID |   score_team_mean1 |   score_team_std1 |   fgm_team_mean1 |   fgm_team_std1 |   fga_team_mean1 |   fga_team_std1 |   fgm3_team_mean1 |   fgm3_team_std1 |   fga3_team_mean1 |   fga3_team_std1 |   ftm_team_mean1 |   ftm_team_std1 |   fta_team_mean1 |   fta_team_std1 |   or_team_mean1 |   or_team_std1 |   dr_team_mean1 |   dr_team_std1 |   ast_team_mean1 |   ast_team_std1 |   to_team_mean1 |   to_team_std1 |   stl_team_mean1 |   stl_team_std1 |   blk_team_mean1 |   blk_team_std1 |   pf_team_mean1 |   pf_team_std1 |   score_opp_mean1 |   score_opp_std1 |   fgm_opp_mean1 |   fgm_opp_std1 |   fga_opp_mean1 |   fga_opp_std1 |   fgm3_opp_mean1 |   fgm3_opp_std1 |   fga3_opp_mean1 |   fga3_opp_std1 |   ftm_opp_mean1 |   ftm_opp_std1 |   fta_opp_mean1 |   fta_opp_std1 |   or_opp_mean1 |   or_opp_std1 |   dr_opp_mean1 |   dr_opp_std1 |   ast_opp_mean1 |   ast_opp_std1 |   to_opp_mean1 |   to_opp_std1 |   stl_opp_mean1 |   stl_opp_std1 |   blk_opp_mean1 |   blk_opp_std1 |   pf_opp_mean1 |   pf_opp_std1 |   score_team_mean2 |   score_team_std2 |   fgm_team_mean2 |   fgm_team_std2 |   fga_team_mean2 |   fga_team_std2 |   fgm3_team_mean2 |   fgm3_team_std2 |   fga3_team_mean2 |   fga3_team_std2 |   ftm_team_mean2 |   ftm_team_std2 |   fta_team_mean2 |   fta_team_std2 |   or_team_mean2 |   or_team_std2 |   dr_team_mean2 |   dr_team_std2 |   ast_team_mean2 |   ast_team_std2 |   to_team_mean2 |   to_team_std2 |   stl_team_mean2 |   stl_team_std2 |   blk_team_mean2 |   blk_team_std2 |   pf_team_mean2 |   pf_team_std2 |   score_opp_mean2 |   score_opp_std2 |   fgm_opp_mean2 |   fgm_opp_std2 |   fga_opp_mean2 |   fga_opp_std2 |   fgm3_opp_mean2 |   fgm3_opp_std2 |   fga3_opp_mean2 |   fga3_opp_std2 |   ftm_opp_mean2 |   ftm_opp_std2 |   fta_opp_mean2 |   fta_opp_std2 |   or_opp_mean2 |   or_opp_std2 |   dr_opp_mean2 |   dr_opp_std2 |   ast_opp_mean2 |   ast_opp_std2 |   to_opp_mean2 |   to_opp_std2 |   stl_opp_mean2 |   stl_opp_std2 |   blk_opp_mean2 |   blk_opp_std2 |   pf_opp_mean2 |   pf_opp_std2 |   scorediff |   score_w |   score_l |
|----+----------+----------+---------+-----------+---------+---------+----------+----------+-------+------------+---------+------------+---------+------------+---------------+--------------------------+---------------+-------------------------------+-------------+------------------+-----------------+--------------+------------------+-----------------+------------+----------------+--------------------+-------------------+------------------+-----------------+------------------+-----------------+-------------------+------------------+-------------------+------------------+------------------+-----------------+------------------+-----------------+-----------------+----------------+-----------------+----------------+------------------+-----------------+-----------------+----------------+------------------+-----------------+------------------+-----------------+-----------------+----------------+-------------------+------------------+-----------------+----------------+-----------------+----------------+------------------+-----------------+------------------+-----------------+-----------------+----------------+-----------------+----------------+----------------+---------------+----------------+---------------+-----------------+----------------+----------------+---------------+-----------------+----------------+-----------------+----------------+----------------+---------------+--------------------+-------------------+------------------+-----------------+------------------+-----------------+-------------------+------------------+-------------------+------------------+------------------+-----------------+------------------+-----------------+-----------------+----------------+-----------------+----------------+------------------+-----------------+-----------------+----------------+------------------+-----------------+------------------+-----------------+-----------------+----------------+-------------------+------------------+-----------------+----------------+-----------------+----------------+------------------+-----------------+------------------+-----------------+-----------------+----------------+-----------------+----------------+----------------+---------------+----------------+---------------+-----------------+----------------+----------------+---------------+-----------------+----------------+-----------------+----------------+----------------+---------------+-------------+-----------+-----------|
|  0 |     2015 |       11 |       0 |         0 |    1103 |    1420 |       74 |       57 |  1103 |          1 | nan     |        nan |     nan |        nan | mac           | Mid-American Conference  | aec           | America East Conference       | Akron       |             1985 |            2019 | UMBC         |             1987 |            2019 |        nan | 2015_1103_1420 |            67.3529 |           9.16476 |          23.1471 |         3.27648 |          56.3529 |         6.67824 |           9.41176 |          3.11514 |           26.9412 |          5.03287 |          11.6471 |         4.74745 |          17.6471 |         6.51284 |        11.9706  |        4.50915 |         23.9412 |        4.40345 |          12.4118 |         3.35855 |         12.1765 |        3.67193 |          6.26471 |         3.19383 |          4.47059 |         1.82964 |         18.8529 |        4.1643  |           63.2353 |         11.2143  |         21.5294 |        4.0169  |         53.1765 |        5.61098 |          5.38235 |         2.88165 |          16.2941 |         4.78969 |         14.7941 |        6.0542  |         21.7941 |        7.38048 |       11       |       4.19235 |        24.0294 |       3.84144 |        10.4412  |        3.60295 |        12.2059 |       4.11044 |         6.97059 |        2.99985 |         2.82353 |        2.56398 |        16.8529 |       3.30411 |            56.0667 |           8.64604 |          19.7    |         3.13105 |          48.0333 |         4.40598 |           5.1     |          2.42615 |           15.6333 |          4.49124 |         11.5667  |         4.39971 |          17.9667 |         5.37865 |          7.5    |        3.31922 |         22.5667 |        3.93642 |         11.2667  |         3.00498 |         15.5667 |        4.39187 |          6.56667 |         2.69972 |          3       |         2.06782 |         15.8333 |        3.84244 |           67.1    |          8.53936 |         24      |        3.25894 |         53.4667 |        6.94179 |          6.76667 |         2.54183 |          18.7333 |         5.29758 |         12.3333 |        4.97118 |         17.6667 |        6.7022  |         9.9    |       3.75408 |        24.7    |       3.68735 |         13.6    |        4.40689 |       12.7667  |       3.58813 |         8.7     |        3.01891 |         3.06667 |        2.25806 |        18.1667 |       3.02955 |          17 |        74 |        57 |
|  1 |     2015 |       11 |       0 |         0 |    1104 |    1406 |       82 |       54 |  1104 |          1 | nan     |        nan |     nan |        nan | sec           | Southeastern Conference  | caa           | Colonial Athletic Association | Alabama     |             1985 |            2019 | Towson       |             1985 |            2019 |        nan | 2015_1104_1406 |            66.6452 |          11.3418  |          22.0323 |         4.05367 |          50.2581 |         8.09925 |           6.58065 |          2.26236 |           20.4839 |          3.84596 |          16      |         6.96659 |          22.1613 |         8.72199 |         9.51613 |        3.38498 |         23.0968 |        4.36161 |          10.3548 |         3.96273 |         12.0968 |        2.92523 |          6.29032 |         2.35504 |          3.6129  |         2.21626 |         19.2903 |        4.5985  |           64.5161 |         13.7982  |         21.7097 |        4.03479 |         52.0323 |        5.98043 |          6.29032 |         2.36916 |          19.3871 |         5.63133 |         14.8065 |        6.43646 |         20.8387 |        8.84721 |       10.6452  |       3.81705 |        22.1613 |       4.74761 |        11.8065  |        3.35081 |        12.0968 |       4.01958 |         5.03226 |        2.4696  |         2.32258 |        1.64088 |        19.5484 |       4.50806 |            62.4839 |          12.3824  |          21.0323 |         3.64677 |          51.5161 |         6.60238 |           3.90323 |          2.49473 |           12.9677 |          4.88865 |         16.5161  |         7.75401 |          24.9032 |         9.69314 |         14.3871 |        4.58023 |         24.0323 |        5.30713 |          8.48387 |         3.65031 |         13.3871 |        4.59476 |          3.48387 |         1.85959 |          3.87097 |         1.83924 |         21.1613 |        3.83055 |           65.7097 |         12.2316  |         21.6774 |        4.24568 |         51.7742 |        8.69371 |          5.93548 |         3.31598 |          17.3548 |         5.95846 |         16.4194 |        7.19154 |         23.2903 |        8.14532 |        10.0645 |       3.50177 |        20.5806 |       5.14959 |         11.2581 |        3.78565 |        9.29032 |       3.77    |         6.12903 |        3.28372 |         4.51613 |        2.59321 |        20.0645 |       5.08551 |          28 |        82 |        54 |
|  2 |     2015 |       11 |       0 |         0 |    1112 |    1291 |       78 |       55 |  1112 |          1 | Z02     |          2 |     nan |        nan | pac_twelve    | Pacific-12 Conference    | nec           | Northeast Conference          | Arizona     |             1985 |            2019 | Mt St Mary's |             1989 |            2019 |        nan | 2015_1112_1291 |            76.4412 |          12.0383  |          26.7059 |         5.84408 |          54.5588 |         6.12591 |           5.05882 |          1.96856 |           14.0588 |          3.86861 |          17.9706 |         5.42433 |          25.7059 |         6.71285 |        10.8235  |        3.81759 |         26.4118 |        4.00846 |          14.2059 |         4.29785 |         11.2059 |        3.19829 |          7.17647 |         2.30244 |          3.58824 |         1.8442  |         17.9118 |        3.51936 |           58.6176 |          9.32255 |         20.1765 |        3.07946 |         51.5294 |        5.81135 |          5.32353 |         1.98052 |          16.2059 |         5.12143 |         12.9412 |        5.74161 |         18.7059 |        6.9958  |        7.58824 |       3.38551 |        20.8824 |       4.05092 |         9.85294 |        2.92463 |        14.1471 |       2.85118 |         4.67647 |        2.29255 |         2.55882 |        2.16293 |        21.1176 |       3.89844 |            63.1    |          12.7829  |          22.0667 |         4.55566 |          54.8333 |         5.83144 |           7.6     |          2.51341 |           23.0333 |          4.95137 |         11.3667  |         6.78479 |          16.6667 |         8.28931 |         10.8333 |        3.93992 |         21.8    |        4.80947 |         11.4     |         3.61606 |         12.2333 |        4.13299 |          6.73333 |         3.06182 |          3.73333 |         2.1645  |         18.3    |        4.76445 |           64.8333 |         12.069   |         23.4    |        4.91023 |         52.6    |        6.69328 |          5.4     |         2.41547 |          15.5    |         3.71158 |         12.6333 |        6.31082 |         18.7    |        8.4574  |        10.4667 |       3.61733 |        24.5333 |       4.27288 |         11.4    |        3.99655 |       13.7     |       4.02706 |         6.33333 |        2.74595 |         2.73333 |        1.55216 |        16.8    |       5.17554 |          23 |        78 |        55 |
|  3 |     2015 |       11 |       0 |         0 |    1113 |    1152 |       86 |       50 |  1113 |          1 | nan     |        nan |     nan |        nan | pac_twelve    | Pacific-12 Conference    | wac           | Western Athletic Conference   | Arizona St  |             1985 |            2019 | Chicago St   |             1985 |            2019 |        nan | 2015_1113_1152 |            69.4375 |          12.024   |          23.625  |         4.44863 |          53.1562 |         7.37961 |           6.8125  |          2.84477 |           19.5625 |          4.73789 |          15.375  |         6.10526 |          23      |         8.94788 |        10.7188  |        3.61212 |         23.875  |        5.33249 |          12.9375 |         3.77545 |         13.875  |        3.09787 |          5.90625 |         2.45422 |          2.28125 |         1.61114 |         18.9375 |        3.40718 |           66.4375 |         13.2979  |         24      |        5.14938 |         53.3438 |        7.40797 |          5.5     |         2.38273 |          15.1562 |         4.71859 |         12.9375 |        5.73578 |         18.625  |        6.72381 |        8.3125  |       3.89737 |        22.9062 |       4.9797  |        12.3438  |        4.44761 |        12.9688 |       3.9471  |         6.125   |        2.47243 |         4.1875  |        2.87859 |        21.0938 |       4.69289 |            55.2414 |          11.3975  |          19.6207 |         4.27963 |          53.5517 |         5.78536 |           6.06897 |          2.34416 |           20.1034 |          4.82068 |          9.93103 |         5.35811 |          14.5172 |         6.73806 |         12.2759 |        3.84426 |         19.8966 |        4.4669  |          8.82759 |         2.86692 |         14.931  |        4.75793 |          7.68966 |         3.36052 |          2.86207 |         2.341   |         22.1724 |        4.44063 |           67.5172 |         11.5561  |         21.5517 |        4.24757 |         47.4138 |        6.8218  |          6.96552 |         3.1451  |          18.2759 |         6.13478 |         17.4483 |        6.23118 |         25.6207 |        8.92994 |        10      |       4.14901 |        23.9655 |       4.19594 |         12.8966 |        3.94013 |       14.1379  |       5.132   |         7.31034 |        3.14079 |         3.51724 |        1.93872 |        15.7586 |       4.48533 |          36 |        86 |        50 |
|  4 |     2015 |       11 |       0 |         0 |    1102 |    1119 |       78 |       84 |  1119 |          0 | nan     |        nan |     nan |        nan | mwc           | Mountain West Conference | patriot       | Patriot League                | Air Force   |             1985 |            2019 | Army         |             1985 |            2019 |        nan | 2015_1102_1119 |            64.7241 |          11.3983  |          23.6207 |         4.32942 |          50.7586 |         6.67434 |           7.17241 |          3.0714  |           20.2414 |          4.9183  |          10.3103 |         4.97927 |          15.6552 |         6.2751  |         8.7931  |        4.03006 |         20.7931 |        3.99445 |          14.8966 |         4.76853 |         11.7241 |        3.72153 |          6.31034 |         2.46553 |          1.96552 |         1.88002 |         17.8276 |        3.28491 |           65.8621 |         12.9965  |         22.6897 |        4.08921 |         50.4828 |        6.2199  |          8.13793 |         2.94866 |          22.2414 |         4.61097 |         12.3448 |        6.0254  |         17.8966 |        7.85741 |       10.0345  |       4.37919 |        21.069  |       5.86108 |        14.4483  |        4.07594 |        12.3793 |       3.34252 |         5.7931  |        2.56876 |         3.62069 |        2.04265 |        17.069  |       4.52715 |            71.4138 |           8.55869 |          25.6207 |         3.78355 |          58.5862 |         6.52204 |           7.34483 |          2.70285 |           22.7931 |          4.86518 |         12.8276  |         4.9213  |          18.7586 |         6.96243 |         10.5517 |        3.68962 |         23.6207 |        3.94076 |         14.1379  |         2.46003 |         13.3448 |        3.53832 |          6.13793 |         2.70877 |          3.55172 |         1.91956 |         20.6897 |        3.15214 |           72.931  |         11.6372  |         25.8621 |        4.82349 |         56.4828 |        6.8171  |          5.82759 |         2.81665 |          16.3448 |         4.38566 |         15.3793 |        5.62782 |         23      |        7.31437 |        10.8621 |       3.51247 |        24.9655 |       4.57854 |         14.069  |        4.81019 |       13.2069  |       4.00339 |         7       |        2.952   |         3.62069 |        2.41149 |        18.7931 |       3.73573 |          -6 |        84 |        78 |

1.4 Process Data

TeamConferences = (pd.read_csv(
    os.path.join(utils.get_project_root(), 'input/datafiles/TeamConferences.csv'))
                   .pipe(lambda x:x[x['Season'] == SEASON])
                   )
teams_ordered = list(TeamConferences.sort_values(['ConfAbbrev', 'TeamID'])['TeamID'])
teams_pairwise = [(t1, t2) for t1 in teams_ordered for t2 in teams_ordered]

1.5 Basic Description

n_missing = data.isna().sum().rename('n_missing')
print_df(data.describe().append(n_missing))
|           |   season |    daynum |        numot |      tourney |     team1 |     team2 |    score1 |    score2 |    team1win |   seednum1 |   seednum2 |   firstd1season1 |   lastd1season1 |   firstd1season2 |   lastd1season2 |    seeddiff |   score_team_mean1 |   score_team_std1 |   fgm_team_mean1 |   fgm_team_std1 |   fga_team_mean1 |   fga_team_std1 |   fgm3_team_mean1 |   fgm3_team_std1 |   fga3_team_mean1 |   fga3_team_std1 |   ftm_team_mean1 |   ftm_team_std1 |   fta_team_mean1 |   fta_team_std1 |   or_team_mean1 |   or_team_std1 |   dr_team_mean1 |   dr_team_std1 |   ast_team_mean1 |   ast_team_std1 |   to_team_mean1 |   to_team_std1 |   stl_team_mean1 |   stl_team_std1 |   blk_team_mean1 |   blk_team_std1 |   pf_team_mean1 |   pf_team_std1 |   score_opp_mean1 |   score_opp_std1 |   fgm_opp_mean1 |   fgm_opp_std1 |   fga_opp_mean1 |   fga_opp_std1 |   fgm3_opp_mean1 |   fgm3_opp_std1 |   fga3_opp_mean1 |   fga3_opp_std1 |   ftm_opp_mean1 |   ftm_opp_std1 |   fta_opp_mean1 |   fta_opp_std1 |   or_opp_mean1 |   or_opp_std1 |   dr_opp_mean1 |   dr_opp_std1 |   ast_opp_mean1 |   ast_opp_std1 |   to_opp_mean1 |   to_opp_std1 |   stl_opp_mean1 |   stl_opp_std1 |   blk_opp_mean1 |   blk_opp_std1 |   pf_opp_mean1 |   pf_opp_std1 |   score_team_mean2 |   score_team_std2 |   fgm_team_mean2 |   fgm_team_std2 |   fga_team_mean2 |   fga_team_std2 |   fgm3_team_mean2 |   fgm3_team_std2 |   fga3_team_mean2 |   fga3_team_std2 |   ftm_team_mean2 |   ftm_team_std2 |   fta_team_mean2 |   fta_team_std2 |   or_team_mean2 |   or_team_std2 |   dr_team_mean2 |   dr_team_std2 |   ast_team_mean2 |   ast_team_std2 |   to_team_mean2 |   to_team_std2 |   stl_team_mean2 |   stl_team_std2 |   blk_team_mean2 |   blk_team_std2 |   pf_team_mean2 |   pf_team_std2 |   score_opp_mean2 |   score_opp_std2 |   fgm_opp_mean2 |   fgm_opp_std2 |   fga_opp_mean2 |   fga_opp_std2 |   fgm3_opp_mean2 |   fgm3_opp_std2 |   fga3_opp_mean2 |   fga3_opp_std2 |   ftm_opp_mean2 |   ftm_opp_std2 |   fta_opp_mean2 |   fta_opp_std2 |   or_opp_mean2 |   or_opp_std2 |   dr_opp_mean2 |   dr_opp_std2 |   ast_opp_mean2 |   ast_opp_std2 |   to_opp_mean2 |   to_opp_std2 |   stl_opp_mean2 |   stl_opp_std2 |   blk_opp_mean2 |   blk_opp_std2 |   pf_opp_mean2 |   pf_opp_std2 |   scorediff |   score_w |   score_l |   ID |   conf_descr1 |   conf_descr2 |   confabbrev1 |   confabbrev2 |   loc |   seed1 |   seed2 |   teamname1 |   teamname2 |
|-----------+----------+-----------+--------------+--------------+-----------+-----------+-----------+-----------+-------------+------------+------------+------------------+-----------------+------------------+-----------------+-------------+--------------------+-------------------+------------------+-----------------+------------------+-----------------+-------------------+------------------+-------------------+------------------+------------------+-----------------+------------------+-----------------+-----------------+----------------+-----------------+----------------+------------------+-----------------+-----------------+----------------+------------------+-----------------+------------------+-----------------+-----------------+----------------+-------------------+------------------+-----------------+----------------+-----------------+----------------+------------------+-----------------+------------------+-----------------+-----------------+----------------+-----------------+----------------+----------------+---------------+----------------+---------------+-----------------+----------------+----------------+---------------+-----------------+----------------+-----------------+----------------+----------------+---------------+--------------------+-------------------+------------------+-----------------+------------------+-----------------+-------------------+------------------+-------------------+------------------+------------------+-----------------+------------------+-----------------+-----------------+----------------+-----------------+----------------+------------------+-----------------+-----------------+----------------+------------------+-----------------+------------------+-----------------+-----------------+----------------+-------------------+------------------+-----------------+----------------+-----------------+----------------+------------------+-----------------+------------------+-----------------+-----------------+----------------+-----------------+----------------+----------------+---------------+----------------+---------------+-----------------+----------------+----------------+---------------+-----------------+----------------+-----------------+----------------+----------------+---------------+-------------+-----------+-----------+------+---------------+---------------+---------------+---------------+-------+---------+---------+-------------+-------------|
| count     |     5421 | 5421      | 5421         | 5421         | 5421      | 5421      | 5421      | 5421      | 5421        | 1166       | 1161       |       5421       |            5421 |       5421       |            5421 |  376        |         5421       |        5421       |       5421       |     5421        |       5421       |     5421        |        5421       |      5421        |        5421       |      5421        |       5421       |     5421        |       5421       |      5421       |      5421       |    5421        |      5421       |    5421        |       5421       |     5421        |      5421       |    5421        |       5421       |     5421        |      5421        |     5421        |      5421       |    5421        |        5421       |       5421       |      5421       |    5421        |      5421       |    5421        |      5421        |     5421        |       5421       |      5421       |      5421       |    5421        |      5421       |     5421       |     5421       |   5421        |     5421       |   5421        |      5421       |    5421        |     5421       |   5421        |     5421        |    5421        |      5421       |    5421        |      5421      |   5421        |         5421       |        5421       |       5421       |     5421        |       5421       |      5421       |        5421       |      5421        |        5421       |      5421        |       5421       |     5421        |       5421       |      5421       |      5421       |    5421        |      5421       |    5421        |       5421       |     5421        |      5421       |    5421        |       5421       |     5421        |      5421        |     5421        |      5421       |    5421        |        5421       |       5421       |      5421       |    5421        |      5421       |    5421        |      5421        |     5421        |       5421       |     5421        |      5421       |    5421        |      5421       |     5421       |     5421       |   5421        |     5421       |   5421        |      5421       |    5421        |     5421       |   5421        |     5421        |    5421        |     5421        |    5421        |     5421       |   5421        | 5421        | 5421      | 5421      |  nan |           nan |           nan |           nan |           nan |   nan |     nan |     nan |         nan |         nan |
| mean      |     2015 |   71.5853 |    0.0791367 |    0.0123593 | 1224.32   | 1343.95   |   66.3785 |   67.3331 |    0.472607 |    8.57719 |    8.62532 |       1988.08    |            2019 |       1987.84    |            2019 |    0.361702 |           66.8737  |          10.8762  |         23.2751  |        4.26806  |         54.0455  |        6.57     |           6.4382  |         2.57861  |          18.7178  |         4.73917  |         13.8853  |        5.70753  |         19.9815  |         7.43975 |        10.5241  |       3.68     |        23.2814  |       4.65676  |         12.3241  |        3.78532  |        12.5499  |       3.71573  |          6.17826 |        2.60557  |         3.35053  |        2.00059  |        18.3489  |       4.04711  |          67.0515  |         11.0074  |        23.4055  |       4.2562   |        53.9935  |       6.60589  |         6.35928  |        2.7126   |         18.4858  |         5.10264 |        13.8812  |       5.62295  |        20.0755  |        7.34583 |       10.5276  |      3.89007  |       23.4072  |      4.64446  |        12.4925  |       3.93822  |       12.5597  |      3.67837  |        6.13468  |       2.67936  |         3.42944 |       2.22683  |        18.279  |      4.19815  |           66.9445  |          10.9498  |         23.4001  |        4.29494  |         54.0131  |         6.55353 |           6.24919 |         2.55941  |          18.2306  |         4.69213  |         13.895   |        5.76368  |         20.147   |         7.5511  |        10.564   |       3.68122  |        23.5098  |       4.75349  |         12.4947  |        3.83918  |        12.4979  |       3.71057  |          6.14745 |        2.5967   |         3.52914  |        2.04024  |        18.2497  |       4.06368  |          66.5369  |         10.9999  |        23.196   |       4.26425  |        54.0559  |       6.579    |         6.31115  |        2.71259  |         18.458   |        5.11074  |        13.8338  |       5.64778  |        19.9731  |        7.28873 |       10.5441  |      3.87367  |       23.3122  |      4.67257  |        12.263   |       3.93063  |       12.5203  |      3.66215  |        6.16594  |       2.70921  |        3.42612  |       2.22973  |       18.3635  |      4.24335  |   -0.954621 |   72.6838 |   61.0279 |  nan |           nan |           nan |           nan |           nan |   nan |     nan |     nan |         nan |         nan |
| std       |        0 |   36.0027 |    0.326858  |    0.110494  |   85.1581 |   84.8386 |   12.0862 |   11.9573 |    0.499295 |    4.77696 |    4.60865 |          7.07249 |               0 |          6.70103 |               0 |    5.70773  |            5.52464 |           1.62225 |          2.04792 |        0.670938 |          3.16697 |        0.989161 |           1.3369  |         0.421129 |           3.16423 |         0.724162 |          1.93089 |        0.794005 |          2.58713 |         1.01603 |         1.67694 |       0.616066 |         1.75966 |       0.661675 |          1.8555  |        0.656846 |         1.43128 |       0.555918 |          1.07231 |        0.443117 |         1.06216  |        0.472462 |         1.71815 |       0.584008 |           4.97773 |          1.56059 |         1.95446 |       0.653012 |         3.12226 |       0.983382 |         0.945072 |        0.444129 |          2.25458 |         0.83015 |         2.03363 |       0.816337 |         2.83404 |        1.08191 |        1.23991 |      0.571926 |        1.80676 |      0.677706 |         1.50078 |       0.655381 |        1.47795 |      0.587964 |        0.879407 |       0.431049 |         0.57748 |       0.382304 |         1.3953 |      0.590252 |            4.99212 |           1.60498 |          1.84444 |        0.674099 |          3.14916 |         1.03724 |           1.20583 |         0.404513 |           2.83496 |         0.763442 |          1.78459 |        0.792893 |          2.37795 |         1.02731 |         1.80513 |       0.619388 |         1.63033 |       0.615972 |          1.77211 |        0.595488 |         1.43694 |       0.573845 |          1.15997 |        0.464638 |         1.07918  |        0.453357 |         1.88618 |       0.600851 |           5.16136 |          1.60697 |         2.00014 |       0.655063 |         3.06382 |       0.986753 |         0.917004 |        0.442707 |          2.31773 |        0.849613 |         2.1975  |       0.851969 |         3.01884 |        1.10911 |        1.24964 |      0.636928 |        1.79646 |      0.679314 |         1.52973 |       0.621986 |        1.59932 |      0.588011 |        0.870405 |       0.431609 |        0.598063 |       0.383861 |        1.37365 |      0.586272 |   14.6983   |   10.541  |   10.5097 |  nan |           nan |           nan |           nan |           nan |   nan |     nan |     nan |         nan |         nan |
| min       |     2015 |   11      |    0         |    0         | 1101      | 1106      |   26      |   26      |    0        |    1       |    1       |       1985       |            2019 |       1985       |            2019 |  -15        |           51.1111  |           7.07562 |         17.4815  |        2.74469  |         44.8182  |        3.36139  |           3.29032 |         1.55216  |           9.96774 |         2.08001  |          9.5     |        3.72607  |         13.6562  |         4.92913 |         4.93333 |       2.17237  |        18.7812  |       3.059    |          7.51852 |        2.42301  |         8.85185 |       2.38891  |          3.48387 |        1.5433   |         0.964286 |        0.827682 |        13.8387  |       2.58602  |          50.75    |          6.80608 |        18.0938  |       2.8364   |        44.2667  |       4.11773  |         3.40741  |        1.74991  |         11.4074  |         3.10376 |         8.65625 |       3.28372  |        13.2727  |        4.64434 |        7.46875 |      2.53459  |       18.4062  |      2.53102  |         7.67647 |       2.53011  |        9.0303  |      2.34423  |        3.88889  |       1.4704   |         2.16667 |       1.24291  |        14.2414 |      2.60872  |           51.1111  |           7.07562 |         17.4815  |        2.74469  |         44.9667  |         3.36139 |           3.29032 |         1.55216  |           9.96774 |         2.08001  |          9.5     |        3.72607  |         13.6562  |         4.92913 |         4.93333 |       2.17237  |        18.7812  |       3.059    |          7.51852 |        2.42301  |         7.41176 |       2.38891  |          3.48387 |        1.5433   |         0.964286 |        0.827682 |        12.0294  |       2.58602  |          50.75    |          6.80608 |        18.0938  |       2.8364   |        44.2667  |       4.11773  |         3.40741  |        1.74991  |         11.4074  |        3.10376  |         7.47059 |       3.28372  |        10.9706  |        4.64434 |        7.46875 |      2.53459  |       18.7419  |      2.88835  |         7.67647 |       2.53011  |        9.0303  |      2.24914  |        3.96774  |       1.4704   |        1.97059  |       1.24291  |       14.2414  |      2.60872  |  -62        |   38      |   26      |  nan |           nan |           nan |           nan |           nan |   nan |     nan |     nan |         nan |         nan |
| 25%       |     2015 |   40      |    0         |    0         | 1154      | 1285      |   58      |   59      |    0        |    4       |    5       |       1985       |            2019 |       1985       |            2019 |   -3        |           63.2069  |           9.67593 |         22.0312  |        3.74539  |         51.8571  |        5.84766  |           5.51613 |         2.28578  |          16.6786  |         4.23301  |         12.5938  |        5.16129  |         18.2069  |         6.71285 |         9.51613 |       3.30005  |        22.1071  |       4.21929  |         11.125   |        3.302    |        11.5625  |       3.32546  |          5.43333 |        2.30685  |         2.54839  |        1.63763  |        17.2258  |       3.62558  |          63.2424  |         10.0064  |        21.931   |       3.74795  |        52.0645  |       5.88524  |         5.71875  |        2.40212  |         16.7647  |         4.47934 |        12.3     |       5.11481  |        17.963   |        6.64267 |        9.71875 |      3.47829  |       22.1613  |      4.20468  |        11.5625  |       3.47402  |       11.5333  |      3.26624  |        5.48148  |       2.35478  |         3       |       1.97464  |        17.3235 |      3.82971  |           63.3793  |           9.8103  |         22.129   |        3.76997  |         51.7576  |         5.80137 |           5.34483 |         2.31104  |          16.2857  |         4.22577  |         12.5517  |        5.23066  |         18.5667  |         6.90459 |         9.41935 |       3.2767   |        22.4286  |       4.33699  |         11.3667  |        3.40059  |        11.6667  |       3.30627  |          5.37931 |        2.301    |         2.8      |        1.71421  |        17.0938  |       3.65511  |          63.3871  |          9.9187  |        21.9062  |       3.77013  |        52.0667  |       5.89184  |         5.69697  |        2.4199   |         16.8333  |        4.53177  |        12.3333  |       5.02232  |        17.8929  |        6.57757 |        9.75758 |      3.45041  |       22.0625  |      4.19814  |        11.3333  |       3.46873  |       11.4138  |      3.25411  |        5.64706  |       2.44291  |        3.03571  |       1.98502  |       17.5     |      3.83316  |  -10        |   65      |   54      |  nan |           nan |           nan |           nan |           nan |   nan |     nan |     nan |         nan |         nan |
| 50%       |     2015 |   74      |    0         |    0         | 1210      | 1359      |   66      |   67      |    0        |    9       |    9       |       1985       |            2019 |       1985       |            2019 |    0        |           66.7333  |          10.841   |         23       |        4.25163  |         54.1562  |        6.52204  |           6.45455 |         2.52812  |          18.5938  |         4.73789  |         13.8438  |        5.65947  |         19.8788  |         7.41902 |        10.4688  |       3.65119  |        23.25    |       4.60575  |         12.2424  |        3.70527  |        12.4     |       3.65539  |          6.13793 |        2.55014  |         3.31034  |        1.97368  |        18.3333  |       3.95977  |          67.129   |         10.896   |        23.4062  |       4.18889  |        53.931   |       6.50062  |         6.28125  |        2.67436  |         18.4194  |         5.05816 |        14       |       5.64597  |        20.2258  |        7.367   |       10.697   |      3.84875  |       23.4062  |      4.57854  |        12.3438  |       3.88298  |       12.5172  |      3.65205  |        6.09375  |       2.6435   |         3.4375  |       2.20064  |        18.1613 |      4.17107  |           67.2581  |          10.8508  |         23.5357  |        4.25163  |         54.1333  |         6.56678 |           6.3     |         2.54931  |          18.3125  |         4.6966   |         13.9375  |        5.71021  |         20.1724  |         7.51745 |        10.5455  |       3.61181  |        23.5667  |       4.71278  |         12.4615  |        3.78825  |        12.4375  |       3.71252  |          6.08824 |        2.57606  |         3.5      |        2.01942  |        18.1786  |       3.93868  |          66.9032  |         10.896   |        23.1667  |       4.24451  |        54       |       6.53025  |         6.34375  |        2.67335  |         18.4194  |        5.06899  |        13.8485  |       5.63762  |        19.9     |        7.21271 |       10.5     |      3.79919  |       23.3571  |      4.67492  |        12.2     |       3.88298  |       12.4667  |      3.60482  |        6.14815  |       2.70137  |        3.42857  |       2.20788  |       18.2333  |      4.188    |   -2        |   72      |   61      |  nan |           nan |           nan |           nan |           nan |   nan |     nan |     nan |         nan |         nan |
| 75%       |     2015 |  103      |    0         |    0         | 1281      | 1414      |   74      |   75      |    1        |   13       |   12       |       1985       |            2019 |       1985       |            2019 |    4        |           69.8276  |          11.9543  |         24.3548  |        4.63669  |         56.2     |        7.23792  |           7.22222 |         2.83071  |          20.3438  |         5.15789  |         15.2424  |        6.21709  |         21.8333  |         8.13569 |        11.8235  |       4.07193  |        24.4242  |       5.12824  |         13.4194  |        4.2016   |        13.5     |       4.08305  |          6.76667 |        2.85226  |         3.93939  |        2.32993  |        19.4688  |       4.46429  |          70.3793  |         11.8988  |        24.75    |       4.68399  |        55.9355  |       7.16762  |         7.03846  |        2.95836  |         20       |         5.52916 |        15.1212  |       6.17171  |        22       |        7.99489 |       11.3103  |      4.25424  |       24.5312  |      5.13143  |        13.4483  |       4.29228  |       13.5312  |      4.02706  |        6.67647  |       2.9314   |         3.8125  |       2.50079  |        19.2581 |      4.56218  |           69.7667  |          12.0067  |         24.5     |        4.70899  |         56       |         7.23792 |           6.96667 |         2.75291  |          19.6562  |         5.10578  |         15.0625  |        6.31264  |         21.7812  |         8.21307 |        11.6875  |       4.10469  |        24.5625  |       5.17167  |         13.6667  |        4.20673  |        13.3793  |       4.10621  |          6.78125 |        2.81509  |         4.13793  |        2.32737  |        19.3125  |       4.4248   |          69.5926  |         12.0801  |        24.4815  |       4.68816  |        55.8929  |       7.22451  |         6.93548  |        2.96316  |         20.1212  |        5.546    |        15.129   |       6.22527  |        21.6897  |        7.99877 |       11.4483  |      4.23616  |       24.4194  |      5.12416  |        13.4     |       4.26956  |       13.3793  |      4.01654  |        6.67742  |       2.98366  |        3.79412  |       2.49383  |       19.2414  |      4.7179   |    9        |   79      |   68      |  nan |           nan |           nan |           nan |           nan |   nan |     nan |     nan |         nan |         nan |
| max       |     2015 |  154      |    4         |    1         | 1460      | 1464      |  116      |  126      |    1        |   16       |   16       |       2014       |            2019 |       2014       |            2019 |   15        |           83.8148  |          16.5944  |         29.2963  |        6.31     |         67.3929  |        9.84401  |          10.9286  |         4.56175  |          34.8214  |         8.86965  |         19.25    |        8.39282  |         26.0588  |        10.3776  |        16.8438  |       6.33148  |        29       |       6.90314  |         17.7429  |        5.62746  |        18.2414  |       5.73719  |         10.9375  |        4.24917  |         7.87879  |        3.39295  |        23.3438  |       6.16956  |          83.8889  |         16.0707  |        30       |       6.12221  |        62.7941  |      10.0667   |         9.42857  |        4.22486  |         24.5     |         8.95718 |        20       |       7.9761   |        27.4333  |       10.671   |       14.5185  |      6.22707  |       29.8929  |      6.8251   |        16.9     |       6.56917  |       19.625   |      5.49477  |        9.37931  |       4.58128  |         5.44828 |       3.09987  |        22.4688 |      5.58301  |           83.8148  |          16.5944  |         29.2963  |        6.31     |         67.3929  |         9.84401 |          10.9286  |         4.56175  |          34.8214  |         8.86965  |         19.25    |        8.39282  |         26.0588  |        10.3776  |        16.8438  |       6.33148  |        29       |       6.90314  |         17.7429  |        5.57457  |        18.2414  |       5.73719  |         10.9375  |        4.24917  |         7.87879  |        3.39295  |        23.3438  |       6.16956  |          83.8889  |         16.0707  |        30       |       6.12221  |        62.7941  |      10.0667   |         9.42857  |        4.22486  |         24.5     |        8.95718  |        20       |       7.9761   |        27.5862  |       10.671   |       14.5185  |      6.22707  |       29.8929  |      6.8251   |        16.9     |       6.56917  |       19.625   |      5.49477  |        9.37931  |       4.58128  |        5.44828  |       3.09987  |       22.4688  |      5.58301  |   69        |  126      |  111      |  nan |           nan |           nan |           nan |           nan |   nan |     nan |     nan |         nan |         nan |
| n_missing |        0 |    0      |    0         |    0         |    0      |    0      |    0      |    0      |    0        | 4255       | 4260       |          0       |               0 |          0       |               0 | 5045        |            0       |           0       |          0       |        0        |          0       |        0        |           0       |         0        |           0       |         0        |          0       |        0        |          0       |         0       |         0       |       0        |         0       |       0        |          0       |        0        |         0       |       0        |          0       |        0        |         0        |        0        |         0       |       0        |           0       |          0       |         0       |       0        |         0       |       0        |         0        |        0        |          0       |         0       |         0       |       0        |         0       |        0       |        0       |      0        |        0       |      0        |         0       |       0        |        0       |      0        |        0        |       0        |         0       |       0        |         0      |      0        |            0       |           0       |          0       |        0        |          0       |         0       |           0       |         0        |           0       |         0        |          0       |        0        |          0       |         0       |         0       |       0        |         0       |       0        |          0       |        0        |         0       |       0        |          0       |        0        |         0        |        0        |         0       |       0        |           0       |          0       |         0       |       0        |         0       |       0        |         0        |        0        |          0       |        0        |         0       |       0        |         0       |        0       |        0       |      0        |        0       |      0        |         0       |       0        |        0       |      0        |        0        |       0        |        0        |       0        |        0       |      0        |    0        |    0      |    0      |    0 |             0 |             0 |             0 |             0 |     0 |    4255 |    4260 |           0 |           0 |

2 Visualizations

2.1 Heatmap of pairwise matchups - Regular

pairwise_matchups = (data[data['tourney'] == 0]
                     .pipe(lambda x: x.groupby(['team1', 'team2'])['scorediff'].size() > 0)
                     .reindex(teams_pairwise)
                     .fillna(False)
)
pairwise_matchups.loc[pairwise_matchups[pairwise_matchups].swaplevel().index.values] = True
pairwise_matchups = pairwise_matchups.unstack()
fig, ax = plt.subplots(figsize=(10, 10))
sns.heatmap(pairwise_matchups.loc[teams_ordered, teams_ordered], ax=ax, cbar=False)
ax.set_title('Regular Season Pairwise Matchups')
show_fig('regular_pairwise_matchup_heatmap.png')

regular_pairwise_matchup_heatmap.png

The figure above shows the pairwise matchups between teams. The teams are ordered by conference. The diagonal blocks indicate that teams play with [almost] every other team within their conference. The games across conferences are much more sparse.

2.2 Heatmap of pairwise matchups - Tournament

tourney_pairwise_matchups = (data[data['tourney'] == 1]
                     .pipe(lambda x: x.groupby(['team1', 'team2'])['scorediff'].size() > 0)
                     .reindex(teams_pairwise)
                     .fillna(False)
)
tourney_pairwise_matchups.loc[tourney_pairwise_matchups[tourney_pairwise_matchups].swaplevel().index.values] = True
tourney_pairwise_matchups = tourney_pairwise_matchups.unstack()
fig, ax = plt.subplots(figsize=(10, 10))
sns.heatmap(tourney_pairwise_matchups.loc[teams_ordered, teams_ordered], ax=ax, cbar=False)
ax.set_title('Tourney Season Pairwise Matchups')
show_fig('tourney_pairwise_matchup_heatmap.png')

tourney_pairwise_matchup_heatmap.png

As shown above, most games in the tournament are played outside of the conference. A good model must be able to generalize to games played across conferences.

3 Matrix completion by nuclear norm minimization

from fancyimpute import NuclearNormMinimization

pairwise_scorediff = (data[data['tourney'] == 0]
                     .pipe(lambda x: x.groupby(['team1', 'team2'])['scorediff'].mean())
                     .reindex(teams_pairwise)
)
has_values = ~pairwise_scorediff.isna()
pairwise_scorediff.loc[pairwise_scorediff[has_values].swaplevel().index.values] = (-pairwise_scorediff[has_values]).values
pairwise_scorediff = pairwise_scorediff.unstack()

# matrix completion using convex optimization to find low-rank solution
# that still matches observed values. Slow!
X_filled_nnm = NuclearNormMinimization().fit_transform(pairwise_scorediff)

3.1 Accuracy

df_pred = pd.DataFrame(X_filled_nnm, index=pairwise_scorediff.index, columns=pairwise_scorediff.columns)
tourney_matchups = data.loc[data['tourney'] == 1, ['team1', 'team2']]
y_pred = np.array([df_pred.loc[i, j] for i, j in tourney_matchups.values])
y_true = data.loc[data['tourney'] == 1, 'scorediff']
print('accuracy = {}'.format(np.mean((y_pred > 0) == (y_true > 0))))
accuracy = 0.6716417910447762

3.2 Prediction vs Actual

fig, ax = plt.subplots(figsize=(10,10))
ax.scatter(y_true, y_pred)
ax.grid(True)
ax.plot(np.arange(-20, 20), np.arange(-20, 20))
ax.set_xlabel('actual')
ax.set_ylabel('pred')
show_fig('pred_vs_actual_nnm.png')

pred_vs_actual_nnm.png

Points in the first and fourth quadrants are incorrect predictions.