# coding: utf-8
# In[20]:
import numpy as np
import matplotlib.pyplot as plt
get_ipython().magic('matplotlib inline')
# Let's download some data from here: (old at the moment...who cares)
#
# http://blog.smellthedata.com/2012/03/data-for-2012.html
#
# http://blog.smellthedata.com/2011/03/selection-sunday-today.html
# Open up the `teams.tsv` file to get a list of NCAA basketball teams and their abbreviations:
# In[21]:
teamfile = open('teams.tsv')
teams={}
teamsID={}
teamsName={}
k=0
for line in teamfile:
(teamlabel,teamname)=line.split(None,1)
teams[teamlabel]=teamname.strip()
teamsID[teamlabel]=k
teamsName[k]=teamname.strip()
k+=1
teamfile.close()
nteams = len(teams)
# We're going to look at `maxgames` of basketball scores
#
# We will construct `maxgames` edges called `winlossIJ` and a weight for these edges called `winlossData`
# In[22]:
maxgames = 100000
winlossIJ = np.zeros((maxgames,2),dtype=int) # winner loser
winlossData = np.zeros((maxgames,)) # weight
# Now read the basketball scores.
#
# For each line read as
# - game date
# - home team
# - away team
# - home score
# - away score
# - winnner
#
# We have to decide. How "much" of a winn is something? We'll mark the winner as a 1 and the loser as a 0. If the score is close (less than `breakscore`) then give the winner a weight of 0.5 and the loser a weight of 0.4. Otherwise weight the score closer to 1.0.
#
# In[23]:
k=0
ngames=0
breakscore = 6
gamesfile = open('GameResults_20110311.tsv')
for line in gamesfile:
(gamedate,hteam,ateam,hscore,ascore,winner)=line.split()
if hteam=='UNK' or ateam=='UNK':
continue
hscore=int(hscore)
ascore=int(ascore)
# winner = int(winner)
gamedate=int(gamedate)
if gamedate>=20111107 and gamedate<=20120311:
if winner==1:
wteam = hteam
lteam = ateam
else:
lteam = hteam
wteam = ateam
scorediff = abs(hscore-ascore)
if scorediff