import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from matplotlib.pyplot import figure from matplotlib.offsetbox import OffsetImage, AnnotationBbox from scipy import stats import pickle import json from datetime import timedelta from urllib.request import urlopen from datetime import date from datetime import datetime import pytz import json from matplotlib.ticker import MaxNLocator import matplotlib.font_manager as font_manager import numpy as np # team_games_df = pd.read_csv('data/team_games_all.csv',index_col=[0]) # player_games_df = pd.read_csv('data/player_games_cards.csv',index_col=[0]).sort_values(by='date').reset_index(drop=True) team_abv_nst = pd.read_csv('data/team_abv_nst.csv') #player_games_df = player_games_df.loc[:, ~player_games_df.columns.str.contains('^Unnamed')] #team_abv = pd.read_csv('team_abv.csv') #team_games_df = team_games_df.merge(right=team_abv,left_on='team',right_on='team_name',how='left').drop(columns='team_name') team_abv = pd.read_csv('data/team_abv.csv') import pickle from datetime import timedelta # # Loop over the counter and format the API call # r = requests.get('https://statsapi.web.nhl.com/api/v1/schedule?startDate=2022-10-01&endDate=2023-06-01') # schedule = r.json() # schedule = json.loads(urlopen('https://statsapi.web.nhl.com/api/v1/schedule?startDate=2023-10-07&endDate=2024-04-19').read()) # def flatten(t): # return [item for sublist in t for item in sublist] # game_id = flatten([[x['gamePk'] for x in schedule['dates'][y]['games']] for y in range(0,len(schedule['dates']))]) # game_type = flatten([[x['gameType'] for x in schedule['dates'][y]['games']] for y in range(0,len(schedule['dates']))]) # game_date = flatten([[(pd.to_datetime(x['gameDate']) - timedelta(hours=8)) for x in schedule['dates'][y]['games']] for y in range(0,len(schedule['dates']))]) # game_final = flatten([[x['status']['detailedState'] for x in schedule['dates'][y]['games']] for y in range(0,len(schedule['dates']))]) # game_home = flatten([[x['teams']['home']['team']['name'] for x in schedule['dates'][y]['games']] for y in range(0,len(schedule['dates']))]) # game_away = flatten([[x['teams']['away']['team']['name'] for x in schedule['dates'][y]['games']] for y in range(0,len(schedule['dates']))]) # schedule_df = pd.DataFrame(data={'game_id': game_id, 'game_type':game_type,'game_date' : game_date, 'game_home' : game_home, 'game_away' : game_away,'status' : game_final}) # schedule_df = schedule_df[schedule_df.game_type == 'R'].reset_index(drop=True) # schedule_df = schedule_df[schedule_df.status != 'Postponed'] # schedule_df = schedule_df.replace('Montréal Canadiens','Montreal Canadiens') schedule = pd.read_html('https://www.hockey-reference.com/leagues/NHL_2024_games.html')[0] #schedule.to_csv('schedule/schedule_'+str(date.today())+'.csv') #schedule = pd.read_csv('schedule/schedule_'+str(date.today())+'.csv') schedule = schedule.replace('St Louis Blues','St. Louis Blues') schedule_df = schedule.merge(right=team_abv,left_on='Visitor',right_on='team_name',how='inner',suffixes=['','_away']) schedule_df = schedule_df.merge(right=team_abv,left_on='Home',right_on='team_name',how='inner',suffixes=['','_home']) schedule_df_merge = schedule_df.merge(right=team_abv,left_on='game_home',right_on='team_name',how='left') schedule_df_merge = schedule_df_merge.merge(right=team_abv,left_on='game_away',right_on='team_name',how='left') schedule_df_merge = schedule_df_merge.drop(columns={'team_name_x','team_name_y'}) schedule_df_merge = schedule_df_merge.rename(columns={'team_abv_x' : 'team_abv_home','team_abv_y' : 'team_abv_away'}) #schedule_df_merge.game_date = pd.to_datetime(schedule_df_merge['game_date']).dt.tz_convert(tz='US/Eastern').dt.date # schedule_df_merge = schedule_df_merge.set_index(pd.DatetimeIndex(schedule_df_merge.game_date).strftime('%Y-%m-%d')) schedule_df_merge.index = pd.to_datetime(schedule_df_merge.game_date) schedule_df_merge = schedule_df_merge.drop(columns='game_date') #schedule_df_merge.index = schedule_df_merge.index.tz_convert('US/Pacific') schedule_df_merge.index = schedule_df_merge.index.date schedule_df_merge = schedule_df_merge.sort_index() schedule_df_merge = schedule_df_merge[schedule_df_merge.index <= date(2024,5,1)] schedule_df_merge_final = schedule_df_merge[schedule_df_merge['status']=='Final'] schedule_ccount_df = pd.DataFrame(data={'date':list(schedule_df_merge_final.index)*2,'team':list(schedule_df_merge_final.team_abv_away)+list(schedule_df_merge_final.team_abv_home)}).sort_values(by='date').reset_index(drop=True) schedule_ccount_df['team_game'] = schedule_ccount_df.groupby('team').cumcount()+1 schedule_ccount_df.date = pd.to_datetime(schedule_ccount_df.date) today = pd.to_datetime(datetime.now(pytz.timezone('US/Pacific')).strftime('%Y-%m-%d')) team_schdule = schedule_df_merge[(schedule_df_merge['team_abv_home']=='EDM')|(schedule_df_merge['team_abv_away']=='EDM')] team_schdule_live = team_schdule[team_schdule.index <= today] team_schdule_live.head() team_games_df = pd.read_csv('data/team_games_all.csv',index_col=[0]) player_games_df = pd.read_csv('data/player_games_cards.csv',index_col=[0]).sort_values(by='date').reset_index(drop=True) team_abv_df = pd.read_csv('data/team_abv.csv') player_games_df = player_games_df.loc[:, ~player_games_df.columns.str.contains('^Unnamed')] team_games_df = team_games_df.merge(right=team_abv_df,left_on='team',right_on='team_name',how='left').drop(columns='team_name') player_games_df = player_games_df.drop_duplicates(subset=['player_id','date'],keep='last').reset_index(drop=True) player_games_df.date = pd.to_datetime(player_games_df.date) team_games_df = team_games_df[team_games_df['date']=='Final'] schedule_df_merge_final = schedule_df_merge[schedule_df_merge['status']=='Final'] schedule_ccount_df = pd.DataFrame(data={'date':list(schedule_df_merge_final.index)*2,'team':list(schedule_df_merge_final.team_abv_away)+list(schedule_df_merge_final.team_abv_home)}).sort_values(by='date').reset_index(drop=True) schedule_ccount_df['team_game'] = schedule_ccount_df.groupby('team').cumcount()+1 schedule_ccount_df.date = pd.to_datetime(schedule_ccount_df.date) team_games_df['team_game'] = team_games_df.groupby('team').cumcount()+1 player_games_df = player_games_df.merge(right=schedule_ccount_df,left_on=['Team','date'],right_on=['team','date'],how='left') player_games_df['player_game'] = player_games_df.groupby('player_id').cumcount()+1 date_range_list = pd.date_range(start=player_games_df.date.min()+timedelta(days=6),end=player_games_df.date.max()) team_abv_nst_dict = {'All':''} | team_abv_nst.set_index('team_abv')['team_name'].to_dict() position_dict = {'All':'','F':'Forwards','D':'Defense'} player_games_df = player_games_df.rename(columns={'Total Points_pp':'PP Points'}) stat_input_list = ['TOI', 'Goals', 'Total Assists', 'First Assists', 'Total Points', 'PP Points','Shots', 'Hits', 'Shots Blocked'] df_cum_stat_total = player_games_df.groupby(['player_id','Player','Position']).agg( GP = ('GP','count'), Total_Points = ('Total Points','sum') ).reset_index() df_all_sort = df_cum_stat_total.copy() stat_pick = 'Total_Points' count=11 not_position = '' team = '' df_all_sort = df_all_sort[(df_all_sort['Position']!=not_position)] df_all_sort[stat_pick+' per game'] = df_all_sort[stat_pick]/df_all_sort['GP'] df_all_sort[stat_pick+' Rank'] = df_all_sort[stat_pick].rank(ascending=False,method='min') df_all_sort = df_all_sort[df_all_sort[stat_pick+' Rank']<=count] df_all_sort[stat_pick+' per game Rank'] = df_all_sort[stat_pick+' per game'].rank(ascending=False,method='min') # #df_all_sort.sort_values(by=[stat_pick,stat_pick+' per game','Total Points'],ascending = (False, False,False)) df_all_sort_list = df_all_sort[df_all_sort[stat_pick+' Rank']= player_games_df.date)].reset_index(drop=True)) #print('touble',i, player_lookup_list[i],len(player_games_df[(player_games_df.player_id == player_lookup_list[i])])) team_schedule_url_merge[i].index = team_schedule_url_merge[i].team_game team_schedule_url_merge[i] = team_schedule_url_merge[i].reindex(np.arange(team_schedule_url_merge[i].team_game.min(), team_schedule_url_merge[i].team_game.max() + 1)).reset_index(drop=True) #team_schedule_url_merge[0]['team_game'] = team_schedule_url_merge[0]['index'] #team_schedule_url_merge[0]['player_game'] = #schedule_ccount_df[schedule_ccount_df['team'].isin(team_schedule_url_merge[0].Team.unique())].merge(right=team_schedule_url_merge[0],left_on=['date','team'],right_on=['date','Team'],how='left') team_schedule_url_merge[i]['stat'] = team_schedule_url_merge[i][stat].cumsum() #team_schedule_url_merge[i]['stat'] = team_schedule_url_merge[i][stat_pick] team_schedule_url_merge[i] = team_schedule_url_merge[i].append(team_schedule_url_merge[i]).sort_index() team_schedule_url_merge[i] = team_schedule_url_merge[i].append(team_schedule_url_merge[i].iloc[0]).sort_index().reset_index(drop=True) team_schedule_url_merge[i]['team_game'][0] = 0 team_schedule_url_merge[i]['player_game'][0] = 0 team_schedule_url_merge[i]['stat'][0] = 0 for j in range(1,len(team_schedule_url_merge[i]),2): team_schedule_url_merge[i]['player_game'][j] = team_schedule_url_merge[i]['player_game'][j]-1 team_schedule_url_merge[i]['team_game'][j] = team_schedule_url_merge[i]['team_game'][j]-1 team_schedule_url_merge[i]['stat'][j] = team_schedule_url_merge[i]['stat'][j] - team_schedule_url_merge[i][stat][j] if len(team_schedule_url_merge[i]) >3: if pd.isna(team_schedule_url_merge[i].iloc[3]['player_game']) and pd.isna(team_schedule_url_merge[i].iloc[1]['player_game']) == True: team_schedule_url_merge[i]['player_game'][2] = np.nan team_schedule_url_merge[i]['stat'][2] = np.nan if len(team_schedule_url_merge[i]) >3: if pd.isna(team_schedule_url_merge[i].iloc[len(team_schedule_url_merge[i])-1]['player_game']) == True: team_schedule_url_merge[i]['stat'][len(team_schedule_url_merge[i])-1] = np.nanmax(team_schedule_url_merge[i]['stat']) if not (team_schedule_url_merge[i]['team_game'].values[1] == team_schedule_url_merge[i]['player_game'].values[0]): team_schedule_url_merge[i].loc[0,'team_game'] = np.nan max_games_player.append(np.around(np.nanmax(team_schedule_url_merge[i]['player_game']))) max_games_team.append(np.around(np.nanmax(team_schedule_url_merge[i]['team_game']))) max_stat.append((np.around(np.nanmax(team_schedule_url_merge[i]['stat'])))) fig, ax = plt.subplots(figsize=(15,15)) cgfont = {'fontname':'Century Gothic'} font = font_manager.FontProperties(family='Century Gothic', style='normal', size=14) ax.axhline(0,color='black',linestyle ="--",linewidth=2,alpha=1.0,label='Missed Games') ax.axhline(0,color='black',linestyle ="-",linewidth=2,alpha=1.0) if 'Total' in stat: stat = stat.replace('Total ',"") colour_scheme = ['#648FFF','#785EF0','#DC267F','#FE6100','#FFB000','#FAEF3B','#861318','#2ED3BC','#341BBF','#B37E2C'] for i in range(len(team_schedule_url_merge)): sns.lineplot(team_schedule_url_merge[i].reset_index()['team_game'],team_schedule_url_merge[i].reset_index()['stat'],linewidth=3-i*.2,color=colour_scheme[i]) plt.plot(team_schedule_url_merge[i]['team_game'],team_schedule_url_merge[i]['stat'],color=ax.lines[i*2+2].get_color(),label=str(i+1)+'. '+team_schedule_url_merge[i]['Player'][0]+', '+str(int(max_stat[i]))+' '+stat+' in '+str(int(max(team_schedule_url_merge[i]['player_game'])))+' Games',linewidth=6) ax.lines[i*2+2].set_linestyle("--") fig.set_facecolor('#ffffff') ax.set(xlim=(0,max([team_schedule_url_merge[x].team_game.max() for x in range(len(team_schedule_url_merge))]))) ax.set(ylim=(0,max([team_schedule_url_merge[x].stat.max() for x in range(len(team_schedule_url_merge))]))) ax.legend_.remove() if per_game == False: fig.suptitle(f'{rookie}{team_select_title}{position_select_title}{stat} Race',y=.98,fontsize=32,color='black',**cgfont) ax.set_ylabel(stat,fontsize=20,color='black',**cgfont) # else: # fig.suptitle(stat+' Per Game, All Situations',y=.99,fontsize=48,color='black',**cgfont) # ax.set_ylabel(stat+"/GP",fontsize=20,color='black',**cgfont) ax.set_title(str(current_season)[0:4]+'-'+str(start_season)[-4:]+' Season',y=1.01,fontsize=18,color='black',**cgfont,x=0,ha='left') ax.set_xlabel('Team Game',fontsize=20,color='black',**cgfont) ax.tick_params(axis="x", labelsize=24,colors='black') ax.set_facecolor('#ffffff') ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.tick_params(axis="y", labelsize=24,colors='black') ax.yaxis.set_major_locator(MaxNLocator(integer=True)) fig.text(x=0.025,y=0.01,s="Created By: @TJStats",color='black', fontsize=20, horizontalalignment='left',**cgfont) fig.text(x=0.975,y=0.01,s="Data: Natural Stat Trick",color='black', fontsize=20, horizontalalignment='right',**cgfont) fig.text(x=.975,y=0.92,s='Date: '+input.date().strftime('%B %d, %Y'),color='black', fontsize=18, horizontalalignment='right',**cgfont) ax.legend(prop=font,bbox_to_anchor=(0.01, 0.99),loc='upper left',framealpha=1,frameon=True) plt.tight_layout() #fig.savefig('gif_race/'+stat+rookie+str(date_range_list[k].date())+'.png', facecolor=fig.get_facecolor(), edgecolor='none',bbox_inches='tight',dpi=100) #plt.close() #fig.legend(prop=font,loc='best',framealpha=1,frameon=True) app = App(app_ui, server)