import streamlit as st st.set_page_config(layout="wide") import pandas as pd import gspread import pymongo import time import numpy as np from scipy.stats import poisson @st.cache_resource def init_conn(): scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] credentials = { "type": "service_account", "project_id": "model-sheets-connect", "private_key_id": "0e0bc2fdef04e771172fe5807392b9d6639d945e", "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDiu1v/e6KBKOcK\ncx0KQ23nZK3ZVvADYy8u/RUn/EDI82QKxTd/DizRLIV81JiNQxDJXSzgkbwKYEDm\n48E8zGvupU8+Nk76xNPakrQKy2Y8+VJlq5psBtGchJTuUSHcXU5Mg2JhQsB376PJ\nsCw552K6Pw8fpeMDJDZuxpKSkaJR6k9G5Dhf5q8HDXnC5Rh/PRFuKJ2GGRpX7n+2\nhT/sCax0J8jfdTy/MDGiDfJqfQrOPrMKELtsGHR9Iv6F4vKiDqXpKfqH+02E9ptz\nBk+MNcbZ3m90M8ShfRu28ebebsASfarNMzc3dk7tb3utHOGXKCf4tF8yYKo7x8BZ\noO9X4gSfAgMBAAECggEAU8ByyMpSKlTCF32TJhXnVJi/kS+IhC/Qn5JUDMuk4LXr\naAEWsWO6kV/ZRVXArjmuSzuUVrXumISapM9Ps5Ytbl95CJmGDiLDwRL815nvv6k3\nUyAS8EGKjz74RpoIoH6E7EWCAzxlnUgTn+5oP9Flije97epYk3H+e2f1f5e1Nn1d\nYNe8U+1HqJgILcxA1TAUsARBfoD7+K3z/8DVPHI8IpzAh6kTHqhqC23Rram4XoQ6\nzj/ZdVBjvnKuazETfsD+Vl3jGLQA8cKQVV70xdz3xwLcNeHsbPbpGBpZUoF73c65\nkAXOrjYl0JD5yAk+hmYhXr6H9c6z5AieuZGDrhmlFQKBgQDzV6LRXmjn4854DP/J\nI82oX2GcI4eioDZPRukhiQLzYerMQBmyqZIRC+/LTCAhYQSjNgMa+ZKyvLqv48M0\n/x398op/+n3xTs+8L49SPI48/iV+mnH7k0WI/ycd4OOKh8rrmhl/0EWb9iitwJYe\nMjTV/QxNEpPBEXfR1/mvrN/lVQKBgQDuhomOxUhWVRVH6x03slmyRBn0Oiw4MW+r\nrt1hlNgtVmTc5Mu+4G0USMZwYuOB7F8xG4Foc7rIlwS7Ic83jMJxemtqAelwOLdV\nXRLrLWJfX8+O1z/UE15l2q3SUEnQ4esPHbQnZowHLm0mdL14qSVMl1mu1XfsoZ3z\nJZTQb48CIwKBgEWbzQRtKD8lKDupJEYqSrseRbK/ax43DDITS77/DWwHl33D3FYC\nMblUm8ygwxQpR4VUfwDpYXBlklWcJovzamXpSnsfcYVkkQH47NuOXPXPkXQsw+w+\nDYcJzeu7F/vZqk9I7oBkWHUrrik9zPNoUzrfPvSRGtkAoTDSwibhoc5dAoGBAMHE\nK0T/ANeZQLNuzQps6S7G4eqjwz5W8qeeYxsdZkvWThOgDd/ewt3ijMnJm5X05hOn\ni4XF1euTuvUl7wbqYx76Wv3/1ZojiNNgy7ie4rYlyB/6vlBS97F4ZxJdxMlabbCW\n6b3EMWa4EVVXKoA1sCY7IVDE+yoQ1JYsZmq45YzPAoGBANWWHuVueFGZRDZlkNlK\nh5OmySmA0NdNug3G1upaTthyaTZ+CxGliwBqMHAwpkIRPwxUJpUwBTSEGztGTAxs\nWsUOVWlD2/1JaKSmHE8JbNg6sxLilcG6WEDzxjC5dLL1OrGOXj9WhC9KX3sq6qb6\nF/j9eUXfXjAlb042MphoF3ZC\n-----END PRIVATE KEY-----\n", "client_email": "gspread-connection@model-sheets-connect.iam.gserviceaccount.com", "client_id": "100369174533302798535", "auth_uri": "https://accounts.google.com/o/oauth2/auth", "token_uri": "https://oauth2.googleapis.com/token", "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gspread-connection%40model-sheets-connect.iam.gserviceaccount.com" } MLB_Data = 'https://docs.google.com/spreadsheets/d/1f42Ergav8K1VsOLOK9MUn7DM_MLMvv4GR2Fy7EfnZTc/edit#gid=340831852' gc_con = gspread.service_account_from_dict(credentials, scope) return gc_con, MLB_Data gcservice_account, MLB_Data = init_conn() @st.cache_data(ttl = 599) def init_baselines(): sh = gcservice_account.open_by_url(MLB_Data) worksheet = sh.worksheet('Hitter_Data (RHP)') load_display = pd.DataFrame(worksheet.get_all_records()) load_display.replace('', np.nan, inplace=True) load_display = load_display.dropna(subset=['PA']) load_display = load_display.drop_duplicates(subset=['Player'], keep='first') load_display= load_display.sort_values(by='Player', ascending=False) hitter_rhp = load_display.copy() time.sleep(.5) worksheet = sh.worksheet('Hitter_Data (LHP)') load_display = pd.DataFrame(worksheet.get_all_records()) load_display.replace('', np.nan, inplace=True) load_display = load_display.dropna(subset=['PA']) load_display = load_display.drop_duplicates(subset=['Player'], keep='first') load_display= load_display.sort_values(by='Player', ascending=False) hitter_lhp = load_display.copy() time.sleep(.5) worksheet = sh.worksheet('Pitcher_Data (RHH)') load_display = pd.DataFrame(worksheet.get_all_records()) load_display.replace('', np.nan, inplace=True) load_display = load_display.dropna(subset=['True AVG']) load_display= load_display.sort_values(by='Names', ascending=False) pitcher_rhh = load_display.copy() time.sleep(.5) worksheet = sh.worksheet('Pitcher_Data (LHH)') load_display = pd.DataFrame(worksheet.get_all_records()) load_display.replace('', np.nan, inplace=True) load_display = load_display.dropna(subset=['True AVG']) load_display= load_display.sort_values(by='Names', ascending=False) pitcher_lhh = load_display.copy() time.sleep(.5) worksheet = sh.worksheet('Bullpen_xData') load_display = pd.DataFrame(worksheet.get_all_records()) load_display.replace('', np.nan, inplace=True) load_display = load_display.dropna(subset=['HWS Ratio']) load_display= load_display.sort_values(by='Names', ascending=False) bullpen_data = load_display.copy() return hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh, bullpen_data @st.cache_resource def calc_poisson(hitter_val, sp_val, bp_val, sp_count, bp_count): base_val = hitter_val opp_val = sp_val sp_combo_val = sum([base_val, opp_val]) / 2 bp_combo_val = sum([base_val, bp_val]) / 2 sp_instances = sp_count bp_instances = bp_count sp_mean = sp_combo_val * sp_instances bp_mean = bp_combo_val * bp_instances # Generate a large number of samples from the Poisson distribution SP_run = poisson.rvs(sp_mean, size=10000) BP_run = poisson.rvs(bp_mean, size=10000) # Calculate the sample mean sp_outcome = np.mean(SP_run) bp_outcome = np.mean(BP_run) mean_outcome = sp_outcome + bp_outcome return sp_outcome, bp_outcome, mean_outcome hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh, bullpen_data = init_baselines() col1, col2 = st.columns([1, 7]) with col1: if st.button("Load/Reset Data", key='reset1'): st.cache_data.clear() hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh, bullpen_data = init_baselines() pitcher_var1 = st.selectbox("Which pitcher are you looking at?", options = pitcher_rhh['Names'].unique()) working_pitcher = pitcher_rhh.copy() pitcher_check = working_pitcher[working_pitcher['Names'] == pitcher_var1] pitcher_hand = pitcher_check['Hand'].iloc[0] if pitcher_hand == 'RHP': hitter_var1 = st.selectbox("What hitter are you looking at?", options = hitter_rhp['Player'].unique()) working_hitters = hitter_rhp.copy() hitter_check = working_hitters[working_hitters['Player'] == hitter_var1] else: hitter_var1 = st.selectbox("What hitter are you looking at?", options = hitter_lhp['Player'].unique()) working_hitters = hitter_lhp.copy() hitter_check = working_hitters[working_hitters['Player'] == hitter_var1] bullpen_var1 = st.selectbox("Which Bullpen are you looking at?", options = bullpen_data['Names'].unique()) working_bullpen = bullpen_data.copy() bullpen_check = working_bullpen[working_bullpen['Names'] == bullpen_var1] sp_count = st.number_input("How many PA against the Pitcher?", step = 1) bp_count = st.number_input("How many PA against the Bullpen?", step = 1) stat_var1 = st.selectbox("What Stat are you looking at?", options = ['Projected Walks', 'Projected Strikeouts', 'Projected HRs']) with col2: if st.button('calculate theoretical means'): if stat_var1 == 'Projected Walks': hitter_val = hitter_check['BB%'].iloc[0] sp_val = pitcher_check['BB%'].iloc[0] bp_val = bullpen_check['Walkper'].iloc[0] / 100 elif stat_var1 == 'Projected Strikeouts': hitter_val = hitter_check['K%'].iloc[0] sp_val = pitcher_check['K%'].iloc[0] bp_val = bullpen_check['Strikeoutper'].iloc[0] / 100 elif stat_var1 == 'Projected HRs': hitter_val = hitter_check['xHRs'].iloc[0] / hitter_check['PA'].iloc[0] sp_val = pitcher_check['xHR/PA'].iloc[0] bp_val = bullpen_check['Homeruns'].iloc[0] / bullpen_check['PA'].iloc[0] value = calc_poisson(hitter_val, sp_val, bp_val, sp_count, bp_count) st.table(hitter_check) st.write(f"Theoretical mean of the SP instances: {value[0]}") st.table(pitcher_check) st.write(f"Theoretical mean of the BP instances: {value[1]}") st.table(bullpen_check) st.write(f"Sample mean from generated data: {value[2]}")