cassiomo commited on
Commit
3520d74
β€’
1 Parent(s): dd0fd7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +769 -1
app.py CHANGED
@@ -563,7 +563,775 @@ st.plotly_chart(fig_bar)
563
  # # In[ ]:
564
  #
565
 
566
- # qatar.to_csv("/content/drive/MyDrive/data/last_team_scores.csv", index = False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
 
568
  if __name__ == "__main__":
569
  main()
 
563
  # # In[ ]:
564
  #
565
 
566
+ final_df = pd.read_csv('./data/training.csv')
567
+ final_df.tail()
568
+
569
+
570
+ # # GROUP STAGE MODELING
571
+
572
+ # ### Choosing a model
573
+
574
+ # In[4]:
575
+
576
+
577
+ # I save the original data frame in a flag to then train the final pipeline
578
+ pipe_DF = final_df
579
+ # Dummies for categorical columns
580
+ final_df = pd.get_dummies(final_df)
581
+
582
+
583
+ # I split the dataset into training, testing and validation.
584
+
585
+ # In[5]:
586
+
587
+
588
+ X = final_df.drop('Team1_Result',axis=1)
589
+ y = final_df['Team1_Result']
590
+ from sklearn.model_selection import train_test_split
591
+ X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)
592
+ X_hold_test, X_test, y_hold_test, y_test = train_test_split(X_val, y_val, test_size=0.5, random_state=42)
593
+
594
+
595
+ # Scaling
596
+
597
+ # In[6]:
598
+
599
+
600
+ from sklearn.preprocessing import StandardScaler
601
+ scaler = StandardScaler()
602
+ X_train = scaler.fit_transform(X_train)
603
+ X_test = scaler.transform(X_test)
604
+ X_hold_test = scaler.transform(X_hold_test)
605
+
606
+
607
+ # Defining function to display the confusion matrix quickly.
608
+
609
+ # In[7]:
610
+
611
+
612
+ from sklearn.metrics import classification_report,ConfusionMatrixDisplay
613
+ def metrics_display(model):
614
+ model.fit(X_train,y_train)
615
+ y_pred = model.predict(X_test)
616
+ print(classification_report(y_test,y_pred))
617
+ ConfusionMatrixDisplay.from_predictions(y_test,y_pred);
618
+
619
+
620
+ # * **Random Forest**
621
+
622
+ # In[8]:
623
+
624
+
625
+ from sklearn.ensemble import RandomForestClassifier
626
+ metrics_display(RandomForestClassifier())
627
+
628
+
629
+ # * **Ada Boost Classifier**
630
+
631
+ # In[9]:
632
+
633
+
634
+ from sklearn.ensemble import AdaBoostClassifier
635
+ metrics_display(AdaBoostClassifier())
636
+
637
+
638
+ # * **XGB Boost**
639
+
640
+ # In[10]:
641
+
642
+
643
+ from xgboost import XGBClassifier
644
+ metrics_display(XGBClassifier(use_label_encoder=False))
645
+
646
+
647
+ # * **Neural network**
648
+ #
649
+ #
650
+
651
+ # In[11]:
652
+
653
+
654
+ import keras
655
+ from keras import Sequential
656
+ from keras.layers import Dense,Dropout
657
+ from keras import Input
658
+
659
+ X_train.shape
660
+
661
+
662
+ # In[12]:
663
+
664
+
665
+ model = Sequential()
666
+ model.add(Input(shape=(404,)))
667
+ model.add(Dense(300,activation='relu'))
668
+ model.add(Dropout(0.3))
669
+ model.add(Dense(200,activation='relu'))
670
+ model.add(Dropout(0.3))
671
+ model.add(Dense(100,activation='relu'))
672
+ model.add(Dropout(0.3))
673
+ model.add(Dense(3,activation='softmax'))
674
+ model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
675
+ model.fit(X_train,y_train,epochs=10,validation_split=0.2)
676
+
677
+ y_pred1 = model.predict(X_test)
678
+ y_pred1 = np.argmax(y_pred1,axis=1)
679
+ print(classification_report(y_test,y_pred1))
680
+ ConfusionMatrixDisplay.from_predictions(y_test,y_pred1)
681
+
682
+
683
+ # The XGBoost model performs better than the others, so I will tune its hyperparameters and evaluate the performance based on the validation dataset.
684
+
685
+ # ### XGB Boost - Tuning & Hold-out Validation
686
+
687
+ # In[13]:
688
+
689
+
690
+ from sklearn.model_selection import GridSearchCV
691
+ from sklearn.metrics import accuracy_score
692
+
693
+ # Make a dictionary of hyperparameter values to search
694
+ search_space = {
695
+ "n_estimators" : [200,250,300,350,400,450,500],
696
+ "max_depth" : [3,4,5,6,7,8,9],
697
+ "gamma" : [0.001,0.01,0.1],
698
+ "learning_rate" : [0.001,0.01,0.1]
699
+ }
700
+
701
+
702
+ # In[14]:
703
+
704
+
705
+ # make a GridSearchCV object
706
+ GS = GridSearchCV(estimator = XGBClassifier(use_label_encoder=False),
707
+ param_grid = search_space,
708
+ scoring = 'accuracy',
709
+ cv = 5,
710
+ verbose = 4)
711
+
712
+
713
+ # Uncomment the following line to enable the tuning. The best result I found was: gamma = 0.01, learning_rate = 0.01, n_estimators = 300, max_depth = 4
714
+
715
+ # In[15]:
716
+
717
+
718
+ #GS.fit(X_train,y_train)
719
+
720
+
721
+ # To get only the best hyperparameter values
722
+
723
+ # In[16]:
724
+
725
+
726
+ #print(GS.best_params_)
727
+
728
+
729
+ # Initially, I validate the model with its default parameters, and then I will validate it with its tuned parameters.
730
+
731
+ # * **Default Hyperparameters**
732
+
733
+ # In[17]:
734
+
735
+
736
+ model = XGBClassifier()
737
+ model.fit(X_train,y_train)
738
+ y_pred = model.predict(X_hold_test)
739
+ print(classification_report(y_hold_test,y_pred))
740
+ ConfusionMatrixDisplay.from_predictions(y_hold_test,y_pred);
741
+
742
+
743
+ # * **Tuned Hyperparameters**
744
+
745
+ # In[18]:
746
+
747
+
748
+ model = XGBClassifier(use_label_encoder = False, gamma = 0.01, learning_rate = 0.01, n_estimators = 300, max_depth = 4)
749
+ model.fit(X_train,y_train)
750
+ y_pred = model.predict(X_hold_test)
751
+ print(classification_report(y_hold_test,y_pred))
752
+ ConfusionMatrixDisplay.from_predictions(y_hold_test,y_pred);
753
+
754
+
755
+ # The model improves a bit, so I will create a pipe to use the model later easily.
756
+
757
+ # ### Creating a pipeline for the XGB model
758
+
759
+ # In[19]:
760
+
761
+
762
+ from sklearn.preprocessing import OneHotEncoder
763
+ from sklearn.compose import make_column_transformer
764
+ column_trans = make_column_transformer(
765
+ (OneHotEncoder(),['Team1', 'Team2']),remainder='passthrough')
766
+
767
+ pipe_X = pipe_DF.drop('Team1_Result',axis=1)
768
+ pipe_y = pipe_DF['Team1_Result']
769
+
770
+ from sklearn.pipeline import make_pipeline
771
+ pipe_League = make_pipeline(column_trans,StandardScaler(with_mean=False),XGBClassifier(use_label_encoder=False, gamma= 0.01, learning_rate= 0.01, n_estimators= 300, max_depth= 4))
772
+ pipe_League.fit(pipe_X,pipe_y)
773
+
774
+
775
+ # In[20]:
776
+
777
+
778
+ import joblib
779
+ joblib.dump(pipe_League,"./groups_stage_prediction.pkl")
780
+
781
+
782
+ # # KNOCKOUT STAGE MODELING
783
+
784
+ # ### Choosing the model
785
+ #
786
+ # Removing Draw status.
787
+
788
+ # In[21]:
789
+
790
+
791
+ knock_df = pipe_DF[pipe_DF['Team1_Result'] != 2]
792
+
793
+
794
+ # In[22]:
795
+
796
+
797
+ pipe_knock_df = knock_df
798
+ knock_df = pd.get_dummies(knock_df)
799
+ X = knock_df.drop('Team1_Result',axis=1)
800
+ y = knock_df['Team1_Result']
801
+
802
+ X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
803
+ X_hold_test, X_test, y_hold_test, y_test = train_test_split(X_val, y_val, test_size=0.5, random_state=42)
804
+
805
+
806
+ # * **Ada Boost Classifier**
807
+
808
+ # In[23]:
809
+
810
+
811
+ metrics_display(AdaBoostClassifier())
812
+
813
+
814
+ # * **Random Forest**
815
+ #
816
+ #
817
+ #
818
+
819
+ # In[26]:
820
+
821
+
822
+ metrics_display(RandomForestClassifier())
823
+
824
+
825
+ # * **XGB Boost**
826
+
827
+ # In[27]:
828
+
829
+
830
+ metrics_display(XGBClassifier(use_label_encoder=False))
831
+
832
+
833
+ # * **Neural network**
834
+
835
+ # In[28]:
836
+
837
+
838
+ X_train.shape
839
+
840
+
841
+ # In[30]:
842
+
843
+
844
+ model = Sequential()
845
+ model.add(Input(shape=(399,)))
846
+ model.add(Dense(300,activation='relu'))
847
+ model.add(Dropout(0.3))
848
+ model.add(Dense(200,activation='relu'))
849
+ model.add(Dropout(0.3))
850
+ model.add(Dense(100,activation='relu'))
851
+ model.add(Dropout(0.3))
852
+ model.add(Dense(2,activation='softmax'))
853
+ model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
854
+ model.fit(X_train,y_train,epochs=10,validation_split=0.2)
855
+
856
+ y_pred1 = model.predict(X_test)
857
+ y_pred1 = np.argmax(y_pred1,axis=1)
858
+ print(classification_report(y_test,y_pred1))
859
+ ConfusionMatrixDisplay.from_predictions(y_test,y_pred1)
860
+
861
+
862
+ # All models have very similar performance. Therefore I will tune the Random Forest model and the XGB Boost.
863
+
864
+ # ### Random Forest - Tuning & Hold-out Validation
865
+
866
+ # In[31]:
867
+
868
+
869
+ search_space = {
870
+ "max_depth" : [11,12,13,14,15,16],
871
+ "max_leaf_nodes" : [170,180,190,200,210,220,230],
872
+ "min_samples_leaf" : [3,4,5,6,7,8],
873
+ "n_estimators" : [310,320,330,340,350]
874
+ }
875
+
876
+
877
+ # In[32]:
878
+
879
+
880
+ GS = GridSearchCV(estimator = RandomForestClassifier(),
881
+ param_grid = search_space,
882
+ scoring = 'accuracy',
883
+ cv = 5,
884
+ verbose = 4)
885
+
886
+
887
+ # Uncomment the following lines to enable the tuning. The best result I found was: max_depth = 16, n_estimators = 320, max_leaf_nodes = 190, min_samples_leaf = 5
888
+
889
+ # In[33]:
890
+
891
+
892
+ #GS.fit(X_train,y_train)
893
+
894
+
895
+ # In[34]:
896
+
897
+
898
+ #print(GS.best_params_)
899
+
900
+
901
+ # * **Default Hyperparameters**
902
+
903
+ # In[35]:
904
+
905
+
906
+ model = RandomForestClassifier()
907
+ model.fit(X_train,y_train)
908
+ y_pred = model.predict(X_hold_test)
909
+ print(classification_report(y_hold_test,y_pred))
910
+ ConfusionMatrixDisplay.from_predictions(y_hold_test,y_pred);
911
+
912
+
913
+ # * **Tuned Hyperparameters**
914
+
915
+ # In[36]:
916
+
917
+
918
+ model = RandomForestClassifier(max_depth= 16, n_estimators=320, max_leaf_nodes= 190, min_samples_leaf= 5)
919
+ model.fit(X_train,y_train)
920
+ y_pred = model.predict(X_hold_test)
921
+ print(classification_report(y_hold_test,y_pred))
922
+ ConfusionMatrixDisplay.from_predictions(y_hold_test,y_pred);
923
+
924
+
925
+ # The Random Forest greatly improves performance with the tuned hyperparameters; let's see the XGB Boost model.
926
+
927
+ # ### XGB Boost - Tuning & Hold-out Validation
928
+
929
+ # In[37]:
930
+
931
+
932
+ search_space = {
933
+ "n_estimators" : [300,350,400,450,500,550,600],
934
+ "max_depth" : [3,4,5,6,7,8,9],
935
+ "gamma" : [0.001,0.01,0.1],
936
+ "learning_rate" : [0.001,0.01]
937
+ }
938
+
939
+
940
+ # In[38]:
941
+
942
+
943
+ GS = GridSearchCV(estimator = XGBClassifier(use_label_encoder=False),
944
+ param_grid = search_space,
945
+ scoring = 'accuracy',
946
+ cv = 5,
947
+ verbose = 4)
948
+
949
+
950
+ # In[39]:
951
+
952
+
953
+ #GS.fit(X_train,y_train)
954
+
955
+
956
+ # In[40]:
957
+
958
+
959
+ #print(GS.best_params_) # to get only the best hyperparameter values that we searched for
960
+
961
+
962
+ # Uncomment the following lines to enable the tuning. The best result I found was: gamma = 0.01, learning_rate = 0.01, max_depth = 5, n_estimators = 500
963
+
964
+ # * **Default Hyperparameters**
965
+
966
+ # In[41]:
967
+
968
+
969
+ model = XGBClassifier()
970
+ model.fit(X_train,y_train)
971
+ y_pred = model.predict(X_hold_test)
972
+ print(classification_report(y_hold_test,y_pred))
973
+ ConfusionMatrixDisplay.from_predictions(y_hold_test,y_pred);
974
+
975
+
976
+ # * **Tuned Hyperparameters**
977
+
978
+ # In[42]:
979
+
980
+
981
+ model = XGBClassifier(gamma=0.01,learning_rate=0.01, max_depth=5, n_estimators=500)
982
+ model.fit(X_train,y_train)
983
+ y_pred = model.predict(X_hold_test)
984
+ print(classification_report(y_hold_test,y_pred))
985
+ ConfusionMatrixDisplay.from_predictions(y_hold_test,y_pred);
986
+
987
+
988
+ # The model does not improve notably. However, it does improve compared to the Random Forest.
989
+
990
+ # ### Creating a pipeline for the XGB Boost model
991
+
992
+ # In[43]:
993
+
994
+
995
+ pipe_X = pipe_knock_df.drop('Team1_Result',axis=1)
996
+ pipe_y = pipe_knock_df['Team1_Result']
997
+ pipe_knock = make_pipeline(column_trans,StandardScaler(with_mean=False),XGBClassifier(gamma=0.01,learning_rate=0.01, max_depth=5, n_estimators=500))
998
+ pipe_knock.fit(pipe_X,pipe_y)
999
+
1000
+
1001
+ # In[44]:
1002
+
1003
+
1004
+ joblib.dump(pipe_knock,"./knockout_stage_prediction.pkl")
1005
+
1006
+ st.title("FIFA winner predication")
1007
+ st.write('This app predict 2022 FIFA winner')
1008
+
1009
+ if st.button("Predict FIFA Winner"):
1010
+
1011
+ last_team_scores = pd.read_csv('./data/last_team_scores.csv')
1012
+ last_team_scores.tail()
1013
+
1014
+ squad_stats = pd.read_csv('./data/squad_stats.csv')
1015
+ squad_stats.tail()
1016
+
1017
+ group_matches = pd.read_csv('./data/Qatar_group_stage.csv')
1018
+ round_16 = group_matches.iloc[48:56, :]
1019
+ quarter_finals = group_matches.iloc[56:60, :]
1020
+ semi_finals = group_matches.iloc[60:62, :]
1021
+ final = group_matches.iloc[62:63, :]
1022
+ second_final = group_matches.iloc[63:64, :]
1023
+ group_matches = group_matches.iloc[:48, :]
1024
+ group_matches.tail()
1025
+
1026
+ xgb_gs_model = joblib.load("./groups_stage_prediction.pkl")
1027
+
1028
+ xgb_ks_model = joblib.load("./knockout_stage_prediction.pkl")
1029
+
1030
+ team_group = group_matches.drop(['country2'], axis=1)
1031
+ team_group = team_group.drop_duplicates().reset_index(drop=True)
1032
+ team_group = team_group.rename(columns={"country1": "team"})
1033
+ team_group.head(5)
1034
+
1035
+ def matches(g_matches):
1036
+ g_matches.insert(2, 'potential1',
1037
+ g_matches['country1'].map(squad_stats.set_index('nationality_name')['potential']))
1038
+ g_matches.insert(3, 'potential2',
1039
+ g_matches['country2'].map(squad_stats.set_index('nationality_name')['potential']))
1040
+ g_matches.insert(4, 'rank1', g_matches['country1'].map(last_team_scores.set_index('team')['rank']))
1041
+ g_matches.insert(5, 'rank2', g_matches['country2'].map(last_team_scores.set_index('team')['rank']))
1042
+ pred_set = []
1043
+
1044
+ for index, row in g_matches.iterrows():
1045
+ if row['potential1'] > row['potential2'] and abs(row['potential1'] - row['potential2']) > 2:
1046
+ pred_set.append({'Team1': row['country1'], 'Team2': row['country2']})
1047
+ elif row['potential2'] > row['potential1'] and abs(row['potential2'] - row['potential1']) > 2:
1048
+ pred_set.append({'Team1': row['country2'], 'Team2': row['country1']})
1049
+ else:
1050
+ if row['rank1'] > row['rank2']:
1051
+ pred_set.append({'Team1': row['country1'], 'Team2': row['country2']})
1052
+ else:
1053
+ pred_set.append({'Team1': row['country2'], 'Team2': row['country1']})
1054
+
1055
+ pred_set = pd.DataFrame(pred_set)
1056
+ pred_set.insert(2, 'Team1_FIFA_RANK', pred_set['Team1'].map(last_team_scores.set_index('team')['rank']))
1057
+ pred_set.insert(3, 'Team2_FIFA_RANK', pred_set['Team2'].map(last_team_scores.set_index('team')['rank']))
1058
+ pred_set.insert(4, 'Team1_Goalkeeper_Score',
1059
+ pred_set['Team1'].map(last_team_scores.set_index('team')['goalkeeper_score']))
1060
+ pred_set.insert(5, 'Team2_Goalkeeper_Score',
1061
+ pred_set['Team2'].map(last_team_scores.set_index('team')['goalkeeper_score']))
1062
+ pred_set.insert(6, 'Team1_Defense', pred_set['Team1'].map(last_team_scores.set_index('team')['defense_score']))
1063
+ pred_set.insert(7, 'Team1_Offense', pred_set['Team1'].map(last_team_scores.set_index('team')['offense_score']))
1064
+ pred_set.insert(8, 'Team1_Midfield',
1065
+ pred_set['Team1'].map(last_team_scores.set_index('team')['midfield_score']))
1066
+ pred_set.insert(9, 'Team2_Defense', pred_set['Team2'].map(last_team_scores.set_index('team')['defense_score']))
1067
+ pred_set.insert(10, 'Team2_Offense', pred_set['Team2'].map(last_team_scores.set_index('team')['offense_score']))
1068
+ pred_set.insert(11, 'Team2_Midfield',
1069
+ pred_set['Team2'].map(last_team_scores.set_index('team')['midfield_score']))
1070
+ return pred_set
1071
+
1072
+ def print_results(dataset, y_pred, matches, proba):
1073
+ results = []
1074
+ for i in range(dataset.shape[0]):
1075
+ print()
1076
+ if y_pred[i] == 2:
1077
+ print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Draw")
1078
+ results.append({'result': 'Draw'})
1079
+ elif y_pred[i] == 1:
1080
+ print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Winner: " + dataset.iloc[i, 0])
1081
+ results.append({'result': dataset.iloc[i, 0]})
1082
+ else:
1083
+ print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Winner: " + dataset.iloc[i, 1])
1084
+ results.append({'result': dataset.iloc[i, 1]})
1085
+ try:
1086
+ print('Probability of ' + dataset.iloc[i, 0] + ' winning: ', '%.3f' % (proba[i][1]))
1087
+ print('Probability of Draw: ', '%.3f' % (proba[i][2]))
1088
+ print('Probability of ' + dataset.iloc[i, 1] + ' winning: ', '%.3f' % (proba[i][0]))
1089
+ except:
1090
+ print('Probability of ' + dataset.iloc[i, 1] + ' winning: ', '%.3f' % (proba[i][0]))
1091
+ print("")
1092
+ results = pd.DataFrame(results)
1093
+ matches = pd.concat([matches.group, results], axis=1)
1094
+ return matches
1095
+
1096
+ def winner_to_match(round, prev_match):
1097
+ round.insert(0, 'c1', round['country1'].map(prev_match.set_index('group')['result']))
1098
+ round.insert(1, 'c2', round['country2'].map(prev_match.set_index('group')['result']))
1099
+ round = round.drop(['country1', 'country2'], axis=1)
1100
+ round = round.rename(columns={'c1': 'country1', 'c2': 'country2'}).reset_index(drop=True)
1101
+ return round
1102
+
1103
+ def prediction_knockout(round):
1104
+ dataset_round = matches(round)
1105
+ prediction_round = xgb_ks_model.predict(dataset_round)
1106
+ proba_round = xgb_ks_model.predict_proba(dataset_round)
1107
+
1108
+ # prediction_round = ada_ks_model.predict(dataset_round)
1109
+ # proba_round = ada_ks_model.predict_proba(dataset_round)
1110
+
1111
+ # prediction_round = rf_ks_model.predict(dataset_round)
1112
+ # proba_round = rf_ks_model.predict_proba(dataset_round)
1113
+
1114
+ results_round = print_results(dataset_round, prediction_round, round, proba_round)
1115
+ return results_round
1116
+
1117
+ def center_str(round):
1118
+ spaces = ['', ' ', ' ', ' ', ' ', ' ', ]
1119
+ for j in range(2):
1120
+ for i in range(round.shape[0]):
1121
+ if (13 - len(round.iloc[i, j])) % 2 == 0:
1122
+ round.iloc[i, j] = spaces[int((13 - len(round.iloc[i, j])) / 2)] + round.iloc[i, j] + spaces[
1123
+ int((13 - len(round.iloc[i, j])) / 2)]
1124
+ else:
1125
+ round.iloc[i, j] = spaces[int(((13 - len(round.iloc[i, j])) / 2) - 0.5)] + round.iloc[i, j] + \
1126
+ spaces[int(((13 - len(round.iloc[i, j])) / 2) + 0.5)]
1127
+ return round
1128
+
1129
+ def center2(a):
1130
+ spaces = ['', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
1131
+ ' ', ' ', ' ', ' ', ' ',
1132
+ ' ', ' ', ' ', ' ',
1133
+ ' ']
1134
+ if (29 - len(a)) % 2 == 0:
1135
+ a = spaces[int((29 - len(a)) / 2)] + a + spaces[int((29 - len(a)) / 2)]
1136
+ else:
1137
+ a = spaces[int(((29 - len(a)) / 2) - 0.5)] + a + spaces[int(((29 - len(a)) / 2) + 0.5)]
1138
+ return a
1139
+
1140
+ dataset_groups = matches(group_matches)
1141
+ dataset_groups.tail()
1142
+ print(dataset_groups)
1143
+
1144
+ prediction_groups = xgb_gs_model.predict(dataset_groups)
1145
+ proba = xgb_gs_model.predict_proba(dataset_groups)
1146
+
1147
+ # prediction_groups = ada_gs_model.predict(dataset_groups)
1148
+ # proba = ada_gs_model.predict_proba(dataset_groups)
1149
+
1150
+ # prediction_groups = rf_gs_model.predict(dataset_groups)
1151
+ # proba = rf_gs_model.predict_proba(dataset_groups)
1152
+
1153
+ results = print_results(dataset_groups, prediction_groups, group_matches, proba)
1154
+
1155
+ team_group['points'] = 0
1156
+ team_group
1157
+ for i in range(results.shape[0]):
1158
+ for j in range(team_group.shape[0]):
1159
+ if results.iloc[i, 1] == team_group.iloc[j, 0]:
1160
+ team_group.iloc[j, 2] += 3
1161
+
1162
+ print(team_group.groupby(['group', 'team']).mean().astype(int))
1163
+
1164
+ round_of_16 = team_group[team_group['points'] > 5].reset_index(drop=True)
1165
+ round_of_16['group'] = (4 - 1 / 3 * round_of_16.points).astype(int).astype(str) + round_of_16.group
1166
+ round_of_16 = round_of_16.rename(columns={"team": "result"})
1167
+
1168
+ round_16 = winner_to_match(round_16, round_of_16)
1169
+ results_round_16 = prediction_knockout(round_16)
1170
+
1171
+ quarter_finals = winner_to_match(quarter_finals, results_round_16)
1172
+ results_quarter_finals = prediction_knockout(quarter_finals)
1173
+
1174
+ semi_finals = winner_to_match(semi_finals, results_quarter_finals)
1175
+ results_finals = prediction_knockout(semi_finals)
1176
+
1177
+ final = winner_to_match(final, results_finals)
1178
+ winner = prediction_knockout(final)
1179
+
1180
+ second = results_finals[~results_finals.result.isin(winner.result)]
1181
+ results_finals_3 = results_quarter_finals[~results_quarter_finals.result.isin(results_finals.result)]
1182
+ results_finals_3.iloc[0, 0] = 'z1'
1183
+ results_finals_3.iloc[1, 0] = 'z2'
1184
+ second_final = winner_to_match(second_final, results_finals_3)
1185
+ third = prediction_knockout(second_final)
1186
+
1187
+ round_16 = center_str(round_16)
1188
+ quarter_finals = center_str(quarter_finals)
1189
+ semi_finals = center_str(semi_finals)
1190
+ final = center_str(final)
1191
+ group_matches = center_str(group_matches)
1192
+
1193
+ # Function to center align text
1194
+ def center(text):
1195
+ return f"<div style='text-align: center;'>{text}</div>"
1196
+
1197
+ # Function to generate the formatted text
1198
+ def generate_text(round_16, quarter_finals, semi_finals, final):
1199
+ formatted_text = (
1200
+ round_16.iloc[
1201
+ 0, 0] + '━━━━┓ ┏━━━━' +
1202
+ round_16.iloc[4, 0] + '\n' +
1203
+ ' ┃ ┃\n' +
1204
+ ' ┃━━━━' + quarter_finals.iloc[
1205
+ 0, 0] + '━━━━┓ ┏━━━━' +
1206
+ quarter_finals.iloc[2, 0] + '━━━━┃\n' +
1207
+ ' ┃ ┃ ┃ ┃\n' +
1208
+ round_16.iloc[
1209
+ 0, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
1210
+ round_16.iloc[4, 1] + '\n' +
1211
+ ' ┃━━━━' + semi_finals.iloc[
1212
+ 0, 0] + '━━━━┓ ┏━━━━' + semi_finals.iloc[1, 0] + '━━━━┃\n' +
1213
+ round_16.iloc[
1214
+ 1, 0] + '━━━━┓ ┃ ┃ ┃ ┃ ┏━━━━' +
1215
+ round_16.iloc[5, 0] + '\n' +
1216
+ ' ┃ ┃ ┃ ┃ ┃ ┃\n' +
1217
+ ' ┃━━━━' + quarter_finals.iloc[
1218
+ 0, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
1219
+ quarter_finals.iloc[2, 1] + '━━━━┃\n' +
1220
+ ' ┃ ┃ ┃ ┃\n' +
1221
+ round_16.iloc[
1222
+ 1, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
1223
+ round_16.iloc[5, 1] + '\n' +
1224
+ ' ┃━━━━' + final.iloc[0, 0] + 'vs.' +
1225
+ final.iloc[0, 1] + '━━━━┃\n' +
1226
+ round_16.iloc[
1227
+ 2, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
1228
+ round_16.iloc[6, 0] + '\n' +
1229
+ ' ┃ ┃ ┃ ┃\n' +
1230
+ ' ┃━━━━' + quarter_finals.iloc[
1231
+ 1, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
1232
+ quarter_finals.iloc[3, 0] + '━━━━┃\n' +
1233
+ ' ┃ ┃ ┃ ┃ ┃ ┃\n' +
1234
+ round_16.iloc[
1235
+ 2, 1] + '━━━━┛ ┃ ┃ ┃ ┃ ┗━━━━' +
1236
+ round_16.iloc[6, 1] + '\n' +
1237
+ ' ┃━━━━' + semi_finals.iloc[
1238
+ 0, 1] + '━━━━┛ ┗━━━━' + semi_finals.iloc[1, 1] + '━━━━┃\n' +
1239
+ round_16.iloc[
1240
+ 3, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
1241
+ round_16.iloc[7, 0] + '\n' +
1242
+ ' ┃ ┃ ┃ ┃\n' +
1243
+ ' ┃━━━━' + quarter_finals.iloc[
1244
+ 1, 1] + '━━━━┛ ┗━━━━' +
1245
+ quarter_finals.iloc[3, 1] + '━━━━┃\n' +
1246
+ ' ┃ ┃\n' +
1247
+ round_16.iloc[
1248
+ 3, 1] + '━━━━┛ ┗━━━━' +
1249
+ round_16.iloc[7, 1] + '\n' +
1250
+ " " + center(
1251
+ "\U0001F947" + winner.iloc[0, 1]) + '\n' +
1252
+ " " + center(
1253
+ "\U0001F948" + second.iloc[0, 1]) + '\n' +
1254
+ " " + center(
1255
+ "\U0001F949" + third.iloc[0, 1])
1256
+ )
1257
+ return formatted_text
1258
+
1259
+ # Generate the formatted text
1260
+ formatted_text = generate_text(round_16, quarter_finals, semi_finals, final)
1261
+
1262
+ # Define the round_16, quarter_finals, semi_finals, final DataFrames
1263
+ # Replace the DataFrame creation with your actual data
1264
+
1265
+ # Display the formatted text
1266
+ st.text(formatted_text)
1267
+ # st.markdown(formatted_text)
1268
+
1269
+ print(round_16.iloc[
1270
+ 0, 0] + '━━━━┓ ┏━━━━' +
1271
+ round_16.iloc[4, 0])
1272
+ print(
1273
+ ' ┃ ┃')
1274
+ print(' ┃━━━━' + quarter_finals.iloc[
1275
+ 0, 0] + '━━━━┓ ┏━━━━' +
1276
+ quarter_finals.iloc[2, 0] + '━━━━┃')
1277
+ print(
1278
+ ' ┃ ┃ ┃ ┃')
1279
+ print(round_16.iloc[
1280
+ 0, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
1281
+ round_16.iloc[4, 1])
1282
+ print(' ┃━━━━' + semi_finals.iloc[
1283
+ 0, 0] + '━━━━┓ ┏━━━━' + semi_finals.iloc[1, 0] + '━━━━┃')
1284
+ print(round_16.iloc[
1285
+ 1, 0] + '━━━━┓ ┃ ┃ ┃ ┃ ┏━━━━' +
1286
+ round_16.iloc[5, 0])
1287
+ print(
1288
+ ' ┃ ┃ ┃ ┃ ┃ ┃')
1289
+ print(' ┃━━━━' + quarter_finals.iloc[
1290
+ 0, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
1291
+ quarter_finals.iloc[2, 1] + '━━━━┃')
1292
+ print(
1293
+ ' ┃ ┃ ┃ ┃')
1294
+ print(round_16.iloc[
1295
+ 1, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
1296
+ round_16.iloc[5, 1])
1297
+ print(' ┃━━━━' + final.iloc[0, 0] + 'vs.' + final.iloc[
1298
+ 0, 1] + '━━━━┃')
1299
+ print(round_16.iloc[
1300
+ 2, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
1301
+ round_16.iloc[6, 0])
1302
+ print(
1303
+ ' ┃ ┃ ┃ ┃')
1304
+ print(' ┃━━━━' + quarter_finals.iloc[
1305
+ 1, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
1306
+ quarter_finals.iloc[3, 0] + '━━━━┃')
1307
+ print(
1308
+ ' ┃ ┃ ┃ ┃ ┃ ┃')
1309
+ print(round_16.iloc[
1310
+ 2, 1] + '━━━━┛ ┃ ┃ ┃ ┃ ┗━━━━' +
1311
+ round_16.iloc[6, 1])
1312
+ print(' ┃━━━━' + semi_finals.iloc[
1313
+ 0, 1] + '━━━━┛ ┗━━━━' + semi_finals.iloc[1, 1] + '━━━━┃')
1314
+ print(round_16.iloc[
1315
+ 3, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
1316
+ round_16.iloc[7, 0])
1317
+ print(
1318
+ ' ┃ ┃ ┃ ┃')
1319
+ print(' ┃━━━━' + quarter_finals.iloc[
1320
+ 1, 1] + '━━━━┛ ┗━━━━' +
1321
+ quarter_finals.iloc[3, 1] + '━━━━┃')
1322
+ print(
1323
+ ' ┃ ┃')
1324
+ print(round_16.iloc[
1325
+ 3, 1] + '━━━━┛ ┗━━━━' +
1326
+ round_16.iloc[7, 1])
1327
+ print(
1328
+ " " + center2("\U0001F947" + winner.iloc[0, 1]))
1329
+ print(
1330
+ " " + center2("\U0001F948" + second.iloc[0, 1]))
1331
+ print(
1332
+ " " + center2("\U0001F949" + third.iloc[0, 1]))
1333
+
1334
+
1335
 
1336
  if __name__ == "__main__":
1337
  main()