cassiomo commited on
Commit
16f95fc
β€’
1 Parent(s): 39c83c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +325 -887
app.py CHANGED
@@ -1,897 +1,335 @@
1
  import streamlit as st
2
-
3
- import numpy as np
4
  import pandas as pd
5
- import matplotlib.pyplot as plt
6
- import os
7
- import warnings
8
-
9
- warnings.filterwarnings('ignore')
10
-
11
-
12
- # In[90]:
13
-
14
-
15
- # In[91]:
16
 
17
  def main():
18
- # st.title("FIFA Data visualization")
19
-
20
-
21
- # df = pd.read_csv('./data/international_matches.csv', parse_dates=['date'])
22
- # # df.tail()
23
- # #
24
-
25
- # # In[92]:
26
-
27
-
28
- # # df.columns
29
-
30
-
31
- # # In[93]:
32
-
33
-
34
- # # df.isnull().sum()
35
-
36
-
37
- # # # PRE-ANALYSIS
38
- # # The dataset has a lot of blank fields that need to be fixed.
39
- # # However, before modifying any field, I want to analyze the teams' qualifications on the last FIFA date (June 2022). This is important because, from these qualifications, I will create the inference dataset that enters the machine learning algorithm that predicts the World Cup matches.
40
-
41
- # # ### Top 10 FIFA Ranking
42
- # # Top 10 national teams to date FIFA June 2022.
43
- # # **ref:** https://www.fifa.com/fifa-world-ranking/men?dateId=id13603
44
-
45
-
46
- # # In[94]:
47
-
48
-
49
- # fifa_rank = df[['date', 'home_team', 'away_team', 'home_team_fifa_rank', 'away_team_fifa_rank']]
50
- # home = fifa_rank[['date', 'home_team', 'home_team_fifa_rank']].rename(
51
- # columns={"home_team": "team", "home_team_fifa_rank": "rank"})
52
- # away = fifa_rank[['date', 'away_team', 'away_team_fifa_rank']].rename(
53
- # columns={"away_team": "team", "away_team_fifa_rank": "rank"})
54
- # fifa_rank = pd.concat([home, away])
55
- # # Select each country latest match
56
- # fifa_rank = fifa_rank.sort_values(['team', 'date'], ascending=[True, False])
57
- # last_rank = fifa_rank
58
- # fifa_rank_top10 = fifa_rank.groupby('team').first().sort_values('rank', ascending=True)[0:10].reset_index()
59
-
60
-
61
- # # fifa_rank_top10
62
-
63
-
64
- # # ### Top 10 teams with the highest winning percentage at home and away
65
-
66
- # # In[95]:
67
-
68
-
69
- # def home_percentage(team):
70
- # score = len(df[(df['home_team'] == team) & (df['home_team_result'] == "Win")]) / len(
71
- # df[df['home_team'] == team]) * 100
72
- # return round(score)
73
-
74
-
75
- # def away_percentage(team):
76
- # score = len(df[(df['away_team'] == team) & (df['home_team_result'] == "Lose")]) / len(
77
- # df[df['away_team'] == team]) * 100
78
- # return round(score)
79
-
80
-
81
- # # In[96]:
82
-
83
-
84
- # fifa_rank_top10['Home_win_Per'] = np.vectorize(home_percentage)(fifa_rank_top10['team'])
85
- # fifa_rank_top10['Away_win_Per'] = np.vectorize(away_percentage)(fifa_rank_top10['team'])
86
- # fifa_rank_top10['Average_win_Per'] = round((fifa_rank_top10['Home_win_Per'] + fifa_rank_top10['Away_win_Per']) / 2)
87
- # fifa_rank_win = fifa_rank_top10.sort_values('Average_win_Per', ascending=False)
88
- # # fifa_rank_win
89
-
90
-
91
- # # ### Top 10 attacking teams in the last FIFA date
92
-
93
- # # In[97]:
94
-
95
-
96
- # fifa_offense = df[['date', 'home_team', 'away_team', 'home_team_mean_offense_score', 'away_team_mean_offense_score']]
97
- # home = fifa_offense[['date', 'home_team', 'home_team_mean_offense_score']].rename(
98
- # columns={"home_team": "team", "home_team_mean_offense_score": "offense_score"})
99
- # away = fifa_offense[['date', 'away_team', 'away_team_mean_offense_score']].rename(
100
- # columns={"away_team": "team", "away_team_mean_offense_score": "offense_score"})
101
- # fifa_offense = pd.concat([home, away])
102
- # fifa_offense = fifa_offense.sort_values(['date', 'team'], ascending=[False, True])
103
- # last_offense = fifa_offense
104
- # fifa_offense_top10 = fifa_offense.groupby('team').first().sort_values('offense_score', ascending=False)[
105
- # 0:10].reset_index()
106
- # # fifa_offense_top10
107
-
108
- # import plotly.graph_objs as go
109
- # import plotly.figure_factory as ff
110
-
111
- # # In[99]:
112
-
113
- # # Display the data for the bar chart
114
- # st.write("Top 10 Attacking Teams")
115
- # st.write(fifa_offense_top10)
116
-
117
- # # Create a horizontal bar chart
118
- # fig_bar = go.Figure(data=[go.Bar(y=fifa_offense_top10['team'], x=fifa_offense_top10['offense_score'], orientation='h')])
119
- # # Update layout to include title, x-label, and y-label
120
- # fig_bar.update_layout(title='Top 10 Attacking Teams',
121
- # xaxis_title='Offense Score',
122
- # yaxis_title='Team')
123
- # st.plotly_chart(fig_bar)
124
-
125
- # # Display the data for the bar chart
126
- # # st.write("Top 10 Offense Teams")
127
- # # st.write(fifa_offense_top10)
128
-
129
- # # sns.barplot(data=fifa_offense_top10, x='offense_score', y='team', color="#7F1431")
130
- # # plt.xlabel('Offense Score', size = 20)
131
- # # plt.ylabel('Team', size = 20)
132
- # # plt.title("Top 10 Attacking teams");
133
-
134
-
135
- # # ### Top 10 Midfield teams in the last FIFA date
136
-
137
- # # In[100]:
138
-
139
-
140
- # fifa_midfield = df[['date', 'home_team', 'away_team', 'home_team_mean_midfield_score', 'away_team_mean_midfield_score']]
141
- # home = fifa_midfield[['date', 'home_team', 'home_team_mean_midfield_score']].rename(
142
- # columns={"home_team": "team", "home_team_mean_midfield_score": "midfield_score"})
143
- # away = fifa_midfield[['date', 'away_team', 'away_team_mean_midfield_score']].rename(
144
- # columns={"away_team": "team", "away_team_mean_midfield_score": "midfield_score"})
145
- # fifa_midfield = pd.concat([home, away])
146
- # fifa_midfield = fifa_midfield.sort_values(['date', 'team'], ascending=[False, True])
147
- # last_midfield = fifa_midfield
148
- # fifa_midfield_top10 = fifa_midfield.groupby('team').first().sort_values('midfield_score', ascending=False)[
149
- # 0:10].reset_index()
150
- # # fifa_midfield_top10
151
-
152
-
153
- # # In[101]:
154
-
155
- # # Display the data for the bar chart
156
- # st.write("Top 10 Midfield Teams")
157
- # st.write(fifa_midfield_top10)
158
-
159
- # # Create a horizontal bar chart
160
- # fig_bar = go.Figure(
161
- # data=[go.Bar(y=fifa_midfield_top10['team'], x=fifa_midfield_top10['midfield_score'], orientation='h')])
162
- # # Update layout to include title, x-label, and y-label
163
- # fig_bar.update_layout(title='Top 10 Midfield Teams', # Set the title
164
- # xaxis_title='Midfield Score', # Set the x-axis label
165
- # yaxis_title='Team') # Set the y-axis label
166
-
167
- # # Display the bar chart
168
- # st.plotly_chart(fig_bar)
169
-
170
- # # sns.barplot(data=fifa_midfield_top10, x='midfield_score', y='team', color="#7F1431")
171
- # # plt.xlabel('Midfield Score', size = 20)
172
- # # plt.ylabel('Team', size = 20)
173
- # # plt.title("Top 10 Midfield teams");
174
-
175
-
176
- # # ### Top 10 defending teams in the last FIFA date
177
-
178
- # # In[102]:
179
-
180
-
181
- # fifa_defense = df[['date', 'home_team', 'away_team', 'home_team_mean_defense_score', 'away_team_mean_defense_score']]
182
- # home = fifa_defense[['date', 'home_team', 'home_team_mean_defense_score']].rename(
183
- # columns={"home_team": "team", "home_team_mean_defense_score": "defense_score"})
184
- # away = fifa_defense[['date', 'away_team', 'away_team_mean_defense_score']].rename(
185
- # columns={"away_team": "team", "away_team_mean_defense_score": "defense_score"})
186
- # fifa_defense = pd.concat([home, away])
187
- # fifa_defense = fifa_defense.sort_values(['date', 'team'], ascending=[False, True])
188
- # last_defense = fifa_defense
189
- # fifa_defense_top10 = fifa_defense.groupby('team').first().sort_values('defense_score', ascending=False)[
190
- # 0:10].reset_index()
191
- # # fifa_defense_top10
192
-
193
-
194
- # # In[103]:
195
-
196
- # # Display the data for the bar chart
197
- # st.write("Top 10 Defensive Teams")
198
- # st.write(fifa_defense_top10)
199
-
200
- # # Create the horizontal bar chart
201
- # fig_bar = go.Figure(data=[go.Bar(y=fifa_defense_top10['team'], x=fifa_defense_top10['defense_score'], orientation='h')])
202
-
203
- # # Update layout to include title, x-label, and y-label
204
- # fig_bar.update_layout(title='Top 10 Defensive Teams', # Set the title
205
- # xaxis_title='Defense Score', # Set the x-axis label
206
- # yaxis_title='Team') # Set the y-axis label
207
-
208
- # # Display the bar chart
209
- # st.plotly_chart(fig_bar)
210
-
211
- # # sns.barplot(data=fifa_defense_top10, x='defense_score', y='team', color="#7F1431")
212
- # # plt.xlabel('Defense Score', size=20)
213
- # # plt.ylabel('Team', size=20)
214
- # # plt.title("Top 10 Defense Teams")
215
-
216
- # # ### Do Home teams have any advantage?
217
-
218
- # # In[104]:
219
-
220
-
221
- # # Select all matches played at non-neutral locations
222
- # home_team_advantage = df[df['neutral_location'] == False]['home_team_result'].value_counts(normalize=True)
223
-
224
- # # # Plot
225
- # # fig, axes = plt.subplots(1, 1, figsize=(8,8))
226
- # # ax =plt.pie(home_team_advantage ,labels = ['Win', 'Lose', 'Draw'], autopct='%.0f%%')
227
- # # plt.title('Home team match result', fontsize = 15)
228
- # # plt.show()
229
-
230
-
231
- # # As the graph shows, the home team has an advantage over the away team. This is due to factors such as the fans, the weather and the confidence of the players. For this reason, in the World Cup, those teams that sit at home will have an advantage.
232
-
233
- # # # DATA PREPARATION AND FEATURE ENGINEERING
234
- # # In this section, I will fill in the empty fields in the dataset and clean up the data for teams that did not qualify for the World Cup. Then, I will use the correlation matrix to choose the characteristics that will define the training dataset of the Machine Learning model. Finally, I will use the ratings of the teams in their last match to define the "Last Team Scores" dataset (i.e., the dataset that I will use to predict the World Cup matches).
235
-
236
- # # ### Analyze and fill na's
237
-
238
- # # In[105]:
239
-
240
- # #
241
- # # df.isnull().sum()
242
-
243
-
244
- # # In[106]:
245
-
246
-
247
- # # We can fill mean for na's in goal_keeper_score
248
- # df[df['home_team'] == "Brazil"]['home_team_goalkeeper_score'].describe()
249
-
250
- # # In[107]:
251
-
252
-
253
- # df['home_team_goalkeeper_score'] = round(
254
- # df.groupby("home_team")["home_team_goalkeeper_score"].transform(lambda x: x.fillna(x.mean())))
255
- # df['away_team_goalkeeper_score'] = round(
256
- # df.groupby("away_team")["away_team_goalkeeper_score"].transform(lambda x: x.fillna(x.mean())))
257
-
258
- # # In[108]:
259
-
260
-
261
- # # We can fill mean for na's in defense score
262
- # df[df['away_team'] == "Uruguay"]['home_team_mean_defense_score'].describe()
263
-
264
- # # In[65]:
265
-
266
-
267
- # df['home_team_mean_defense_score'] = round(
268
- # df.groupby('home_team')['home_team_mean_defense_score'].transform(lambda x: x.fillna(x.mean())))
269
- # df['away_team_mean_defense_score'] = round(
270
- # df.groupby('away_team')['away_team_mean_defense_score'].transform(lambda x: x.fillna(x.mean())))
271
-
272
- # # In[109]:
273
-
274
-
275
- # # We can fill mean for na's in offense score
276
- # df[df['away_team'] == "Uruguay"]['home_team_mean_offense_score'].describe()
277
-
278
- # # In[67]:
279
-
280
-
281
- # df['home_team_mean_offense_score'] = round(
282
- # df.groupby('home_team')['home_team_mean_offense_score'].transform(lambda x: x.fillna(x.mean())))
283
- # df['away_team_mean_offense_score'] = round(
284
- # df.groupby('away_team')['away_team_mean_offense_score'].transform(lambda x: x.fillna(x.mean())))
285
-
286
- # # In[110]:
287
-
288
-
289
- # # We can fill mean for na's in midfield score
290
- # df[df['away_team'] == "Uruguay"]['home_team_mean_midfield_score'].describe()
291
-
292
- # # In[111]:
293
-
294
-
295
- # df['home_team_mean_midfield_score'] = round(
296
- # df.groupby('home_team')['home_team_mean_midfield_score'].transform(lambda x: x.fillna(x.mean())))
297
- # df['away_team_mean_midfield_score'] = round(
298
- # df.groupby('away_team')['away_team_mean_midfield_score'].transform(lambda x: x.fillna(x.mean())))
299
-
300
- # # In[112]:
301
-
302
-
303
- # df.isnull().sum()
304
-
305
- # # In[113]:
306
-
307
-
308
- # # Teams are not available in FIFA game itself, so they are not less than average performing teams, so giving a average score of 50 for all.
309
- # df.fillna(50, inplace=True)
310
-
311
- # # ### Filter the teams participating in QATAR - World cup 2022
312
-
313
- # # In[115]:
314
-
315
-
316
- # list_2022 = ['Qatar', 'Germany', 'Denmark', 'Brazil', 'France', 'Belgium', 'Croatia', 'Spain', 'Serbia', 'England',
317
- # 'Switzerland', 'Netherlands', 'Argentina', 'IR Iran', 'Korea Republic', 'Japan', 'Saudi Arabia', 'Ecuador',
318
- # 'Uruguay', 'Canada', 'Ghana', 'Senegal', 'Portugal', 'Poland', 'Tunisia', 'Morocco', 'Cameroon', 'USA',
319
- # 'Mexico', 'Wales', 'Australia', 'Costa Rica']
320
- # final_df = df[(df["home_team"].apply(lambda x: x in list_2022)) | (df["away_team"].apply(lambda x: x in list_2022))]
321
-
322
- # # **Top 10 teams in QATAR 2022**
323
-
324
- # # In[116]:
325
-
326
-
327
- # rank = final_df[['date', 'home_team', 'away_team', 'home_team_fifa_rank', 'away_team_fifa_rank']]
328
- # home = rank[['date', 'home_team', 'home_team_fifa_rank']].rename(
329
- # columns={"home_team": "team", "home_team_fifa_rank": "rank"})
330
- # away = rank[['date', 'away_team', 'away_team_fifa_rank']].rename(
331
- # columns={"away_team": "team", "away_team_fifa_rank": "rank"})
332
- # rank = pd.concat([home, away])
333
-
334
- # # Select each country latest match
335
- # rank = rank.sort_values(['team', 'date'], ascending=[True, False])
336
- # rank_top10 = rank.groupby('team').first().sort_values('rank', ascending=True).reset_index()
337
- # rank_top10 = rank_top10[(rank_top10["team"].apply(lambda x: x in list_2022))][0:10]
338
-
339
- # st.write("Top 10 Countries by Rank - Latest Match")
340
- # rank_top10
341
-
342
- # # # Create a scatter plot
343
- # # fig_scatter = go.Figure(data=go.Scatter(x=rank_top10['team'], y=rank_top10['rank'], mode='markers', marker=dict(color='lightskyblue', size=12)))
344
- # #
345
- # # # Update layout to include title and labels
346
- # # fig_scatter.update_layout(title='Top 10 Countries by Rank - Latest Match',
347
- # # xaxis_title='Country',
348
- # # yaxis_title='Rank')
349
- # #
350
- # # # Display the scatter plot
351
- # # st.plotly_chart(fig_scatter)
352
-
353
- # # **Top 10 teams with the highest winning percentage in QATAR 2022**
354
-
355
- # # In[117]:
356
-
357
-
358
- # rank_top10['Home_win_Per'] = np.vectorize(home_percentage)(rank_top10['team'])
359
- # rank_top10['Away_win_Per'] = np.vectorize(away_percentage)(rank_top10['team'])
360
- # rank_top10['Average_win_Per'] = round((rank_top10['Home_win_Per'] + rank_top10['Away_win_Per']) / 2)
361
- # rank_top10_Win = rank_top10.sort_values('Average_win_Per', ascending=False)
362
-
363
- # # st.write("Top 10 Countries by Rank - Latest Match")
364
- # # rank_top10_Win
365
-
366
-
367
- # # In[118]:
368
-
369
- # # Display the data for the bar chart
370
- # st.write("Top 10 Average Win Per game Teams")
371
- # st.write(rank_top10_Win)
372
-
373
- # # Create a horizontal bar chart
374
- # # Create a horizontal bar chart
375
- # fig_bar = go.Figure(data=[go.Bar(y=rank_top10_Win['team'], x=rank_top10_Win['Average_win_Per'], orientation='h')])
376
-
377
- # # Update layout to include title and labels
378
- # fig_bar.update_layout(title='Top 10 Countries by Average Win Percentage',
379
- # xaxis_title='Average Win Percentage',
380
- # yaxis_title='Country')
381
-
382
- # # Display the horizontal bar chart
383
- # st.plotly_chart(fig_bar)
384
-
385
- # sns.barplot(data=rank_top10_Win,x='Average_win_Per',y='team',color="#7F1431")
386
- # plt.xticks()
387
- # plt.xlabel('Win Average', size = 20)
388
- # plt.ylabel('Team', size = 20)
389
- # plt.title('Top 10 QATAR 2022 teams with the highest winning percentage')
390
-
391
- #
392
- # # ### Correlation Matrix
393
- #
394
- # # In[124]:
395
- #
396
- #
397
- # final_df['home_team_result'].values
398
- # # for index, value in final_df['home_team_result'].items():
399
- # # print(f"Row {index}: {value}")
400
- #
401
- #
402
- # # In[125]:
403
- #
404
- #
405
- # team_result_df = final_df
406
- # # for index, value in team_result_df['home_team_result'].items():
407
- # # print(f"Row {index}: {value}")
408
- #
409
- #
410
- # # In[151]:
411
- #
412
- #
413
- # # Mapping numeric values for home_team_result to find the correleations
414
- # final_df['home_team_result'] = final_df['home_team_result'].map({'Win':1, 'Draw':2, 'Lose':0})
415
- #
416
- #
417
- # # In[145]:
418
- #
419
- #
420
- #
421
- #
422
- #
423
- # # In[150]:
424
- #
425
- #
426
- # final_df['home_team_result'].head(1)
427
- #
428
- #
429
- # # In[152]:
430
- #
431
- #
432
- # final_df['home_team_result'] = pd.to_numeric(final_df['home_team_result'], errors='coerce')
433
- #
434
- #
435
- # # In[155]:
436
- #
437
- #
438
- # # df.head()
439
- #
440
- #
441
- # # In[156]:
442
- #
443
- #
444
- # # final_df.head()
445
- #
446
- #
447
- # # In[157]:
448
- #
449
- #
450
- # numerical_df = final_df.select_dtypes(include=['number'])
451
- #
452
- #
453
- # # In[158]:
454
- #
455
- #
456
- # numerical_df.corr()['home_team_result'].sort_values(ascending=False)
457
- #
458
- #
459
- # # In[153]:
460
- #
461
- #
462
- # # final_df.corr()['home_team_result'].sort_values(ascending=False)
463
- #
464
- #
465
- # # Dropping unnecessary colums.
466
- #
467
- # # In[ ]:
468
- #
469
- #
470
- # #Dropping unnecessary colums
471
- # final_df = final_df.drop(['date', 'home_team_continent', 'away_team_continent', 'home_team_total_fifa_points', 'away_team_total_fifa_points', 'home_team_score', 'away_team_score', 'tournament', 'city', 'country', 'neutral_location', 'shoot_out'],axis=1)
472
- #
473
- #
474
- # # In[ ]:
475
- #
476
- #
477
- # # final_df.columns
478
- #
479
- #
480
- # # In[ ]:
481
- #
482
- #
483
- # # Change column names
484
- # final_df.rename(columns={"home_team":"Team1", "away_team":"Team2", "home_team_fifa_rank":"Team1_FIFA_RANK",
485
- # "away_team_fifa_rank":"Team2_FIFA_RANK", "home_team_result":"Team1_Result", "home_team_goalkeeper_score":"Team1_Goalkeeper_Score",
486
- # "away_team_goalkeeper_score":"Team2_Goalkeeper_Score", "home_team_mean_defense_score":"Team1_Defense",
487
- # "home_team_mean_offense_score":"Team1_Offense", "home_team_mean_midfield_score":"Team1_Midfield",
488
- # "away_team_mean_defense_score":"Team2_Defense", "away_team_mean_offense_score":"Team2_Offense",
489
- # "away_team_mean_midfield_score":"Team2_Midfield"}, inplace=True)
490
- #
491
- #
492
- # # In[ ]:
493
- #
494
- #
495
- # plt.figure(figsize=(10, 4), dpi=200)
496
- # sns.heatmap(final_df.corr(), annot=True)
497
- #
498
- #
499
- # # In[ ]:
500
- #
501
- #
502
- # # final_df.info()
503
- #
504
- #
505
- # # In[ ]:
506
- #
507
- #
508
- # # final_df
509
- #
510
- #
511
- # # Exporting the training dataset.
512
- #
513
- # # In[ ]:
514
- #
515
- #
516
- # # final_df.to_csv("./data/training.csv", index = False)
517
- #
518
- #
519
- # # ### Creating "Last Team Scores" dataset
520
- # # This dataset contains the qualifications of each team on the previous FIFA date and will be used to predict the World Cup matches.
521
- #
522
- # # In[ ]:
523
- #
524
- #
525
- # last_goalkeeper = df[['date', 'home_team', 'away_team', 'home_team_goalkeeper_score', 'away_team_goalkeeper_score']]
526
- # home = last_goalkeeper[['date', 'home_team', 'home_team_goalkeeper_score']].rename(columns={"home_team":"team", "home_team_goalkeeper_score":"goalkeeper_score"})
527
- # away = last_goalkeeper[['date', 'away_team', 'away_team_goalkeeper_score']].rename(columns={"away_team":"team", "away_team_goalkeeper_score":"goalkeeper_score"})
528
- # last_goalkeeper = pd.concat([home,away])
529
- #
530
- # last_goalkeeper = last_goalkeeper.sort_values(['date', 'team'],ascending=[False, True])
531
- #
532
- # list_2022 = ['Qatar', 'Germany', 'Denmark', 'Brazil', 'France', 'Belgium', 'Croatia', 'Spain', 'Serbia', 'England', 'Switzerland', 'Netherlands', 'Argentina', 'IR Iran', 'Korea Republic', 'Japan', 'Saudi Arabia', 'Ecuador', 'Uruguay', 'Canada', 'Ghana', 'Senegal', 'Portugal', 'Poland', 'Tunisia', 'Morocco', 'Cameroon', 'USA', 'Mexico', 'Wales', 'Australia', 'Costa Rica']
533
- #
534
- # rank_qatar = last_rank[(last_rank["team"].apply(lambda x: x in list_2022))]
535
- # rank_qatar = rank_qatar.groupby('team').first().reset_index()
536
- # goal_qatar = last_goalkeeper[(last_goalkeeper["team"].apply(lambda x: x in list_2022))]
537
- # goal_qatar = goal_qatar.groupby('team').first().reset_index()
538
- # goal_qatar = goal_qatar.drop(['date'], axis = 1)
539
- # off_qatar = last_offense[(last_offense["team"].apply(lambda x: x in list_2022))]
540
- # off_qatar = off_qatar.groupby('team').first().reset_index()
541
- # off_qatar = off_qatar.drop(['date'], axis = 1)
542
- # mid_qatar = last_midfield[(last_midfield["team"].apply(lambda x: x in list_2022))]
543
- # mid_qatar = mid_qatar.groupby('team').first().reset_index()
544
- # mid_qatar = mid_qatar.drop(['date'], axis = 1)
545
- # def_qatar = last_defense[(last_defense["team"].apply(lambda x: x in list_2022))]
546
- # def_qatar = def_qatar.groupby('team').first().reset_index()
547
- # def_qatar = def_qatar.drop(['date'], axis = 1)
548
- #
549
- # qatar = pd.merge(rank_qatar, goal_qatar, on = 'team')
550
- # qatar = pd.merge(qatar, def_qatar, on ='team')
551
- # qatar = pd.merge(qatar, off_qatar, on ='team')
552
- # qatar = pd.merge(qatar, mid_qatar, on ='team')
553
- #
554
- # qatar['goalkeeper_score'] = round(qatar["goalkeeper_score"].transform(lambda x: x.fillna(x.mean())))
555
- # qatar['offense_score'] = round(qatar["offense_score"].transform(lambda x: x.fillna(x.mean())))
556
- # qatar['midfield_score'] = round(qatar["midfield_score"].transform(lambda x: x.fillna(x.mean())))
557
- # qatar['defense_score'] = round(qatar["defense_score"].transform(lambda x: x.fillna(x.mean())))
558
- # # qatar.head(5)
559
- #
560
- #
561
- # # Exporting the "Last Team Scores" dataset.
562
- #
563
- # # In[ ]:
564
- #
565
-
566
-
567
- st.title("FIFA winner predication")
568
- st.write('This app predict 2022 FIFA winner')
569
-
570
- if st.button("Predict FIFA Winner"):
571
-
572
- last_team_scores = pd.read_csv('./data/last_team_scores.csv')
573
- last_team_scores.tail()
574
-
575
- squad_stats = pd.read_csv('./data/squad_stats.csv')
576
- squad_stats.tail()
577
-
578
- group_matches = pd.read_csv('./data/Qatar_group_stage.csv')
579
- round_16 = group_matches.iloc[48:56, :]
580
- quarter_finals = group_matches.iloc[56:60, :]
581
- semi_finals = group_matches.iloc[60:62, :]
582
- final = group_matches.iloc[62:63, :]
583
- second_final = group_matches.iloc[63:64, :]
584
- group_matches = group_matches.iloc[:48, :]
585
- group_matches.tail()
586
-
587
- xgb_gs_model = joblib.load("./groups_stage_prediction.pkl")
588
-
589
- xgb_ks_model = joblib.load("./knockout_stage_prediction.pkl")
590
-
591
- team_group = group_matches.drop(['country2'], axis=1)
592
- team_group = team_group.drop_duplicates().reset_index(drop=True)
593
- team_group = team_group.rename(columns={"country1": "team"})
594
- team_group.head(5)
595
-
596
- def matches(g_matches):
597
- g_matches.insert(2, 'potential1',
598
- g_matches['country1'].map(squad_stats.set_index('nationality_name')['potential']))
599
- g_matches.insert(3, 'potential2',
600
- g_matches['country2'].map(squad_stats.set_index('nationality_name')['potential']))
601
- g_matches.insert(4, 'rank1', g_matches['country1'].map(last_team_scores.set_index('team')['rank']))
602
- g_matches.insert(5, 'rank2', g_matches['country2'].map(last_team_scores.set_index('team')['rank']))
603
- pred_set = []
604
-
605
- for index, row in g_matches.iterrows():
606
- if row['potential1'] > row['potential2'] and abs(row['potential1'] - row['potential2']) > 2:
607
- pred_set.append({'Team1': row['country1'], 'Team2': row['country2']})
608
- elif row['potential2'] > row['potential1'] and abs(row['potential2'] - row['potential1']) > 2:
609
- pred_set.append({'Team1': row['country2'], 'Team2': row['country1']})
610
- else:
611
- if row['rank1'] > row['rank2']:
612
  pred_set.append({'Team1': row['country1'], 'Team2': row['country2']})
613
- else:
614
  pred_set.append({'Team1': row['country2'], 'Team2': row['country1']})
615
-
616
- pred_set = pd.DataFrame(pred_set)
617
- pred_set.insert(2, 'Team1_FIFA_RANK', pred_set['Team1'].map(last_team_scores.set_index('team')['rank']))
618
- pred_set.insert(3, 'Team2_FIFA_RANK', pred_set['Team2'].map(last_team_scores.set_index('team')['rank']))
619
- pred_set.insert(4, 'Team1_Goalkeeper_Score',
620
- pred_set['Team1'].map(last_team_scores.set_index('team')['goalkeeper_score']))
621
- pred_set.insert(5, 'Team2_Goalkeeper_Score',
622
- pred_set['Team2'].map(last_team_scores.set_index('team')['goalkeeper_score']))
623
- pred_set.insert(6, 'Team1_Defense', pred_set['Team1'].map(last_team_scores.set_index('team')['defense_score']))
624
- pred_set.insert(7, 'Team1_Offense', pred_set['Team1'].map(last_team_scores.set_index('team')['offense_score']))
625
- pred_set.insert(8, 'Team1_Midfield',
626
- pred_set['Team1'].map(last_team_scores.set_index('team')['midfield_score']))
627
- pred_set.insert(9, 'Team2_Defense', pred_set['Team2'].map(last_team_scores.set_index('team')['defense_score']))
628
- pred_set.insert(10, 'Team2_Offense', pred_set['Team2'].map(last_team_scores.set_index('team')['offense_score']))
629
- pred_set.insert(11, 'Team2_Midfield',
630
- pred_set['Team2'].map(last_team_scores.set_index('team')['midfield_score']))
631
- return pred_set
632
-
633
- def print_results(dataset, y_pred, matches, proba):
634
- results = []
635
- for i in range(dataset.shape[0]):
636
- print()
637
- if y_pred[i] == 2:
638
- print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Draw")
639
- results.append({'result': 'Draw'})
640
- elif y_pred[i] == 1:
641
- print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Winner: " + dataset.iloc[i, 0])
642
- results.append({'result': dataset.iloc[i, 0]})
643
- else:
644
- print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Winner: " + dataset.iloc[i, 1])
645
- results.append({'result': dataset.iloc[i, 1]})
646
- try:
647
- print('Probability of ' + dataset.iloc[i, 0] + ' winning: ', '%.3f' % (proba[i][1]))
648
- print('Probability of Draw: ', '%.3f' % (proba[i][2]))
649
- print('Probability of ' + dataset.iloc[i, 1] + ' winning: ', '%.3f' % (proba[i][0]))
650
- except:
651
- print('Probability of ' + dataset.iloc[i, 1] + ' winning: ', '%.3f' % (proba[i][0]))
652
- print("")
653
- results = pd.DataFrame(results)
654
- matches = pd.concat([matches.group, results], axis=1)
655
- return matches
656
-
657
- def winner_to_match(round, prev_match):
658
- round.insert(0, 'c1', round['country1'].map(prev_match.set_index('group')['result']))
659
- round.insert(1, 'c2', round['country2'].map(prev_match.set_index('group')['result']))
660
- round = round.drop(['country1', 'country2'], axis=1)
661
- round = round.rename(columns={'c1': 'country1', 'c2': 'country2'}).reset_index(drop=True)
662
- return round
663
-
664
- def prediction_knockout(round):
665
- dataset_round = matches(round)
666
- prediction_round = xgb_ks_model.predict(dataset_round)
667
- proba_round = xgb_ks_model.predict_proba(dataset_round)
668
-
669
- # prediction_round = ada_ks_model.predict(dataset_round)
670
- # proba_round = ada_ks_model.predict_proba(dataset_round)
671
-
672
- # prediction_round = rf_ks_model.predict(dataset_round)
673
- # proba_round = rf_ks_model.predict_proba(dataset_round)
674
-
675
- results_round = print_results(dataset_round, prediction_round, round, proba_round)
676
- return results_round
677
-
678
- def center_str(round):
679
- spaces = ['', ' ', ' ', ' ', ' ', ' ', ]
680
- for j in range(2):
681
- for i in range(round.shape[0]):
682
- if (13 - len(round.iloc[i, j])) % 2 == 0:
683
- round.iloc[i, j] = spaces[int((13 - len(round.iloc[i, j])) / 2)] + round.iloc[i, j] + spaces[
684
- int((13 - len(round.iloc[i, j])) / 2)]
685
  else:
686
- round.iloc[i, j] = spaces[int(((13 - len(round.iloc[i, j])) / 2) - 0.5)] + round.iloc[i, j] + \
687
- spaces[int(((13 - len(round.iloc[i, j])) / 2) + 0.5)]
688
- return round
689
-
690
- def center2(a):
691
- spaces = ['', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
692
- ' ', ' ', ' ', ' ', ' ',
693
- ' ', ' ', ' ', ' ',
694
- ' ']
695
- if (29 - len(a)) % 2 == 0:
696
- a = spaces[int((29 - len(a)) / 2)] + a + spaces[int((29 - len(a)) / 2)]
697
- else:
698
- a = spaces[int(((29 - len(a)) / 2) - 0.5)] + a + spaces[int(((29 - len(a)) / 2) + 0.5)]
699
- return a
700
-
701
- dataset_groups = matches(group_matches)
702
- dataset_groups.tail()
703
- print(dataset_groups)
704
-
705
- prediction_groups = xgb_gs_model.predict(dataset_groups)
706
- proba = xgb_gs_model.predict_proba(dataset_groups)
707
-
708
- # prediction_groups = ada_gs_model.predict(dataset_groups)
709
- # proba = ada_gs_model.predict_proba(dataset_groups)
710
-
711
- # prediction_groups = rf_gs_model.predict(dataset_groups)
712
- # proba = rf_gs_model.predict_proba(dataset_groups)
713
-
714
- results = print_results(dataset_groups, prediction_groups, group_matches, proba)
715
-
716
- team_group['points'] = 0
717
- team_group
718
- for i in range(results.shape[0]):
719
- for j in range(team_group.shape[0]):
720
- if results.iloc[i, 1] == team_group.iloc[j, 0]:
721
- team_group.iloc[j, 2] += 3
722
-
723
- print(team_group.groupby(['group', 'team']).mean().astype(int))
724
-
725
- round_of_16 = team_group[team_group['points'] > 5].reset_index(drop=True)
726
- round_of_16['group'] = (4 - 1 / 3 * round_of_16.points).astype(int).astype(str) + round_of_16.group
727
- round_of_16 = round_of_16.rename(columns={"team": "result"})
728
-
729
- round_16 = winner_to_match(round_16, round_of_16)
730
- results_round_16 = prediction_knockout(round_16)
731
-
732
- quarter_finals = winner_to_match(quarter_finals, results_round_16)
733
- results_quarter_finals = prediction_knockout(quarter_finals)
734
-
735
- semi_finals = winner_to_match(semi_finals, results_quarter_finals)
736
- results_finals = prediction_knockout(semi_finals)
737
-
738
- final = winner_to_match(final, results_finals)
739
- winner = prediction_knockout(final)
740
-
741
- second = results_finals[~results_finals.result.isin(winner.result)]
742
- results_finals_3 = results_quarter_finals[~results_quarter_finals.result.isin(results_finals.result)]
743
- results_finals_3.iloc[0, 0] = 'z1'
744
- results_finals_3.iloc[1, 0] = 'z2'
745
- second_final = winner_to_match(second_final, results_finals_3)
746
- third = prediction_knockout(second_final)
747
-
748
- round_16 = center_str(round_16)
749
- quarter_finals = center_str(quarter_finals)
750
- semi_finals = center_str(semi_finals)
751
- final = center_str(final)
752
- group_matches = center_str(group_matches)
753
-
754
- # Function to center align text
755
- def center(text):
756
- return f"<div style='text-align: center;'>{text}</div>"
757
-
758
- # Function to generate the formatted text
759
- def generate_text(round_16, quarter_finals, semi_finals, final):
760
- formatted_text = (
761
- round_16.iloc[
762
- 0, 0] + '━━━━┓ ┏━━━━' +
763
- round_16.iloc[4, 0] + '\n' +
764
- ' ┃ ┃\n' +
765
- ' ┃━━━━' + quarter_finals.iloc[
766
- 0, 0] + '━━━━┓ ┏━━━━' +
767
- quarter_finals.iloc[2, 0] + '━━━━┃\n' +
768
- ' ┃ ┃ ┃ ┃\n' +
769
- round_16.iloc[
770
- 0, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
771
- round_16.iloc[4, 1] + '\n' +
772
- ' ┃━━━━' + semi_finals.iloc[
773
- 0, 0] + '━━━━┓ ┏━━━━' + semi_finals.iloc[1, 0] + '━━━━┃\n' +
774
- round_16.iloc[
775
- 1, 0] + '━━━━┓ ┃ ┃ ┃ ┃ ┏━━━━' +
776
- round_16.iloc[5, 0] + '\n' +
777
- ' ┃ ┃ ┃ ┃ ┃ ┃\n' +
778
- ' ┃━━━━' + quarter_finals.iloc[
779
- 0, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
780
- quarter_finals.iloc[2, 1] + '━━━━┃\n' +
781
- ' ┃ ┃ ┃ ┃\n' +
782
- round_16.iloc[
783
- 1, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
784
- round_16.iloc[5, 1] + '\n' +
785
- ' ┃━━━━' + final.iloc[0, 0] + 'vs.' +
786
- final.iloc[0, 1] + '━━━━┃\n' +
787
- round_16.iloc[
788
- 2, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
789
- round_16.iloc[6, 0] + '\n' +
790
- ' ┃ ┃ ┃ ┃\n' +
791
- ' ┃━━━━' + quarter_finals.iloc[
792
- 1, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
793
- quarter_finals.iloc[3, 0] + '━━━━┃\n' +
794
- ' ┃ ┃ ┃ ┃ ┃ ┃\n' +
795
- round_16.iloc[
796
- 2, 1] + '━━━━┛ ┃ ┃ ┃ ┃ ┗━━━━' +
797
- round_16.iloc[6, 1] + '\n' +
798
- ' ┃━━━━' + semi_finals.iloc[
799
- 0, 1] + '━━━━┛ ┗━━━━' + semi_finals.iloc[1, 1] + '━━━━┃\n' +
800
- round_16.iloc[
801
- 3, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
802
- round_16.iloc[7, 0] + '\n' +
803
- ' ┃ ┃ ┃ ┃\n' +
804
- ' ┃━━━━' + quarter_finals.iloc[
805
- 1, 1] + '━━━━┛ ┗━━━━' +
806
- quarter_finals.iloc[3, 1] + '━━━━┃\n' +
807
- ' ┃ ┃\n' +
808
- round_16.iloc[
809
- 3, 1] + '━━━━┛ ┗━━━━' +
810
- round_16.iloc[7, 1] + '\n' +
811
- " " + center(
812
- "\U0001F947" + winner.iloc[0, 1]) + '\n' +
813
- " " + center(
814
- "\U0001F948" + second.iloc[0, 1]) + '\n' +
815
- " " + center(
816
- "\U0001F949" + third.iloc[0, 1])
817
- )
818
- return formatted_text
819
-
820
- # Generate the formatted text
821
- formatted_text = generate_text(round_16, quarter_finals, semi_finals, final)
822
-
823
- # Define the round_16, quarter_finals, semi_finals, final DataFrames
824
- # Replace the DataFrame creation with your actual data
825
-
826
- # Display the formatted text
827
- st.text(formatted_text)
828
- # st.markdown(formatted_text)
829
-
830
- print(round_16.iloc[
831
- 0, 0] + '━━━━┓ ┏━━━━' +
832
- round_16.iloc[4, 0])
833
- print(
834
- ' ┃ ┃')
835
- print(' ┃━━━━' + quarter_finals.iloc[
836
- 0, 0] + '━━━━┓ ┏━━━━' +
837
- quarter_finals.iloc[2, 0] + '━━━━┃')
838
- print(
839
- ' ┃ ┃ ┃ ┃')
840
- print(round_16.iloc[
841
- 0, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
842
- round_16.iloc[4, 1])
843
- print(' ┃━━━━' + semi_finals.iloc[
844
- 0, 0] + '━━━━┓ ┏━━━━' + semi_finals.iloc[1, 0] + '━━━━┃')
845
- print(round_16.iloc[
846
- 1, 0] + '━━━━┓ ┃ ┃ ┃ ┃ ┏━━━━' +
847
- round_16.iloc[5, 0])
848
- print(
849
- ' ┃ ┃ ┃ ┃ ┃ ┃')
850
- print(' ┃━━━━' + quarter_finals.iloc[
851
- 0, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
852
- quarter_finals.iloc[2, 1] + '━━━━┃')
853
- print(
854
- ' ┃ ┃ ┃ ┃')
855
- print(round_16.iloc[
856
- 1, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
857
- round_16.iloc[5, 1])
858
- print(' ┃━━━━' + final.iloc[0, 0] + 'vs.' + final.iloc[
859
- 0, 1] + '━━━━┃')
860
- print(round_16.iloc[
861
- 2, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
862
- round_16.iloc[6, 0])
863
- print(
864
- ' ┃ ┃ ┃ ┃')
865
- print(' ┃━━━━' + quarter_finals.iloc[
866
- 1, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
867
- quarter_finals.iloc[3, 0] + '━━━━┃')
868
- print(
869
- ' ┃ ┃ ┃ ┃ ┃ ┃')
870
- print(round_16.iloc[
871
- 2, 1] + '━━━━┛ ┃ ┃ ┃ ┃ ┗━━━━' +
872
- round_16.iloc[6, 1])
873
- print(' ┃━━━━' + semi_finals.iloc[
874
- 0, 1] + '━━━━┛ ┗━━━━' + semi_finals.iloc[1, 1] + '━━━━┃')
875
- print(round_16.iloc[
876
- 3, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
877
- round_16.iloc[7, 0])
878
- print(
879
- ' ┃ ┃ ┃ ┃')
880
- print(' ┃━━━━' + quarter_finals.iloc[
881
- 1, 1] + '━━━━┛ ┗━━━━' +
882
- quarter_finals.iloc[3, 1] + '━━━━┃')
883
- print(
884
- ' ┃ ┃')
885
- print(round_16.iloc[
886
- 3, 1] + '━━━━┛ ┗━━━━' +
887
- round_16.iloc[7, 1])
888
- print(
889
- " " + center2("\U0001F947" + winner.iloc[0, 1]))
890
- print(
891
- " " + center2("\U0001F948" + second.iloc[0, 1]))
892
- print(
893
- " " + center2("\U0001F949" + third.iloc[0, 1]))
894
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
895
 
896
 
897
  if __name__ == "__main__":
 
1
  import streamlit as st
 
 
2
  import pandas as pd
3
+ import joblib
 
 
 
 
 
 
 
 
 
 
4
 
5
  def main():
6
+ st.title("FIFA winner predication")
7
+ st.write('This app predict 2022 FIFA winner')
8
+
9
+ if st.button("Predict FIFA Winner"):
10
+
11
+ last_team_scores = pd.read_csv('./data/last_team_scores.csv')
12
+ last_team_scores.tail()
13
+
14
+ squad_stats = pd.read_csv('./data/squad_stats.csv')
15
+ squad_stats.tail()
16
+
17
+ group_matches = pd.read_csv('./data/Qatar_group_stage.csv')
18
+ round_16 = group_matches.iloc[48:56, :]
19
+ quarter_finals = group_matches.iloc[56:60, :]
20
+ semi_finals = group_matches.iloc[60:62, :]
21
+ final = group_matches.iloc[62:63, :]
22
+ second_final = group_matches.iloc[63:64, :]
23
+ group_matches = group_matches.iloc[:48, :]
24
+ group_matches.tail()
25
+
26
+ xgb_gs_model = joblib.load("./groups_stage_prediction.pkl")
27
+
28
+ xgb_ks_model = joblib.load("./knockout_stage_prediction.pkl")
29
+
30
+ team_group = group_matches.drop(['country2'], axis=1)
31
+ team_group = team_group.drop_duplicates().reset_index(drop=True)
32
+ team_group = team_group.rename(columns={"country1": "team"})
33
+ team_group.head(5)
34
+
35
+ def matches(g_matches):
36
+ g_matches.insert(2, 'potential1',
37
+ g_matches['country1'].map(squad_stats.set_index('nationality_name')['potential']))
38
+ g_matches.insert(3, 'potential2',
39
+ g_matches['country2'].map(squad_stats.set_index('nationality_name')['potential']))
40
+ g_matches.insert(4, 'rank1', g_matches['country1'].map(last_team_scores.set_index('team')['rank']))
41
+ g_matches.insert(5, 'rank2', g_matches['country2'].map(last_team_scores.set_index('team')['rank']))
42
+ pred_set = []
43
+
44
+ for index, row in g_matches.iterrows():
45
+ if row['potential1'] > row['potential2'] and abs(row['potential1'] - row['potential2']) > 2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  pred_set.append({'Team1': row['country1'], 'Team2': row['country2']})
47
+ elif row['potential2'] > row['potential1'] and abs(row['potential2'] - row['potential1']) > 2:
48
  pred_set.append({'Team1': row['country2'], 'Team2': row['country1']})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  else:
50
+ if row['rank1'] > row['rank2']:
51
+ pred_set.append({'Team1': row['country1'], 'Team2': row['country2']})
52
+ else:
53
+ pred_set.append({'Team1': row['country2'], 'Team2': row['country1']})
54
+
55
+ pred_set = pd.DataFrame(pred_set)
56
+ pred_set.insert(2, 'Team1_FIFA_RANK', pred_set['Team1'].map(last_team_scores.set_index('team')['rank']))
57
+ pred_set.insert(3, 'Team2_FIFA_RANK', pred_set['Team2'].map(last_team_scores.set_index('team')['rank']))
58
+ pred_set.insert(4, 'Team1_Goalkeeper_Score',
59
+ pred_set['Team1'].map(last_team_scores.set_index('team')['goalkeeper_score']))
60
+ pred_set.insert(5, 'Team2_Goalkeeper_Score',
61
+ pred_set['Team2'].map(last_team_scores.set_index('team')['goalkeeper_score']))
62
+ pred_set.insert(6, 'Team1_Defense', pred_set['Team1'].map(last_team_scores.set_index('team')['defense_score']))
63
+ pred_set.insert(7, 'Team1_Offense', pred_set['Team1'].map(last_team_scores.set_index('team')['offense_score']))
64
+ pred_set.insert(8, 'Team1_Midfield',
65
+ pred_set['Team1'].map(last_team_scores.set_index('team')['midfield_score']))
66
+ pred_set.insert(9, 'Team2_Defense', pred_set['Team2'].map(last_team_scores.set_index('team')['defense_score']))
67
+ pred_set.insert(10, 'Team2_Offense', pred_set['Team2'].map(last_team_scores.set_index('team')['offense_score']))
68
+ pred_set.insert(11, 'Team2_Midfield',
69
+ pred_set['Team2'].map(last_team_scores.set_index('team')['midfield_score']))
70
+ return pred_set
71
+
72
+ def print_results(dataset, y_pred, matches, proba):
73
+ results = []
74
+ for i in range(dataset.shape[0]):
75
+ print()
76
+ if y_pred[i] == 2:
77
+ print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Draw")
78
+ results.append({'result': 'Draw'})
79
+ elif y_pred[i] == 1:
80
+ print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Winner: " + dataset.iloc[i, 0])
81
+ results.append({'result': dataset.iloc[i, 0]})
82
+ else:
83
+ print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Winner: " + dataset.iloc[i, 1])
84
+ results.append({'result': dataset.iloc[i, 1]})
85
+ try:
86
+ print('Probability of ' + dataset.iloc[i, 0] + ' winning: ', '%.3f' % (proba[i][1]))
87
+ print('Probability of Draw: ', '%.3f' % (proba[i][2]))
88
+ print('Probability of ' + dataset.iloc[i, 1] + ' winning: ', '%.3f' % (proba[i][0]))
89
+ except:
90
+ print('Probability of ' + dataset.iloc[i, 1] + ' winning: ', '%.3f' % (proba[i][0]))
91
+ print("")
92
+ results = pd.DataFrame(results)
93
+ matches = pd.concat([matches.group, results], axis=1)
94
+ return matches
95
+
96
+ def winner_to_match(round, prev_match):
97
+ round.insert(0, 'c1', round['country1'].map(prev_match.set_index('group')['result']))
98
+ round.insert(1, 'c2', round['country2'].map(prev_match.set_index('group')['result']))
99
+ round = round.drop(['country1', 'country2'], axis=1)
100
+ round = round.rename(columns={'c1': 'country1', 'c2': 'country2'}).reset_index(drop=True)
101
+ return round
102
+
103
+ def prediction_knockout(round):
104
+ dataset_round = matches(round)
105
+ prediction_round = xgb_ks_model.predict(dataset_round)
106
+ proba_round = xgb_ks_model.predict_proba(dataset_round)
107
+
108
+ # prediction_round = ada_ks_model.predict(dataset_round)
109
+ # proba_round = ada_ks_model.predict_proba(dataset_round)
110
+
111
+ # prediction_round = rf_ks_model.predict(dataset_round)
112
+ # proba_round = rf_ks_model.predict_proba(dataset_round)
113
+
114
+ results_round = print_results(dataset_round, prediction_round, round, proba_round)
115
+ return results_round
116
+
117
+ def center_str(round):
118
+ spaces = ['', ' ', ' ', ' ', ' ', ' ', ]
119
+ for j in range(2):
120
+ for i in range(round.shape[0]):
121
+ if (13 - len(round.iloc[i, j])) % 2 == 0:
122
+ round.iloc[i, j] = spaces[int((13 - len(round.iloc[i, j])) / 2)] + round.iloc[i, j] + spaces[
123
+ int((13 - len(round.iloc[i, j])) / 2)]
124
+ else:
125
+ round.iloc[i, j] = spaces[int(((13 - len(round.iloc[i, j])) / 2) - 0.5)] + round.iloc[i, j] + \
126
+ spaces[int(((13 - len(round.iloc[i, j])) / 2) + 0.5)]
127
+ return round
128
+
129
+ def center2(a):
130
+ spaces = ['', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
131
+ ' ', ' ', ' ', ' ', ' ',
132
+ ' ', ' ', ' ', ' ',
133
+ ' ']
134
+ if (29 - len(a)) % 2 == 0:
135
+ a = spaces[int((29 - len(a)) / 2)] + a + spaces[int((29 - len(a)) / 2)]
136
+ else:
137
+ a = spaces[int(((29 - len(a)) / 2) - 0.5)] + a + spaces[int(((29 - len(a)) / 2) + 0.5)]
138
+ return a
139
+
140
+ dataset_groups = matches(group_matches)
141
+ dataset_groups.tail()
142
+ print(dataset_groups)
143
+
144
+ prediction_groups = xgb_gs_model.predict(dataset_groups)
145
+ proba = xgb_gs_model.predict_proba(dataset_groups)
146
+
147
+ # prediction_groups = ada_gs_model.predict(dataset_groups)
148
+ # proba = ada_gs_model.predict_proba(dataset_groups)
149
+
150
+ # prediction_groups = rf_gs_model.predict(dataset_groups)
151
+ # proba = rf_gs_model.predict_proba(dataset_groups)
152
+
153
+ results = print_results(dataset_groups, prediction_groups, group_matches, proba)
154
+
155
+ team_group['points'] = 0
156
+ team_group
157
+ for i in range(results.shape[0]):
158
+ for j in range(team_group.shape[0]):
159
+ if results.iloc[i, 1] == team_group.iloc[j, 0]:
160
+ team_group.iloc[j, 2] += 3
161
+
162
+ print(team_group.groupby(['group', 'team']).mean().astype(int))
163
+
164
+ round_of_16 = team_group[team_group['points'] > 5].reset_index(drop=True)
165
+ round_of_16['group'] = (4 - 1 / 3 * round_of_16.points).astype(int).astype(str) + round_of_16.group
166
+ round_of_16 = round_of_16.rename(columns={"team": "result"})
167
+
168
+ round_16 = winner_to_match(round_16, round_of_16)
169
+ results_round_16 = prediction_knockout(round_16)
170
+
171
+ quarter_finals = winner_to_match(quarter_finals, results_round_16)
172
+ results_quarter_finals = prediction_knockout(quarter_finals)
173
+
174
+ semi_finals = winner_to_match(semi_finals, results_quarter_finals)
175
+ results_finals = prediction_knockout(semi_finals)
176
+
177
+ final = winner_to_match(final, results_finals)
178
+ winner = prediction_knockout(final)
179
+
180
+ second = results_finals[~results_finals.result.isin(winner.result)]
181
+ results_finals_3 = results_quarter_finals[~results_quarter_finals.result.isin(results_finals.result)]
182
+ results_finals_3.iloc[0, 0] = 'z1'
183
+ results_finals_3.iloc[1, 0] = 'z2'
184
+ second_final = winner_to_match(second_final, results_finals_3)
185
+ third = prediction_knockout(second_final)
186
+
187
+ round_16 = center_str(round_16)
188
+ quarter_finals = center_str(quarter_finals)
189
+ semi_finals = center_str(semi_finals)
190
+ final = center_str(final)
191
+ group_matches = center_str(group_matches)
192
+
193
+ # Function to center align text
194
+ def center(text):
195
+ return f"<div style='text-align: center;'>{text}</div>"
196
+
197
+ # Function to generate the formatted text
198
+ def generate_text(round_16, quarter_finals, semi_finals, final):
199
+ formatted_text = (
200
+ round_16.iloc[
201
+ 0, 0] + '━━━━┓ ┏━━━━' +
202
+ round_16.iloc[4, 0] + '\n' +
203
+ ' ┃ ┃\n' +
204
+ ' ┃━━━━' + quarter_finals.iloc[
205
+ 0, 0] + '━━━━┓ ┏━━━━' +
206
+ quarter_finals.iloc[2, 0] + '━━━━┃\n' +
207
+ ' ┃ ┃ ┃ ┃\n' +
208
+ round_16.iloc[
209
+ 0, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
210
+ round_16.iloc[4, 1] + '\n' +
211
+ ' ┃━━━━' + semi_finals.iloc[
212
+ 0, 0] + '━━━━┓ ┏━━━━' + semi_finals.iloc[1, 0] + '━━━━┃\n' +
213
+ round_16.iloc[
214
+ 1, 0] + '━━━━┓ ┃ ┃ ┃ ┃ ┏━━━━' +
215
+ round_16.iloc[5, 0] + '\n' +
216
+ ' ┃ ┃ ┃ ┃ ┃ ┃\n' +
217
+ ' ┃━━━━' + quarter_finals.iloc[
218
+ 0, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
219
+ quarter_finals.iloc[2, 1] + '━━━━┃\n' +
220
+ ' ┃ ┃ ┃ ┃\n' +
221
+ round_16.iloc[
222
+ 1, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
223
+ round_16.iloc[5, 1] + '\n' +
224
+ ' ┃━━━━' + final.iloc[0, 0] + 'vs.' +
225
+ final.iloc[0, 1] + '━━━━┃\n' +
226
+ round_16.iloc[
227
+ 2, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
228
+ round_16.iloc[6, 0] + '\n' +
229
+ ' ┃ ┃ ┃ ┃\n' +
230
+ ' ┃━━━━' + quarter_finals.iloc[
231
+ 1, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
232
+ quarter_finals.iloc[3, 0] + '━━━━┃\n' +
233
+ ' ┃ ┃ ┃ ┃ ┃ ┃\n' +
234
+ round_16.iloc[
235
+ 2, 1] + '━━━━┛ ┃ ┃ ┃ ┃ ┗━━━━' +
236
+ round_16.iloc[6, 1] + '\n' +
237
+ ' ┃━━━━' + semi_finals.iloc[
238
+ 0, 1] + '━━━━┛ ┗━━━━' + semi_finals.iloc[1, 1] + '━━━━┃\n' +
239
+ round_16.iloc[
240
+ 3, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
241
+ round_16.iloc[7, 0] + '\n' +
242
+ ' ┃ ┃ ┃ ┃\n' +
243
+ ' ┃━━━━' + quarter_finals.iloc[
244
+ 1, 1] + '━━━━┛ ┗━━━━' +
245
+ quarter_finals.iloc[3, 1] + '━━━━┃\n' +
246
+ ' ┃ ┃\n' +
247
+ round_16.iloc[
248
+ 3, 1] + '━━━━┛ ┗━━━━' +
249
+ round_16.iloc[7, 1] + '\n' +
250
+ " " + center(
251
+ "\U0001F947" + winner.iloc[0, 1]) + '\n' +
252
+ " " + center(
253
+ "\U0001F948" + second.iloc[0, 1]) + '\n' +
254
+ " " + center(
255
+ "\U0001F949" + third.iloc[0, 1])
256
+ )
257
+ return formatted_text
258
+
259
+ # Generate the formatted text
260
+ formatted_text = generate_text(round_16, quarter_finals, semi_finals, final)
261
+
262
+ # Define the round_16, quarter_finals, semi_finals, final DataFrames
263
+ # Replace the DataFrame creation with your actual data
264
+
265
+ # Display the formatted text
266
+ st.text(formatted_text)
267
+ # st.markdown(formatted_text)
268
+
269
+ print(round_16.iloc[
270
+ 0, 0] + '━━━━┓ ┏━━━━' +
271
+ round_16.iloc[4, 0])
272
+ print(
273
+ ' ┃ ┃')
274
+ print(' ┃━━━━' + quarter_finals.iloc[
275
+ 0, 0] + '━━━━┓ ┏━━━━' +
276
+ quarter_finals.iloc[2, 0] + '━━━━┃')
277
+ print(
278
+ ' ┃ ┃ ┃ ┃')
279
+ print(round_16.iloc[
280
+ 0, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
281
+ round_16.iloc[4, 1])
282
+ print(' ┃━━━━' + semi_finals.iloc[
283
+ 0, 0] + '━━━━┓ ┏━━━━' + semi_finals.iloc[1, 0] + '━━━━┃')
284
+ print(round_16.iloc[
285
+ 1, 0] + '━━━━┓ ┃ ┃ ┃ ┃ ┏━━━━' +
286
+ round_16.iloc[5, 0])
287
+ print(
288
+ ' ┃ ┃ ┃ ┃ ┃ ┃')
289
+ print(' ┃━━━━' + quarter_finals.iloc[
290
+ 0, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
291
+ quarter_finals.iloc[2, 1] + '━━━━┃')
292
+ print(
293
+ ' ┃ ┃ ┃ ┃')
294
+ print(round_16.iloc[
295
+ 1, 1] + '━━━━┛ ┃ ┃ ┗━━━━' +
296
+ round_16.iloc[5, 1])
297
+ print(' ┃━━━━' + final.iloc[0, 0] + 'vs.' + final.iloc[
298
+ 0, 1] + '━━━━┃')
299
+ print(round_16.iloc[
300
+ 2, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
301
+ round_16.iloc[6, 0])
302
+ print(
303
+ ' ┃ ┃ ┃ ┃')
304
+ print(' ┃━━━━' + quarter_finals.iloc[
305
+ 1, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
306
+ quarter_finals.iloc[3, 0] + '━━━━┃')
307
+ print(
308
+ ' ┃ ┃ ┃ ┃ ┃ ┃')
309
+ print(round_16.iloc[
310
+ 2, 1] + '━━━━┛ ┃ ┃ ┃ ┃ ┗━━━━' +
311
+ round_16.iloc[6, 1])
312
+ print(' ┃━━━━' + semi_finals.iloc[
313
+ 0, 1] + '━━━━┛ ┗━━━━' + semi_finals.iloc[1, 1] + '━━━━┃')
314
+ print(round_16.iloc[
315
+ 3, 0] + '━━━━┓ ┃ ┃ ┏━━━━' +
316
+ round_16.iloc[7, 0])
317
+ print(
318
+ ' ┃ ┃ ┃ ┃')
319
+ print(' ┃━━━━' + quarter_finals.iloc[
320
+ 1, 1] + '━━━━┛ ┗━━━━' +
321
+ quarter_finals.iloc[3, 1] + '━━━━┃')
322
+ print(
323
+ ' ┃ ┃')
324
+ print(round_16.iloc[
325
+ 3, 1] + '━━━━┛ ┗━━━━' +
326
+ round_16.iloc[7, 1])
327
+ print(
328
+ " " + center2("\U0001F947" + winner.iloc[0, 1]))
329
+ print(
330
+ " " + center2("\U0001F948" + second.iloc[0, 1]))
331
+ print(
332
+ " " + center2("\U0001F949" + third.iloc[0, 1]))
333
 
334
 
335
  if __name__ == "__main__":