Spaces:
Configuration error
Configuration error
Upload Scheduler_Covid.py
Browse files- Scheduler/Scheduler_Covid.py +116 -3
Scheduler/Scheduler_Covid.py
CHANGED
@@ -1,3 +1,116 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
Created on Thu Sep 23 09:27:21 2021
|
5 |
+
|
6 |
+
@author: benjaminull
|
7 |
+
"""
|
8 |
+
|
9 |
+
import pandas as pd
|
10 |
+
from datetime import datetime
|
11 |
+
import numpy as np
|
12 |
+
from datetime import timedelta
|
13 |
+
import requests
|
14 |
+
import io
|
15 |
+
import openpyxl
|
16 |
+
from pandas import ExcelWriter
|
17 |
+
import requests
|
18 |
+
from bs4 import BeautifulSoup
|
19 |
+
from bs4 import BeautifulSoup
|
20 |
+
import pandas as pd
|
21 |
+
from selenium import webdriver
|
22 |
+
import requests
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
def GenerarExcel(ruta_guardado, Pestañas, Data):
|
27 |
+
wb = openpyxl.Workbook()
|
28 |
+
writer = ExcelWriter(ruta_guardado)
|
29 |
+
for pestaña in Pestañas:
|
30 |
+
wb.create_sheet(pestaña)
|
31 |
+
std = wb.get_sheet_by_name('Sheet')
|
32 |
+
wb.remove_sheet(std)
|
33 |
+
wb.save(ruta_guardado)
|
34 |
+
for i in range(len(Pestañas)):
|
35 |
+
print(Data[i])
|
36 |
+
Data[i].to_excel(writer, sheet_name=Pestañas[i])
|
37 |
+
writer.save()
|
38 |
+
|
39 |
+
|
40 |
+
def run_data_covid():
|
41 |
+
options = webdriver.ChromeOptions()
|
42 |
+
options.binary_location = r'C:/Program Files/Google/Chrome/Application/chrome.exe'
|
43 |
+
path_to_chromedriver = r'C:/Users/bullm/larrainvial.com/Equipo Quant - Area Estrategias Cuantitativas 2.0/Codigos\Data Alternativa/Transcripts/chromedriver.exe'
|
44 |
+
browser = webdriver.Chrome(executable_path=path_to_chromedriver, chrome_options=options)
|
45 |
+
# Ir a página deseada
|
46 |
+
url = 'https://covid19.apple.com/mobility'
|
47 |
+
browser.get(url)
|
48 |
+
page = requests.get(url)
|
49 |
+
html=browser.page_source
|
50 |
+
soup = BeautifulSoup(html, "html.parser")
|
51 |
+
link = str(soup.find_all("a")[1]).split('"')[1]
|
52 |
+
link_apple = "https://covid19.apple.com/mobility"
|
53 |
+
r = requests.get(link_apple)
|
54 |
+
soup = BeautifulSoup(r.text, 'lxml')
|
55 |
+
data_agg = pd.DataFrame()
|
56 |
+
i = 0
|
57 |
+
for chunk in pd.read_csv(
|
58 |
+
'https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv',
|
59 |
+
usecols=['country_region', 'date',
|
60 |
+
'retail_and_recreation_percent_change_from_baseline',
|
61 |
+
'grocery_and_pharmacy_percent_change_from_baseline',
|
62 |
+
'parks_percent_change_from_baseline',
|
63 |
+
'transit_stations_percent_change_from_baseline',
|
64 |
+
'workplaces_percent_change_from_baseline'],
|
65 |
+
dtype = {"workplaces_percent_change_from_baseline":
|
66 |
+
"float32",
|
67 |
+
"parks_percent_change_from_baseline": "float32",
|
68 |
+
"retail_and_recreation_percent_change_from_baseline":
|
69 |
+
"float32",
|
70 |
+
'transit_stations_percent_change_from_baseline':
|
71 |
+
"float32",
|
72 |
+
},chunksize = 150000):
|
73 |
+
if i == 65:
|
74 |
+
break
|
75 |
+
i=i+1
|
76 |
+
data_agg = pd.concat([data_agg, chunk], ignore_index=True)
|
77 |
+
data_agg.info(memory_usage="deep")
|
78 |
+
data_agg.set_index(['country_region', 'date'], inplace=True)
|
79 |
+
data_agg = data_agg.groupby(level=[0, 1]).mean()
|
80 |
+
data_agg.columns = data_agg.columns.str.replace('_percent_change_from_baseline', '_google')
|
81 |
+
yesterday = (datetime.today() - timedelta(2)).strftime("%Y-%m-%d")
|
82 |
+
url=f''+link
|
83 |
+
CONFIRMED_CONTENT = requests.get(url).content
|
84 |
+
data_app = pd.read_csv(io.StringIO(CONFIRMED_CONTENT.decode('utf-8')),
|
85 |
+
error_bad_lines=False)
|
86 |
+
# Dejamos solo la data a nivel pais
|
87 |
+
data_app.info(memory_usage="deep")
|
88 |
+
data_app = data_app.loc[data_app['geo_type'] == 'country/region']
|
89 |
+
data_app = data_app.drop(columns=['geo_type', 'country',
|
90 |
+
'alternative_name', 'sub-region'])
|
91 |
+
data_app = data_app.set_index(['region', 'transportation_type']).stack()
|
92 |
+
data_app = data_app.unstack(level='transportation_type') - 100
|
93 |
+
data_app.index.names = data_agg.index.names
|
94 |
+
data_agg = data_agg.join(data_app)
|
95 |
+
print(data_app.columns)
|
96 |
+
mob_idx_cols = ['retail_and_recreation_google', 'grocery_and_pharmacy_google',
|
97 |
+
'parks_google', 'transit_stations_google',
|
98 |
+
'workplaces_google', 'driving', 'transit', 'walking']
|
99 |
+
data_agg['Mobility Index'] = data_agg[mob_idx_cols].mean(1)
|
100 |
+
regiones = {}
|
101 |
+
regiones['Latam'] = ['Argentina', 'Brazil', 'Chile', 'Colombia',
|
102 |
+
'Mexico', 'Peru']
|
103 |
+
regiones['Europa'] = ['Italy', 'Spain', 'Germany', 'United Kingdom', 'France']
|
104 |
+
regiones['Asia Emergente'] = ['South Korea', 'Taiwan', 'Hong Kong', 'India',
|
105 |
+
'Thailand', 'Indonesia']
|
106 |
+
regiones['USA'] = ['United States']
|
107 |
+
# regiones['Israel'] = ['Israel']
|
108 |
+
data_dict = {}
|
109 |
+
for col in data_agg.columns:
|
110 |
+
df = data_agg[col].unstack().T.rolling(7, 3).mean()
|
111 |
+
for region, paises in regiones.items():
|
112 |
+
df[region] = df[paises].mean(1)
|
113 |
+
data_dict[col] = df
|
114 |
+
GenerarExcel("Scheduler/Movilidad_desagrada.xlsx", list(data_dict.keys()),
|
115 |
+
list(data_dict.values()))
|
116 |
+
np.save('Scheduler/dict_movilidad.npy', data_dict)
|