Niv Sardi
reorder, python in python deno in deno
63f9146
import csv
import pathlib
import requests
import shutil
from bs4 import BeautifulSoup
from progress.bar import ChargingBar
from entity import Entity
from common import selectors
pathlib.Path(f"{Entity._DATA_PATH}/logos").mkdir(parents=True, exist_ok=True)
DATA_FILE = './data/entidades.csv'
URL = "http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp"
page = requests.get(URL)
soup = BeautifulSoup(page.content, "html.parser")
options = soup.find(class_="form-control").find_all('option')
with open(f"{DATA_FILE}.tmp", 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(Entity.row_names())
bar = ChargingBar('Processing', max=len(options))
for o in options[1:]:
e = Entity(
name = o.text,
bco = o.attrs['value']
)
page = requests.post(URL, data={'bco': e.bco})
soup = BeautifulSoup(page.content, "html.parser")
try:
img = soup.select_one(selectors.logosbancos).attrs['src']
img = img.replace("../", "https://www.bcra.gob.ar/")
except AttributeError as err:
print('img', e.name, err)
img = None
e.logo = img
a = soup.select_one(selectors.entity_http)
try:
a = a.attrs['href']
except AttributeError:
a = soup.select_one(selectors.entity_mailto)
try:
a = 'http://' + a.attrs['href'].split('@')[1]
except TypeError:
print('ERROR', a)
e.url = a
writer.writerow(e.to_row())
bar.next()
bar.finish()
shutil.move(f"{DATA_FILE}.tmp", DATA_FILE)
print("scrape finished")