import csv import pathlib import requests import shutil from bs4 import BeautifulSoup from progress.bar import ChargingBar from entity import Entity from common import selectors pathlib.Path(f"{Entity._DATA_PATH}/logos").mkdir(parents=True, exist_ok=True) DATA_FILE = './data/entidades.csv' URL = "http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp" page = requests.get(URL) soup = BeautifulSoup(page.content, "html.parser") options = soup.find(class_="form-control").find_all('option') with open(f"{DATA_FILE}.tmp", 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow(Entity.row_names()) bar = ChargingBar('Processing', max=len(options)) for o in options[1:]: e = Entity( name = o.text, bco = o.attrs['value'] ) page = requests.post(URL, data={'bco': e.bco}) soup = BeautifulSoup(page.content, "html.parser") try: img = soup.select_one(selectors.logosbancos).attrs['src'] img = img.replace("../", "https://www.bcra.gob.ar/") except AttributeError as err: print('img', e.name, err) img = None e.logo = img a = soup.select_one(selectors.entity_http) try: a = a.attrs['href'] except AttributeError: a = soup.select_one(selectors.entity_mailto) try: a = 'http://' + a.attrs['href'].split('@')[1] except TypeError: print('ERROR', a) e.url = a writer.writerow(e.to_row()) bar.next() bar.finish() shutil.move(f"{DATA_FILE}.tmp", DATA_FILE) print("scrape finished")