Niv Sardi
get logos in main.py
74a29fd
raw
history blame
1.77 kB
import csv
import requests
import shutil
from bs4 import BeautifulSoup
from progress.bar import ChargingBar
import web
from entity import Entity
from common import selectors, defaults, mkdir
URL = 'http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
options = soup.find(class_='form-control').find_all('option')
mkdir.make_dirs([defaults.DATA_PATH])
with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(Entity.row_names())
i = 0
bar = ChargingBar('Processing', max=len(options))
for o in options[1:]:
(name, bco)= (o.text, o.attrs['value'])
page = requests.post(URL, data={'bco': bco})
soup = BeautifulSoup(page.content, 'html.parser')
try:
img = soup.select_one(selectors.logosbancos).attrs['src']
img = img.replace('../', 'https://www.bcra.gob.ar/')
fn = f"{defaults.LOGOS_DATA_PATH}/{bco}.0.png"
web.get_img_logo(img, fn)
except AttributeError as err:
print('img', name, err)
img = None
a = soup.select_one(selectors.entity_http)
try:
a = a.attrs['href']
except AttributeError:
a = soup.select_one(selectors.entity_mailto)
try:
a = 'http://' + a.attrs['href'].split('@')[1]
except TypeError:
print('ERROR', a)
e = Entity(name, id=i, bco=bco, logo=str(img), url=str(a))
writer.writerow(e.to_row())
i+=1
bar.next()
bar.finish()
shutil.move(f'{defaults.MAIN_CSV_PATH}.tmp', defaults.MAIN_CSV_PATH)
print('scrape finished')