File size: 1,693 Bytes
485f76b
f1ab0d5
485f76b
880b04e
 
485f76b
 
 
 
 
 
f1ab0d5
 
880b04e
485f76b
 
 
 
 
880b04e
485f76b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
880b04e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import csv
import pathlib
import requests
import shutil

from bs4 import BeautifulSoup
from progress.bar import ChargingBar

from entity import Entity
from common import selectors

pathlib.Path(f"{Entity._DATA_PATH}/logos").mkdir(parents=True, exist_ok=True)

DATA_FILE = './data/entidades.csv'
URL = "http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp"
page = requests.get(URL)
soup = BeautifulSoup(page.content, "html.parser")

options = soup.find(class_="form-control").find_all('option')
with open(f"{DATA_FILE}.tmp", 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(Entity.row_names())

    bar = ChargingBar('Processing', max=len(options))
    for o in options[1:]:
        e = Entity(
            name = o.text,
            bco = o.attrs['value']
        )
        page = requests.post(URL, data={'bco': e.bco})
        soup = BeautifulSoup(page.content, "html.parser")
        try:
            img = soup.select_one(selectors.logosbancos).attrs['src']
            img = img.replace("../", "https://www.bcra.gob.ar/")
        except AttributeError as err:
            print('img', e.name, err)
            img = None
        e.logo = img

        a = soup.select_one(selectors.entity_http)
        try:
            a = a.attrs['href']
        except AttributeError:
            a = soup.select_one(selectors.entity_mailto)
            try:
                a = 'http://' + a.attrs['href'].split('@')[1]

            except TypeError:
                print('ERROR', a)

        e.url = a
        writer.writerow(e.to_row())
        bar.next()
    bar.finish()

shutil.move(f"{DATA_FILE}.tmp", DATA_FILE)
print("scrape finished")