File size: 2,096 Bytes
e919aa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c9f2bb9
e919aa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
import csv
import requests
import shutil

from bs4 import BeautifulSoup
from progress.bar import ChargingBar

import web
from entity import Entity
from common import selectors, defaults, mkdir

URL = 'http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')

options = soup.find(class_='form-control').find_all('option')
mkdir.make_dirs([defaults.DATA_PATH, defaults.LOGOS_DATA_PATH])

i = 0
with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(Entity.row_names())

    bar = ChargingBar('get entities', max=len(options))
    for o in options[1:]:
        def get_bco():
            (name, bco)= (o.text, o.attrs['value'])
            page = requests.post(URL, data={'bco': bco})
            soup = BeautifulSoup(page.content, 'html.parser')
            try:
                img = soup.select_one(selectors.logosbancos).attrs['src']
                img = img.replace('../', 'https://www.bcra.gob.ar/')
                fn = f"{defaults.LOGOS_DATA_PATH}/{bco}.0.png"
                web.get_img_logo(img, fn)
            except AttributeError as err:
                print('img', name, err)
                img = None

            a = soup.select_one(selectors.entity_http)
            try:
                a = a.attrs['href']
            except AttributeError:
                a = soup.select_one(selectors.entity_mailto)
                try:
                    a = 'http://' + a.attrs['href'].split('@')[1]

                except TypeError:
                    print('ERROR', a)

            e = Entity(name, id=i, bco=bco, logo=str(img), url=str(a))
            writer.writerow(e.to_row())

        try:
            get_bco()
        except Exception as e:
            print(f'Error processing: {e}')

        i+=1
        bar.next()
    bar.finish()

shutil.move(f'{defaults.MAIN_CSV_PATH}.tmp', defaults.MAIN_CSV_PATH)
print(f'scrape finished, found {i} entities, dumped to {defaults.MAIN_CSV_PATH}')