File size: 1,960 Bytes
e919aa3
 
 
 
1a24a58
e919aa3
 
 
1a24a58
e919aa3
 
 
 
 
 
 
 
 
 
 
 
1a24a58
 
 
 
 
 
 
e919aa3
 
 
 
c9f2bb9
1a24a58
 
 
 
 
 
 
 
e919aa3
1a24a58
 
 
 
 
 
 
 
e919aa3
 
 
 
1a24a58
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python
import csv
import requests
import shutil
import re

from bs4 import BeautifulSoup
from progress.bar import ChargingBar
import concurrent.futures

import web
from entity import Entity
from common import selectors, defaults, mkdir

URL = 'http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')

options = soup.find(class_='form-control').find_all('option')
mkdir.make_dirs([defaults.DATA_PATH, defaults.LOGOS_DATA_PATH])

def get_links(soup):
    for l in soup.select('.post-pagina-interior'):
        for a in l.select('a'):
            if 'href' in a.attrs and a.attrs['href'].startswith('http'):
                return a.attrs['href']


with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(Entity.row_names())

    bar = ChargingBar('get entities', max=len(options))
    def get_bco(o, i):
        (name, bco)= (o.text, o.attrs['value'])

        page = requests.post(URL, data={'bco': bco}, stream=False)
        soup = BeautifulSoup(page.content, 'html.parser')
        img = f'https://www.bcra.gob.ar/Imagenes/logosbancos/{bco}.jpg'
        e = Entity(name, id=i, bco=bco, logo=str(img), url=str(get_links(soup)))
        writer.writerow(e.to_row())
        i+=1
    with concurrent.futures.ThreadPoolExecutor(max_workers = 20) as executor:
        futures = {executor.submit(get_bco, o, i): o for (i, o) in enumerate(options[1:])}
        for f in concurrent.futures.as_completed(futures):
            o = futures[f]
            try:
                f.result()
            except Exception as err:
                print(f'({o}) generated an exception: {err}')
        bar.next()
    bar.finish()

shutil.move(f'{defaults.MAIN_CSV_PATH}.tmp', defaults.MAIN_CSV_PATH)
print(f'scrape finished, found {len(options[1:])} entities, dumped to {defaults.MAIN_CSV_PATH}')