Niv Sardi commited on
Commit
880b04e
1 Parent(s): 971aff7

crawler: only move to csv file once fully written

Browse files

Signed-off-by: Niv Sardi <xaiki@evilgiggle.com>

Files changed (1) hide show
  1. crawler/main.py +7 -1
crawler/main.py CHANGED
@@ -1,6 +1,8 @@
1
  import csv
2
  import pathlib
3
  import requests
 
 
4
  from bs4 import BeautifulSoup
5
  from progress.bar import ChargingBar
6
 
@@ -9,12 +11,13 @@ from common import selectors
9
 
10
  pathlib.Path(f"{Entity._DATA_PATH}/logos").mkdir(parents=True, exist_ok=True)
11
 
 
12
  URL = "http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp"
13
  page = requests.get(URL)
14
  soup = BeautifulSoup(page.content, "html.parser")
15
 
16
  options = soup.find(class_="form-control").find_all('option')
17
- with open('./data/entidades.csv', 'w', newline='') as csvfile:
18
  writer = csv.writer(csvfile)
19
  writer.writerow(Entity.row_names())
20
 
@@ -49,3 +52,6 @@ with open('./data/entidades.csv', 'w', newline='') as csvfile:
49
  writer.writerow(e.to_row())
50
  bar.next()
51
  bar.finish()
 
 
 
 
1
  import csv
2
  import pathlib
3
  import requests
4
+ import shutil
5
+
6
  from bs4 import BeautifulSoup
7
  from progress.bar import ChargingBar
8
 
 
11
 
12
  pathlib.Path(f"{Entity._DATA_PATH}/logos").mkdir(parents=True, exist_ok=True)
13
 
14
+ DATA_FILE = './data/entidades.csv'
15
  URL = "http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp"
16
  page = requests.get(URL)
17
  soup = BeautifulSoup(page.content, "html.parser")
18
 
19
  options = soup.find(class_="form-control").find_all('option')
20
+ with open(f"{DATA_FILE}.tmp", 'w', newline='') as csvfile:
21
  writer = csv.writer(csvfile)
22
  writer.writerow(Entity.row_names())
23
 
 
52
  writer.writerow(e.to_row())
53
  bar.next()
54
  bar.finish()
55
+
56
+ shutil.move(f"{DATA_FILE}.tmp", DATA_FILE)
57
+ print("scrape finished")