#!/usr/bin/env python3 import ssl import shutil import requests from bs4 import BeautifulSoup from entity import Entity from common import selectors, defaults, mkdir def get_page(e: Entity): try: page = requests.get(e.url) except Exception: url = e.url.replace('http', 'https') page = requests.get(url) return page def get_cert(e: Entity): ssl_url = e.url.split("/")[2] mkdir.make_dirs([defaults.CERTS_PATH]) fn = f"{defaults.CERTS_PATH}/{e.bco}.cert" try: cert = ssl.get_server_certificate((ssl_url, 443), ca_certs=None) with open(fn, 'w') as f: f.write(cert) except Exception as err: with open(f"{defaults.DATA_PATH}/{e.bco}.error.log", 'w+') as f: f.write(str(err)) return fn def get_img_logo(src: str, fn): res = requests.get(src, stream=True) with open(fn, "wb") as f: shutil.copyfileobj(res.raw, f) return fn def get_logos(e: Entity): page = get_page(e) soup = BeautifulSoup(page.content, "html.parser") logos = soup.select(selectors.img_logo) logos.extend(soup.select(selectors.id_logo)) logos.extend(soup.select(selectors.cls_logo)) mkdir.make_dirs([defaults.LOGOS_DATA_PATH]) i = 1 lfn = [] for l in logos: if 'src' in l.attrs: src = l.attrs['src'] ext = src.split('.')[-1].split('/')[-1] if not src.startswith('http'): src = e.url + src fn = f"{defaults.LOGOS_DATA_PATH}/{e.bco}.{i}.{ext}" lfn.append(get_img_logo(src, fn)) i+=1 return lfn