|
import json |
|
import requests |
|
import re |
|
import urllib.parse |
|
|
|
def _extractBingImages(html): |
|
pattern = r'mediaurl=(.*?)&.*?expw=(\d+).*?exph=(\d+)' |
|
matches = re.findall(pattern, html) |
|
result = [] |
|
|
|
for match in matches: |
|
url, width, height = match |
|
if url.endswith('.jpg') or url.endswith('.png') or url.endswith('.jpeg'): |
|
result.append({'url': urllib.parse.unquote(url), 'width': int(width), 'height': int(height)}) |
|
|
|
return result |
|
|
|
|
|
def _extractGoogleImages(html): |
|
images = [] |
|
regex = re.compile(r"AF_initDataCallback\({key: 'ds:1', hash: '2', data:(.*?), sideChannel: {}}\);") |
|
match = regex.search(html) |
|
if match: |
|
dz = json.loads(match.group(1)) |
|
for c in dz[56][1][0][0][1][0]: |
|
try: |
|
thing = list(c[0][0].values())[0] |
|
images.append(thing[1][3]) |
|
except: |
|
pass |
|
return images |
|
|
|
|
|
def getBingImages(query, retries=5): |
|
query = query.replace(" ", "+") |
|
images = [] |
|
tries = 0 |
|
while(len(images) == 0 and tries < retries): |
|
response = requests.get(f"https://www.bing.com/images/search?q={query}&first=1") |
|
if(response.status_code == 200): |
|
images = _extractBingImages(response.text) |
|
else: |
|
print("Error While making bing image searches", response.text) |
|
raise Exception("Error While making bing image searches") |
|
if(images): |
|
return images |
|
raise Exception("Error While making bing image searches") |
|
|
|
|