|
import os |
|
import requests |
|
from bs4 import BeautifulSoup |
|
from urllib.parse import urljoin |
|
import urllib.request |
|
import pandas as pd |
|
import warnings |
|
from requests.packages.urllib3.exceptions import InsecureRequestWarning |
|
|
|
warnings.simplefilter('ignore', InsecureRequestWarning) |
|
|
|
REQ_FMT = { |
|
"url": 'https://api.ebird.org/v2/ref/taxonomy/ebird', |
|
"params" : { |
|
'species': 'CHANGE THIS TO SPECIES CODE' |
|
} |
|
} |
|
bird_df = pd.read_csv("ebird_taxonomy_v2023.csv") |
|
|
|
|
|
def scientific_to_species_code(scientific_name: str): |
|
scode = bird_df[bird_df['SCI_NAME'].str.contains(scientific_name, na=False)]['SPECIES_CODE'] |
|
if not scode.array: |
|
return [] |
|
else: |
|
return scode.array[0] |
|
|
|
|
|
def get_bird_info(species_code : str): |
|
|
|
REQ_FMT['params'] = {"species": species_code} |
|
response = requests.get(REQ_FMT["url"], headers=REQ_FMT["headers"], params=REQ_FMT['params'], verify=False) |
|
data = response.content |
|
return data |
|
|
|
def download_images(url): |
|
|
|
|
|
response = requests.get(url, verify=False) |
|
if response.status_code != 200: |
|
print(f"Failed to retrieve the page. Status code: {response.status_code}") |
|
return |
|
|
|
|
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
|
|
img_tags = soup.find_all('img') |
|
|
|
|
|
img_urls = [urljoin(url, img['src']) for img in img_tags if 'src' in img.attrs] |
|
img_urls = [el for el in img_urls if "api" in el and "asset" in el] |
|
|
|
return img_urls |
|
|
|
if __name__ == '__main__': |
|
bird_tax = pd.read_csv("ebird_taxonomy_v2023.csv") |
|
scode = scientific_to_species_code("Melanocharis striativentris") |
|
print(get_bird_info(scode)) |
|
download_images(f"https://ebird.org/species/{scode}") |
|
|