Spaces:

VOJ
/

voj

Sleeping

App Files Files Community

voj / fetch_img.py

amroa

update weights

d73fb39 7 months ago

raw

history blame

1.87 kB

	import os
	import requests
	from bs4 import BeautifulSoup
	from urllib.parse import urljoin
	import urllib.request
	import pandas as pd
	import warnings
	from requests.packages.urllib3.exceptions import InsecureRequestWarning

	warnings.simplefilter('ignore', InsecureRequestWarning)

	REQ_FMT = {
	"url": 'https://api.ebird.org/v2/ref/taxonomy/ebird',
	"params" : {
	'species': 'CHANGE THIS TO SPECIES CODE'
	}
	}
	bird_df = pd.read_csv("ebird_taxonomy_v2023.csv")


	def scientific_to_species_code(scientific_name: str):
	scode = bird_df[bird_df['SCI_NAME'].str.contains(scientific_name, na=False)]['SPECIES_CODE']
	if not scode.array:
	return []
	else:
	return scode.array[0]

	# Gets taxonomical info on bird. (Is not actually used)
	def get_bird_info(species_code : str):

	REQ_FMT['params'] = {"species": species_code}
	response = requests.get(REQ_FMT["url"], headers=REQ_FMT["headers"], params=REQ_FMT['params'], verify=False)
	data = response.content
	return data

	def download_images(url):

	# Fetch the HTML content of the webpage
	response = requests.get(url, verify=False)
	if response.status_code != 200:
	print(f"Failed to retrieve the page. Status code: {response.status_code}")
	return

	# Parse the HTML content
	soup = BeautifulSoup(response.content, 'html.parser')

	# Find all image tags
	img_tags = soup.find_all('img')

	# Extract image URLs
	img_urls = [urljoin(url, img['src']) for img in img_tags if 'src' in img.attrs]
	img_urls = [el for el in img_urls if "api" in el and "asset" in el]

	return img_urls

	if __name__ == '__main__':
	bird_tax = pd.read_csv("ebird_taxonomy_v2023.csv")
	scode = scientific_to_species_code("Melanocharis striativentris")
	print(get_bird_info(scode))
	download_images(f"https://ebird.org/species/{scode}")