Spaces:
Running
Running
File size: 3,646 Bytes
e91ac58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import os
from opencage.geocoder import OpenCageGeocode
import pycountry_convert as pc
import warnings
import unicodedata
import pycountry_convert as pc
import warnings
### TODO 1/24/24
### If I want to use this instead of HERE, update the procedure for picking the best/most granular geolocation
def normalize_country_name(name):
return unicodedata.normalize('NFKD', name).encode('ASCII', 'ignore').decode('ASCII')
def get_continent(country_name):
warnings.filterwarnings("ignore", category=UserWarning, module='pycountry')
continent_code_to_name = {
"AF": "Africa",
"NA": "North America",
"OC": "Oceania",
"AN": "Antarctica",
"AS": "Asia",
"EU": "Europe",
"SA": "South America"
}
try:
normalized_country_name = normalize_country_name(country_name)
# Get country alpha2 code
country_code = pc.country_name_to_country_alpha2(normalized_country_name)
# Get continent code from country alpha2 code
continent_code = pc.country_alpha2_to_continent_code(country_code)
# Map the continent code to continent name
return continent_code_to_name.get(continent_code, '')
except Exception as e:
print(str(e))
return ''
def validate_coordinates_opencage(record, replace_if_success_geo=False):
GEO_dict = {
'GEO_method': '',
'GEO_formatted_full_string': '',
'GEO_decimal_lat': '',
'GEO_decimal_long': '',
'GEO_city': '',
'GEO_county': '',
'GEO_state': '',
'GEO_state_code': '',
'GEO_country': '',
'GEO_country_code': '',
'GEO_continent': '',
}
geocoder = OpenCageGeocode(os.environ['open_cage_geocode'])
query_loc = ', '.join(filter(None, [record.get('municipality', '').strip(),
record.get('county', '').strip(),
record.get('stateProvince', '').strip(),
record.get('country', '').strip()])).strip()
query_decimal = ', '.join(filter(None, [record.get('decimalLatitude', '').strip(),
record.get('decimalLongitude', '').strip()])).strip()
query_verbatim = record.get('verbatimCoordinates', '').strip()
# results = geocoder.geocode('Ann Arbor, Michigan', no_annotations='1')
results = geocoder.geocode(query_loc, no_annotations='1')
if results:
GEO_dict['GEO_method'] = 'OpenCageGeocode_forward'
GEO_dict['GEO_formatted_full_string'] = results[0]['formatted']
GEO_dict['GEO_decimal_lat'] = results[0]['geometry']['lat']
GEO_dict['GEO_decimal_long'] = results[0]['geometry']['lng']
GEO_dict['GEO_city'] = results[0]['components']['city']
GEO_dict['GEO_county'] = results[0]['components']['county']
GEO_dict['GEO_state'] = results[0]['components']['state']
GEO_dict['GEO_state_code'] = results[0]['components']['state_code']
GEO_dict['GEO_country'] = results[0]['components']['country']
GEO_dict['GEO_country_code'] = results[0]['components']['country_code']
GEO_dict['GEO_continent'] = results[0]['components']['continent']
if GEO_dict['GEO_formatted_full_string'] and replace_if_success_geo:
GEO_dict['GEO_override_OCR'] = True
record['country'] = GEO_dict.get('GEO_country')
record['stateProvince'] = GEO_dict.get('GEO_state')
record['county'] = GEO_dict.get('GEO_county')
record['municipality'] = GEO_dict.get('GEO_city')
return record, GEO_dict
|