import math import random import csv import logging from collections import defaultdict from .shared import point_has_streetview, GeoPointSource, CachedGeoPointSource, GeoPointSourceGroup from ..scoring import mean_earth_radius_km logger = logging.getLogger(__name__) URBAN_CENTERS = defaultdict(list) _found_countries = set() _urban_center_count = 0 with open("./data/urban-centers.csv") as infile: for code, name, lat, lng in csv.reader(infile, delimiter=",", quotechar='"'): URBAN_CENTERS[code].append((name, float(lat), float(lng))) _found_countries.add(code) _urban_center_count += 1 logger.info(f"Read {_urban_center_count} urban centers from {len(_found_countries)} countries.") VALID_COUNTRIES = tuple(_found_countries) def urban_coord(country_lock, city_retries=10, point_retries=10, max_dist_km=25): """ Returns (latitude, longitude) of usable coord (where google has data) that is near a known urban center. Points will be at most max_dist_km kilometers away. This function will use country_lock to determine the country from which to pull a known urban center, generate at most point_retries points around that urban center, and try at most city_retries urban centers in that country. If none of the generated points have street view data, this will return None. Otherwise, it will exit as soon as suitable point is found. This function calls the streetview metadata endpoint - there is no quota consumed. """ country_lock = country_lock.lower() cities = URBAN_CENTERS[country_lock] src = random.sample(cities, k=min(city_retries, len(cities))) logger.info(f"Trying {len(src)} centers in {country_lock}") for (name, city_lat, city_lng) in src: # logic adapted from https://stackoverflow.com/a/7835325 # start in a city logger.info(f"Trying at most {point_retries} points around {name}") city_lat_rad = math.radians(city_lat) sin_lat = math.sin(city_lat_rad) cos_lat = math.cos(city_lat_rad) city_lng_rad = math.radians(city_lng) for _ in range(point_retries): # turn a random direction, and go random distance dist_km = random.random() * max_dist_km angle_rad = random.random() * 2 * math.pi d_over_radius = dist_km / mean_earth_radius_km sin_dor = math.sin(d_over_radius) cos_dor = math.cos(d_over_radius) pt_lat_rad = math.asin(sin_lat * cos_dor + cos_lat * sin_dor * math.cos(angle_rad)) pt_lng_rad = city_lng_rad + math.atan2(math.sin(angle_rad) * sin_dor * cos_lat, cos_dor - sin_lat * math.sin(pt_lat_rad)) pt_lat = math.degrees(pt_lat_rad) pt_lng = math.degrees(pt_lng_rad) if point_has_streetview(pt_lat, pt_lng): logger.info("Point found!") return (country_lock, pt_lat, pt_lng) class WorldUrbanPointSource(GeoPointSource): def __init__(self, country_retries=20, max_attempts=20): self.country_retries = country_retries self.max_attempts = max_attempts def get_name(self): return "Urban-global" def get_points(self, n): # Will make at most self.country_retries * self.max_attempts attempts to call urban_coord attempts = 0 points = [] # TODO tweak this to just go point by point, should be simpler while len(points) < n: if attempts > self.max_attempts: raise ExhaustedSourceError(points) countries = random.sample(URBAN_CENTERS.keys(), k=min(self.country_retries, len(URBAN_CENTERS))) for c in countries: logger.info(f"Selecting urban centers from {c}") pt = urban_coord(c) if pt is not None: points.append(pt) break attempts += 1 return points class CountryUrbanPointSource(GeoPointSource): def __init__(self, country_lock, max_attempts=20): self.country_lock = country_lock self.max_attempts = max_attempts def get_name(self): return f"Urban-{self.country_lock}" def get_points(self, n): # Will make at most self.max_attempts calls to urban_coord with 100 city retries each attempts = 0 points = [] while len(points) < n: if attempts > self.max_attempts: raise ExhaustedSourceError(points) pt = urban_coord( city_retries=100, country_lock=self.country_lock, ) if pt is not None: points.append(pt) attempts += 1 return points class CountryUrbanSourceDict(dict): def get(self, key, default): if key is None: return default if key not in self: self[key] = CountryUrbanPointSource(key) return self[key] WORLD_SOURCE = CachedGeoPointSource(WorldUrbanPointSource(), 20) COUNTRY_SOURCES = CountryUrbanSourceDict() COUNTRY_SOURCES["us"] = CachedGeoPointSource(CountryUrbanPointSource("us"), 20) # cache US SOURCE_GROUP = GeoPointSourceGroup(COUNTRY_SOURCES, WORLD_SOURCE)