|
@@ -0,0 +1,129 @@
|
|
|
+import math
|
|
|
+import random
|
|
|
+import csv
|
|
|
+import logging
|
|
|
+from collections import defaultdict
|
|
|
+
|
|
|
+from .shared import point_has_streetview, GeoPointSource, CachedGeoPointSource, GeoPointSourceGroup
|
|
|
+from ..scoring import mean_earth_radius_km
|
|
|
+
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
+URBAN_CENTERS = defaultdict(list)
|
|
|
+
|
|
|
+
|
|
|
+_found_countries = set()
|
|
|
+_urban_center_count = 0
|
|
|
+with open("./data/urban-centers.csv") as infile:
|
|
|
+ for code, name, lat, lng in csv.reader(infile, delimiter=",", quotechar='"'):
|
|
|
+ URBAN_CENTERS[code].append((name, float(lat), float(lng)))
|
|
|
+ _found_countries.add(code)
|
|
|
+ _urban_center_count += 1
|
|
|
+logger.info(f"Read {_urban_center_count} urban centers from {len(_found_countries)} countries.")
|
|
|
+
|
|
|
+def urban_coord(country_lock, city_retries=10, point_retries=10, max_dist_km=25):
|
|
|
+ """
|
|
|
+ Returns (latitude, longitude) of usable coord (where google has data) that is near
|
|
|
+ a known urban center. Points will be at most max_dist_km kilometers away. This function
|
|
|
+ will use country_lock to determine the country from which to pull a known urban center,
|
|
|
+ generate at most point_retries points around that urban center, and try at most
|
|
|
+ city_retries urban centers in that country. If none of the generated points have street
|
|
|
+ view data, this will return None. Otherwise, it will exit as soon as suitable point is
|
|
|
+ found.
|
|
|
+
|
|
|
+ This function calls the streetview metadata endpoint - there is no quota consumed.
|
|
|
+ """
|
|
|
+
|
|
|
+ cities = URBAN_CENTERS[country_lock]
|
|
|
+ src = random.sample(cities, k=min(city_retries, len(cities)))
|
|
|
+
|
|
|
+ logger.info(f"Trying {len(src)} centers in {country_lock}")
|
|
|
+
|
|
|
+ for (name, city_lat, city_lng) in src:
|
|
|
+ # logic adapted from https://stackoverflow.com/a/7835325
|
|
|
+ # start in a city
|
|
|
+ logger.info(f"Trying at most {point_retries} points around {name}")
|
|
|
+ city_lat_rad = math.radians(city_lat)
|
|
|
+ sin_lat = math.sin(city_lat_rad)
|
|
|
+ cos_lat = math.cos(city_lat_rad)
|
|
|
+ city_lng_rad = math.radians(city_lng)
|
|
|
+ for _ in range(point_retries):
|
|
|
+ # turn a random direction, and go random distance
|
|
|
+ dist_km = random.random() * max_dist_km
|
|
|
+ angle_rad = random.random() * 2 * math.pi
|
|
|
+ d_over_radius = dist_km / mean_earth_radius_km
|
|
|
+ sin_dor = math.sin(d_over_radius)
|
|
|
+ cos_dor = math.cos(d_over_radius)
|
|
|
+ pt_lat_rad = math.asin(sin_lat * cos_dor + cos_lat * sin_dor * math.cos(angle_rad))
|
|
|
+ pt_lng_rad = city_lng_rad + math.atan2(math.sin(angle_rad) * sin_dor * cos_lat, cos_dor - sin_lat * math.sin(pt_lat_rad))
|
|
|
+ pt_lat = math.degrees(pt_lat_rad)
|
|
|
+ pt_lng = math.degrees(pt_lng_rad)
|
|
|
+ if point_has_streetview(pt_lat, pt_lng):
|
|
|
+ logger.info("Point found!")
|
|
|
+ return (pt_lat, pt_lng)
|
|
|
+
|
|
|
+
|
|
|
+class WorldUrbanPointSource(GeoPointSource):
|
|
|
+ def __init__(self, country_retries=20, max_attempts=20):
|
|
|
+ self.country_retries = country_retries
|
|
|
+ self.max_attempts = max_attempts
|
|
|
+
|
|
|
+ def get_name(self):
|
|
|
+ return "Urban-global"
|
|
|
+
|
|
|
+ def get_points(self, n):
|
|
|
+ # Will make at most self.country_retries * self.max_attempts attempts to call urban_coord
|
|
|
+ attempts = 0
|
|
|
+ points = []
|
|
|
+ while len(points) < n:
|
|
|
+ if attempts > self.max_attempts:
|
|
|
+ raise ExhaustedSourceError(points)
|
|
|
+ countries = random.sample(URBAN_CENTERS.keys(), k=min(self.country_retries, len(URBAN_CENTERS)))
|
|
|
+ for c in countries:
|
|
|
+ logger.info(f"Selecting urban centers from {c}")
|
|
|
+ pt = urban_coord(c)
|
|
|
+ if pt is not None:
|
|
|
+ points.append(pt)
|
|
|
+ break
|
|
|
+ attempts += 1
|
|
|
+ return points
|
|
|
+
|
|
|
+
|
|
|
+class CountryUrbanPointSource(GeoPointSource):
|
|
|
+ def __init__(self, country_lock, max_attempts=20):
|
|
|
+ self.country_lock = country_lock
|
|
|
+ self.max_attempts = max_attempts
|
|
|
+
|
|
|
+ def get_name(self):
|
|
|
+ return f"Urban-{self.country_lock}"
|
|
|
+
|
|
|
+ def get_points(self, n):
|
|
|
+ # Will make at most self.max_attempts calls to urban_coord with 100 city retries each
|
|
|
+ attempts = 0
|
|
|
+ points = []
|
|
|
+ while len(points) < n:
|
|
|
+ if attempts > self.max_attempts:
|
|
|
+ raise ExhaustedSourceError(points)
|
|
|
+ pt = urban_coord(
|
|
|
+ city_retries=100,
|
|
|
+ country_lock=self.country_lock,
|
|
|
+ )
|
|
|
+ if pt is not None:
|
|
|
+ points.append(pt)
|
|
|
+ attempts += 1
|
|
|
+ return points
|
|
|
+
|
|
|
+
|
|
|
+class CountryUrbanSourceDict(dict):
|
|
|
+ def get(self, key, default):
|
|
|
+ if key is None:
|
|
|
+ return default
|
|
|
+ if key not in self:
|
|
|
+ self[key] = CountryUrbanPointSource(key)
|
|
|
+ return self[key]
|
|
|
+
|
|
|
+
|
|
|
+WORLD_SOURCE = CachedGeoPointSource(WorldUrbanPointSource(), 20)
|
|
|
+VALID_COUNTRIES = tuple(_found_countries)
|
|
|
+COUNTRY_SOURCES = CountryUrbanSourceDict()
|
|
|
+COUNTRY_SOURCES["us"] = CachedGeoPointSource(CountryUrbanPointSource("us"), 20) # cache US
|
|
|
+SOURCE_GROUP = GeoPointSourceGroup(COUNTRY_SOURCES, WORLD_SOURCE)
|