123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- import math
- import random
- import csv
- import logging
- from collections import defaultdict
- from .shared import point_has_streetview, GeoPointSource, CachedGeoPointSource, GeoPointSourceGroup
- from ..scoring import mean_earth_radius_km
- logger = logging.getLogger(__name__)
- URBAN_CENTERS = defaultdict(list)
- _found_countries = set()
- _urban_center_count = 0
- with open("./data/urban-centers.csv") as infile:
- for code, name, lat, lng in csv.reader(infile, delimiter=",", quotechar='"'):
- URBAN_CENTERS[code].append((name, float(lat), float(lng)))
- _found_countries.add(code)
- _urban_center_count += 1
- logger.info(f"Read {_urban_center_count} urban centers from {len(_found_countries)} countries.")
- VALID_COUNTRIES = tuple(_found_countries)
- def urban_coord(country_lock, city_retries=10, point_retries=10, max_dist_km=25):
- """
- Returns (latitude, longitude) of usable coord (where google has data) that is near
- a known urban center. Points will be at most max_dist_km kilometers away. This function
- will use country_lock to determine the country from which to pull a known urban center,
- generate at most point_retries points around that urban center, and try at most
- city_retries urban centers in that country. If none of the generated points have street
- view data, this will return None. Otherwise, it will exit as soon as suitable point is
- found.
- This function calls the streetview metadata endpoint - there is no quota consumed.
- """
- country_lock = country_lock.lower()
- cities = URBAN_CENTERS[country_lock]
- src = random.sample(cities, k=min(city_retries, len(cities)))
- logger.info(f"Trying {len(src)} centers in {country_lock}")
- for (name, city_lat, city_lng) in src:
- # logic adapted from https://stackoverflow.com/a/7835325
- # start in a city
- logger.info(f"Trying at most {point_retries} points around {name}")
- city_lat_rad = math.radians(city_lat)
- sin_lat = math.sin(city_lat_rad)
- cos_lat = math.cos(city_lat_rad)
- city_lng_rad = math.radians(city_lng)
- for _ in range(point_retries):
- # turn a random direction, and go random distance
- dist_km = random.random() * max_dist_km
- angle_rad = random.random() * 2 * math.pi
- d_over_radius = dist_km / mean_earth_radius_km
- sin_dor = math.sin(d_over_radius)
- cos_dor = math.cos(d_over_radius)
- pt_lat_rad = math.asin(sin_lat * cos_dor + cos_lat * sin_dor * math.cos(angle_rad))
- pt_lng_rad = city_lng_rad + math.atan2(math.sin(angle_rad) * sin_dor * cos_lat, cos_dor - sin_lat * math.sin(pt_lat_rad))
- pt_lat = math.degrees(pt_lat_rad)
- pt_lng = math.degrees(pt_lng_rad)
- if point_has_streetview(pt_lat, pt_lng):
- logger.info("Point found!")
- return (country_lock, pt_lat, pt_lng)
- class WorldUrbanPointSource(GeoPointSource):
- def __init__(self, country_retries=20, max_attempts=20):
- self.country_retries = country_retries
- self.max_attempts = max_attempts
- def get_name(self):
- return "Urban-global"
- def get_points(self, n):
- # Will make at most self.country_retries * self.max_attempts attempts to call urban_coord
- attempts = 0
- points = []
- while len(points) < n:
- if attempts > self.max_attempts:
- raise ExhaustedSourceError(points)
- countries = random.sample(URBAN_CENTERS.keys(), k=min(self.country_retries, len(URBAN_CENTERS)))
- for c in countries:
- logger.info(f"Selecting urban centers from {c}")
- pt = urban_coord(c)
- if pt is not None:
- points.append(pt)
- break
- attempts += 1
- return points
- class CountryUrbanPointSource(GeoPointSource):
- def __init__(self, country_lock, max_attempts=20):
- self.country_lock = country_lock
- self.max_attempts = max_attempts
- def get_name(self):
- return f"Urban-{self.country_lock}"
- def get_points(self, n):
- # Will make at most self.max_attempts calls to urban_coord with 100 city retries each
- attempts = 0
- points = []
- while len(points) < n:
- if attempts > self.max_attempts:
- raise ExhaustedSourceError(points)
- pt = urban_coord(
- city_retries=100,
- country_lock=self.country_lock,
- )
- if pt is not None:
- points.append(pt)
- attempts += 1
- return points
- class CountryUrbanSourceDict(dict):
- def get(self, key, default):
- if key is None:
- return default
- if key not in self:
- self[key] = CountryUrbanPointSource(key)
- return self[key]
- WORLD_SOURCE = CachedGeoPointSource(WorldUrbanPointSource(), 20)
- COUNTRY_SOURCES = CountryUrbanSourceDict()
- COUNTRY_SOURCES["us"] = CachedGeoPointSource(CountryUrbanPointSource("us"), 20) # cache US
- SOURCE_GROUP = GeoPointSourceGroup(COUNTRY_SOURCES, WORLD_SOURCE)
|