urban_centers.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. import math
  2. import random
  3. import csv
  4. import logging
  5. from collections import defaultdict
  6. from .shared import point_has_streetview, GeoPointSource, CachedGeoPointSource, GeoPointSourceGroup
  7. from ..scoring import mean_earth_radius_km
  8. logger = logging.getLogger(__name__)
  9. URBAN_CENTERS = defaultdict(list)
  10. _found_countries = set()
  11. _urban_center_count = 0
  12. with open("./data/urban-centers.csv") as infile:
  13. for code, name, lat, lng in csv.reader(infile, delimiter=",", quotechar='"'):
  14. URBAN_CENTERS[code].append((name, float(lat), float(lng)))
  15. _found_countries.add(code)
  16. _urban_center_count += 1
  17. logger.info(f"Read {_urban_center_count} urban centers from {len(_found_countries)} countries.")
  18. VALID_COUNTRIES = tuple(_found_countries)
  19. def urban_coord(country_lock, city_retries=10, point_retries=10, max_dist_km=25):
  20. """
  21. Returns (latitude, longitude) of usable coord (where google has data) that is near
  22. a known urban center. Points will be at most max_dist_km kilometers away. This function
  23. will use country_lock to determine the country from which to pull a known urban center,
  24. generate at most point_retries points around that urban center, and try at most
  25. city_retries urban centers in that country. If none of the generated points have street
  26. view data, this will return None. Otherwise, it will exit as soon as suitable point is
  27. found.
  28. This function calls the streetview metadata endpoint - there is no quota consumed.
  29. """
  30. country_lock = country_lock.lower()
  31. cities = URBAN_CENTERS[country_lock]
  32. src = random.sample(cities, k=min(city_retries, len(cities)))
  33. logger.info(f"Trying {len(src)} centers in {country_lock}")
  34. for (name, city_lat, city_lng) in src:
  35. # logic adapted from https://stackoverflow.com/a/7835325
  36. # start in a city
  37. logger.info(f"Trying at most {point_retries} points around {name}")
  38. city_lat_rad = math.radians(city_lat)
  39. sin_lat = math.sin(city_lat_rad)
  40. cos_lat = math.cos(city_lat_rad)
  41. city_lng_rad = math.radians(city_lng)
  42. for _ in range(point_retries):
  43. # turn a random direction, and go random distance
  44. dist_km = random.random() * max_dist_km
  45. angle_rad = random.random() * 2 * math.pi
  46. d_over_radius = dist_km / mean_earth_radius_km
  47. sin_dor = math.sin(d_over_radius)
  48. cos_dor = math.cos(d_over_radius)
  49. pt_lat_rad = math.asin(sin_lat * cos_dor + cos_lat * sin_dor * math.cos(angle_rad))
  50. pt_lng_rad = city_lng_rad + math.atan2(math.sin(angle_rad) * sin_dor * cos_lat, cos_dor - sin_lat * math.sin(pt_lat_rad))
  51. pt_lat = math.degrees(pt_lat_rad)
  52. pt_lng = math.degrees(pt_lng_rad)
  53. if point_has_streetview(pt_lat, pt_lng):
  54. logger.info("Point found!")
  55. return (country_lock, pt_lat, pt_lng)
  56. class WorldUrbanPointSource(GeoPointSource):
  57. def __init__(self, country_retries=20, max_attempts=20):
  58. self.country_retries = country_retries
  59. self.max_attempts = max_attempts
  60. def get_name(self):
  61. return "Urban-global"
  62. def get_points(self, n):
  63. # Will make at most self.country_retries * self.max_attempts attempts to call urban_coord
  64. attempts = 0
  65. points = []
  66. while len(points) < n:
  67. if attempts > self.max_attempts:
  68. raise ExhaustedSourceError(points)
  69. countries = random.sample(URBAN_CENTERS.keys(), k=min(self.country_retries, len(URBAN_CENTERS)))
  70. for c in countries:
  71. logger.info(f"Selecting urban centers from {c}")
  72. pt = urban_coord(c)
  73. if pt is not None:
  74. points.append(pt)
  75. break
  76. attempts += 1
  77. return points
  78. class CountryUrbanPointSource(GeoPointSource):
  79. def __init__(self, country_lock, max_attempts=20):
  80. self.country_lock = country_lock
  81. self.max_attempts = max_attempts
  82. def get_name(self):
  83. return f"Urban-{self.country_lock}"
  84. def get_points(self, n):
  85. # Will make at most self.max_attempts calls to urban_coord with 100 city retries each
  86. attempts = 0
  87. points = []
  88. while len(points) < n:
  89. if attempts > self.max_attempts:
  90. raise ExhaustedSourceError(points)
  91. pt = urban_coord(
  92. city_retries=100,
  93. country_lock=self.country_lock,
  94. )
  95. if pt is not None:
  96. points.append(pt)
  97. attempts += 1
  98. return points
  99. class CountryUrbanSourceDict(dict):
  100. def get(self, key, default):
  101. if key is None:
  102. return default
  103. if key not in self:
  104. self[key] = CountryUrbanPointSource(key)
  105. return self[key]
  106. WORLD_SOURCE = CachedGeoPointSource(WorldUrbanPointSource(), 20)
  107. COUNTRY_SOURCES = CountryUrbanSourceDict()
  108. COUNTRY_SOURCES["us"] = CachedGeoPointSource(CountryUrbanPointSource("us"), 20) # cache US
  109. SOURCE_GROUP = GeoPointSourceGroup(COUNTRY_SOURCES, WORLD_SOURCE)