download.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. """
  2. Manage the logic of downloading the pokedex and source images.
  3. """
  4. import re
  5. import json
  6. import asyncio
  7. from pathlib import Path
  8. from dataclasses import dataclass, asdict
  9. from collections import defaultdict
  10. from aiohttp import ClientSession
  11. JS_TO_JSON = re.compile(r"\b([a-zA-Z][a-zA-Z0-9]*?):")
  12. # the dex from showdown assumes only strawberry alcremie, since
  13. # that's what's in showdown, but we might as well add the rest
  14. ALCREMIE_SWEETS = [
  15. "Strawberry", "Berry", "Love", "Star",
  16. "Clover", "Flower", "Ribbon",
  17. ]
  18. # https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_with_gender_differences
  19. # there are some pokemon with notable gender diffs that the dex doesn't cover
  20. # judgement calls made arbitrarily
  21. GENDER_DIFFS = (
  22. "hippopotas", "hippowdon",
  23. "unfezant",
  24. "frillish", "jellicent",
  25. "pyroar",
  26. # meowstic, indeedee, basculegion, oinkologne are already handled in the dex
  27. )
  28. @dataclass
  29. class Form:
  30. name: str
  31. traits: list[str]
  32. types: list[str]
  33. color: str
  34. @dataclass
  35. class Pokemon:
  36. num: int
  37. species: str
  38. forms: list[Form]
  39. async def download_pokedex() -> dict:
  40. async with ClientSession() as session:
  41. async with session.get("https://play.pokemonshowdown.com/data/pokedex.js") as res:
  42. res.raise_for_status()
  43. text = await res.text("utf-8")
  44. # this is not json of course, but it's close
  45. # start by taking out the ; and definition
  46. cleaned = text.replace("exports.BattlePokedex = ", "").strip(";")
  47. # then convert the keys to strings
  48. converted = re.sub(JS_TO_JSON, lambda m: f'"{m.group(1)}":', cleaned)
  49. # and fix Type: Null, Farfetch'd, Sirfetch'd
  50. fixed = converted.replace('""Type": Null"', '"Type: Null"').replace("’", "'")
  51. # then, parse it
  52. return json.loads(fixed)
  53. def get_traits(species: str, kind: str) -> list[str]:
  54. traits = []
  55. if kind in ("mega", "mega-x", "mega-y", "primal"):
  56. traits.extend(("mega", "nostart"))
  57. if kind in ("gmax", "eternamax", "rapid-strike-gmax"):
  58. traits.extend(("gmax", "nostart"))
  59. if kind in ("alola", "galar", "hisui", "galar", "paldea"):
  60. traits.extend(("regional", kind))
  61. # special cases
  62. if species == "Tauros" and "paldea" in kind:
  63. # paldean tauros has dumb names
  64. traits.extend(("regional", "paldea"))
  65. if species == "Minior" and kind != "meteor":
  66. # minior can only start the battle in meteor form
  67. traits.append("nostart")
  68. if species == "Darmanitan" and "zen" in kind:
  69. # darmanitan cannot start in zen form
  70. traits.append("nostart")
  71. if "galar" in kind:
  72. # also there's a galar-zen form to handle
  73. traits.extend(("regional", "galar"))
  74. if species == "Palafin" and kind == "hero":
  75. # palafin can only start in zero form
  76. traits.append("nostart")
  77. if species == "Gimmighoul" and kind == "roaming":
  78. # gimmighoul roaming is only in PGO
  79. traits.append("nostart")
  80. return sorted(set(traits))
  81. def clean_dex(raw: dict) -> dict[int, Pokemon]:
  82. regrouped = defaultdict(list)
  83. for key, entry in raw.items():
  84. isNonstandard = entry.get("isNonstandard", None)
  85. baseSpecies = entry.get("baseSpecies", None)
  86. forme = entry.get("forme", None)
  87. if isNonstandard not in (None, "Past", "Unobtainable"):
  88. continue # remove CAP etc.
  89. if baseSpecies in ("Pikachu", "Pichu") and forme is not None:
  90. continue # remove pikachu spam + spiky ear pichu
  91. if forme is not None and "Totem" in forme:
  92. continue # remove totem pokemon
  93. num = entry["num"]
  94. # non-cosmetic forms get separate entries automatically
  95. # but keeping the separate unown forms would be ridiculous
  96. if key != "unown" and len(cosmetic := entry.get("cosmeticFormes", [])) > 0:
  97. cosmetic.append(f'{entry["name"]}-{entry["baseForme"]}')
  98. if key == "alcremie":
  99. # oh god this thing
  100. cosmetic = [
  101. f"{cf}-{sweet}"
  102. for cf in cosmetic
  103. for sweet in ALCREMIE_SWEETS
  104. ]
  105. regrouped[num].extend({
  106. **entry,
  107. "forme": cf.replace(" ", "-"),
  108. "formeKind": "cosmetic",
  109. } for cf in cosmetic)
  110. elif key in GENDER_DIFFS:
  111. regrouped[num].append({
  112. **entry,
  113. "forme": f'{entry["name"]}-M',
  114. "formeKind": "cosmetic",
  115. })
  116. regrouped[num].append({
  117. **entry,
  118. "forme": f'{entry["name"]}-F',
  119. "formeKind": "cosmetic",
  120. })
  121. else:
  122. regrouped[num].append({
  123. **entry,
  124. "forme": entry["name"],
  125. "formeKind": entry.get("forme", "base").lower(),
  126. })
  127. return {
  128. i: Pokemon(
  129. num=i,
  130. species=(
  131. # doubles as an assertion that forms is not empty
  132. species := (forms := regrouped[i])[0].get("baseSpecies", forms[0]["name"])
  133. ),
  134. forms=[
  135. Form(
  136. name=f.get("forme", f["name"]),
  137. traits=get_traits(species, f["formeKind"].lower()),
  138. types=f["types"],
  139. color=f["color"],
  140. ) for f in forms
  141. ]
  142. ) for i in range(1, max(regrouped.keys()) + 1)
  143. }
  144. async def load_pokedex(dex_file: str) -> dict:
  145. if Path(dex_file).is_file():
  146. with open(dex_file) as infile:
  147. loaded = json.load(infile)
  148. dex = {
  149. int(num): Pokemon(
  150. num=entry["num"],
  151. species=entry["species"],
  152. forms=[Form(**f) for f in entry["forms"]],
  153. ) for num, entry in loaded.items()
  154. }
  155. else:
  156. # first download the pokedex
  157. raw_dex = await download_pokedex()
  158. # clean and reorganize it
  159. dex = clean_dex(raw_dex)
  160. # output dex for auditing and reloading
  161. with open(dex_file, "w") as out:
  162. json.dump({
  163. str(i): asdict(pkmn)
  164. for i, pkmn in dex.items()
  165. }, out, indent=2)
  166. return dex
  167. SHOWDOWN_REPLACEMENTS = [
  168. ("mega-", "mega"), # charizard, mewtwo
  169. ("paldea-", "paldea"), # tauros
  170. ("mr. ", "mr"), # mr mime + mr rime
  171. ("'d", "d"), # farfetch'd and sirfetch'd
  172. ("nidoran-m", "nidoranm"), # nidoran is a special case
  173. ("-f", "f"), # gender diff forms
  174. (re.compile(r"-m$"), ""), # gender diff forms
  175. (re.compile(r"^ho-oh$"), "hooh"), # Ho-oh special case
  176. ]
  177. def get_showdown_urls(form: Form) -> list[tuple[str, str]]:
  178. name = form.name.lower()
  179. for pat, ins in SHOWDOWN_REPLACEMENTS:
  180. if isinstance(pat, re.Pattern):
  181. name = re.sub(pat, ins, name)
  182. else:
  183. name = name.replace(pat, ins)
  184. return [
  185. (f"https://play.pokemonshowdown.com/sprites/ani/{name}.gif", "gif"),
  186. (f"https://play.pokemonshowdown.com/sprites/ani-back/{name}.gif", "gif"),
  187. (f"https://play.pokemonshowdown.com/sprites/gen5/{name}.png", "png"),
  188. (f"https://play.pokemonshowdown.com/sprites/gen5-back/{name}.png", "png"),
  189. ]
  190. async def download(session: ClientSession, url: str, filename: str) -> tuple[str, Exception | bool]:
  191. if Path(filename).is_file():
  192. return url, False
  193. try:
  194. async with session.get(url) as res:
  195. res.raise_for_status()
  196. with open(filename, "wb") as out:
  197. out.write(await res.read())
  198. except Exception as ex:
  199. return url, ex
  200. return url, True
  201. async def download_all_for_pokemon(pkmn: Pokemon, image_dir: str) -> dict[str, dict[str, Exception | bool]]:
  202. results = defaultdict(dict)
  203. async with ClientSession() as session:
  204. for form in pkmn.forms:
  205. urls = []
  206. urls += get_showdown_urls(form)
  207. # TODO more sources
  208. results[form.name].update(await asyncio.gather(*[
  209. download(session, url, f"{image_dir}/{form.name}-{i}.{ext}")
  210. for i, (url, ext) in enumerate(urls)
  211. ]))
  212. return results
  213. async def download_all(image_dir: str, pkmn: list[Pokemon]) -> dict[str, dict[str, Exception | bool]]:
  214. Path(image_dir).mkdir(parents=True, exist_ok=True)
  215. log = {}
  216. for p in pkmn:
  217. log.update(await download_all_for_pokemon(p, image_dir))
  218. return log
  219. async def main(dex_file: str, image_dir: str, startIndex: int, endIndex: int, log_skipped: bool):
  220. dex = await load_pokedex(dex_file)
  221. log = await download_all(image_dir, (dex[i] for i in range(startIndex, endIndex + 1)))
  222. new_downloads = 0
  223. for form, result in log.items():
  224. for url, info in result.items():
  225. if isinstance(info, Exception):
  226. print(f"{form}: FAILED {url} - {info}")
  227. elif not info:
  228. if log_skipped:
  229. print(f"{form}: SKIPPED {url} - {info}")
  230. else:
  231. new_downloads += 1
  232. print(f"New Downloads: {new_downloads}")
  233. if __name__ == "__main__":
  234. from sys import argv
  235. # TODO make this an arg parser
  236. dex_file = argv[1] if len(argv) > 1 else "data/pokedex.json"
  237. image_dir = argv[2] if len(argv) > 2 else "images"
  238. start, end = map(int, (
  239. argv[3] if len(argv) > 3 else "1-151"
  240. ).split("-")[0:2])
  241. log_skipped = len(argv) > 4 and argv[4].lower() == 'true'
  242. asyncio.run(main(dex_file, image_dir, start, end, log_skipped))