download.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. """
  2. Manage the logic of downloading the pokedex and source images.
  3. """
  4. import re
  5. import json
  6. import asyncio
  7. from pathlib import Path
  8. from dataclasses import dataclass, asdict
  9. from collections import defaultdict
  10. from aiohttp import ClientSession
  11. JS_TO_JSON = re.compile(r"\b([a-zA-Z][a-zA-Z0-9]*?):")
  12. # the dex from showdown assumes only strawberry alcremie, since
  13. # that's what's in showdown, but we might as well add the rest
  14. ALCREMIE_SWEETS = [
  15. "Strawberry", "Berry", "Love", "Star",
  16. "Clover", "Flower", "Ribbon",
  17. ]
  18. # https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_with_gender_differences
  19. # there are some pokemon with notable gender diffs that the dex doesn't cover
  20. # judgement calls made arbitrarily
  21. GENDER_DIFFS = (
  22. "hippopotas", "hippowdon",
  23. "unfezant",
  24. "frillish", "jellicent",
  25. "pyroar",
  26. # meowstic, indeedee, basculegion, oinkologne are already handled in the dex
  27. )
  28. @dataclass
  29. class Form:
  30. name: str
  31. traits: list[str]
  32. types: list[str]
  33. color: str
  34. @dataclass
  35. class Pokemon:
  36. num: int
  37. species: str
  38. forms: list[Form]
  39. async def download_pokedex() -> dict:
  40. async with ClientSession() as session:
  41. async with session.get("https://play.pokemonshowdown.com/data/pokedex.js") as res:
  42. res.raise_for_status()
  43. text = await res.text("utf-8")
  44. # this is not json of course, but it's close
  45. # start by taking out the ; and definition
  46. cleaned = text.replace("exports.BattlePokedex = ", "").strip(";")
  47. # then convert the keys to strings
  48. converted = re.sub(JS_TO_JSON, lambda m: f'"{m.group(1)}":', cleaned)
  49. # and fix Type: Null, Farfetch'd, Sirfetch'd
  50. fixed = converted.replace(
  51. '""Type": Null"', '"Type: Null"').replace("\u2019", "'")
  52. # then, parse it
  53. return json.loads(fixed)
  54. def get_traits(species: str, kind: str) -> list[str]:
  55. traits = []
  56. if kind in ("mega", "mega-x", "mega-y", "primal"):
  57. traits.extend(("mega", "nostart"))
  58. if kind in ("gmax", "eternamax", "rapid-strike-gmax"):
  59. traits.extend(("gmax", "nostart"))
  60. if kind in ("alola", "galar", "hisui", "paldea"):
  61. traits.extend(("regional", kind))
  62. # special cases
  63. if species == "Castform" and kind != "base":
  64. # castform can't start battle in weather forms
  65. traits.append("nostart")
  66. if species == "Tauros" and "paldea" in kind:
  67. # paldean tauros has dumb names
  68. traits.extend(("regional", "paldea"))
  69. if species == "Minior" and kind != "meteor":
  70. # minior can only start the battle in meteor form
  71. traits.append("nostart")
  72. if species == "Darmanitan" and "zen" in kind:
  73. # darmanitan cannot start in zen form
  74. traits.append("nostart")
  75. if "galar" in kind:
  76. # also there's a galar-zen form to handle
  77. traits.extend(("regional", "galar"))
  78. if species == "Palafin" and kind == "hero":
  79. # palafin can only start in zero form
  80. traits.append("nostart")
  81. if species == "Gimmighoul" and kind == "roaming":
  82. # gimmighoul roaming is only in PGO
  83. traits.append("nostart")
  84. return sorted(set(traits))
  85. def clean_dex(raw: dict) -> dict[int, Pokemon]:
  86. regrouped = defaultdict(list)
  87. for key, entry in raw.items():
  88. isNonstandard = entry.get("isNonstandard", None)
  89. baseSpecies = entry.get("baseSpecies", None)
  90. forme = entry.get("forme", None)
  91. if isNonstandard not in (None, "Past", "Unobtainable"):
  92. continue # remove CAP etc.
  93. if baseSpecies in ("Pikachu", "Pichu") and forme is not None:
  94. continue # remove pikachu spam + spiky ear pichu
  95. if forme is not None and "Totem" in forme:
  96. continue # remove totem pokemon
  97. num = entry["num"]
  98. # non-cosmetic forms get separate entries automatically
  99. # but keeping the separate unown forms would be ridiculous
  100. if key != "unown" and len(cosmetic := entry.get("cosmeticFormes", [])) > 0:
  101. cosmetic.append(f'{entry["name"]}-{entry["baseForme"]}')
  102. if key == "alcremie":
  103. # oh god this thing
  104. cosmetic = [
  105. f"{cf}-{sweet}"
  106. for cf in cosmetic
  107. for sweet in ALCREMIE_SWEETS
  108. ]
  109. regrouped[num].extend({
  110. **entry,
  111. "forme": cf.replace(" ", "-"),
  112. "formeKind": "cosmetic",
  113. } for cf in cosmetic)
  114. elif key in GENDER_DIFFS:
  115. regrouped[num].append({
  116. **entry,
  117. "forme": f'{entry["name"]}-M',
  118. "formeKind": "cosmetic",
  119. })
  120. regrouped[num].append({
  121. **entry,
  122. "forme": f'{entry["name"]}-F',
  123. "formeKind": "cosmetic",
  124. })
  125. else:
  126. regrouped[num].append({
  127. **entry,
  128. "forme": entry["name"],
  129. "formeKind": entry.get("forme", "base").lower(),
  130. })
  131. return {
  132. i: Pokemon(
  133. num=i,
  134. species=(
  135. # doubles as an assertion that forms is not empty
  136. species := (forms := regrouped[i])[0].get("baseSpecies", forms[0]["name"])
  137. ),
  138. forms=[
  139. Form(
  140. name=f.get("forme", f["name"]),
  141. traits=get_traits(species, f["formeKind"].lower()),
  142. types=f["types"],
  143. color=f["color"],
  144. ) for f in forms
  145. ]
  146. ) for i in range(1, max(regrouped.keys()) + 1)
  147. }
  148. async def load_pokedex(dex_file: Path, force_dex: bool) -> dict:
  149. if dex_file.is_file() and not force_dex:
  150. with open(dex_file) as infile:
  151. loaded = json.load(infile)
  152. dex = {
  153. int(num): Pokemon(
  154. num=entry["num"],
  155. species=entry["species"],
  156. forms=[Form(**f) for f in entry["forms"]],
  157. ) for num, entry in loaded.items()
  158. }
  159. else:
  160. # first download the pokedex
  161. raw_dex = await download_pokedex()
  162. # clean and reorganize it
  163. dex = clean_dex(raw_dex)
  164. # output dex for auditing and reloading
  165. with open(dex_file, "w") as out:
  166. json.dump({
  167. str(i): asdict(pkmn)
  168. for i, pkmn in dex.items()
  169. }, out, indent=2)
  170. return dex
  171. SHOWDOWN_REPLACEMENTS = [
  172. ("mega-", "mega"), # charizard, mewtwo
  173. ("paldea-", "paldea"), # tauros
  174. ("mr. ", "mr"), # mr mime + mr rime
  175. ("'d", "d"), # farfetch'd and sirfetch'd
  176. ("nidoran-m", "nidoranm"), # nidoran is a special case
  177. ("-f", "f"), # gender diff forms
  178. (re.compile(r"-m$"), ""), # gender diff forms
  179. (re.compile(r"^ho-oh$"), "hooh"), # Ho-oh special case
  180. ]
  181. def get_showdown_urls(form: Form) -> list[tuple[str, str]]:
  182. name = form.name.lower()
  183. for pat, ins in SHOWDOWN_REPLACEMENTS:
  184. if isinstance(pat, re.Pattern):
  185. name = re.sub(pat, ins, name)
  186. else:
  187. name = name.replace(pat, ins)
  188. return [
  189. (f"https://play.pokemonshowdown.com/sprites/ani/{name}.gif", "gif"),
  190. (f"https://play.pokemonshowdown.com/sprites/ani-back/{name}.gif", "gif"),
  191. (f"https://play.pokemonshowdown.com/sprites/gen5/{name}.png", "png"),
  192. (f"https://play.pokemonshowdown.com/sprites/gen5-back/{name}.png", "png"),
  193. ]
  194. SEREBII_SPECIAL = {
  195. "Castform-Rainy": "r",
  196. "Castform-Snowy": "i",
  197. "Castform-Sunny": "s",
  198. "Deoxys-Attack": "a",
  199. "Deoxys-Defense": "d",
  200. "Deoxys-Speed": "s",
  201. "Tauros-Paldea-Blaze": "b",
  202. "Tauros-Paldea-Aqua": "a",
  203. }
  204. def get_serebii_url(pkmn: Pokemon, form: Form) -> str | None:
  205. if form.name == pkmn.species:
  206. return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}.png"
  207. if form.name in SEREBII_SPECIAL:
  208. return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-{SEREBII_SPECIAL[form.name]}.png"
  209. if "gmax" in form.traits:
  210. return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-gi.png"
  211. if "mega" in form.traits:
  212. if "Mega-X" in form.name:
  213. return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-mx.png"
  214. elif "Mega-Y" in form.name:
  215. return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-my.png"
  216. else:
  217. return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-m.png"
  218. if "alola" in form.traits:
  219. return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-a.png"
  220. if "galar" in form.traits:
  221. return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-g.png"
  222. if "hisui" in form.traits:
  223. return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-h.png"
  224. if "paldea" in form.traits:
  225. return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-p.png"
  226. async def download(session: ClientSession, url: str, filename: Path) -> tuple[str, Exception | bool]:
  227. if filename.is_file():
  228. return url, False
  229. try:
  230. async with session.get(url) as res:
  231. res.raise_for_status()
  232. with open(filename, "wb") as out:
  233. out.write(await res.read())
  234. except Exception as ex:
  235. return url, ex
  236. return url, True
  237. async def download_all_for_pokemon(pkmn: Pokemon, image_dir: Path) -> dict[str, dict[str, Exception | bool]]:
  238. results = defaultdict(dict)
  239. async with ClientSession() as session:
  240. for form in pkmn.forms:
  241. urls = []
  242. urls += get_showdown_urls(form)
  243. urls.append((get_serebii_url(pkmn, form), "png"))
  244. # TODO more sources
  245. results[form.name].update(await asyncio.gather(*[
  246. download(session, url, image_dir.joinpath(f"{form.name}-{i}.{ext}"))
  247. for i, (url, ext) in enumerate(urls) if url is not None
  248. ]))
  249. return results
  250. async def download_all(image_dir: Path, pkmn: list[Pokemon]) -> dict[str, dict[str, Exception | bool]]:
  251. image_dir.mkdir(parents=True, exist_ok=True)
  252. log = {}
  253. for p in pkmn:
  254. log.update(await download_all_for_pokemon(p, image_dir))
  255. return log
  256. KNOWN_MISSING = [
  257. "https://play.pokemonshowdown.com/sprites/ani/venusaur-gmax.gif",
  258. "https://play.pokemonshowdown.com/sprites/ani-back/venusaur-gmax.gif",
  259. "https://play.pokemonshowdown.com/sprites/ani/blastoise-gmax.gif",
  260. "https://play.pokemonshowdown.com/sprites/ani-back/blastoise-gmax.gif",
  261. "https://play.pokemonshowdown.com/sprites/ani/growlithe-hisui.gif",
  262. "https://play.pokemonshowdown.com/sprites/ani-back/growlithe-hisui.gif",
  263. "https://play.pokemonshowdown.com/sprites/ani/arcanine-hisui.gif",
  264. "https://play.pokemonshowdown.com/sprites/ani-back/arcanine-hisui.gif",
  265. "https://play.pokemonshowdown.com/sprites/ani/voltorb-hisui.gif",
  266. "https://play.pokemonshowdown.com/sprites/ani-back/voltorb-hisui.gif",
  267. "https://play.pokemonshowdown.com/sprites/ani/electrode-hisui.gif",
  268. "https://play.pokemonshowdown.com/sprites/ani-back/electrode-hisui.gif",
  269. "https://play.pokemonshowdown.com/sprites/ani/tauros-paldeacombat.gif",
  270. "https://play.pokemonshowdown.com/sprites/ani-back/tauros-paldeacombat.gif",
  271. "https://play.pokemonshowdown.com/sprites/ani/tauros-paldeablaze.gif",
  272. "https://play.pokemonshowdown.com/sprites/ani-back/tauros-paldeablaze.gif",
  273. "https://play.pokemonshowdown.com/sprites/ani/tauros-paldeaaqua.gif",
  274. "https://play.pokemonshowdown.com/sprites/ani-back/tauros-paldeaaqua.gif",
  275. "https://play.pokemonshowdown.com/sprites/ani/wooper-paldea.gif",
  276. "https://play.pokemonshowdown.com/sprites/ani-back/wooper-paldea.gif",
  277. "https://play.pokemonshowdown.com/sprites/ani/qwilfish-hisui.gif",
  278. "https://play.pokemonshowdown.com/sprites/ani-back/qwilfish-hisui.gif",
  279. "https://play.pokemonshowdown.com/sprites/ani/sneasel-hisui.gif",
  280. "https://play.pokemonshowdown.com/sprites/ani-back/sneasel-hisui.gif",
  281. ]
  282. async def main(
  283. dex_file: Path, image_dir: Path, startIndex: int, endIndex: int,
  284. log_skipped: bool, force_dex: bool, dex_only: bool
  285. ):
  286. dex = await load_pokedex(dex_file, force_dex)
  287. if dex_only:
  288. return
  289. log = await download_all(image_dir, (dex[i] for i in range(startIndex, endIndex + 1)))
  290. new_downloads = 0
  291. for form, result in log.items():
  292. for url, info in result.items():
  293. if isinstance(info, Exception):
  294. if url not in KNOWN_MISSING:
  295. print(f"{form}: FAILED {url} - {info}")
  296. elif not info:
  297. if log_skipped:
  298. print(f"{form}: SKIPPED {url} - {info}")
  299. else:
  300. print(f"{form}: SUCCESS {url}")
  301. new_downloads += 1
  302. print(f"New Downloads: {new_downloads}")
  303. if __name__ == "__main__":
  304. from argparse import ArgumentParser
  305. parser = ArgumentParser(
  306. prog="Image Retriever",
  307. description="Retrieve pokedex and images",
  308. )
  309. parser.add_argument(
  310. "-d", "--pokedex", default="data/pokedex.json", type=Path, help="Pokedex file"
  311. )
  312. parser.add_argument(
  313. "--refresh-dex", action="store_true", help="Update the pokedex"
  314. )
  315. parser.add_argument(
  316. "--pokedex-only", action="store_true", help="Quit before image download"
  317. )
  318. parser.add_argument(
  319. "-o", "--output", default="images", type=Path, help="Image output directory"
  320. )
  321. parser.add_argument(
  322. "--log-skipped", action="store_true", help="Log skipped images"
  323. )
  324. parser.add_argument(
  325. "bounds", type=lambda a: map(int, a.split("-")), default="1-151", nargs="?",
  326. help="Range of dex numbers to download, inclusive"
  327. )
  328. args = parser.parse_args()
  329. start, end = args.bounds
  330. asyncio.run(main(
  331. args.pokedex, args.output, start, end,
  332. args.log_skipped, args.refresh_dex, args.pokedex_only
  333. ))