download.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. """
  2. Manage the logic of downloading the pokedex and source images.
  3. """
  4. import re
  5. import json
  6. import asyncio
  7. from pathlib import Path
  8. from dataclasses import dataclass, asdict
  9. from collections import defaultdict
  10. from aiohttp import ClientSession
  11. JS_TO_JSON = re.compile(r"\b([a-zA-Z][a-zA-Z0-9]*?):")
  12. # the dex from showdown assumes only strawberry alcremie, since
  13. # that's what's in showdown, but we might as well add the rest
  14. ALCREMIE_SWEETS = [
  15. "Strawberry", "Berry", "Love", "Star",
  16. "Clover", "Flower", "Ribbon",
  17. ]
  18. # https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_with_gender_differences
  19. # there are some pokemon with notable gender diffs that the dex doesn't cover
  20. # judgement calls made arbitrarily
  21. GENDER_DIFFS = (
  22. "hippopotas", "hippowdon",
  23. "unfezant",
  24. "frillish", "jellicent",
  25. "pyroar",
  26. # meowstic, indeedee, basculegion, oinkologne are already handled in the dex
  27. )
  28. @dataclass
  29. class Form:
  30. name: str
  31. traits: list[str]
  32. types: list[str]
  33. color: str
  34. @dataclass
  35. class Pokemon:
  36. num: int
  37. species: str
  38. forms: list[Form]
  39. async def download_pokedex() -> dict:
  40. async with ClientSession() as session:
  41. async with session.get("https://play.pokemonshowdown.com/data/pokedex.js") as res:
  42. res.raise_for_status()
  43. text = await res.text("utf-8")
  44. # this is not json of course, but it's close
  45. # start by taking out the ; and definition
  46. cleaned = text.replace("exports.BattlePokedex = ", "").strip(";")
  47. # then convert the keys to strings
  48. converted = re.sub(JS_TO_JSON, lambda m: f'"{m.group(1)}":', cleaned)
  49. # and fix Type: Null, Farfetch'd, Sirfetch'd
  50. fixed = converted.replace('""Type": Null"', '"Type: Null"').replace("’", "'")
  51. # then, parse it
  52. return json.loads(fixed)
  53. def get_traits(species: str, kind: str) -> list[str]:
  54. traits = []
  55. if kind in ("mega", "mega-x", "mega-y", "primal"):
  56. traits.extend(("mega", "nostart"))
  57. if kind in ("gmax", "eternamax", "rapid-strike-gmax"):
  58. traits.extend(("gmax", "nostart"))
  59. if kind in ("alola", "galar", "hisui", "galar", "paldea"):
  60. traits.extend(("regional", kind))
  61. # special cases
  62. if species == "Castform" and kind != "base":
  63. # castform can't start battle in weather forms
  64. traits.append("nostart")
  65. if species == "Tauros" and "paldea" in kind:
  66. # paldean tauros has dumb names
  67. traits.extend(("regional", "paldea"))
  68. if species == "Minior" and kind != "meteor":
  69. # minior can only start the battle in meteor form
  70. traits.append("nostart")
  71. if species == "Darmanitan" and "zen" in kind:
  72. # darmanitan cannot start in zen form
  73. traits.append("nostart")
  74. if "galar" in kind:
  75. # also there's a galar-zen form to handle
  76. traits.extend(("regional", "galar"))
  77. if species == "Palafin" and kind == "hero":
  78. # palafin can only start in zero form
  79. traits.append("nostart")
  80. if species == "Gimmighoul" and kind == "roaming":
  81. # gimmighoul roaming is only in PGO
  82. traits.append("nostart")
  83. return sorted(set(traits))
  84. def clean_dex(raw: dict) -> dict[int, Pokemon]:
  85. regrouped = defaultdict(list)
  86. for key, entry in raw.items():
  87. isNonstandard = entry.get("isNonstandard", None)
  88. baseSpecies = entry.get("baseSpecies", None)
  89. forme = entry.get("forme", None)
  90. if isNonstandard not in (None, "Past", "Unobtainable"):
  91. continue # remove CAP etc.
  92. if baseSpecies in ("Pikachu", "Pichu") and forme is not None:
  93. continue # remove pikachu spam + spiky ear pichu
  94. if forme is not None and "Totem" in forme:
  95. continue # remove totem pokemon
  96. num = entry["num"]
  97. # non-cosmetic forms get separate entries automatically
  98. # but keeping the separate unown forms would be ridiculous
  99. if key != "unown" and len(cosmetic := entry.get("cosmeticFormes", [])) > 0:
  100. cosmetic.append(f'{entry["name"]}-{entry["baseForme"]}')
  101. if key == "alcremie":
  102. # oh god this thing
  103. cosmetic = [
  104. f"{cf}-{sweet}"
  105. for cf in cosmetic
  106. for sweet in ALCREMIE_SWEETS
  107. ]
  108. regrouped[num].extend({
  109. **entry,
  110. "forme": cf.replace(" ", "-"),
  111. "formeKind": "cosmetic",
  112. } for cf in cosmetic)
  113. elif key in GENDER_DIFFS:
  114. regrouped[num].append({
  115. **entry,
  116. "forme": f'{entry["name"]}-M',
  117. "formeKind": "cosmetic",
  118. })
  119. regrouped[num].append({
  120. **entry,
  121. "forme": f'{entry["name"]}-F',
  122. "formeKind": "cosmetic",
  123. })
  124. else:
  125. regrouped[num].append({
  126. **entry,
  127. "forme": entry["name"],
  128. "formeKind": entry.get("forme", "base").lower(),
  129. })
  130. return {
  131. i: Pokemon(
  132. num=i,
  133. species=(
  134. # doubles as an assertion that forms is not empty
  135. species := (forms := regrouped[i])[0].get("baseSpecies", forms[0]["name"])
  136. ),
  137. forms=[
  138. Form(
  139. name=f.get("forme", f["name"]),
  140. traits=get_traits(species, f["formeKind"].lower()),
  141. types=f["types"],
  142. color=f["color"],
  143. ) for f in forms
  144. ]
  145. ) for i in range(1, max(regrouped.keys()) + 1)
  146. }
  147. async def load_pokedex(dex_file: str) -> dict:
  148. if Path(dex_file).is_file():
  149. with open(dex_file) as infile:
  150. loaded = json.load(infile)
  151. dex = {
  152. int(num): Pokemon(
  153. num=entry["num"],
  154. species=entry["species"],
  155. forms=[Form(**f) for f in entry["forms"]],
  156. ) for num, entry in loaded.items()
  157. }
  158. else:
  159. # first download the pokedex
  160. raw_dex = await download_pokedex()
  161. # clean and reorganize it
  162. dex = clean_dex(raw_dex)
  163. # output dex for auditing and reloading
  164. with open(dex_file, "w") as out:
  165. json.dump({
  166. str(i): asdict(pkmn)
  167. for i, pkmn in dex.items()
  168. }, out, indent=2)
  169. return dex
  170. SHOWDOWN_REPLACEMENTS = [
  171. ("mega-", "mega"), # charizard, mewtwo
  172. ("paldea-", "paldea"), # tauros
  173. ("mr. ", "mr"), # mr mime + mr rime
  174. ("'d", "d"), # farfetch'd and sirfetch'd
  175. ("nidoran-m", "nidoranm"), # nidoran is a special case
  176. ("-f", "f"), # gender diff forms
  177. (re.compile(r"-m$"), ""), # gender diff forms
  178. (re.compile(r"^ho-oh$"), "hooh"), # Ho-oh special case
  179. ]
  180. def get_showdown_urls(form: Form) -> list[tuple[str, str]]:
  181. name = form.name.lower()
  182. for pat, ins in SHOWDOWN_REPLACEMENTS:
  183. if isinstance(pat, re.Pattern):
  184. name = re.sub(pat, ins, name)
  185. else:
  186. name = name.replace(pat, ins)
  187. return [
  188. (f"https://play.pokemonshowdown.com/sprites/ani/{name}.gif", "gif"),
  189. (f"https://play.pokemonshowdown.com/sprites/ani-back/{name}.gif", "gif"),
  190. (f"https://play.pokemonshowdown.com/sprites/gen5/{name}.png", "png"),
  191. (f"https://play.pokemonshowdown.com/sprites/gen5-back/{name}.png", "png"),
  192. ]
  193. async def download(session: ClientSession, url: str, filename: str) -> tuple[str, Exception | bool]:
  194. if Path(filename).is_file():
  195. return url, False
  196. try:
  197. async with session.get(url) as res:
  198. res.raise_for_status()
  199. with open(filename, "wb") as out:
  200. out.write(await res.read())
  201. except Exception as ex:
  202. return url, ex
  203. return url, True
  204. async def download_all_for_pokemon(pkmn: Pokemon, image_dir: str) -> dict[str, dict[str, Exception | bool]]:
  205. results = defaultdict(dict)
  206. async with ClientSession() as session:
  207. for form in pkmn.forms:
  208. urls = []
  209. urls += get_showdown_urls(form)
  210. # TODO more sources
  211. results[form.name].update(await asyncio.gather(*[
  212. download(session, url, f"{image_dir}/{form.name}-{i}.{ext}")
  213. for i, (url, ext) in enumerate(urls)
  214. ]))
  215. return results
  216. async def download_all(image_dir: str, pkmn: list[Pokemon]) -> dict[str, dict[str, Exception | bool]]:
  217. Path(image_dir).mkdir(parents=True, exist_ok=True)
  218. log = {}
  219. for p in pkmn:
  220. log.update(await download_all_for_pokemon(p, image_dir))
  221. return log
  222. async def main(dex_file: str, image_dir: str, startIndex: int, endIndex: int, log_skipped: bool):
  223. dex = await load_pokedex(dex_file)
  224. log = await download_all(image_dir, (dex[i] for i in range(startIndex, endIndex + 1)))
  225. new_downloads = 0
  226. for form, result in log.items():
  227. for url, info in result.items():
  228. if isinstance(info, Exception):
  229. print(f"{form}: FAILED {url} - {info}")
  230. elif not info:
  231. if log_skipped:
  232. print(f"{form}: SKIPPED {url} - {info}")
  233. else:
  234. new_downloads += 1
  235. print(f"New Downloads: {new_downloads}")
  236. if __name__ == "__main__":
  237. from sys import argv
  238. # TODO make this an arg parser
  239. dex_file = argv[1] if len(argv) > 1 else "data/pokedex.json"
  240. image_dir = argv[2] if len(argv) > 2 else "images"
  241. start, end = map(int, (
  242. argv[3] if len(argv) > 3 else "1-151"
  243. ).split("-")[0:2])
  244. log_skipped = len(argv) > 4 and argv[4].lower() == 'true'
  245. asyncio.run(main(dex_file, image_dir, start, end, log_skipped))