download.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. """
  2. Manage the logic of downloading the pokedex and source images.
  3. """
  4. import re
  5. import json
  6. import asyncio
  7. from dataclasses import dataclass, asdict
  8. from collections import defaultdict
  9. from aiohttp import ClientSession
  10. JS_TO_JSON = re.compile(r"\b([a-zA-Z][a-zA-Z0-9]*?):")
  11. # the dex from showdown assumes only strawberry alcremie, since
  12. # that's what's in showdown, but we might as well add the rest
  13. ALCREMIE_SWEETS = [
  14. "Strawberry", "Berry", "Love", "Star",
  15. "Clover", "Flower", "Ribbon",
  16. ]
  17. # https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_with_gender_differences
  18. # there are some pokemon with notable gender diffs that the dex doesn't cover
  19. # judgement calls made arbitrarily
  20. GENDER_DIFFS = (
  21. "hippopotas", "hippowdon",
  22. "unfezant",
  23. "frillish", "jellicent",
  24. "pyroar",
  25. # meowstic, indeedee, basculegion, oinkologne are already handled in the dex
  26. )
  27. @dataclass
  28. class Form:
  29. name: str
  30. traits: list[str]
  31. types: list[str]
  32. color: str
  33. @dataclass
  34. class Pokemon:
  35. num: int
  36. species: str
  37. forms: list[Form]
  38. async def load_pokedex() -> dict:
  39. async with ClientSession() as session:
  40. async with session.get("https://play.pokemonshowdown.com/data/pokedex.js") as res:
  41. res.raise_for_status()
  42. text = await res.text("utf-8")
  43. # this is not json of course, but it's close
  44. # start by taking out the ; and definition
  45. cleaned = text.replace("exports.BattlePokedex = ", "").strip(";")
  46. # then convert the keys to strings
  47. converted = re.sub(JS_TO_JSON, lambda m: f'"{m.group(1)}":', cleaned)
  48. # and fix Type: Null
  49. fixed = converted.replace('""Type": Null"', '"Type: Null"')
  50. # then, parse it
  51. return json.loads(fixed)
  52. def get_traits(species: str, kind: str) -> list[str]:
  53. traits = []
  54. if kind in ("mega", "mega-x", "mega-y", "primal"):
  55. traits.extend(("mega", "nostart"))
  56. if kind in ("gmax", "eternamax", "rapid-strike-gmax"):
  57. traits.extend(("gmax", "nostart"))
  58. if kind in ("alola", "galar", "hisui", "galar", "paldea"):
  59. traits.extend(("regional", kind))
  60. # special cases
  61. if species == "Tauros" and "-paldea" in kind:
  62. # paldean tauros has dumb names
  63. traits.extend(("regional", "paldea"))
  64. if species == "Minior" and kind != "meteor":
  65. # minior can only start the battle in meteor form
  66. traits.append("nostart")
  67. if species == "Darmanitan" and "zen" in kind:
  68. # darmanitan cannot start in zen form
  69. traits.append("nostart")
  70. if "galar" in kind:
  71. # also there's a galar-zen form to handle
  72. traits.extend(("regional", "galar"))
  73. if species == "Palafin" and kind == "hero":
  74. # palafin can only start in zero form
  75. traits.append("nostart")
  76. if species == "Gimmighoul" and kind == "roaming":
  77. # gimmighoul roaming is only in PGO
  78. traits.append("nostart")
  79. return list(set(traits))
  80. def clean_dex(raw: dict) -> dict[int, Pokemon]:
  81. regrouped = defaultdict(list)
  82. for key, entry in raw.items():
  83. isNonstandard = entry.get("isNonstandard", None)
  84. baseSpecies = entry.get("baseSpecies", None)
  85. forme = entry.get("forme", None)
  86. if isNonstandard not in (None, "Past", "Unobtainable"):
  87. continue # remove CAP etc.
  88. if baseSpecies in ("Pikachu", "Pichu") and forme is not None:
  89. continue # remove pikachu spam + spiky ear pichu
  90. if forme == "Totem":
  91. continue # remove totem pokemon
  92. num = entry["num"]
  93. # non-cosmetic forms get separate entries automatically
  94. # but keeping the separate unown forms would be ridiculous
  95. if key != "unown" and len(cosmetic := entry.get("cosmeticFormes", [])) > 0:
  96. cosmetic.append(f'{entry["name"]}-{entry["baseForme"]}')
  97. if key == "alcremie":
  98. # oh god this thing
  99. cosmetic = [
  100. f"{cf}-{sweet}"
  101. for cf in cosmetic
  102. for sweet in ALCREMIE_SWEETS
  103. ]
  104. regrouped[num].extend({
  105. **entry,
  106. "forme": cf.replace(" ", "-"),
  107. "formeKind": "cosmetic",
  108. } for cf in cosmetic)
  109. elif key in GENDER_DIFFS:
  110. regrouped[num].append({
  111. **entry,
  112. "forme": f'{entry["name"]}-M',
  113. "formeKind": "cosmetic",
  114. })
  115. regrouped[num].append({
  116. **entry,
  117. "forme": f'{entry["name"]}-F',
  118. "formeKind": "cosmetic",
  119. })
  120. else:
  121. regrouped[num].append({
  122. **entry,
  123. "forme": entry["name"],
  124. "formeKind": entry.get("forme", "base").lower(),
  125. })
  126. cleaned = {}
  127. for i in range(1, max(regrouped.keys()) + 1):
  128. forms = regrouped[i]
  129. # double check there's no skipped or duped entries
  130. assert len(forms) > 0 and i not in cleaned
  131. species = forms[0].get("baseSpecies", forms[0]["name"])
  132. cleaned[i] = Pokemon(
  133. num=i,
  134. species=species,
  135. forms=[
  136. Form(
  137. name=f.get("forme", f["name"]),
  138. traits=get_traits(species, f["formeKind"]),
  139. types=f["types"],
  140. color=f["color"],
  141. ) for f in forms
  142. ]
  143. )
  144. return cleaned
  145. async def main(dex_file: str):
  146. # first download the pokedex
  147. raw_dex = await load_pokedex()
  148. # clean and reorganize it
  149. dex = clean_dex(raw_dex)
  150. # output dex for auditing
  151. with open(dex_file, "w") as out:
  152. json.dump({str(i): asdict(pkmn) for i, pkmn in dex.items()}, out, indent=2)
  153. # TODO actually progress to images
  154. if __name__ == "__main__":
  155. from sys import argv
  156. dex_file = argv[1] if len(argv) > 1 else "data/pokedex.json"
  157. asyncio.run(main(dex_file))