123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240 |
- """
- Manage the logic of downloading the pokedex and source images.
- """
- import re
- import json
- import asyncio
- from pathlib import Path
- from dataclasses import dataclass, asdict
- from collections import defaultdict
- from aiohttp import ClientSession
- JS_TO_JSON = re.compile(r"\b([a-zA-Z][a-zA-Z0-9]*?):")
- # the dex from showdown assumes only strawberry alcremie, since
- # that's what's in showdown, but we might as well add the rest
- ALCREMIE_SWEETS = [
- "Strawberry", "Berry", "Love", "Star",
- "Clover", "Flower", "Ribbon",
- ]
- # https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_with_gender_differences
- # there are some pokemon with notable gender diffs that the dex doesn't cover
- # judgement calls made arbitrarily
- GENDER_DIFFS = (
- "hippopotas", "hippowdon",
- "unfezant",
- "frillish", "jellicent",
- "pyroar",
- # meowstic, indeedee, basculegion, oinkologne are already handled in the dex
- )
- @dataclass
- class Form:
- name: str
- traits: list[str]
- types: list[str]
- color: str
- @dataclass
- class Pokemon:
- num: int
- species: str
- forms: list[Form]
- async def load_pokedex() -> dict:
- async with ClientSession() as session:
- async with session.get("https://play.pokemonshowdown.com/data/pokedex.js") as res:
- res.raise_for_status()
- text = await res.text("utf-8")
- # this is not json of course, but it's close
- # start by taking out the ; and definition
- cleaned = text.replace("exports.BattlePokedex = ", "").strip(";")
- # then convert the keys to strings
- converted = re.sub(JS_TO_JSON, lambda m: f'"{m.group(1)}":', cleaned)
- # and fix Type: Null
- fixed = converted.replace('""Type": Null"', '"Type: Null"')
- # then, parse it
- return json.loads(fixed)
- def get_traits(species: str, kind: str) -> list[str]:
- traits = []
- if kind in ("mega", "mega-x", "mega-y", "primal"):
- traits.extend(("mega", "nostart"))
- if kind in ("gmax", "eternamax", "rapid-strike-gmax"):
- traits.extend(("gmax", "nostart"))
- if kind in ("alola", "galar", "hisui", "galar", "paldea"):
- traits.extend(("regional", kind))
- # special cases
- if species == "Tauros" and "-paldea" in kind:
- # paldean tauros has dumb names
- traits.extend(("regional", "paldea"))
- if species == "Minior" and kind != "meteor":
- # minior can only start the battle in meteor form
- traits.append("nostart")
- if species == "Darmanitan" and "zen" in kind:
- # darmanitan cannot start in zen form
- traits.append("nostart")
- if "galar" in kind:
- # also there's a galar-zen form to handle
- traits.extend(("regional", "galar"))
- if species == "Palafin" and kind == "hero":
- # palafin can only start in zero form
- traits.append("nostart")
- if species == "Gimmighoul" and kind == "roaming":
- # gimmighoul roaming is only in PGO
- traits.append("nostart")
- return sorted(set(traits))
- def clean_dex(raw: dict) -> dict[int, Pokemon]:
- regrouped = defaultdict(list)
- for key, entry in raw.items():
- isNonstandard = entry.get("isNonstandard", None)
- baseSpecies = entry.get("baseSpecies", None)
- forme = entry.get("forme", None)
- if isNonstandard not in (None, "Past", "Unobtainable"):
- continue # remove CAP etc.
- if baseSpecies in ("Pikachu", "Pichu") and forme is not None:
- continue # remove pikachu spam + spiky ear pichu
- if forme is not None and "Totem" in forme:
- continue # remove totem pokemon
- num = entry["num"]
- # non-cosmetic forms get separate entries automatically
- # but keeping the separate unown forms would be ridiculous
- if key != "unown" and len(cosmetic := entry.get("cosmeticFormes", [])) > 0:
- cosmetic.append(f'{entry["name"]}-{entry["baseForme"]}')
- if key == "alcremie":
- # oh god this thing
- cosmetic = [
- f"{cf}-{sweet}"
- for cf in cosmetic
- for sweet in ALCREMIE_SWEETS
- ]
- regrouped[num].extend({
- **entry,
- "forme": cf.replace(" ", "-"),
- "formeKind": "cosmetic",
- } for cf in cosmetic)
- elif key in GENDER_DIFFS:
- regrouped[num].append({
- **entry,
- "forme": f'{entry["name"]}-M',
- "formeKind": "cosmetic",
- })
- regrouped[num].append({
- **entry,
- "forme": f'{entry["name"]}-F',
- "formeKind": "cosmetic",
- })
- else:
- regrouped[num].append({
- **entry,
- "forme": entry["name"],
- "formeKind": entry.get("forme", "base").lower(),
- })
- return {
- i: Pokemon(
- num=i,
- species=(
- # doubles as an assertion that forms is not empty
- species := (forms := regrouped[i])[0].get("baseSpecies", forms[0]["name"])
- ),
- forms=[
- Form(
- name=f.get("forme", f["name"]),
- traits=get_traits(species, f["formeKind"]),
- types=f["types"],
- color=f["color"],
- ) for f in forms
- ]
- ) for i in range(1, max(regrouped.keys()) + 1)
- }
- def get_showdown_urls(species: str, form: Form) -> list[tuple[str, str]]:
- name = form.name.lower().replace("mega-y", "megay").replace("mega-x", "megax")
- return [
- (f"https://play.pokemonshowdown.com/sprites/ani/{name}.gif", "gif"),
- (f"https://play.pokemonshowdown.com/sprites/ani-back/{name}.gif", "gif"),
- (f"https://play.pokemonshowdown.com/sprites/gen5/{name}.png", "png"),
- (f"https://play.pokemonshowdown.com/sprites/gen5-back/{name}.png", "png"),
- ]
- async def download(session: ClientSession, url: str, filename: str) -> tuple[str, Exception | bool]:
- if Path(filename).is_file():
- return url, False
- try:
- async with session.get(url) as res:
- res.raise_for_status()
- with open(filename, "wb") as out:
- out.write(await res.read())
- except Exception as ex:
- return url, ex
- return url, True
- async def download_all(pkmn: Pokemon, image_dir: str) -> dict[str, dict[str, Exception | bool]]:
- results = defaultdict(dict)
- async with ClientSession() as session:
- for form in pkmn.forms:
- urls = []
- urls += get_showdown_urls(pkmn.species, form)
- # TODO more
- results[form.name].update(await asyncio.gather(*[
- download(session, url, f"{image_dir}/{form.name}-{i}.{ext}")
- for i, (url, ext) in enumerate(urls)
- ]))
- return results
- async def main(dex_file: str, image_dir: str):
- if Path(dex_file).is_file():
- with open(dex_file) as infile:
- loaded = json.load(infile)
- dex = {
- int(num): Pokemon(
- num=entry["num"],
- species=entry["species"],
- forms=[Form(**f) for f in entry["forms"]],
- ) for num, entry in loaded.items()
- }
- else:
- # first download the pokedex
- raw_dex = await load_pokedex()
- # clean and reorganize it
- dex = clean_dex(raw_dex)
- # output dex for auditing and reloading
- with open(dex_file, "w") as out:
- json.dump({
- str(i): asdict(pkmn)
- for i, pkmn in dex.items()
- }, out, indent=2)
- Path(image_dir).mkdir(parents=True, exist_ok=True)
- log = await download_all(dex[286], image_dir)
- for url, result in log.items():
- print(url, "-", str(result))
- # TODO actually get all images
- if __name__ == "__main__":
- from sys import argv
- dex_file = argv[1] if len(argv) > 1 else "data/pokedex.json"
- image_dir = argv[2] if len(argv) > 2 else "images"
- asyncio.run(main(dex_file, image_dir))
|