""" Manage the logic of downloading the pokedex and source images. """ import re import json import asyncio from pathlib import Path from dataclasses import dataclass, asdict from collections import defaultdict from aiohttp import ClientSession JS_TO_JSON = re.compile(r"\b([a-zA-Z][a-zA-Z0-9]*?):") # the dex from showdown assumes only strawberry alcremie, since # that's what's in showdown, but we might as well add the rest ALCREMIE_SWEETS = [ "Strawberry", "Berry", "Love", "Star", "Clover", "Flower", "Ribbon", ] # https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_with_gender_differences # there are some pokemon with notable gender diffs that the dex doesn't cover # judgement calls made arbitrarily GENDER_DIFFS = ( "hippopotas", "hippowdon", "unfezant", "frillish", "jellicent", "pyroar", # meowstic, indeedee, basculegion, oinkologne are already handled in the dex ) @dataclass class Form: name: str traits: list[str] types: list[str] color: str @dataclass class Pokemon: num: int species: str forms: list[Form] async def download_pokedex() -> dict: async with ClientSession() as session: async with session.get("https://play.pokemonshowdown.com/data/pokedex.js") as res: res.raise_for_status() text = await res.text("utf-8") # this is not json of course, but it's close # start by taking out the ; and definition cleaned = text.replace("exports.BattlePokedex = ", "").strip(";") # then convert the keys to strings converted = re.sub(JS_TO_JSON, lambda m: f'"{m.group(1)}":', cleaned) # and fix Type: Null, Farfetch'd, Sirfetch'd fixed = converted.replace( '""Type": Null"', '"Type: Null"').replace("\u2019", "'") # then, parse it return json.loads(fixed) def get_traits(species: str, kind: str) -> list[str]: traits = [] if kind in ("mega", "mega-x", "mega-y", "primal"): traits.extend(("mega", "nostart")) if kind in ("gmax", "eternamax", "rapid-strike-gmax"): traits.extend(("gmax", "nostart")) if kind in ("alola", "galar", "hisui", "paldea"): traits.extend(("regional", kind)) # special cases if species == "Castform" and kind != "base": # castform can't start battle in weather forms traits.append("nostart") if species == "Tauros" and "paldea" in kind: # paldean tauros has dumb names traits.extend(("regional", "paldea")) if species == "Minior" and kind != "meteor": # minior can only start the battle in meteor form traits.append("nostart") if species == "Darmanitan" and "zen" in kind: # darmanitan cannot start in zen form traits.append("nostart") if "galar" in kind: # also there's a galar-zen form to handle traits.extend(("regional", "galar")) if species == "Palafin" and kind == "hero": # palafin can only start in zero form traits.append("nostart") if species == "Gimmighoul" and kind == "roaming": # gimmighoul roaming is only in PGO traits.append("nostart") return sorted(set(traits)) def clean_dex(raw: dict) -> dict[int, Pokemon]: regrouped = defaultdict(list) for key, entry in raw.items(): isNonstandard = entry.get("isNonstandard", None) baseSpecies = entry.get("baseSpecies", None) forme = entry.get("forme", None) if isNonstandard not in (None, "Past", "Unobtainable"): continue # remove CAP etc. if baseSpecies in ("Pikachu", "Pichu") and forme is not None: continue # remove pikachu spam + spiky ear pichu if forme is not None and "Totem" in forme: continue # remove totem pokemon num = entry["num"] # non-cosmetic forms get separate entries automatically # but keeping the separate unown forms would be ridiculous if key != "unown" and len(cosmetic := entry.get("cosmeticFormes", [])) > 0: cosmetic.append(f'{entry["name"]}-{entry["baseForme"]}') if key == "alcremie": # oh god this thing cosmetic = [ f"{cf}-{sweet}" for cf in cosmetic for sweet in ALCREMIE_SWEETS ] regrouped[num].extend({ **entry, "forme": cf.replace(" ", "-"), "formeKind": "cosmetic", } for cf in cosmetic) elif key in GENDER_DIFFS: regrouped[num].append({ **entry, "forme": f'{entry["name"]}-M', "formeKind": "cosmetic", }) regrouped[num].append({ **entry, "forme": f'{entry["name"]}-F', "formeKind": "cosmetic", }) else: regrouped[num].append({ **entry, "forme": entry["name"], "formeKind": entry.get("forme", "base").lower(), }) return { i: Pokemon( num=i, species=( # doubles as an assertion that forms is not empty species := (forms := regrouped[i])[0].get("baseSpecies", forms[0]["name"]) ), forms=[ Form( name=f.get("forme", f["name"]), traits=get_traits(species, f["formeKind"].lower()), types=f["types"], color=f["color"], ) for f in forms ] ) for i in range(1, max(regrouped.keys()) + 1) } async def load_pokedex(dex_file: Path, force_dex: bool) -> dict: if dex_file.is_file() and not force_dex: with open(dex_file) as infile: loaded = json.load(infile) dex = { int(num): Pokemon( num=entry["num"], species=entry["species"], forms=[Form(**f) for f in entry["forms"]], ) for num, entry in loaded.items() } else: # first download the pokedex raw_dex = await download_pokedex() # clean and reorganize it dex = clean_dex(raw_dex) # output dex for auditing and reloading with open(dex_file, "w") as out: json.dump({ str(i): asdict(pkmn) for i, pkmn in dex.items() }, out, indent=2) return dex SHOWDOWN_REPLACEMENTS = [ ("mega-", "mega"), # charizard, mewtwo ("paldea-", "paldea"), # tauros ("mr. ", "mr"), # mr mime + mr rime ("'d", "d"), # farfetch'd and sirfetch'd ("nidoran-m", "nidoranm"), # nidoran is a special case ("-f", "f"), # gender diff forms (re.compile(r"-m$"), ""), # gender diff forms (re.compile(r"^ho-oh$"), "hooh"), # Ho-oh special case ] def get_showdown_urls(form: Form) -> list[tuple[str, str]]: name = form.name.lower() for pat, ins in SHOWDOWN_REPLACEMENTS: if isinstance(pat, re.Pattern): name = re.sub(pat, ins, name) else: name = name.replace(pat, ins) return [ (f"https://play.pokemonshowdown.com/sprites/ani/{name}.gif", "gif"), (f"https://play.pokemonshowdown.com/sprites/ani-back/{name}.gif", "gif"), (f"https://play.pokemonshowdown.com/sprites/gen5/{name}.png", "png"), (f"https://play.pokemonshowdown.com/sprites/gen5-back/{name}.png", "png"), ] SEREBII_SPECIAL = { "Castform-Rainy": "r", "Castform-Snowy": "i", "Castform-Sunny": "s", "Deoxys-Attack": "a", "Deoxys-Defense": "d", "Deoxys-Speed": "s", "Tauros-Paldea-Blaze": "b", "Tauros-Paldea-Aqua": "a", } def get_serebii_url(pkmn: Pokemon, form: Form) -> str | None: if form.name == pkmn.species: return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}.png" if form.name in SEREBII_SPECIAL: return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-{SEREBII_SPECIAL[form.name]}.png" if "gmax" in form.traits: return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-gi.png" if "mega" in form.traits: if "Mega-X" in form.name: return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-mx.png" elif "Mega-Y" in form.name: return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-my.png" else: return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-m.png" if "alola" in form.traits: return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-a.png" if "galar" in form.traits: return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-g.png" if "hisui" in form.traits: return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-h.png" if "paldea" in form.traits: return f"https://www.serebii.net/pokemon/art/{pkmn.num:03d}-p.png" async def download(session: ClientSession, url: str, filename: Path) -> tuple[str, Exception | bool]: if filename.is_file(): return url, False try: async with session.get(url) as res: res.raise_for_status() with open(filename, "wb") as out: out.write(await res.read()) except Exception as ex: return url, ex return url, True async def download_all_for_pokemon(pkmn: Pokemon, image_dir: Path) -> dict[str, dict[str, Exception | bool]]: results = defaultdict(dict) async with ClientSession() as session: for form in pkmn.forms: urls = [] urls += get_showdown_urls(form) urls.append((get_serebii_url(pkmn, form), "png")) # TODO more sources results[form.name].update(await asyncio.gather(*[ download(session, url, image_dir.joinpath(f"{form.name}-{i}.{ext}")) for i, (url, ext) in enumerate(urls) if url is not None ])) return results async def download_all(image_dir: Path, pkmn: list[Pokemon]) -> dict[str, dict[str, Exception | bool]]: image_dir.mkdir(parents=True, exist_ok=True) log = {} for p in pkmn: log.update(await download_all_for_pokemon(p, image_dir)) return log KNOWN_MISSING = [ "https://play.pokemonshowdown.com/sprites/ani/venusaur-gmax.gif", "https://play.pokemonshowdown.com/sprites/ani-back/venusaur-gmax.gif", "https://play.pokemonshowdown.com/sprites/ani/blastoise-gmax.gif", "https://play.pokemonshowdown.com/sprites/ani-back/blastoise-gmax.gif", "https://play.pokemonshowdown.com/sprites/ani/growlithe-hisui.gif", "https://play.pokemonshowdown.com/sprites/ani-back/growlithe-hisui.gif", "https://play.pokemonshowdown.com/sprites/ani/arcanine-hisui.gif", "https://play.pokemonshowdown.com/sprites/ani-back/arcanine-hisui.gif", "https://play.pokemonshowdown.com/sprites/ani/voltorb-hisui.gif", "https://play.pokemonshowdown.com/sprites/ani-back/voltorb-hisui.gif", "https://play.pokemonshowdown.com/sprites/ani/electrode-hisui.gif", "https://play.pokemonshowdown.com/sprites/ani-back/electrode-hisui.gif", "https://play.pokemonshowdown.com/sprites/ani/tauros-paldeacombat.gif", "https://play.pokemonshowdown.com/sprites/ani-back/tauros-paldeacombat.gif", "https://play.pokemonshowdown.com/sprites/ani/tauros-paldeablaze.gif", "https://play.pokemonshowdown.com/sprites/ani-back/tauros-paldeablaze.gif", "https://play.pokemonshowdown.com/sprites/ani/tauros-paldeaaqua.gif", "https://play.pokemonshowdown.com/sprites/ani-back/tauros-paldeaaqua.gif", "https://play.pokemonshowdown.com/sprites/ani/wooper-paldea.gif", "https://play.pokemonshowdown.com/sprites/ani-back/wooper-paldea.gif", "https://play.pokemonshowdown.com/sprites/ani/qwilfish-hisui.gif", "https://play.pokemonshowdown.com/sprites/ani-back/qwilfish-hisui.gif", "https://play.pokemonshowdown.com/sprites/ani/sneasel-hisui.gif", "https://play.pokemonshowdown.com/sprites/ani-back/sneasel-hisui.gif", ] async def main( dex_file: Path, image_dir: Path, startIndex: int, endIndex: int, log_skipped: bool, force_dex: bool, dex_only: bool ): dex = await load_pokedex(dex_file, force_dex) if dex_only: return log = await download_all(image_dir, (dex[i] for i in range(startIndex, endIndex + 1))) new_downloads = 0 for form, result in log.items(): for url, info in result.items(): if isinstance(info, Exception): if url not in KNOWN_MISSING: print(f"{form}: FAILED {url} - {info}") elif not info: if log_skipped: print(f"{form}: SKIPPED {url} - {info}") else: print(f"{form}: SUCCESS {url}") new_downloads += 1 print(f"New Downloads: {new_downloads}") if __name__ == "__main__": from argparse import ArgumentParser parser = ArgumentParser( prog="Image Retriever", description="Retrieve pokedex and images", ) parser.add_argument( "-d", "--pokedex", default="data/pokedex.json", type=Path, help="Pokedex file" ) parser.add_argument( "--refresh-dex", action="store_true", help="Update the pokedex" ) parser.add_argument( "--pokedex-only", action="store_true", help="Quit before image download" ) parser.add_argument( "-o", "--output", default="images", type=Path, help="Image output directory" ) parser.add_argument( "--log-skipped", action="store_true", help="Log skipped images" ) parser.add_argument( "bounds", type=lambda a: map(int, a.split("-")), default="1-151", nargs="?", help="Range of dex numbers to download, inclusive" ) args = parser.parse_args() start, end = args.bounds asyncio.run(main( args.pokedex, args.output, start, end, args.log_skipped, args.refresh_dex, args.pokedex_only ))