""" Manage the logic of downloading the pokedex and source images. """ import re import json import asyncio from pathlib import Path from dataclasses import dataclass, asdict from collections import defaultdict from aiohttp import ClientSession JS_TO_JSON = re.compile(r"\b([a-zA-Z][a-zA-Z0-9]*?):") # the dex from showdown assumes only strawberry alcremie, since # that's what's in showdown, but we might as well add the rest ALCREMIE_SWEETS = [ "Strawberry", "Berry", "Love", "Star", "Clover", "Flower", "Ribbon", ] # https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_with_gender_differences # there are some pokemon with notable gender diffs that the dex doesn't cover # judgement calls made arbitrarily GENDER_DIFFS = ( "hippopotas", "hippowdon", "unfezant", "frillish", "jellicent", "pyroar", # meowstic, indeedee, basculegion, oinkologne are already handled in the dex ) @dataclass class Form: name: str traits: list[str] types: list[str] color: str @dataclass class Pokemon: num: int species: str forms: list[Form] async def download_pokedex() -> dict: async with ClientSession() as session: async with session.get("https://play.pokemonshowdown.com/data/pokedex.js") as res: res.raise_for_status() text = await res.text("utf-8") # this is not json of course, but it's close # start by taking out the ; and definition cleaned = text.replace("exports.BattlePokedex = ", "").strip(";") # then convert the keys to strings converted = re.sub(JS_TO_JSON, lambda m: f'"{m.group(1)}":', cleaned) # and fix Type: Null, Farfetch'd, Sirfetch'd fixed = converted.replace('""Type": Null"', '"Type: Null"').replace("’", "'") # then, parse it return json.loads(fixed) def get_traits(species: str, kind: str) -> list[str]: traits = [] if kind in ("mega", "mega-x", "mega-y", "primal"): traits.extend(("mega", "nostart")) if kind in ("gmax", "eternamax", "rapid-strike-gmax"): traits.extend(("gmax", "nostart")) if kind in ("alola", "galar", "hisui", "galar", "paldea"): traits.extend(("regional", kind)) # special cases if species == "Tauros" and "paldea" in kind: # paldean tauros has dumb names traits.extend(("regional", "paldea")) if species == "Minior" and kind != "meteor": # minior can only start the battle in meteor form traits.append("nostart") if species == "Darmanitan" and "zen" in kind: # darmanitan cannot start in zen form traits.append("nostart") if "galar" in kind: # also there's a galar-zen form to handle traits.extend(("regional", "galar")) if species == "Palafin" and kind == "hero": # palafin can only start in zero form traits.append("nostart") if species == "Gimmighoul" and kind == "roaming": # gimmighoul roaming is only in PGO traits.append("nostart") return sorted(set(traits)) def clean_dex(raw: dict) -> dict[int, Pokemon]: regrouped = defaultdict(list) for key, entry in raw.items(): isNonstandard = entry.get("isNonstandard", None) baseSpecies = entry.get("baseSpecies", None) forme = entry.get("forme", None) if isNonstandard not in (None, "Past", "Unobtainable"): continue # remove CAP etc. if baseSpecies in ("Pikachu", "Pichu") and forme is not None: continue # remove pikachu spam + spiky ear pichu if forme is not None and "Totem" in forme: continue # remove totem pokemon num = entry["num"] # non-cosmetic forms get separate entries automatically # but keeping the separate unown forms would be ridiculous if key != "unown" and len(cosmetic := entry.get("cosmeticFormes", [])) > 0: cosmetic.append(f'{entry["name"]}-{entry["baseForme"]}') if key == "alcremie": # oh god this thing cosmetic = [ f"{cf}-{sweet}" for cf in cosmetic for sweet in ALCREMIE_SWEETS ] regrouped[num].extend({ **entry, "forme": cf.replace(" ", "-"), "formeKind": "cosmetic", } for cf in cosmetic) elif key in GENDER_DIFFS: regrouped[num].append({ **entry, "forme": f'{entry["name"]}-M', "formeKind": "cosmetic", }) regrouped[num].append({ **entry, "forme": f'{entry["name"]}-F', "formeKind": "cosmetic", }) else: regrouped[num].append({ **entry, "forme": entry["name"], "formeKind": entry.get("forme", "base").lower(), }) return { i: Pokemon( num=i, species=( # doubles as an assertion that forms is not empty species := (forms := regrouped[i])[0].get("baseSpecies", forms[0]["name"]) ), forms=[ Form( name=f.get("forme", f["name"]), traits=get_traits(species, f["formeKind"].lower()), types=f["types"], color=f["color"], ) for f in forms ] ) for i in range(1, max(regrouped.keys()) + 1) } async def load_pokedex(dex_file: str) -> dict: if Path(dex_file).is_file(): with open(dex_file) as infile: loaded = json.load(infile) dex = { int(num): Pokemon( num=entry["num"], species=entry["species"], forms=[Form(**f) for f in entry["forms"]], ) for num, entry in loaded.items() } else: # first download the pokedex raw_dex = await download_pokedex() # clean and reorganize it dex = clean_dex(raw_dex) # output dex for auditing and reloading with open(dex_file, "w") as out: json.dump({ str(i): asdict(pkmn) for i, pkmn in dex.items() }, out, indent=2) return dex SHOWDOWN_REPLACEMENTS = [ ("mega-", "mega"), # charizard, mewtwo ("paldea-", "paldea"), # tauros ("mr. ", "mr"), # mr mime + mr rime ("'d", "d"), # farfetch'd and sirfetch'd ("nidoran-m", "nidoranm"), # nidoran is a special case ("-f", "f"), # gender diff forms (re.compile(r"-m$"), ""), # gender diff forms ] def get_showdown_urls(form: Form) -> list[tuple[str, str]]: name = form.name.lower() for pat, ins in SHOWDOWN_REPLACEMENTS: if isinstance(pat, re.Pattern): name = re.sub(pat, ins, name) else: name = name.replace(pat, ins) return [ (f"https://play.pokemonshowdown.com/sprites/ani/{name}.gif", "gif"), (f"https://play.pokemonshowdown.com/sprites/ani-back/{name}.gif", "gif"), (f"https://play.pokemonshowdown.com/sprites/gen5/{name}.png", "png"), (f"https://play.pokemonshowdown.com/sprites/gen5-back/{name}.png", "png"), ] async def download(session: ClientSession, url: str, filename: str) -> tuple[str, Exception | bool]: if Path(filename).is_file(): return url, False try: async with session.get(url) as res: res.raise_for_status() with open(filename, "wb") as out: out.write(await res.read()) except Exception as ex: return url, ex return url, True async def download_all_for_pokemon(pkmn: Pokemon, image_dir: str) -> dict[str, dict[str, Exception | bool]]: results = defaultdict(dict) async with ClientSession() as session: for form in pkmn.forms: urls = [] urls += get_showdown_urls(form) # TODO more sources results[form.name].update(await asyncio.gather(*[ download(session, url, f"{image_dir}/{form.name}-{i}.{ext}") for i, (url, ext) in enumerate(urls) ])) return results async def download_all(image_dir: str, pkmn: list[Pokemon]) -> dict[str, dict[str, Exception | bool]]: Path(image_dir).mkdir(parents=True, exist_ok=True) log = {} for p in pkmn: log.update(await download_all_for_pokemon(p, image_dir)) return log async def main(dex_file: str, image_dir: str, startIndex: int, endIndex: int, log_skipped: bool): dex = await load_pokedex(dex_file) log = await download_all(image_dir, (dex[i] for i in range(startIndex, endIndex + 1))) new_downloads = 0 for form, result in log.items(): for url, info in result.items(): if isinstance(info, Exception): print(f"{form}: FAILED {url} - {info}") elif not info: if log_skipped: print(f"{form}: SKIPPED {url} - {info}") else: new_downloads += 1 print(f"New Downloads: {new_downloads}") if __name__ == "__main__": from sys import argv # TODO make this an arg parser dex_file = argv[1] if len(argv) > 1 else "data/pokedex.json" image_dir = argv[2] if len(argv) > 2 else "images" start, end = map(int, ( argv[3] if len(argv) > 3 else "1-151" ).split("-")[0:2]) log_skipped = len(argv) > 4 and argv[4].lower() == 'true' asyncio.run(main(dex_file, image_dir, start, end, log_skipped))