import io import math import itertools import multiprocessing from typing import Callable, NamedTuple from PIL import Image from bs4 import BeautifulSoup from colorspacious import cspace_convert from scipy.cluster import vq import requests import numpy as np import ingest extension = ".gif" cluster_seed = 20220328 cluster_attempts = 10 base = "https://play.pokemonshowdown.com/sprites/ani/" back_base = "https://play.pokemonshowdown.com/sprites/ani-back/" # removing all forms of a pokemon, and also pokestars start_with_filters = [ # no significant visual changes "arceus-", "silvally-", "genesect-", "pumpkaboo-", "gourgeist-", "unown-", "giratina-", # cannot start the battle in alternate form "castform-", "cherrim-", "aegislash-", "xerneas-", "wishiwashi-", "eiscue-", "mimikyu-", "cramorant-", "morpeko-", # weird event thing "greninja-", "eevee-", "pikachu-", "zarude-", "magearna-", # pokestars "pokestar", ] # removing all forms of a type end_with_filters = [ "-mega", "-megax", "-megay", "-primal", "-ultra", "-gmax", "-eternamax", "-totem", "-f", "-b", "-old", "-shiny", "-eternalflower", "-rapidstrikegmax", ] # removing pokemon entirely full_filters = [ # darmanitan zen forms (cannot start in zen) "darmanitan-galarzen", "darmanitan-zen", # minior core forms (cannot start in anything but -meteor, renamed below) "minior", "minior-blue", "minior-green", "minior-indigo", "minior-orange", "minior-red", "minior-violet", "minior-yellow", # gimmighoul roaming (cannot start roaming) "gimmighoul-roaming", # palafin hero (cannot start as hero) "palafin-hero", # because it is a create-a-pokemon "argalis", "arghonaut", "brattler", "breezi", "caimanoe", "cawdet", "colossoil", "coribalis", "cupra", "cyclohm", "dorsoil", "duohm", "electrelk", "embirch", "fawnifer", "flarelm", "floatoy", "krillowatt", "krolowatt", "miasmite", "monohm", "necturine", "nohface", "privatyke", "pyroak", "rebble", "revenankh", "saharaja", "snugglow", "solotl", "swirlpool", "syclant", "syclar", "tactite", "venomicon", "venomicon-epilogue", "volkritter", "voodoll", "astrolotl", "aurumoth", "caribolt", "cawmodore", "chromera", "crucibelle", "equilibra", "fidgit", "jumbao", "justyke", "kerfluffle", "kitsunoh", "krilowatt", "malaconda", "miasmaw", "mollux", "naviathan", "necturna", "pajantom", "plasmanta", "pluffle", "protowatt", "scratchet", "smogecko", "smoguana", "smokomodo", "snaelstrom", "stratagem", "tomohawk", "volkraken", "voodoom", # typos/duplicates "0", "arctovolt", "buffalant", "burmy-plant", "darmanitan-standard", "deerling-spring", "deoxys-rs", "gastrodon-west", "klinklang-back", "krikretot", "marenie", "marowak-alolan", "meloetta-aria", "pichu-spikyeared", "polteageist-chipped", "rattata-alolan", "regidragon", "sawsbuck-spring", "shaymin-land", "shellos-west", "sinistea-chipped", "wormadam-plant", "pumpkabo-super", "magcargo%20", "meowstic-female", "ratatta-a", "ratatta-alola", "raticate-a", "rotom-h", "rotom-m", "rotom-s", "rotom-w", # not a pokemon "substitute", "egg", "egg-manaphy", "missingno", ] # force certain pokemon to stay force_keep = [ "meowstic-f", "unfezant-f", "pyroar-f" ] # rename certain pokemon after the fact rename = { # dash consistency "nidoranm": "nidoran-m", "nidoranf": "nidoran-f", "porygonz": "porygon-z", "tapubulu": "tapu-bulu", "tapufini": "tapu-fini", "tapukoko": "tapu-koko", "tapulele": "tapu-lele", "hooh": "ho-oh", "mimejr": "mime-jr", "mrmime": "mr-mime", "mrmime-galar": "mr-mime-galar", "mrrime": "mr-rime", "jangmoo": "jangmo-o", "hakamoo": "hakamo-o", "kommoo": "kommo-o", "typenull": "type-null", "oricorio-pompom": "oricorio-pom-pom", "necrozma-duskmane": "necrozma-dusk-mane", "necrozma-dawnwings": "necrozma-dawn-wings", "toxtricity-lowkey": "toxtricity-low-key", # rename forms "shellos": "shellos-west", "shaymin": "shaymin-land", "meloetta": "meloetta-aria", "keldeo": "keldeo-ordinary", "hoopa": "hoopa-confined", "burmy": "burmy-plant", "wormadam": "wormadam-plant", "deerling": "deerling-spring", "sawsbuck": "sawsbuck-spring", "vivillon": "vivillon-meadow", "basculin": "basculin-redstriped", "meowstic": "meowstic-male", "meowstic-f": "meowstic-female", "pyroar-f": "pyroar-female", "flabebe": "flabebe-red", "floette": "floette-red", "florges": "florges-red", "minior-meteor": "minior", "sinistea": "sinistea-phony", "polteageist": "polteageist-phony", "gastrodon": "gastrodon-west", "furfrou": "furfrou-natural", "wishiwashi": "wishiwashi-school", "tornadus": "tornadus-incarnate", "landorus": "landorus-incarnate", "thundurus": "thundurus-incarnate", "calyrex-ice": "calyrex-ice-rider", "calyrex-shadow": "calyrex-shadow-rider", "urshifu-rapidstrike": "urshifu-rapid-strike", "zacian": "zacian-hero", "zamazenta": "zamazenta-hero", } def get_all_pokemon(url: str, ext: str = extension) -> list[str]: # TODO clean this up soup = BeautifulSoup(requests.get(url).text, "html.parser") imgs = [href for a in soup.find_all("a") if (href := a.get("href")).endswith(ext)] return [ g[:-4] for g in imgs if g in [name + ext for name in force_keep] or ( g not in [full + ext for full in full_filters] and not any(g.startswith(f) for f in start_with_filters) and not any(g.endswith(f) for f in [ending + ext for ending in end_with_filters]) ) ] def load_image(base: str, name: str, ext: str = extension) -> Image: return Image.open(io.BytesIO(requests.get(base + name + ext).content)) def get_all_pixels(im: Image) -> list[tuple[int, int, int]]: rgb_pixels = [] for fr in range(getattr(im, "n_frames", 1)): im.seek(fr) rgb_pixels += [ (r, g, b) for r, g, b, a in im.convert("RGBA").getdata() if not ingest.is_outline(r, g, b, a) ] return rgb_pixels def merge_dist_jab(p: np.array, q: np.array) -> float: pj, pa, pb = p qj, qa, qb = q light_diff = abs(pj - qj) hue_angle = math.acos((pa * qa + pb * qb) / math.sqrt((pa ** 2 + pb ** 2) * (qa ** 2 + qb ** 2))) * 180 / math.pi return light_diff if hue_angle <= 10 and light_diff <= 20 else None def merge_dist_rgb(p: np.array, q: np.array) -> float: return merge_dist_jab(*cspace_convert(np.array([p, q]), "sRGB255", "CAM02-UCS")) def score_clustering_jab(means: list[np.array]) -> float: score = 0 count = 0 for p, q in itertools.combinations(means, 2): # squared dist in the a-b plane _, pa, pb = p _, qa, qb = q score += (pa - qa) ** 2 + (pb - qb) ** 2 count += 1 return score / count def score_clustering_rgb(means: list[np.array]) -> float: return score_clustering_jab(list(cspace_convert(np.array(means), "sRGB255", "CAM02-UCS"))) Stats = NamedTuple("Stats", [("size", int), ("inertia", float), ("mu", np.array), ("nu", np.array)]) def merge_stats(s1: Stats, s2: Stats) -> Stats: ts = s1.size + s2.size f1 = s1.size / ts f2 = s2.size / ts return Stats( size=ts, inertia=s1.inertia * f1 + s2.inertia * f2, mu=s1.mu * f1 + s2.mu * f2, nu=s1.nu * f1 + s2.nu * f2, ) def flatten_stats(ss: list[Stats], target_len: int = 40) -> list[float]: to_return = [] for s in ss: to_return += [s.size, s.inertia, *s.mu, *s.nu] return to_return + ([0] * (target_len - len(to_return))) def compute_stats( pixels: np.array, clustering_scorer: Callable[[list[np.array]], float], merge_dist: Callable[[np.array, np.array], float], ) -> list[Stats]: total_stats = Stats( size=len(pixels), inertia=ingest.inertia(pixels), mu=ingest.mu(pixels), nu=ingest.nu(pixels), ) # run k-means multiple times, for multiple k's, trying to maximize the clustering_scorer best = None for k in (2, 3, 4): for i in range(cluster_attempts): means, labels = vq.kmeans2(pixels.astype(float), k, minit="++", seed=cluster_seed + i) score = clustering_scorer(means) if best is None or best[0] < score: best = (score, means, labels) _, best_means, best_labels = best cluster_stats = [] for i in range(len(best_means)): cluster_pixels = pixels[best_labels == i] cluster_stats.append(Stats( size=len(cluster_pixels), inertia=ingest.inertia(cluster_pixels), mu=best_means[i], nu=ingest.nu(cluster_pixels), )) # assuming there are still more than two clusters, # attempt to merge the closest if they're close enough if len(cluster_stats) > 2: # first, find all the options options = [] for i, j in itertools.combinations(range(len(cluster_stats)), 2): ci = cluster_stats[i] cj = cluster_stats[j] if (dist := merge_dist(ci.mu, cj.mu)) is not None: rest = [c for k, c in enumerate(cluster_stats) if k not in (i, j)] options.append((dist, [merge_stats(ci, cj), *rest])) # if there are multiple options, use the closest, # otherwise leaves cluster_stats the same if len(options) > 0: cluster_stats = min(options, key=lambda x: x[0])[1] return [total_stats, *cluster_stats] def get_stats(name: str) -> list[float]: front = get_all_pixels(load_image(base, name)) back = get_all_pixels(load_image(back_base, name)) rgb_pixels = np.array(front + back) jab_pixels = cspace_convert(rgb_pixels, "sRGB255", "CAM02-UCS") jab_stats = flatten_stats(compute_stats( jab_pixels, score_clustering_jab, merge_dist_jab, ))[1:] rgb_stats = flatten_stats(compute_stats( rgb_pixels, score_clustering_rgb, merge_dist_rgb, ))[1:] return [len(rgb_pixels), *jab_stats, *rgb_stats] if __name__ == "__main__": pkmn = get_all_pokemon(back_base) print("Found", len(pkmn), "sprites...") errors = [] def ingest_and_format(pair: tuple[int, str]) -> str: index, name = pair try: print(f"Ingesting #{index+1}: {name}...") stats = get_stats(name) format_name = rename.get(name, name) print(f"Finished #{index+1}: {name}, saving under {format_name}") return f' [ "{format_name}", {", ".join(str(n) for n in stats)} ],\n' except Exception as e: print(e) errors.append((name, e)) with multiprocessing.Pool(4) as pool: stats = sorted(res for res in pool.imap_unordered(ingest_and_format, enumerate(pkmn), 100) if res is not None) print(f"Calculated {len(stats)} statistics, writing...") with open("database-v3.js", "w") as outfile: outfile.write("const databaseV3 = [\n") for line in sorted(stats): outfile.write(line) outfile.write("];\n") print("Errors:", errors)