import io import math from PIL import Image from bs4 import BeautifulSoup from colorspacious import cspace_convert from scipy.cluster import vq import requests import numpy as np import ingest cluster_seed = 20220326 cluster_attempts = 3 base = "https://play.pokemonshowdown.com/sprites/ani/" back_base = "https://play.pokemonshowdown.com/sprites/ani-back/" # removing all forms of a pokemon, and also pokestars start_with_filters = [ # no significant visual changes "arceus-", "silvally-", "genesect-", "pumpkaboo-", "gourgeist-", "unown-", "giratina-", # cannot start the battle in alternate form "castform-", "cherrim-", "aegislash-", "xerneas-", "wishiwashi-", "eiscue-", "mimikyu-", "cramorant-", "morepeko-", # weird event thing "greninja-", "eevee-", "pikachu-", "zarude-", "magearna-", # pokestars "pokestar", ] # removing all forms of a type end_with_filters = [ "-mega.gif", "-megax.gif", "-megay.gif", "-primal.gif", "-ultra.gif", "-gmax.gif", "-eternamax.gif", "-totem.gif", "-f.gif", "-b.gif", ] # removing pokemon entirely full_filters = [ # darmanitan zen forms (cannot start in zen) "darmanitan-galarzen.gif", "darmanitan-zen.gif", # minior core forms (cannot start in anything but -meteor, renamed below) "minior.gif", "minior-blue.gif", "minior-green.gif", "minior-indigo.gif", "minior-orange.gif", "minior-violet.gif", "minior-yellow.gif", # because it is a create-a-pokemon "astrolotl.gif", "aurumoth.gif", "caribolt.gif", "cawmodore.gif", "chromera.gif", "crucibelle.gif", "equilibra.gif", "fidgit.gif", "jumbao.gif", "justyke.gif", "kerfluffle.gif", "kitsunoh.gif", "krilowatt.gif", "malaconda.gif", "miasmaw.gif", "mollux.gif", "naviathan.gif", "necturna.gif", "pajantom.gif", "plasmanta.gif", "pluffle.gif", "protowatt.gif", "scratchet.gif", "smogecko.gif", "smoguana.gif", "smokomodo.gif", "snaelstrom.gif", "stratagem.gif", "tomohawk.gif", "volkraken.gif", "voodoom.gif", # typos/duplicates "buffalant.gif", "klinklang-back.gif", "krikretot.gif", "pumpkabo-super.gif", "magcargo%20.gif", "ratatta-a.gif", "ratatta-alola.gif", "raticate-a.gif", "rotom-h.gif", "rotom-m.gif", "rotom-s.gif", "rotom-w.gif", # not a pokemon "substitute.gif", ] # force certain pokemon to stay force_keep = [ "meowstic-f.gif", "unfezant-f.gif", "pyroar-f.gif" ] # rename certain pokemon after the fact rename = { # dash consistency "nidoranm": "nidoran-m", "nidoranf": "nidoran-f", "porygonz": "porygon-z", "tapubulu": "tapu-bulu", "tapufini": "tapu-fini", "tapukoko": "tapu-koko", "tapulele": "tapu-lele", "hooh": "ho-oh", "mimejr": "mime-jr", "mrmime": "mr-mime", "mrmime-galar": "mr-mime-galar", "mrrime": "mr-rime", "jangmoo": "jangmo-o", "hakamoo": "hakamo-o", "kommoo": "kommo-o", "typenull": "type-null", "oricorio-pompom": "oricorio-pom-pom", "necrozma-duskmane": "necrozma-dusk-mane", "necrozma-dawnwings": "necrozma-dawn-wings", "toxtricity-lowkey": "toxtricity-low-key", # rename forms "shellos": "shellos-west", "shaymin": "shaymin-land", "meloetta": "meloetta-aria", "keldeo": "keldeo-ordinary", "hoopa": "hoopa-confined", "burmy": "burmy-plant", "wormadam": "wormadam-plant", "deerling": "deerling-spring", "sawsbuck": "sawsbuck-spring", "vivillon": "vivillon-meadow", "basculin": "basculin-redstriped", "meowstic": "meowstic-male", "meowstic-f": "meowstic-female", "pyroar-f": "pyroar-female", "flabebe": "flabebe-red", "floette": "floette-red", "florges": "florges-red", "minior-meteor": "minior", "sinistea": "sinistea-phony", "polteageist": "polteageist-phony", "gastrodon": "gastrodon-west", "furfrou": "furfrou-natural", "wishiwashi": "wishiwashi-school", "tornadus": "tornadus-incarnate", "landorus": "landorus-incarnate", "thundurus": "thundurus-incarnate", "calyrex-ice": "calyrex-ice-rider", "calyrex-shadow": "calyrex-shadow-rider", "urshifu-rapidstrike": "urshifu-rapid-strike", "zacian": "zacian-hero", "zamazenta": "zamazenta-hero", } def get_all_pokemon() -> list[str]: soup = BeautifulSoup(requests.get(back_base).text, "html.parser") gifs = [href for a in soup.find_all("a") if (href := a.get("href")).endswith("gif")] return [ g[:-4] for g in gifs if g in force_keep or ( g not in full_filters and not any(g.startswith(f) for f in start_with_filters) and not any(g.endswith(f) for f in end_with_filters) ) ] def load_image(base: str, name: str) -> Image: return Image.open(io.BytesIO(requests.get(base + name + ".gif").content)) def get_all_pixels(im: Image) -> list[tuple[int, int, int]]: rgb_pixels = [] for fr in range(getattr(im, "n_frames", 1)): im.seek(fr) rgb_pixels += [ (r, g, b) for r, g, b, a in im.convert("RGBA").getdata() if not ingest.is_outline(r, g, b, a) ] return rgb_pixels def chromatic_dist_jab(p: np.array, q: np.array) -> float: _, pa, pb = p _, qa, qb = q return math.sqrt((pa - qa) ** 2 + (pb - qb) ** 2) def chromatic_dist_rgb(p: np.array, q: np.array) -> float: return chromatic_dist_jab(*cspace_convert(np.array([p, q]), "sRGB255", "CAM02-UCS")) def compute_stats(pixels: np.array, chroma_dist, verbose: bool = False) -> list[float]: # return [inertia, *mu (3 components), *nu (3 components), and all cluster data (8 * 4 = 32 components)] total_stats = [ingest.inertia(pixels), *ingest.mu(pixels), *ingest.nu(pixels)] # attempt all of k=2,3,4 and select the "best" k # for a normal k-means problem this involves using the cluster variance or similar # but for this, what we really want to detect is a tripartite vs bipartite pokemon # these means need to be *distinct* to avoid punishing a pokemon with two shades # of the same color # for that reason, the average distance between the means is used instead # rather, the distance between the 2 means for the k=2 case is compared against # the mean of the three distances for the k=3 case, and the mean of the six # differences for the k=4 case # also, since the actual color is most important, as opposed to shades/hues, a # "chromatic" distance is used (distance in the a-b plane) # finally, a few different clusterings are actually attempted, in the hopes of # avoiding local maxima # result is then padded to have 2 fake empty clusters which can then be ignored by the UI best_dist = 0 best_means = None best_labels = None for i in range(cluster_attempts): kmeans2, labels2 = vq.kmeans2(pixels.astype(float), 2, minit="++", seed=cluster_seed + i) dist2 = chroma_dist(*kmeans2) if dist2 > best_dist: best_dist, best_means, best_labels = dist2, kmeans2, labels2 for i in range(cluster_attempts): kmeans3, labels3 = vq.kmeans2(pixels.astype(float), 3, minit="++", seed=cluster_seed + i) dist3 = ( chroma_dist(kmeans3[0], kmeans3[1]) + chroma_dist(kmeans3[1], kmeans3[2]) + chroma_dist(kmeans3[2], kmeans3[0]) ) / 3 if dist3 > best_dist: best_dist, best_means, best_labels = dist3, kmeans3, labels3 for i in range(cluster_attempts): kmeans4, labels4 = vq.kmeans2(pixels.astype(float), 4, minit="++", seed=cluster_seed + i) dist4 = ( chroma_dist(kmeans4[0], kmeans4[1]) + chroma_dist(kmeans4[0], kmeans4[2]) + chroma_dist(kmeans4[0], kmeans4[3]) + chroma_dist(kmeans4[1], kmeans4[2]) + chroma_dist(kmeans4[1], kmeans4[3]) + chroma_dist(kmeans4[2], kmeans4[3]) ) / 6 if dist4 > best_dist: best_dist, best_means, best_labels = dist4, kmeans4, labels4 cluster_stats = [] for i in range(len(best_means)): cluster_pixels = pixels[best_labels == i] cluster_stats += [len(cluster_pixels), ingest.inertia(cluster_pixels), *best_means[i], *ingest.nu(cluster_pixels)] cluster_stats += [0] * (32 - len(cluster_stats)) if verbose: print(f" Selected k={len(best_means)}") return [*total_stats, *cluster_stats] def get_stats(name: str, verbose: bool = False) -> list[float]: front = get_all_pixels(load_image(base, name)) back = get_all_pixels(load_image(back_base, name)) rgb_pixels = np.array(front + back) jab_pixels = cspace_convert(rgb_pixels, "sRGB255", "CAM02-UCS") if verbose: print(f" Jab space...") jab_stats = compute_stats(jab_pixels, chromatic_dist_jab, verbose) if verbose: print(f" RGB space...") rgb_stats = compute_stats(rgb_pixels, chromatic_dist_rgb, verbose) return [len(rgb_pixels), *jab_stats, *rgb_stats] if __name__ == "__main__": pkmn = get_all_pokemon() print("Found", len(pkmn), "sprites...") errors = [] with open("database-v3.js", "w") as outfile: outfile.write("const databaseV3 = [\n") for name in pkmn: print("Ingesting", name, "...") try: stats = get_stats(name, verbose=True) outfile.write(f' [ "{rename.get(name, name)}", {", ".join(str(n) for n in stats)} ],\n') except Exception as e: print(e) errors.append((name, e)) outfile.write("];\n") print("Errors:", errors)