123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320 |
- import io
- import math
- import itertools
- import multiprocessing
- from typing import Callable, NamedTuple
- from PIL import Image
- from bs4 import BeautifulSoup
- from colorspacious import cspace_convert
- from scipy.cluster import vq
- import requests
- import numpy as np
- import ingest
- extension = ".gif"
- cluster_seed = 20220328
- cluster_attempts = 10
- base = "https://play.pokemonshowdown.com/sprites/ani/"
- back_base = "https://play.pokemonshowdown.com/sprites/ani-back/"
- # removing all forms of a pokemon, and also pokestars
- start_with_filters = [
- # no significant visual changes
- "arceus-", "silvally-", "genesect-", "pumpkaboo-", "gourgeist-", "unown-", "giratina-",
- # cannot start the battle in alternate form
- "castform-", "cherrim-", "aegislash-", "xerneas-", "wishiwashi-",
- "eiscue-", "mimikyu-", "cramorant-", "morpeko-",
- # weird event thing
- "greninja-", "eevee-", "pikachu-", "zarude-", "magearna-",
- # pokestars
- "pokestar",
- ]
- # removing all forms of a type
- end_with_filters = [
- "-mega", "-megax", "-megay", "-primal", "-ultra",
- "-gmax", "-eternamax", "-totem", "-f", "-b", "-old", "-shiny",
- "-eternalflower", "-rapidstrikegmax",
- ]
- # removing pokemon entirely
- full_filters = [
- # darmanitan zen forms (cannot start in zen)
- "darmanitan-galarzen", "darmanitan-zen",
- # minior core forms (cannot start in anything but -meteor, renamed below)
- "minior", "minior-blue", "minior-green", "minior-indigo",
- "minior-orange", "minior-red", "minior-violet", "minior-yellow",
- # gimmighoul roaming (cannot start roaming)
- "gimmighoul-roaming",
- # palafin hero (cannot start as hero)
- "palafin-hero",
- # because it is a create-a-pokemon
- "argalis", "arghonaut", "brattler", "breezi", "caimanoe", "cawdet",
- "colossoil", "coribalis", "cupra", "cyclohm", "dorsoil", "duohm",
- "electrelk", "embirch", "fawnifer", "flarelm", "floatoy", "krillowatt",
- "krolowatt", "miasmite", "monohm", "necturine", "nohface", "privatyke",
- "pyroak", "rebble", "revenankh", "saharaja", "snugglow", "solotl",
- "swirlpool", "syclant", "syclar", "tactite", "venomicon",
- "venomicon-epilogue", "volkritter", "voodoll",
- "astrolotl", "aurumoth", "caribolt", "cawmodore", "chromera", "crucibelle",
- "equilibra", "fidgit", "jumbao", "justyke", "kerfluffle", "kitsunoh",
- "krilowatt", "malaconda", "miasmaw", "mollux", "naviathan", "necturna",
- "pajantom", "plasmanta", "pluffle", "protowatt", "scratchet", "smogecko",
- "smoguana", "smokomodo", "snaelstrom", "stratagem", "tomohawk", "volkraken", "voodoom",
- # typos/duplicates
- "0", "arctovolt", "buffalant", "burmy-plant", "darmanitan-standard",
- "deerling-spring", "deoxys-rs", "gastrodon-west",
- "klinklang-back", "krikretot", "marenie", "marowak-alolan", "meloetta-aria",
- "pichu-spikyeared", "polteageist-chipped", "rattata-alolan", "regidragon",
- "sawsbuck-spring", "shaymin-land", "shellos-west", "sinistea-chipped", "wormadam-plant",
- "pumpkabo-super", "magcargo%20", "meowstic-female",
- "ratatta-a", "ratatta-alola", "raticate-a",
- "rotom-h", "rotom-m", "rotom-s", "rotom-w",
- # not a pokemon
- "substitute", "egg", "egg-manaphy", "missingno",
- ]
- # force certain pokemon to stay
- force_keep = [ "meowstic-f", "unfezant-f", "pyroar-f" ]
- # rename certain pokemon after the fact
- rename = {
- # dash consistency
- "nidoranm": "nidoran-m",
- "nidoranf": "nidoran-f",
- "porygonz": "porygon-z",
- "tapubulu": "tapu-bulu",
- "tapufini": "tapu-fini",
- "tapukoko": "tapu-koko",
- "tapulele": "tapu-lele",
- "hooh": "ho-oh",
- "mimejr": "mime-jr",
- "mrmime": "mr-mime",
- "mrmime-galar": "mr-mime-galar",
- "mrrime": "mr-rime",
- "jangmoo": "jangmo-o",
- "hakamoo": "hakamo-o",
- "kommoo": "kommo-o",
- "typenull": "type-null",
- "oricorio-pompom": "oricorio-pom-pom",
- "necrozma-duskmane": "necrozma-dusk-mane",
- "necrozma-dawnwings": "necrozma-dawn-wings",
- "toxtricity-lowkey": "toxtricity-low-key",
- # rename forms
- "shellos": "shellos-west",
- "shaymin": "shaymin-land",
- "meloetta": "meloetta-aria",
- "keldeo": "keldeo-ordinary",
- "hoopa": "hoopa-confined",
- "burmy": "burmy-plant",
- "wormadam": "wormadam-plant",
- "deerling": "deerling-spring",
- "sawsbuck": "sawsbuck-spring",
- "vivillon": "vivillon-meadow",
- "basculin": "basculin-redstriped",
- "meowstic": "meowstic-male",
- "meowstic-f": "meowstic-female",
- "pyroar-f": "pyroar-female",
- "flabebe": "flabebe-red",
- "floette": "floette-red",
- "florges": "florges-red",
- "minior-meteor": "minior",
- "sinistea": "sinistea-phony",
- "polteageist": "polteageist-phony",
- "gastrodon": "gastrodon-west",
- "furfrou": "furfrou-natural",
- "wishiwashi": "wishiwashi-school",
- "tornadus": "tornadus-incarnate",
- "landorus": "landorus-incarnate",
- "thundurus": "thundurus-incarnate",
- "calyrex-ice": "calyrex-ice-rider",
- "calyrex-shadow": "calyrex-shadow-rider",
- "urshifu-rapidstrike": "urshifu-rapid-strike",
- "zacian": "zacian-hero",
- "zamazenta": "zamazenta-hero",
- }
- def get_all_pokemon(url: str, ext: str = extension) -> list[str]:
- # TODO clean this up
- soup = BeautifulSoup(requests.get(url).text, "html.parser")
- imgs = [href for a in soup.find_all("a") if (href := a.get("href")).endswith(ext)]
- return [
- g[:-4]
- for g in imgs
- if g in [name + ext for name in force_keep] or (
- g not in [full + ext for full in full_filters]
- and not any(g.startswith(f) for f in start_with_filters)
- and not any(g.endswith(f) for f in [ending + ext for ending in end_with_filters])
- )
- ]
- def load_image(base: str, name: str, ext: str = extension) -> Image:
- return Image.open(io.BytesIO(requests.get(base + name + ext).content))
- def get_all_pixels(im: Image) -> list[tuple[int, int, int]]:
- rgb_pixels = []
- for fr in range(getattr(im, "n_frames", 1)):
- im.seek(fr)
- rgb_pixels += [
- (r, g, b)
- for r, g, b, a in im.convert("RGBA").getdata()
- if not ingest.is_outline(r, g, b, a)
- ]
- return rgb_pixels
- def merge_dist_jab(p: np.array, q: np.array) -> float:
- pj, pa, pb = p
- qj, qa, qb = q
- light_diff = abs(pj - qj)
- hue_angle = math.acos((pa * qa + pb * qb) / math.sqrt((pa ** 2 + pb ** 2) * (qa ** 2 + qb ** 2))) * 180 / math.pi
- return light_diff if hue_angle <= 10 and light_diff <= 20 else None
- def merge_dist_rgb(p: np.array, q: np.array) -> float:
- return merge_dist_jab(*cspace_convert(np.array([p, q]), "sRGB255", "CAM02-UCS"))
- def score_clustering_jab(means: list[np.array]) -> float:
- score = 0
- count = 0
- for p, q in itertools.combinations(means, 2):
- # squared dist in the a-b plane
- _, pa, pb = p
- _, qa, qb = q
- score += (pa - qa) ** 2 + (pb - qb) ** 2
- count += 1
- return score / count
- def score_clustering_rgb(means: list[np.array]) -> float:
- return score_clustering_jab(list(cspace_convert(np.array(means), "sRGB255", "CAM02-UCS")))
- Stats = NamedTuple("Stats", [("size", int), ("inertia", float), ("mu", np.array), ("nu", np.array)])
- def merge_stats(s1: Stats, s2: Stats) -> Stats:
- ts = s1.size + s2.size
- f1 = s1.size / ts
- f2 = s2.size / ts
- return Stats(
- size=ts,
- inertia=s1.inertia * f1 + s2.inertia * f2,
- mu=s1.mu * f1 + s2.mu * f2,
- nu=s1.nu * f1 + s2.nu * f2,
- )
- def flatten_stats(ss: list[Stats], target_len: int = 40) -> list[float]:
- to_return = []
- for s in ss:
- to_return += [s.size, s.inertia, *s.mu, *s.nu]
- return to_return + ([0] * (target_len - len(to_return)))
- def compute_stats(
- pixels: np.array,
- clustering_scorer: Callable[[list[np.array]], float],
- merge_dist: Callable[[np.array, np.array], float],
- ) -> list[Stats]:
- total_stats = Stats(
- size=len(pixels),
- inertia=ingest.inertia(pixels),
- mu=ingest.mu(pixels),
- nu=ingest.nu(pixels),
- )
- # run k-means multiple times, for multiple k's, trying to maximize the clustering_scorer
- best = None
- for k in (2, 3, 4):
- for i in range(cluster_attempts):
- means, labels = vq.kmeans2(pixels.astype(float), k, minit="++", seed=cluster_seed + i)
- score = clustering_scorer(means)
- if best is None or best[0] < score:
- best = (score, means, labels)
- _, best_means, best_labels = best
- cluster_stats = []
- for i in range(len(best_means)):
- cluster_pixels = pixels[best_labels == i]
- cluster_stats.append(Stats(
- size=len(cluster_pixels),
- inertia=ingest.inertia(cluster_pixels),
- mu=best_means[i],
- nu=ingest.nu(cluster_pixels),
- ))
- # assuming there are still more than two clusters,
- # attempt to merge the closest if they're close enough
- if len(cluster_stats) > 2:
- # first, find all the options
- options = []
- for i, j in itertools.combinations(range(len(cluster_stats)), 2):
- ci = cluster_stats[i]
- cj = cluster_stats[j]
- if (dist := merge_dist(ci.mu, cj.mu)) is not None:
- rest = [c for k, c in enumerate(cluster_stats) if k not in (i, j)]
- options.append((dist, [merge_stats(ci, cj), *rest]))
- # if there are multiple options, use the closest,
- # otherwise leaves cluster_stats the same
- if len(options) > 0:
- cluster_stats = min(options, key=lambda x: x[0])[1]
- return [total_stats, *cluster_stats]
- def get_stats(name: str) -> list[float]:
- front = get_all_pixels(load_image(base, name))
- back = get_all_pixels(load_image(back_base, name))
- rgb_pixels = np.array(front + back)
- jab_pixels = cspace_convert(rgb_pixels, "sRGB255", "CAM02-UCS")
- jab_stats = flatten_stats(compute_stats(
- jab_pixels,
- score_clustering_jab,
- merge_dist_jab,
- ))[1:]
- rgb_stats = flatten_stats(compute_stats(
- rgb_pixels,
- score_clustering_rgb,
- merge_dist_rgb,
- ))[1:]
- return [len(rgb_pixels), *jab_stats, *rgb_stats]
- if __name__ == "__main__":
- pkmn = get_all_pokemon(back_base)
- print("Found", len(pkmn), "sprites...")
- errors = []
- def ingest_and_format(pair: tuple[int, str]) -> str:
- index, name = pair
- try:
- print(f"Ingesting #{index+1}: {name}...")
- stats = get_stats(name)
- format_name = rename.get(name, name)
- print(f"Finished #{index+1}: {name}, saving under {format_name}")
- return f' [ "{format_name}", {", ".join(str(n) for n in stats)} ],\n'
- except Exception as e:
- print(e)
- errors.append((name, e))
- with multiprocessing.Pool(4) as pool:
- stats = sorted(res for res in pool.imap_unordered(ingest_and_format, enumerate(pkmn), 100) if res is not None)
- print(f"Calculated {len(stats)} statistics, writing...")
- with open("database-v3.js", "w") as outfile:
- outfile.write("const databaseV3 = [\n")
- for line in sorted(stats):
- outfile.write(line)
- outfile.write("];\n")
- print("Errors:", errors)
|