anim_ingest.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. import io
  2. import math
  3. from PIL import Image
  4. from bs4 import BeautifulSoup
  5. from colorspacious import cspace_convert
  6. from scipy.cluster import vq
  7. import requests
  8. import numpy as np
  9. import ingest
  10. cluster_seed = 20220326
  11. cluster_attempts = 3
  12. base = "https://play.pokemonshowdown.com/sprites/ani/"
  13. back_base = "https://play.pokemonshowdown.com/sprites/ani-back/"
  14. # removing all forms of a pokemon, and also pokestars
  15. start_with_filters = [
  16. # no significant visual changes
  17. "arceus-", "silvally-", "genesect-", "pumpkaboo-", "gourgeist-", "unown-", "giratina-",
  18. # cannot start the battle in alternate form
  19. "castform-", "cherrim-", "aegislash-", "xerneas-", "wishiwashi-", "eiscue-", "mimikyu-",
  20. "cramorant-", "morepeko-",
  21. # weird event thing
  22. "greninja-", "eevee-", "pikachu-", "zarude-", "magearna-",
  23. # pokestars
  24. "pokestar",
  25. ]
  26. # removing all forms of a type
  27. end_with_filters = [
  28. "-mega.gif", "-megax.gif", "-megay.gif", "-primal.gif", "-ultra.gif",
  29. "-gmax.gif", "-eternamax.gif", "-totem.gif", "-f.gif", "-b.gif",
  30. ]
  31. # removing pokemon entirely
  32. full_filters = [
  33. # darmanitan zen forms (cannot start in zen)
  34. "darmanitan-galarzen.gif", "darmanitan-zen.gif",
  35. # minior core forms (cannot start in anything but -meteor, renamed below)
  36. "minior.gif", "minior-blue.gif", "minior-green.gif", "minior-indigo.gif",
  37. "minior-orange.gif", "minior-violet.gif", "minior-yellow.gif",
  38. # because it is a create-a-pokemon
  39. "astrolotl.gif", "aurumoth.gif", "caribolt.gif", "cawmodore.gif", "chromera.gif", "crucibelle.gif",
  40. "equilibra.gif", "fidgit.gif", "jumbao.gif", "justyke.gif", "kerfluffle.gif", "kitsunoh.gif",
  41. "krilowatt.gif", "malaconda.gif", "miasmaw.gif", "mollux.gif", "naviathan.gif", "necturna.gif",
  42. "pajantom.gif", "plasmanta.gif", "pluffle.gif", "protowatt.gif", "scratchet.gif", "smogecko.gif",
  43. "smoguana.gif", "smokomodo.gif", "snaelstrom.gif", "stratagem.gif", "tomohawk.gif", "volkraken.gif", "voodoom.gif",
  44. # typos/duplicates
  45. "buffalant.gif", "klinklang-back.gif", "krikretot.gif", "pumpkabo-super.gif", "magcargo%20.gif",
  46. "ratatta-a.gif", "ratatta-alola.gif", "raticate-a.gif",
  47. "rotom-h.gif", "rotom-m.gif", "rotom-s.gif", "rotom-w.gif",
  48. # not a pokemon
  49. "substitute.gif",
  50. ]
  51. # force certain pokemon to stay
  52. force_keep = [ "meowstic-f.gif", "unfezant-f.gif", "pyroar-f.gif" ]
  53. # rename certain pokemon after the fact
  54. rename = {
  55. # dash consistency
  56. "nidoranm": "nidoran-m",
  57. "nidoranf": "nidoran-f",
  58. "porygonz": "porygon-z",
  59. "tapubulu": "tapu-bulu",
  60. "tapufini": "tapu-fini",
  61. "tapukoko": "tapu-koko",
  62. "tapulele": "tapu-lele",
  63. "hooh": "ho-oh",
  64. "mimejr": "mime-jr",
  65. "mrmime": "mr-mime",
  66. "mrmime-galar": "mr-mime-galar",
  67. "mrrime": "mr-rime",
  68. "jangmoo": "jangmo-o",
  69. "hakamoo": "hakamo-o",
  70. "kommoo": "kommo-o",
  71. "typenull": "type-null",
  72. "oricorio-pompom": "oricorio-pom-pom",
  73. "necrozma-duskmane": "necrozma-dusk-mane",
  74. "necrozma-dawnwings": "necrozma-dawn-wings",
  75. "toxtricity-lowkey": "toxtricity-low-key",
  76. # rename forms
  77. "shellos": "shellos-west",
  78. "shaymin": "shaymin-land",
  79. "meloetta": "meloetta-aria",
  80. "keldeo": "keldeo-ordinary",
  81. "hoopa": "hoopa-confined",
  82. "burmy": "burmy-plant",
  83. "wormadam": "wormadam-plant",
  84. "deerling": "deerling-spring",
  85. "sawsbuck": "sawsbuck-spring",
  86. "vivillon": "vivillon-meadow",
  87. "basculin": "basculin-redstriped",
  88. "meowstic": "meowstic-male",
  89. "meowstic-f": "meowstic-female",
  90. "pyroar-f": "pyroar-female",
  91. "flabebe": "flabebe-red",
  92. "floette": "floette-red",
  93. "florges": "florges-red",
  94. "minior-meteor": "minior",
  95. "sinistea": "sinistea-phony",
  96. "polteageist": "polteageist-phony",
  97. "gastrodon": "gastrodon-west",
  98. "furfrou": "furfrou-natural",
  99. "wishiwashi": "wishiwashi-school",
  100. "tornadus": "tornadus-incarnate",
  101. "landorus": "landorus-incarnate",
  102. "thundurus": "thundurus-incarnate",
  103. "calyrex-ice": "calyrex-ice-rider",
  104. "calyrex-shadow": "calyrex-shadow-rider",
  105. "urshifu-rapidstrike": "urshifu-rapid-strike",
  106. "zacian": "zacian-hero",
  107. "zamazenta": "zamazenta-hero",
  108. }
  109. def get_all_pokemon() -> list[str]:
  110. soup = BeautifulSoup(requests.get(back_base).text, "html.parser")
  111. gifs = [href for a in soup.find_all("a") if (href := a.get("href")).endswith("gif")]
  112. return [
  113. g[:-4]
  114. for g in gifs
  115. if g in force_keep or (
  116. g not in full_filters
  117. and not any(g.startswith(f) for f in start_with_filters)
  118. and not any(g.endswith(f) for f in end_with_filters)
  119. )
  120. ]
  121. def load_image(base: str, name: str) -> Image:
  122. return Image.open(io.BytesIO(requests.get(base + name + ".gif").content))
  123. def get_all_pixels(im: Image) -> list[tuple[int, int, int]]:
  124. rgb_pixels = []
  125. for fr in range(getattr(im, "n_frames", 1)):
  126. im.seek(fr)
  127. rgb_pixels += [
  128. (r, g, b)
  129. for r, g, b, a in im.convert("RGBA").getdata()
  130. if not ingest.is_outline(r, g, b, a)
  131. ]
  132. return rgb_pixels
  133. def chromatic_dist_jab(p: np.array, q: np.array) -> float:
  134. _, pa, pb = p
  135. _, qa, qb = q
  136. return math.sqrt((pa - qa) ** 2 + (pb - qb) ** 2)
  137. def chromatic_dist_rgb(p: np.array, q: np.array) -> float:
  138. return chromatic_dist_jab(*cspace_convert(np.array([p, q]), "sRGB255", "CAM02-UCS"))
  139. def compute_stats(pixels: np.array, chroma_dist, verbose: bool = False) -> list[float]:
  140. # return [inertia, *mu (3 components), *nu (3 components), and all cluster data (8 * 4 = 32 components)]
  141. total_stats = [ingest.inertia(pixels), *ingest.mu(pixels), *ingest.nu(pixels)]
  142. # attempt all of k=2,3,4 and select the "best" k
  143. # for a normal k-means problem this involves using the cluster variance or similar
  144. # but for this, what we really want to detect is a tripartite vs bipartite pokemon
  145. # these means need to be *distinct* to avoid punishing a pokemon with two shades
  146. # of the same color
  147. # for that reason, the average distance between the means is used instead
  148. # rather, the distance between the 2 means for the k=2 case is compared against
  149. # the mean of the three distances for the k=3 case, and the mean of the six
  150. # differences for the k=4 case
  151. # also, since the actual color is most important, as opposed to shades/hues, a
  152. # "chromatic" distance is used (distance in the a-b plane)
  153. # finally, a few different clusterings are actually attempted, in the hopes of
  154. # avoiding local maxima
  155. # result is then padded to have 2 fake empty clusters which can then be ignored by the UI
  156. best_dist = 0
  157. best_means = None
  158. best_labels = None
  159. for i in range(cluster_attempts):
  160. kmeans2, labels2 = vq.kmeans2(pixels.astype(float), 2, minit="++", seed=cluster_seed + i)
  161. dist2 = chroma_dist(*kmeans2)
  162. if dist2 > best_dist:
  163. best_dist, best_means, best_labels = dist2, kmeans2, labels2
  164. for i in range(cluster_attempts):
  165. kmeans3, labels3 = vq.kmeans2(pixels.astype(float), 3, minit="++", seed=cluster_seed + i)
  166. dist3 = ( chroma_dist(kmeans3[0], kmeans3[1])
  167. + chroma_dist(kmeans3[1], kmeans3[2])
  168. + chroma_dist(kmeans3[2], kmeans3[0])
  169. ) / 3
  170. if dist3 > best_dist:
  171. best_dist, best_means, best_labels = dist3, kmeans3, labels3
  172. for i in range(cluster_attempts):
  173. kmeans4, labels4 = vq.kmeans2(pixels.astype(float), 4, minit="++", seed=cluster_seed + i)
  174. dist4 = ( chroma_dist(kmeans4[0], kmeans4[1])
  175. + chroma_dist(kmeans4[0], kmeans4[2])
  176. + chroma_dist(kmeans4[0], kmeans4[3])
  177. + chroma_dist(kmeans4[1], kmeans4[2])
  178. + chroma_dist(kmeans4[1], kmeans4[3])
  179. + chroma_dist(kmeans4[2], kmeans4[3])
  180. ) / 6
  181. if dist4 > best_dist:
  182. best_dist, best_means, best_labels = dist4, kmeans4, labels4
  183. cluster_stats = []
  184. for i in range(len(best_means)):
  185. cluster_pixels = pixels[best_labels == i]
  186. cluster_stats += [len(cluster_pixels), ingest.inertia(cluster_pixels), *best_means[i], *ingest.nu(cluster_pixels)]
  187. cluster_stats += [0] * (32 - len(cluster_stats))
  188. if verbose:
  189. print(f" Selected k={len(best_means)}")
  190. return [*total_stats, *cluster_stats]
  191. def get_stats(name: str, verbose: bool = False) -> list[float]:
  192. front = get_all_pixels(load_image(base, name))
  193. back = get_all_pixels(load_image(back_base, name))
  194. rgb_pixels = np.array(front + back)
  195. jab_pixels = cspace_convert(rgb_pixels, "sRGB255", "CAM02-UCS")
  196. if verbose:
  197. print(f" Jab space...")
  198. jab_stats = compute_stats(jab_pixels, chromatic_dist_jab, verbose)
  199. if verbose:
  200. print(f" RGB space...")
  201. rgb_stats = compute_stats(rgb_pixels, chromatic_dist_rgb, verbose)
  202. return [len(rgb_pixels), *jab_stats, *rgb_stats]
  203. if __name__ == "__main__":
  204. pkmn = get_all_pokemon()
  205. print("Found", len(pkmn), "sprites...")
  206. errors = []
  207. with open("database-v3.js", "w") as outfile:
  208. outfile.write("const databaseV3 = [\n")
  209. for name in pkmn:
  210. print("Ingesting", name, "...")
  211. try:
  212. stats = get_stats(name, verbose=True)
  213. outfile.write(f' [ "{rename.get(name, name)}", {", ".join(str(n) for n in stats)} ],\n')
  214. except Exception as e:
  215. print(e)
  216. errors.append((name, e))
  217. outfile.write("];\n")
  218. print("Errors:", errors)