|
@@ -19,9 +19,10 @@ Goals:
|
|
|
+ Improved clustering logic
|
|
|
+ Parallel, in the same way as anim-ingest
|
|
|
+ Async requests for downloads
|
|
|
- * Include more info about the pokemon (form, display name, icon sprite source)
|
|
|
+ + Include more info about the pokemon (form, display name, icon sprite source)
|
|
|
+ + Include megas/gmax/etc, tagged so the UI can filter them
|
|
|
* Include more images (get more stills from pokemondb + serebii)
|
|
|
- * Include shinies + megas, tagged so the UI can filter them
|
|
|
+ * Include shinies
|
|
|
* Fallback automatically (try showdown animated, then showdown gen5, then pdb)
|
|
|
* Filtering system more explicit and easier to work around
|
|
|
* Output a record of ingest for auditing
|
|
@@ -87,6 +88,26 @@ Stats = NamedTuple("Stats", [
|
|
|
("bhat", float),
|
|
|
])
|
|
|
|
|
|
+Data = NamedTuple("Data", [
|
|
|
+ ("total", Stats),
|
|
|
+ ("clusters", list[Stats]),
|
|
|
+])
|
|
|
+
|
|
|
+FormInfo = NamedTuple("FormData", [
|
|
|
+ ("name", str),
|
|
|
+ ("traits", list[str]),
|
|
|
+ ("types", list[str]),
|
|
|
+ ("color", str),
|
|
|
+ ("data", Data | None),
|
|
|
+])
|
|
|
+
|
|
|
+Pokemon = NamedTuple("Pokemon", [
|
|
|
+ ("num", int),
|
|
|
+ ("species", str),
|
|
|
+ ("sprite", str | None),
|
|
|
+ ("forms", list[FormInfo]),
|
|
|
+])
|
|
|
+
|
|
|
|
|
|
def calc_statistics(pixels: np.array) -> Stats:
|
|
|
# mean pixel of the image, (L-bar, a-bar, b-bar)
|
|
@@ -141,15 +162,6 @@ def find_clusters(pixels: np.array, cluster_attempts=5, seed=0) -> list[Stats]:
|
|
|
return [calc_statistics(pixels[labels == i]) for i in range(len(means))]
|
|
|
|
|
|
|
|
|
-Data = NamedTuple("Data", [
|
|
|
- ("name", str),
|
|
|
- ("sprite", str),
|
|
|
- ("traits", list[str]),
|
|
|
- ("total", Stats),
|
|
|
- ("clusters", list[Stats]),
|
|
|
-])
|
|
|
-
|
|
|
-
|
|
|
def get_pixels(img: Image) -> np.array:
|
|
|
rgb = []
|
|
|
for fr in range(getattr(img, "n_frames", 1)):
|
|
@@ -173,37 +185,61 @@ async def load_all_images(urls: list[str]) -> list[Image.Image]:
|
|
|
return await asyncio.gather(*(load_image(session, url) for url in urls))
|
|
|
|
|
|
|
|
|
-def get_data(name, seed=0) -> Data:
|
|
|
- images = asyncio.get_event_loop().run_until_complete(load_all_images([
|
|
|
- # TODO source images
|
|
|
- ]))
|
|
|
-
|
|
|
+def get_data(urls: list[str], seed=0) -> Data:
|
|
|
+ images = asyncio.get_event_loop().run_until_complete(load_all_images(urls))
|
|
|
# TODO error handling
|
|
|
-
|
|
|
pixels = np.concatenate([get_pixels(img) for img in images])
|
|
|
-
|
|
|
return Data(
|
|
|
- # TODO name normalization
|
|
|
- name=name,
|
|
|
- # TODO sprite URL discovery
|
|
|
- sprite=f"https://img.pokemondb.net/sprites/sword-shield/icon/{name}.png",
|
|
|
- # TODO trait analysis
|
|
|
- traits=[],
|
|
|
total=calc_statistics(pixels),
|
|
|
clusters=find_clusters(pixels, seed=seed),
|
|
|
)
|
|
|
|
|
|
|
|
|
-def get_data_for_all(pokemon: list[str], seed=0) -> Generator[Data, None, None]:
|
|
|
- with multiprocessing.Pool(4) as pool:
|
|
|
- yield from pool.imap_unordered(lambda n: get_data(n, seed=seed), enumerate(pokemon), 100)
|
|
|
-
|
|
|
-
|
|
|
-def name2id(name: str) -> str:
|
|
|
- return name.replace(" ", "").replace("-", "").lower()
|
|
|
+def get_traits(species: str, form: dict) -> list[str]:
|
|
|
+ kind = form["formeKind"]
|
|
|
+ traits = []
|
|
|
+ if kind in ("mega", "mega-x", "mega-y", "primal"):
|
|
|
+ traits.extend(("mega", "nostart"))
|
|
|
+ if kind in ("gmax", "eternamax", "rapid-strike-gmax"):
|
|
|
+ traits.extend(("gmax", "nostart"))
|
|
|
+ if kind in ("alola", "galar", "hisui", "galar", "paldea"):
|
|
|
+ traits.extend(("regional", kind))
|
|
|
+
|
|
|
+ # special cases
|
|
|
+ if species == "Tauros" and "-paldea" in kind:
|
|
|
+ # paldean tauros has dumb names
|
|
|
+ traits.extend(("regional", "paldea"))
|
|
|
+ if species == "Minior" and kind != "meteor":
|
|
|
+ # minior can only start the battle in meteor form
|
|
|
+ traits.append("nostart")
|
|
|
+ if species == "Darmanitan" and "zen" in kind:
|
|
|
+ # darmanitan cannot start in zen form
|
|
|
+ traits.append("nostart")
|
|
|
+ if "galar" in kind:
|
|
|
+ # also there's a galar-zen form to handle
|
|
|
+ traits.extend(("regional", "galar"))
|
|
|
+ if species == "Palafin" and kind == "hero":
|
|
|
+ # palafin can only start in zero form
|
|
|
+ traits.append("nostart")
|
|
|
+ if species == "Gimmighoul" and kind == "roaming":
|
|
|
+ # gimmighoul roaming is only in PGO
|
|
|
+ traits.append("nostart")
|
|
|
+
|
|
|
+ return list(set(traits))
|
|
|
+
|
|
|
+
|
|
|
+# https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_with_gender_differences
|
|
|
+# there are some pokemon with notable gender diffs that the dex doesn't cover
|
|
|
+# judgement calls made arbitrarily
|
|
|
+GENDER_DIFFS = (
|
|
|
+ "hippopotas", "hippowdon",
|
|
|
+ "unfezant", "frillish", "jellicent",
|
|
|
+ "pyroar",
|
|
|
+ # meowstic, indeedee, basculegion, oinkologne are already handled in the dex
|
|
|
+)
|
|
|
|
|
|
|
|
|
-def load_pokedex(path: str) -> dict:
|
|
|
+def load_pokedex(path: str) -> Generator[Pokemon, None, None]:
|
|
|
with open(path) as infile:
|
|
|
pkdx_raw = json.load(infile)
|
|
|
|
|
@@ -214,7 +250,7 @@ def load_pokedex(path: str) -> dict:
|
|
|
# non-cosmetic forms get separate entries automatically
|
|
|
# but keeping the separate unown forms would be ridiculous
|
|
|
if key != "unown" and len(cosmetic := entry.get("cosmeticFormes", [])) > 0:
|
|
|
- cosmetic.append(f'{key}-{entry["baseForme"].replace(" ", "-")}')
|
|
|
+ cosmetic.append(f'{entry["name"]}-{entry["baseForme"]}')
|
|
|
if key == "alcremie":
|
|
|
# oh god this thing
|
|
|
cosmetic = [
|
|
@@ -225,20 +261,62 @@ def load_pokedex(path: str) -> dict:
|
|
|
"Clover", "Flower", "Ribbon",
|
|
|
]
|
|
|
]
|
|
|
- pkdx[num].extend((name2id(cf), {
|
|
|
+ pkdx[num].extend({
|
|
|
+ **entry,
|
|
|
+ "forme": cf.replace(" ", "-"),
|
|
|
+ "formeKind": "cosmetic",
|
|
|
+ } for cf in cosmetic)
|
|
|
+ elif key in GENDER_DIFFS:
|
|
|
+ pkdx[num].append({
|
|
|
**entry,
|
|
|
- "forme": cf,
|
|
|
- }) for cf in cosmetic)
|
|
|
+ "forme": f'{entry["name"]}-M',
|
|
|
+ "formeKind": "cosmetic",
|
|
|
+ })
|
|
|
+ pkdx[num].append({
|
|
|
+ **entry,
|
|
|
+ "forme": f'{entry["name"]}-F',
|
|
|
+ "formeKind": "cosmetic",
|
|
|
+ })
|
|
|
else:
|
|
|
- pkdx[num].append((key, entry))
|
|
|
+ pkdx[num].append({
|
|
|
+ **entry,
|
|
|
+ "forme": entry["name"],
|
|
|
+ "formeKind": entry.get("forme", "base").lower(),
|
|
|
+ })
|
|
|
|
|
|
- for i in range(min(pkdx.keys()), max(pkdx.keys()) + 1):
|
|
|
+ for i in range(1, max(pkdx.keys()) + 1):
|
|
|
+ forms = pkdx[i]
|
|
|
# double check there's no skipped entries
|
|
|
- assert len(pkdx[i]) > 0
|
|
|
-
|
|
|
- return pkdx
|
|
|
+ assert len(forms) > 0
|
|
|
+ # yield forms
|
|
|
+ species = forms[0].get("baseSpecies", forms[0]["name"])
|
|
|
+ yield Pokemon(
|
|
|
+ num=i,
|
|
|
+ species=species,
|
|
|
+ sprite=None, # found later
|
|
|
+ forms=[
|
|
|
+ FormInfo(
|
|
|
+ name=f.get("forme", f["name"]),
|
|
|
+ traits=get_traits(species, f),
|
|
|
+ types=f["types"],
|
|
|
+ color=f["color"],
|
|
|
+ data=None, # found later
|
|
|
+ ) for f in forms
|
|
|
+ ]
|
|
|
+ )
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
from sys import argv
|
|
|
- load_pokedex(argv[1] if len(argv) > 1 else "data/pokedex.json")
|
|
|
+ dex_file = argv[1] if len(argv) > 1 else "data/pokedex.json"
|
|
|
+ out_file = argv[2] if len(argv) > 2 else "data/database-latest.js"
|
|
|
+ log_file = argv[3] if len(argv) > 2 else "ingest.log"
|
|
|
+
|
|
|
+ pkdx = list(load_pokedex())
|
|
|
+
|
|
|
+ print(json.dumps(pkdx[5], indent=2))
|
|
|
+ print(json.dumps(pkdx[285], indent=2))
|
|
|
+ print(json.dumps(pkdx[773], indent=2))
|
|
|
+
|
|
|
+ # with multiprocessing.Pool(4) as pool:
|
|
|
+ # yield from pool.imap_unordered(lambda n: get_data(n, seed=seed), pokemon, 100)
|