Forráskód Böngészése

Update pokedex processing

Kirk Trombley 2 éve
szülő
commit
96367ab39b
2 módosított fájl, 123 hozzáadás és 43 törlés
  1. 3 1
      .gitignore
  2. 120 42
      ingest2.py

+ 3 - 1
.gitignore

@@ -7,4 +7,6 @@ counts.csv
 __pycache__/
 .venv/
 
-.vscode/
+.vscode/
+
+*.log

+ 120 - 42
ingest2.py

@@ -19,9 +19,10 @@ Goals:
  + Improved clustering logic
  + Parallel, in the same way as anim-ingest
  + Async requests for downloads
- * Include more info about the pokemon (form, display name, icon sprite source)
+ + Include more info about the pokemon (form, display name, icon sprite source)
+ + Include megas/gmax/etc, tagged so the UI can filter them
  * Include more images (get more stills from pokemondb + serebii)
- * Include shinies + megas, tagged so the UI can filter them
+ * Include shinies 
  * Fallback automatically (try showdown animated, then showdown gen5, then pdb)
  * Filtering system more explicit and easier to work around
  * Output a record of ingest for auditing
@@ -87,6 +88,26 @@ Stats = NamedTuple("Stats", [
   ("bhat", float),
 ])
 
+Data = NamedTuple("Data", [
+  ("total", Stats),
+  ("clusters", list[Stats]),
+])
+
+FormInfo = NamedTuple("FormData", [
+  ("name", str),
+  ("traits", list[str]),
+  ("types", list[str]),
+  ("color", str),
+  ("data", Data | None),
+])
+
+Pokemon = NamedTuple("Pokemon", [
+  ("num", int),
+  ("species", str),
+  ("sprite", str | None),
+  ("forms", list[FormInfo]),
+])
+
 
 def calc_statistics(pixels: np.array) -> Stats:
   # mean pixel of the image, (L-bar, a-bar, b-bar)
@@ -141,15 +162,6 @@ def find_clusters(pixels: np.array, cluster_attempts=5, seed=0) -> list[Stats]:
   return [calc_statistics(pixels[labels == i]) for i in range(len(means))]
 
 
-Data = NamedTuple("Data", [
-  ("name", str),
-  ("sprite", str),
-  ("traits", list[str]),
-  ("total", Stats),
-  ("clusters", list[Stats]),
-])
-
-
 def get_pixels(img: Image) -> np.array:
   rgb = []
   for fr in range(getattr(img, "n_frames", 1)):
@@ -173,37 +185,61 @@ async def load_all_images(urls: list[str]) -> list[Image.Image]:
     return await asyncio.gather(*(load_image(session, url) for url in urls))
 
 
-def get_data(name, seed=0) -> Data:
-  images = asyncio.get_event_loop().run_until_complete(load_all_images([
-    # TODO source images
-  ]))
-
+def get_data(urls: list[str], seed=0) -> Data:
+  images = asyncio.get_event_loop().run_until_complete(load_all_images(urls))
   # TODO error handling
-
   pixels = np.concatenate([get_pixels(img) for img in images])
-
   return Data(
-    # TODO name normalization
-    name=name,
-    # TODO sprite URL discovery
-    sprite=f"https://img.pokemondb.net/sprites/sword-shield/icon/{name}.png",
-    # TODO trait analysis
-    traits=[],
     total=calc_statistics(pixels),
     clusters=find_clusters(pixels, seed=seed),
   )
 
 
-def get_data_for_all(pokemon: list[str], seed=0) -> Generator[Data, None, None]:
-  with multiprocessing.Pool(4) as pool:
-    yield from pool.imap_unordered(lambda n: get_data(n, seed=seed), enumerate(pokemon), 100)
-
-
-def name2id(name: str) -> str:
-  return name.replace(" ", "").replace("-", "").lower()
+def get_traits(species: str, form: dict) -> list[str]:
+  kind = form["formeKind"]
+  traits = []
+  if kind in ("mega", "mega-x", "mega-y", "primal"):
+    traits.extend(("mega", "nostart"))
+  if kind in ("gmax", "eternamax", "rapid-strike-gmax"):
+    traits.extend(("gmax", "nostart"))
+  if kind in ("alola", "galar", "hisui", "galar", "paldea"):
+    traits.extend(("regional", kind))
+
+  # special cases
+  if species == "Tauros" and "-paldea" in kind:
+    # paldean tauros has dumb names
+    traits.extend(("regional", "paldea"))
+  if species == "Minior" and kind != "meteor":
+    # minior can only start the battle in meteor form
+    traits.append("nostart")
+  if species == "Darmanitan" and "zen" in kind:
+    # darmanitan cannot start in zen form
+    traits.append("nostart")
+    if "galar" in kind:
+      # also there's a galar-zen form to handle
+      traits.extend(("regional", "galar"))
+  if species == "Palafin" and kind == "hero":
+    # palafin can only start in zero form
+    traits.append("nostart")
+  if species == "Gimmighoul" and kind == "roaming":
+    # gimmighoul roaming is only in PGO
+    traits.append("nostart")
+
+  return list(set(traits))
+
+
+# https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_with_gender_differences
+# there are some pokemon with notable gender diffs that the dex doesn't cover
+# judgement calls made arbitrarily
+GENDER_DIFFS = (
+  "hippopotas", "hippowdon", 
+  "unfezant", "frillish", "jellicent",
+  "pyroar",
+  # meowstic, indeedee, basculegion, oinkologne are already handled in the dex
+)
 
 
-def load_pokedex(path: str) -> dict:
+def load_pokedex(path: str) -> Generator[Pokemon, None, None]:
   with open(path) as infile:
     pkdx_raw = json.load(infile)
 
@@ -214,7 +250,7 @@ def load_pokedex(path: str) -> dict:
     # non-cosmetic forms get separate entries automatically
     # but keeping the separate unown forms would be ridiculous
     if key != "unown" and len(cosmetic := entry.get("cosmeticFormes", [])) > 0:
-      cosmetic.append(f'{key}-{entry["baseForme"].replace(" ", "-")}')
+      cosmetic.append(f'{entry["name"]}-{entry["baseForme"]}')
       if key == "alcremie":
         # oh god this thing
         cosmetic = [
@@ -225,20 +261,62 @@ def load_pokedex(path: str) -> dict:
             "Clover", "Flower", "Ribbon",
           ]
         ]
-      pkdx[num].extend((name2id(cf), {
+      pkdx[num].extend({
+        **entry,
+        "forme": cf.replace(" ", "-"),
+        "formeKind": "cosmetic",
+      } for cf in cosmetic)
+    elif key in GENDER_DIFFS:
+      pkdx[num].append({
         **entry,
-        "forme": cf,
-      }) for cf in cosmetic)
+        "forme": f'{entry["name"]}-M',
+        "formeKind": "cosmetic",
+      })
+      pkdx[num].append({
+        **entry,
+        "forme": f'{entry["name"]}-F',
+        "formeKind": "cosmetic",
+      })
     else:
-      pkdx[num].append((key, entry))
+      pkdx[num].append({
+        **entry,
+        "forme": entry["name"],
+        "formeKind": entry.get("forme", "base").lower(),
+      })
 
-  for i in range(min(pkdx.keys()), max(pkdx.keys()) + 1):
+  for i in range(1, max(pkdx.keys()) + 1):
+    forms = pkdx[i]
     # double check there's no skipped entries
-    assert len(pkdx[i]) > 0
-
-  return pkdx
+    assert len(forms) > 0
+    # yield forms
+    species = forms[0].get("baseSpecies", forms[0]["name"])
+    yield Pokemon(
+      num=i,
+      species=species,
+      sprite=None,  # found later
+      forms=[
+        FormInfo(
+          name=f.get("forme", f["name"]),
+          traits=get_traits(species, f),
+          types=f["types"],
+          color=f["color"],
+          data=None,  # found later
+        ) for f in forms
+      ]
+    )
 
 
 if __name__ == "__main__":
   from sys import argv
-  load_pokedex(argv[1] if len(argv) > 1 else "data/pokedex.json")
+  dex_file = argv[1] if len(argv) > 1 else "data/pokedex.json"
+  out_file = argv[2] if len(argv) > 2 else "data/database-latest.js"
+  log_file = argv[3] if len(argv) > 2 else "ingest.log"
+
+  pkdx = list(load_pokedex())
+
+  print(json.dumps(pkdx[5], indent=2))
+  print(json.dumps(pkdx[285], indent=2))
+  print(json.dumps(pkdx[773], indent=2))
+
+  # with multiprocessing.Pool(4) as pool:
+  #   yield from pool.imap_unordered(lambda n: get_data(n, seed=seed), pokemon, 100)