瀏覽代碼

Improve download logic, add showdown name transforms

Kirk Trombley 2 年之前
父節點
當前提交
187d691fe0
共有 1 個文件被更改,包括 67 次插入35 次删除
  1. 67 35
      tools/download.py

+ 67 - 35
tools/download.py

@@ -49,7 +49,7 @@ class Pokemon:
   forms: list[Form]
 
 
-async def load_pokedex() -> dict:
+async def download_pokedex() -> dict:
   async with ClientSession() as session:
     async with session.get("https://play.pokemonshowdown.com/data/pokedex.js") as res:
       res.raise_for_status()
@@ -165,8 +165,42 @@ def clean_dex(raw: dict) -> dict[int, Pokemon]:
   }
 
 
-def get_showdown_urls(species: str, form: Form) -> list[tuple[str, str]]:
-  name = form.name.lower().replace("mega-y", "megay").replace("mega-x", "megax")
+async def load_pokedex(dex_file: str) -> dict:
+  if Path(dex_file).is_file():
+    with open(dex_file) as infile:
+      loaded = json.load(infile)
+    dex = {
+      int(num): Pokemon(
+        num=entry["num"],
+        species=entry["species"],
+        forms=[Form(**f) for f in entry["forms"]],
+      ) for num, entry in loaded.items()
+    }
+  else:
+    # first download the pokedex
+    raw_dex = await download_pokedex()
+    # clean and reorganize it
+    dex = clean_dex(raw_dex)
+    # output dex for auditing and reloading
+    with open(dex_file, "w") as out:
+      json.dump({
+        str(i): asdict(pkmn)
+        for i, pkmn in dex.items()
+      }, out, indent=2)
+
+  return dex
+
+
+def get_showdown_urls(form: Form) -> list[tuple[str, str]]:
+  name = form.name.lower().replace(
+    "mega-", "mega"  # charizard, mewtwo
+  ).replace(
+    "paldea-", "paldea"  # tauros
+  ).replace(
+    "mr. ", "mr"  # mr mime + mr rime
+  ).replace(
+    "'d", "d"  # farfetch'd and sirfetch'd
+  )
   return [
     (f"https://play.pokemonshowdown.com/sprites/ani/{name}.gif", "gif"),
     (f"https://play.pokemonshowdown.com/sprites/ani-back/{name}.gif", "gif"),
@@ -188,13 +222,13 @@ async def download(session: ClientSession, url: str, filename: str) -> tuple[str
   return url, True
 
 
-async def download_all(pkmn: Pokemon, image_dir: str) -> dict[str, dict[str, Exception | bool]]:
+async def download_all_for_pokemon(pkmn: Pokemon, image_dir: str) -> dict[str, dict[str, Exception | bool]]:
   results = defaultdict(dict)
   async with ClientSession() as session:
     for form in pkmn.forms:
       urls = []
-      urls += get_showdown_urls(pkmn.species, form)
-      # TODO more
+      urls += get_showdown_urls(form)
+      # TODO more sources
       results[form.name].update(await asyncio.gather(*[
         download(session, url, f"{image_dir}/{form.name}-{i}.{ext}")
         for i, (url, ext) in enumerate(urls)
@@ -202,39 +236,37 @@ async def download_all(pkmn: Pokemon, image_dir: str) -> dict[str, dict[str, Exc
   return results
 
 
-async def main(dex_file: str, image_dir: str):
-  if Path(dex_file).is_file():
-    with open(dex_file) as infile:
-      loaded = json.load(infile)
-    dex = {
-      int(num): Pokemon(
-        num=entry["num"],
-        species=entry["species"],
-        forms=[Form(**f) for f in entry["forms"]],
-      ) for num, entry in loaded.items()
-    }
-  else:
-    # first download the pokedex
-    raw_dex = await load_pokedex()
-    # clean and reorganize it
-    dex = clean_dex(raw_dex)
-    # output dex for auditing and reloading
-    with open(dex_file, "w") as out:
-      json.dump({
-        str(i): asdict(pkmn)
-        for i, pkmn in dex.items()
-      }, out, indent=2)
-
+async def download_all(image_dir: str, pkmn: list[Pokemon]) -> dict[str, dict[str, Exception | bool]]:
   Path(image_dir).mkdir(parents=True, exist_ok=True)
-  log = await download_all(dex[286], image_dir)
-  for url, result in log.items():
-    print(url, "-", str(result))
-  # TODO actually get all images
-
+  log = {}
+  for p in pkmn:
+    log.update(await download_all_for_pokemon(p, image_dir))
+  return log
+
+
+async def main(dex_file: str, image_dir: str, startIndex: int, endIndex: int, log_skipped: bool):
+  dex = await load_pokedex(dex_file)
+  log = await download_all(image_dir, (dex[i] for i in range(startIndex, endIndex + 1)))
+  new_downloads = 0
+  for form, result in log.items():
+    for url, info in result.items():
+      if isinstance(info, Exception):
+        print(f"{form}: FAILED {url} - {info}")
+      elif not info:
+        if log_skipped:
+          print(f"{form}: SKIPPED {url} - {info}")
+      else:
+        new_downloads += 1
+  print(f"New Downloads: {new_downloads}")
 
 if __name__ == "__main__":
   from sys import argv
+  # TODO make this an arg parser
   dex_file = argv[1] if len(argv) > 1 else "data/pokedex.json"
   image_dir = argv[2] if len(argv) > 2 else "images"
+  start, end = map(int, (
+    argv[3] if len(argv) > 3 else "1-151"
+  ).split("-")[0:2])
+  log_skipped = len(argv) > 4 and argv[4].lower() == 'true'
 
-  asyncio.run(main(dex_file, image_dir))
+  asyncio.run(main(dex_file, image_dir, start, end, log_skipped))