123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118 |
- """
- Goals:
- + Use OKLab
- + Improved clustering logic
- + Parallel, in the same way as anim-ingest
- + Async requests for downloads
- + Include more info about the pokemon (form, display name, icon sprite source)
- + Include megas/gmax/etc, tagged so the UI can filter them
- * Include more images (get more stills from pokemondb + serebii)
- * Include shinies
- * Fallback automatically (try showdown animated, then showdown gen5, then pdb)
- * Filtering system more explicit and easier to work around
- * Output a record of ingest for auditing
- * Automatic retry of a partially failed ingest, using record
- """
- # async def load_image(session: ClientSession, url: str) -> Image.Image:
- # async with session.get(url) as res:
- # res.raise_for_status()
- # return Image.open(BytesIO(await res.read()))
- # async def load_all_images(urls: list[str]) -> tuple[list[Image.Image], list[Exception]]:
- # async with ClientSession() as session:
- # results = await asyncio.gather(
- # *(load_image(session, url) for url in urls),
- # return_exceptions=True
- # )
- # success = []
- # errors = []
- # for r in results:
- # (success if isinstance(r, Image.Image) else errors).append(r)
- # return success, errors
- # def get_urls(target: Pokemon, form: FormInfo) -> list[str]:
- # lower_name = form.name.lower()
- # return [
- # f"https://play.pokemonshowdown.com/sprites/ani/{lower_name}.gif",
- # f"https://play.pokemonshowdown.com/sprites/ani-back/{lower_name}.gif",
- # f"https://play.pokemonshowdown.com/sprites/gen5/{lower_name}.png",
- # f"https://play.pokemonshowdown.com/sprites/gen5-back/{lower_name}.png",
- # f"https://img.pokemondb.net/sprites/home/normal/{lower_name}.png",
- # # TODO other sources - want to make sure we never cross contaminate though...
- # # if we pull the wrong form for something it will be a nightmare to debug
- # # f"https://www.serebii.net/scarletviolet/pokemon/new/{target.num}-{???}.png"
- # # f"https://www.serebii.net/pokemon/art/{target.num}-{???}.png"
- # ]
- # async def set_data(target: Pokemon, seed=0) -> list[Exception]:
- # all_errors = []
- # for form in target.forms:
- # print(f" #{target.num} - Ingesting Form: {form.name}")
- # urls = get_urls(target, form)
- # print(f" #{target.num} - Attempting {len(urls)} potential sources")
- # images, errors = await load_all_images(urls)
- # all_errors.extend(errors)
- # print(f" #{target.num} - Loaded {len(images)} sources")
- # try:
- # pixels = np.concatenate([get_pixels(img) for img in images])
- # print(f" #{target.num} - Summarizing {len(pixels)} total pixels")
- # total = calc_statistics(pixels)
- # print(f" #{target.num} - Begin clustering")
- # clusters = find_clusters(pixels, seed=seed)
- # print(f" #{target.num} - End clustering, chose k={len(clusters)}")
- # form.data = Data(total=total, clusters=clusters)
- # except Exception as e:
- # all_errors.append(e)
- # return all_errors
- # async def ingest(pool_size: int, seed: int) -> tuple[list[str], list[str]]:
- # computed = []
- # errors = []
- # loop = asyncio.get_event_loop()
- # with ProcessPoolExecutor(pool_size) as exec:
- # print(f"Ingesting #{start} - #{end}")
- # for pkmn in pkdx[start - 1:end]:
- # print(f"Ingesting #{pkmn.num}: {pkmn.species}...")
- # new_errors = await set_data(pkmn, seed)
- # loop.run_in_executor(exec, set_data, pkmn, seed)
-
- # computed.append(loop.run_in_executor(pool, ingest(p)))
- # try:
- # errors.extend(new_errors)
- # print(f"Finished #{pkmn.num}: {len(new_errors)} error(s)")
- # return json.dumps(asdict(pkmn))
- # except Exception as e:
- # print(e)
- # errors.append(e)
- # if __name__ == "__main__":
- # from sys import argv
- # dex_file = argv[1] if len(argv) > 1 else "data/pokedex.json"
- # out_file = argv[2] if len(argv) > 2 else "data/database-latest.db"
- # dex_span = argv[3] if len(argv) > 3 else "1-151"
- # log_file = argv[4] if len(argv) > 4 else "errors-latest.log"
- # set_seed = argv[5] if len(argv) > 5 else "20230304"
- # start, end = map(int, dex_span.split("-", maxsplit=1))
- # seed = int(set_seed)
- # errors = []
- # pkdx = list(load_pokedex(dex_file))
- # loop = asyncio.new_event_loop()
- # with open(log_file, "w") as log:
- # # TODO better logging
- # log.writelines(str(e) for e in errors)
- # with open(out_file, "a") as db:
- # for _, line in computed:
- # db.write(line)
- # db.write("\n")
|