""" Goals: + Use OKLab + Improved clustering logic + Parallel, in the same way as anim-ingest + Async requests for downloads + Include more info about the pokemon (form, display name, icon sprite source) + Include megas/gmax/etc, tagged so the UI can filter them * Include more images (get more stills from pokemondb + serebii) * Include shinies * Fallback automatically (try showdown animated, then showdown gen5, then pdb) * Filtering system more explicit and easier to work around * Output a record of ingest for auditing * Automatic retry of a partially failed ingest, using record """ # async def load_image(session: ClientSession, url: str) -> Image.Image: # async with session.get(url) as res: # res.raise_for_status() # return Image.open(BytesIO(await res.read())) # async def load_all_images(urls: list[str]) -> tuple[list[Image.Image], list[Exception]]: # async with ClientSession() as session: # results = await asyncio.gather( # *(load_image(session, url) for url in urls), # return_exceptions=True # ) # success = [] # errors = [] # for r in results: # (success if isinstance(r, Image.Image) else errors).append(r) # return success, errors # def get_urls(target: Pokemon, form: FormInfo) -> list[str]: # lower_name = form.name.lower() # return [ # f"https://play.pokemonshowdown.com/sprites/ani/{lower_name}.gif", # f"https://play.pokemonshowdown.com/sprites/ani-back/{lower_name}.gif", # f"https://play.pokemonshowdown.com/sprites/gen5/{lower_name}.png", # f"https://play.pokemonshowdown.com/sprites/gen5-back/{lower_name}.png", # f"https://img.pokemondb.net/sprites/home/normal/{lower_name}.png", # # TODO other sources - want to make sure we never cross contaminate though... # # if we pull the wrong form for something it will be a nightmare to debug # # f"https://www.serebii.net/scarletviolet/pokemon/new/{target.num}-{???}.png" # # f"https://www.serebii.net/pokemon/art/{target.num}-{???}.png" # ] # async def set_data(target: Pokemon, seed=0) -> list[Exception]: # all_errors = [] # for form in target.forms: # print(f" #{target.num} - Ingesting Form: {form.name}") # urls = get_urls(target, form) # print(f" #{target.num} - Attempting {len(urls)} potential sources") # images, errors = await load_all_images(urls) # all_errors.extend(errors) # print(f" #{target.num} - Loaded {len(images)} sources") # try: # pixels = np.concatenate([get_pixels(img) for img in images]) # print(f" #{target.num} - Summarizing {len(pixels)} total pixels") # total = calc_statistics(pixels) # print(f" #{target.num} - Begin clustering") # clusters = find_clusters(pixels, seed=seed) # print(f" #{target.num} - End clustering, chose k={len(clusters)}") # form.data = Data(total=total, clusters=clusters) # except Exception as e: # all_errors.append(e) # return all_errors # async def ingest(pool_size: int, seed: int) -> tuple[list[str], list[str]]: # computed = [] # errors = [] # loop = asyncio.get_event_loop() # with ProcessPoolExecutor(pool_size) as exec: # print(f"Ingesting #{start} - #{end}") # for pkmn in pkdx[start - 1:end]: # print(f"Ingesting #{pkmn.num}: {pkmn.species}...") # new_errors = await set_data(pkmn, seed) # loop.run_in_executor(exec, set_data, pkmn, seed) # computed.append(loop.run_in_executor(pool, ingest(p))) # try: # errors.extend(new_errors) # print(f"Finished #{pkmn.num}: {len(new_errors)} error(s)") # return json.dumps(asdict(pkmn)) # except Exception as e: # print(e) # errors.append(e) # if __name__ == "__main__": # from sys import argv # dex_file = argv[1] if len(argv) > 1 else "data/pokedex.json" # out_file = argv[2] if len(argv) > 2 else "data/database-latest.db" # dex_span = argv[3] if len(argv) > 3 else "1-151" # log_file = argv[4] if len(argv) > 4 else "errors-latest.log" # set_seed = argv[5] if len(argv) > 5 else "20230304" # start, end = map(int, dex_span.split("-", maxsplit=1)) # seed = int(set_seed) # errors = [] # pkdx = list(load_pokedex(dex_file)) # loop = asyncio.new_event_loop() # with open(log_file, "w") as log: # # TODO better logging # log.writelines(str(e) for e in errors) # with open(out_file, "a") as db: # for _, line in computed: # db.write(line) # db.write("\n")