#!/usr/bin/env python3
from collections import namedtuple

import numpy as np
from scipy.cluster import vq
from PIL import Image
from colorspacious import cspace_convert

verbose = False
seed = 20220211


def is_outline(r: int, g: int, b: int, a: int) -> bool:
    # returns true if a pixel is transparent or pure black
    return a == 0 or (r, g, b) == (0, 0, 0)


def inertia(pixels: np.array) -> float:
    # Inertia - the mean squared Euclidean norm
    # computed as the sum of the squares of the components of the pixels,
    # normalized by the number of pixels
    if verbose:
        print("  Computing inertia...")
    return sum(sum(pixels ** 2)) / len(pixels)


def mu(pixels: np.array) -> np.array:
    # Mu - the mean pixel of the image
    if verbose:
        print("  Computing mu...")
    return pixels.mean(0)


def nu(pixels: np.array) -> np.array:
    # Nu - the mean of the normalized pixels of the image
    if verbose:
        print("  Computing nu...")
    return (pixels / np.sqrt((pixels * pixels).sum(axis=1)).reshape(len(pixels), 1)).mean(0)


def clusters(pixels: np.array) -> tuple[np.array, np.array, np.array, np.array]:
    # run k-means, and return the means and cluster contents
    # k chosen somewhat arbitrarily to be 3
    if verbose:
        print("  Computing clusters...")
    means, labels = vq.kmeans2(pixels.astype(float), 3, minit="++", seed=seed)
    c1, c2, c3 = (pixels[labels == i] for i in range(3))
    return means, c1, c2, c3


def all_stats(pixels: np.array) -> np.array:
    kmeans, c1, c2, c3 = clusters(pixels)
    return np.array([
        # total 
        inertia(pixels), *mu(pixels), *nu(pixels),
        # clusters
        len(c1), inertia(c1), *kmeans[0], *nu(c1),
        len(c2), inertia(c2), *kmeans[1], *nu(c2),
        len(c3), inertia(c3), *kmeans[2], *nu(c3),
    ])

def ingest_png(file_name: str) -> tuple[str, list[float]]:
    print(f"Ingesting {file_name}")

    # image name - strip leading path and trailing extension
    name = file_name.rsplit("/", maxsplit=1)[1].split(".", maxsplit=1)[0]

    # read non-outline pixels of image
    rgb_pixels = np.array([
        (r, g, b)
        for r, g, b, a in Image.open(file_name).convert("RGBA").getdata()
        if not is_outline(r, g, b, a)
    ])

    # convert RGB pixels to CAM02 values
    jab_pixels = cspace_convert(rgb_pixels, "sRGB255", "CAM02-UCS")

    # compute metrics, flatten to a single array
    return name, [len(rgb_pixels), *all_stats(jab_pixels), *all_stats(rgb_pixels)]

if __name__ == "__main__":
    import os
    import sys

    dir = "pngs" if len(sys.argv) < 2 else sys.argv[1]

    with open("database-v2.js", "w") as outfile:
        outfile.write("const databaseV2 = [\n")
        for f in os.listdir(dir):
            if (fn := os.fsdecode(f)).endswith(".png"):
                name, ra = ingest_png(dir + "/" + fn)
                outfile.write(f'  [ "{name}", {", ".join(str(n) for n in ra)} ],\n')
        outfile.write("];\n")