/*
 * XORCE Core Engine - Implementation
 *
 * Algebraic foundation for the XOR Algebra Compiler.
 * Implements phase kernels, holonomy computation, and verification.
 *
 * Zero external dependencies (libc only).
 */

#include "core.h"
#include <stdlib.h>
#include <string.h>

/* ============================================================================
 * UTILITY FUNCTIONS
 * ============================================================================ */

/*
 * Population count (number of 1-bits).
 * Uses compiler builtin if available, otherwise manual fallback.
 */
static inline int popcount(uint32_t x)
{
#if defined(__GNUC__) || defined(__clang__)
    return __builtin_popcount(x);
#else
    /* Manual popcount fallback */
    x = x - ((x >> 1) & 0x55555555);
    x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
    x = (x + (x >> 4)) & 0x0F0F0F0F;
    x = x + (x >> 8);
    x = x + (x >> 16);
    return x & 0x3F;
#endif
}

/* ============================================================================
 * OMEGA OPERATIONS
 * ============================================================================ */

xorc_omega_t* omega_create(xorc_dim_t dim)
{
    xorc_omega_t *omega = (xorc_omega_t*)malloc(sizeof(xorc_omega_t));
    if (!omega) return NULL;

    omega->dim = dim;
    omega->size = (uint32_t)1 << dim;

    /* Allocate size x size table */
    size_t table_size = (size_t)omega->size * omega->size;
    omega->data = (xorc_sign_t*)malloc(table_size * sizeof(xorc_sign_t));
    if (!omega->data) {
        free(omega);
        return NULL;
    }

    /* Initialize to +1 (flat default) */
    for (size_t i = 0; i < table_size; i++) {
        omega->data[i] = 1;
    }

    return omega;
}

void omega_free(xorc_omega_t *omega)
{
    if (omega) {
        free(omega->data);
        free(omega);
    }
}

xorc_sign_t omega_get(xorc_omega_t *omega, xorc_addr_t u, xorc_addr_t v)
{
    return omega->data[(size_t)u * omega->size + v];
}

void omega_set(xorc_omega_t *omega, xorc_addr_t u, xorc_addr_t v, xorc_sign_t s)
{
    omega->data[(size_t)u * omega->size + v] = s;
}

/* ============================================================================
 * KERNEL INITIALIZATION (Section VI.1)
 * ============================================================================ */

void omega_init_flat(xorc_omega_t *omega)
{
    size_t table_size = (size_t)omega->size * omega->size;
    for (size_t i = 0; i < table_size; i++) {
        omega->data[i] = 1;
    }
}

void omega_init_pauli(xorc_omega_t *omega, uint8_t qubits)
{
    /*
     * Pauli kernel for n qubits.
     * V = F_2^{2n}, element represented as (x,z) where x,z in F_2^n.
     * omega(u,v) = (-1)^{x2 . z1}
     * where x2 = lower n bits of v, z1 = upper n bits of u.
     */
    uint8_t n = qubits;
    uint32_t mask = ((uint32_t)1 << n) - 1;
    uint32_t size = omega->size;

    for (uint32_t u = 0; u < size; u++) {
        uint32_t z1 = (u >> n) & mask;  /* Upper n bits of u */
        for (uint32_t v = 0; v < size; v++) {
            uint32_t x2 = v & mask;     /* Lower n bits of v */
            int dot = popcount(x2 & z1) & 1;
            omega_set(omega, u, v, dot ? -1 : 1);
        }
    }
}

void omega_init_clifford(xorc_omega_t *omega, uint8_t p, uint8_t q)
{
    /*
     * Clifford algebra Cl(p,q).
     * n = p + q is the dimension.
     * omega(u,v) = (-1)^{tau ^ sigma}
     * where:
     *   tau = XOR over (i < j) of (bit_i(u) & bit_j(v))
     *   sigma = XOR over i in [p, n) of (bit_i(u) & bit_i(v))
     */
    uint8_t n = p + q;
    uint32_t size = omega->size;

    for (uint32_t u = 0; u < size; u++) {
        for (uint32_t v = 0; v < size; v++) {
            /* Compute tau: XOR over pairs (i < j) */
            int tau = 0;
            for (int i = 0; i < n; i++) {
                for (int j = i + 1; j < n; j++) {
                    tau ^= ((u >> i) & 1) & ((v >> j) & 1);
                }
            }

            /* Compute sigma: XOR over i in [p, n) */
            int sigma = 0;
            for (int i = p; i < n; i++) {
                sigma ^= ((u >> i) & 1) & ((v >> i) & 1);
            }

            omega_set(omega, u, v, (tau ^ sigma) ? -1 : 1);
        }
    }
}

void omega_init_cayley(xorc_omega_t *omega, uint8_t level)
{
    /*
     * Cayley-Dickson construction for XOR algebra phase kernels.
     * Level 0: reals (dim=0, 1 element, flat)
     * Level 1: complex (dim=1, 2 elements)
     * Level 2: quaternions (dim=2, 4 elements)
     * Level 3: octonions (dim=3, 8 elements)
     *
     * From XORCE.md section VI.1:
     * For u = (p, eps), v = (q, eta) where eps, eta in {0,1}:
     *   eps=0, eta=0: result = omega_prev(p,q)
     *   eps=0, eta=1: result = omega_prev(p,q) * omega_prev(q,q)
     *   eps=1, eta=0: result = omega_prev(p,q)
     *   eps=1, eta=1: result = -omega_prev(q,p) * omega_prev(q,q)
     *
     * This produces XOR algebras with the Cayley-Dickson tower properties.
     *
     * OPTIMIZATION: For small levels (<=4), use stack-allocated temp buffer
     * to avoid malloc overhead. Level 4 means prev_size=8, needing 64 bytes max.
     */
    if (level == 0) {
        omega_init_flat(omega);
        return;
    }

    /* Start with flat (level 0) */
    omega_init_flat(omega);

    /* Stack buffer for small levels (up to level 4: prev_size=8, 8*8=64 bytes) */
    xorc_sign_t stack_buf[64];

    /* Apply doubling construction for each level */
    for (uint8_t lv = 1; lv <= level; lv++) {
        uint32_t prev_size = (uint32_t)1 << (lv - 1);
        uint32_t curr_size = (uint32_t)1 << lv;
        uint32_t prev_table_size = prev_size * prev_size;

        /* Use stack buffer for small sizes, heap for large */
        xorc_sign_t *prev;
        bool heap_allocated = false;

        if (prev_table_size <= 64) {
            prev = stack_buf;
        } else {
            prev = (xorc_sign_t*)malloc(prev_table_size * sizeof(xorc_sign_t));
            if (!prev) return;
            heap_allocated = true;
        }

        /* Copy current state as previous level */
        for (uint32_t i = 0; i < prev_size; i++) {
            for (uint32_t j = 0; j < prev_size; j++) {
                prev[i * prev_size + j] = omega_get(omega, i, j);
            }
        }

        /* Compute new level */
        for (uint32_t u = 0; u < curr_size; u++) {
            uint32_t p_idx = u & (prev_size - 1);
            int eps = (u >> (lv - 1)) & 1;

            for (uint32_t v = 0; v < curr_size; v++) {
                uint32_t q_idx = v & (prev_size - 1);
                int eta = (v >> (lv - 1)) & 1;

                xorc_sign_t w_pq = prev[p_idx * prev_size + q_idx];
                xorc_sign_t w_qq = prev[q_idx * prev_size + q_idx];

                xorc_sign_t result;
                if (eps == 0) {
                    /* eps=0, eta=0: result = w_pq
                     * eps=0, eta=1: result = w_pq * w_qq */
                    result = eta ? (w_pq * w_qq) : w_pq;
                } else {
                    /* eps=1, eta=0: result = w_pq
                     * eps=1, eta=1: result = -w_qp * w_qq */
                    if (eta) {
                        xorc_sign_t w_qp = prev[q_idx * prev_size + p_idx];
                        result = -w_qp * w_qq;
                    } else {
                        result = w_pq;
                    }
                }

                omega_set(omega, u, v, result);
            }
        }

        if (heap_allocated) {
            free(prev);
        }
    }
}

/* ============================================================================
 * HOLONOMY COMPUTATION (Section VI.2)
 * ============================================================================ */

xorc_sign_t bubble3(xorc_omega_t *omega, xorc_addr_t u, xorc_addr_t v, xorc_addr_t w)
{
    /*
     * 3-Bubble (associator holonomy):
     * Omega(u,v,w) = omega(u,v) * omega(u^v, w) * omega(v,w) * omega(u, v^w)
     */
    xorc_sign_t s1 = omega_get(omega, u, v);
    xorc_sign_t s2 = omega_get(omega, u ^ v, w);
    xorc_sign_t s3 = omega_get(omega, v, w);
    xorc_sign_t s4 = omega_get(omega, u, v ^ w);

    return s1 * s2 * s3 * s4;
}

xorc_sign_t bubble2(xorc_omega_t *omega, xorc_addr_t u, xorc_addr_t v)
{
    /*
     * 2-Bubble (commutator holonomy):
     * B(u,v) = omega(u,v) * omega(v,u)
     */
    return omega_get(omega, u, v) * omega_get(omega, v, u);
}

xorc_holo_t* holo_compute(xorc_omega_t *omega)
{
    xorc_holo_t *holo = (xorc_holo_t*)malloc(sizeof(xorc_holo_t));
    if (!holo) return NULL;

    holo->dim = omega->dim;
    holo->size = omega->size;
    holo->assoc_failures = 0;
    holo->comm_failures = 0;

    uint32_t size = omega->size;

    /* Count 3-bubble failures (associativity) */
    for (uint32_t u = 0; u < size; u++) {
        for (uint32_t v = 0; v < size; v++) {
            for (uint32_t w = 0; w < size; w++) {
                if (bubble3(omega, u, v, w) == -1) {
                    holo->assoc_failures++;
                }
            }
        }
    }

    /* Count 2-bubble failures (commutativity) */
    for (uint32_t u = 0; u < size; u++) {
        for (uint32_t v = 0; v < size; v++) {
            if (bubble2(omega, u, v) == -1) {
                holo->comm_failures++;
            }
        }
    }

    return holo;
}

void holo_free(xorc_holo_t *holo)
{
    free(holo);
}

/* ============================================================================
 * VERIFICATION PREDICATES (Section VI.3)
 * ============================================================================ */

bool holo_is_associative(xorc_holo_t *holo)
{
    return holo->assoc_failures == 0;
}

bool holo_is_commutative(xorc_holo_t *holo)
{
    return holo->comm_failures == 0;
}

uint32_t holo_center_dim(xorc_omega_t *omega)
{
    /*
     * Center = {z : B(z,v) = +1 for all v}
     * Returns count of central elements.
     */
    uint32_t count = 0;
    uint32_t size = omega->size;

    for (uint32_t z = 0; z < size; z++) {
        bool central = true;
        for (uint32_t v = 0; v < size; v++) {
            if (bubble2(omega, z, v) == -1) {
                central = false;
                break;
            }
        }
        if (central) count++;
    }

    return count;
}

uint32_t holo_radical_dim(xorc_omega_t *omega)
{
    /*
     * Radical = {r : omega(r,v) = omega(v,r) for all v
     *               AND bubble3(r,u,v) = +1 for all u,v}
     * Returns count of radical elements.
     */
    uint32_t count = 0;
    uint32_t size = omega->size;

    for (uint32_t r = 0; r < size; r++) {
        bool in_radical = true;

        /* Check symmetry: omega(r,v) = omega(v,r) for all v */
        for (uint32_t v = 0; v < size && in_radical; v++) {
            if (omega_get(omega, r, v) != omega_get(omega, v, r)) {
                in_radical = false;
            }
        }

        /* Check associativity: bubble3(r,u,v) = +1 for all u,v */
        for (uint32_t u = 0; u < size && in_radical; u++) {
            for (uint32_t v = 0; v < size && in_radical; v++) {
                if (bubble3(omega, r, u, v) == -1) {
                    in_radical = false;
                }
            }
        }

        if (in_radical) count++;
    }

    return count;
}

/* ============================================================================
 * ALGEBRA ELEMENT OPERATIONS (Section VI.5)
 * ============================================================================ */

xorc_elem_t elem_mul(xorc_omega_t *omega, xorc_addr_t u, xorc_addr_t v)
{
    /*
     * Multiply two basis elements: δ_u * δ_v = ω(u,v) · δ_{u⊕v}
     */
    xorc_elem_t result;
    result.coeff = omega_get(omega, u, v);
    result.basis = u ^ v;
    return result;
}

xorc_elem_t elem_mul2(xorc_omega_t *omega, xorc_elem_t a, xorc_elem_t b)
{
    /*
     * Multiply two elements with coefficients:
     * (a.coeff · δ_{a.basis}) * (b.coeff · δ_{b.basis})
     * = a.coeff · b.coeff · ω(a.basis, b.basis) · δ_{a.basis ⊕ b.basis}
     */
    xorc_elem_t result;
    result.coeff = a.coeff * b.coeff * omega_get(omega, a.basis, b.basis);
    result.basis = a.basis ^ b.basis;
    return result;
}

xorc_sign_t elem_assoc(xorc_omega_t *omega, xorc_addr_t a, xorc_addr_t b, xorc_addr_t c)
{
    /*
     * Associator: compare (a*b)*c with a*(b*c)
     *
     * (a*b)*c = (ω(a,b)·δ_{a⊕b}) * δ_c = ω(a,b)·ω(a⊕b,c)·δ_{a⊕b⊕c}
     * a*(b*c) = δ_a * (ω(b,c)·δ_{b⊕c}) = ω(b,c)·ω(a,b⊕c)·δ_{a⊕b⊕c}
     *
     * Sign difference: ω(a,b)·ω(a⊕b,c) / [ω(b,c)·ω(a,b⊕c)]
     *                = ω(a,b)·ω(a⊕b,c)·ω(b,c)·ω(a,b⊕c)  (since ±1 are self-inverse)
     * This equals bubble3(omega, a, b, c).
     */
    return bubble3(omega, a, b, c);
}

xorc_sign_t elem_comm(xorc_omega_t *omega, xorc_addr_t a, xorc_addr_t b)
{
    /*
     * Commutator: compare a*b with b*a
     *
     * a*b = ω(a,b)·δ_{a⊕b}
     * b*a = ω(b,a)·δ_{b⊕a} = ω(b,a)·δ_{a⊕b}
     *
     * Sign difference: ω(a,b) / ω(b,a) = ω(a,b)·ω(b,a)
     * This equals bubble2(omega, a, b).
     */
    return bubble2(omega, a, b);
}

uint32_t holo_center_list(xorc_omega_t *omega, xorc_addr_t *center_list)
{
    /*
     * Center = {z : B(z,v) = +1 for all v}
     * Returns the list of central elements.
     */
    uint32_t count = 0;
    uint32_t size = omega->size;

    for (uint32_t z = 0; z < size; z++) {
        bool central = true;
        for (uint32_t v = 0; v < size; v++) {
            if (bubble2(omega, z, v) == -1) {
                central = false;
                break;
            }
        }
        if (central) {
            center_list[count++] = z;
        }
    }

    return count;
}

uint32_t holo_radical_list(xorc_omega_t *omega, xorc_addr_t *radical_list)
{
    /*
     * Radical = {r : omega(r,v) = omega(v,r) for all v
     *               AND bubble3(r,u,v) = +1 for all u,v}
     * Returns the list of radical elements.
     */
    uint32_t count = 0;
    uint32_t size = omega->size;

    for (uint32_t r = 0; r < size; r++) {
        bool in_radical = true;

        /* Check symmetry: omega(r,v) = omega(v,r) for all v */
        for (uint32_t v = 0; v < size && in_radical; v++) {
            if (omega_get(omega, r, v) != omega_get(omega, v, r)) {
                in_radical = false;
            }
        }

        /* Check associativity: bubble3(r,u,v) = +1 for all u,v */
        for (uint32_t u = 0; u < size && in_radical; u++) {
            for (uint32_t v = 0; v < size && in_radical; v++) {
                if (bubble3(omega, r, u, v) == -1) {
                    in_radical = false;
                }
            }
        }

        if (in_radical) {
            radical_list[count++] = r;
        }
    }

    return count;
}

/* ============================================================================
 * SHA-256 IMPLEMENTATION (Section VI.4)
 * Minimal, self-contained implementation. No external dependencies.
 * ============================================================================ */

/* SHA-256 constants */
static const uint32_t sha256_k[64] = {
    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};

/* Right rotate */
static inline uint32_t rotr(uint32_t x, int n)
{
    return (x >> n) | (x << (32 - n));
}

/* SHA-256 compression functions */
#define CH(x, y, z)  (((x) & (y)) ^ (~(x) & (z)))
#define MAJ(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#define EP0(x)       (rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22))
#define EP1(x)       (rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25))
#define SIG0(x)      (rotr(x, 7) ^ rotr(x, 18) ^ ((x) >> 3))
#define SIG1(x)      (rotr(x, 17) ^ rotr(x, 19) ^ ((x) >> 10))

/* SHA-256 state */
typedef struct {
    uint32_t state[8];
    uint64_t bitcount;
    uint8_t buffer[64];
    uint32_t buflen;
} sha256_ctx_t;

static void sha256_transform(sha256_ctx_t *ctx, const uint8_t *data)
{
    uint32_t w[64];
    uint32_t a, b, c, d, e, f, g, h;
    uint32_t t1, t2;
    int i;

    /* Prepare message schedule */
    for (i = 0; i < 16; i++) {
        w[i] = ((uint32_t)data[i * 4] << 24)
             | ((uint32_t)data[i * 4 + 1] << 16)
             | ((uint32_t)data[i * 4 + 2] << 8)
             | ((uint32_t)data[i * 4 + 3]);
    }
    for (i = 16; i < 64; i++) {
        w[i] = SIG1(w[i - 2]) + w[i - 7] + SIG0(w[i - 15]) + w[i - 16];
    }

    /* Initialize working variables */
    a = ctx->state[0];
    b = ctx->state[1];
    c = ctx->state[2];
    d = ctx->state[3];
    e = ctx->state[4];
    f = ctx->state[5];
    g = ctx->state[6];
    h = ctx->state[7];

    /* Main loop */
    for (i = 0; i < 64; i++) {
        t1 = h + EP1(e) + CH(e, f, g) + sha256_k[i] + w[i];
        t2 = EP0(a) + MAJ(a, b, c);
        h = g;
        g = f;
        f = e;
        e = d + t1;
        d = c;
        c = b;
        b = a;
        a = t1 + t2;
    }

    /* Update state */
    ctx->state[0] += a;
    ctx->state[1] += b;
    ctx->state[2] += c;
    ctx->state[3] += d;
    ctx->state[4] += e;
    ctx->state[5] += f;
    ctx->state[6] += g;
    ctx->state[7] += h;
}

static void sha256_init(sha256_ctx_t *ctx)
{
    ctx->state[0] = 0x6a09e667;
    ctx->state[1] = 0xbb67ae85;
    ctx->state[2] = 0x3c6ef372;
    ctx->state[3] = 0xa54ff53a;
    ctx->state[4] = 0x510e527f;
    ctx->state[5] = 0x9b05688c;
    ctx->state[6] = 0x1f83d9ab;
    ctx->state[7] = 0x5be0cd19;
    ctx->bitcount = 0;
    ctx->buflen = 0;
}

static void sha256_update(sha256_ctx_t *ctx, const uint8_t *data, size_t len)
{
    size_t i;

    for (i = 0; i < len; i++) {
        ctx->buffer[ctx->buflen++] = data[i];
        if (ctx->buflen == 64) {
            sha256_transform(ctx, ctx->buffer);
            ctx->bitcount += 512;
            ctx->buflen = 0;
        }
    }
}

static void sha256_final(sha256_ctx_t *ctx, uint8_t *hash)
{
    uint32_t i;

    /* Add remaining bits */
    ctx->bitcount += ctx->buflen * 8;

    /* Pad message */
    ctx->buffer[ctx->buflen++] = 0x80;

    if (ctx->buflen > 56) {
        while (ctx->buflen < 64) {
            ctx->buffer[ctx->buflen++] = 0x00;
        }
        sha256_transform(ctx, ctx->buffer);
        ctx->buflen = 0;
    }

    while (ctx->buflen < 56) {
        ctx->buffer[ctx->buflen++] = 0x00;
    }

    /* Append length in bits (big-endian) */
    ctx->buffer[56] = (uint8_t)(ctx->bitcount >> 56);
    ctx->buffer[57] = (uint8_t)(ctx->bitcount >> 48);
    ctx->buffer[58] = (uint8_t)(ctx->bitcount >> 40);
    ctx->buffer[59] = (uint8_t)(ctx->bitcount >> 32);
    ctx->buffer[60] = (uint8_t)(ctx->bitcount >> 24);
    ctx->buffer[61] = (uint8_t)(ctx->bitcount >> 16);
    ctx->buffer[62] = (uint8_t)(ctx->bitcount >> 8);
    ctx->buffer[63] = (uint8_t)(ctx->bitcount);

    sha256_transform(ctx, ctx->buffer);

    /* Output hash (big-endian) */
    for (i = 0; i < 8; i++) {
        hash[i * 4] = (uint8_t)(ctx->state[i] >> 24);
        hash[i * 4 + 1] = (uint8_t)(ctx->state[i] >> 16);
        hash[i * 4 + 2] = (uint8_t)(ctx->state[i] >> 8);
        hash[i * 4 + 3] = (uint8_t)(ctx->state[i]);
    }
}

void sha256(const uint8_t *data, size_t len, uint8_t *hash)
{
    sha256_ctx_t ctx;
    sha256_init(&ctx);
    sha256_update(&ctx, data, len);
    sha256_final(&ctx, hash);
}

void seal_compute(const uint8_t *data, size_t len, uint8_t *seal)
{
    sha256(data, len, seal);
}

bool seal_verify(const uint8_t *data, size_t len, const uint8_t *seal)
{
    uint8_t computed[32];
    sha256(data, len, computed);
    return memcmp(computed, seal, 32) == 0;
}
