/*
 * XORCE Lexer - Implementation
 *
 * Tokenizes XORCE source language per section VII.1.
 * Zero external dependencies (libc only).
 */

#include "core.h"
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>

/* ============================================================================
 * TOKEN TYPES
 * ============================================================================ */

typedef enum {
    TOK_EOF,
    TOK_IDENT,
    TOK_NUMBER,
    TOK_HEX,
    TOK_STRING,
    TOK_KERNEL,
    TOK_VERIFY,
    TOK_EXPORT,
    TOK_AS,
    TOK_FLAT,
    TOK_PAULI,
    TOK_CLIFFORD,
    TOK_CAYLEY,
    TOK_CUSTOM,
    TOK_ASSOCIATIVE,
    TOK_COMMUTATIVE,
    TOK_CENTER,
    TOK_RADICAL,
    TOK_LPAREN,
    TOK_RPAREN,
    TOK_LBRACE,
    TOK_RBRACE,
    TOK_COLON,
    TOK_SEMI,
    TOK_COMMA,
    TOK_EQ,
    TOK_PLUS,
    TOK_MINUS
} token_kind_t;

typedef struct {
    token_kind_t kind;
    union {
        char *str;      /* For TOK_IDENT, TOK_STRING */
        uint32_t num;   /* For TOK_NUMBER, TOK_HEX */
    } val;
    int line;
    int col;
} token_t;

typedef struct {
    const char *src;    /* Source text */
    int pos;            /* Current position */
    int len;            /* Source length */
    int line;           /* Current line (1-based) */
    int col;            /* Current column (1-based) */
    token_t current;    /* Current peeked token */
    int has_peeked;     /* Whether current is valid */
} lexer_t;

/* ============================================================================
 * KEYWORD TABLE
 * ============================================================================ */

typedef struct {
    const char *text;
    token_kind_t kind;
} keyword_entry_t;

static const keyword_entry_t keywords[] = {
    { "kernel",      TOK_KERNEL },
    { "verify",      TOK_VERIFY },
    { "export",      TOK_EXPORT },
    { "as",          TOK_AS },
    { "flat",        TOK_FLAT },
    { "pauli",       TOK_PAULI },
    { "clifford",    TOK_CLIFFORD },
    { "cayley",      TOK_CAYLEY },
    { "custom",      TOK_CUSTOM },
    { "associative", TOK_ASSOCIATIVE },
    { "commutative", TOK_COMMUTATIVE },
    { "center",      TOK_CENTER },
    { "radical",     TOK_RADICAL },
    { NULL,          TOK_EOF }
};

/* ============================================================================
 * LEXER HELPERS
 * ============================================================================ */

static int lex_eof(lexer_t *l)
{
    return l->pos >= l->len;
}

static char lex_char(lexer_t *l)
{
    if (lex_eof(l)) return '\0';
    return l->src[l->pos];
}

static char lex_advance(lexer_t *l)
{
    if (lex_eof(l)) return '\0';
    char c = l->src[l->pos++];
    if (c == '\n') {
        l->line++;
        l->col = 1;
    } else {
        l->col++;
    }
    return c;
}

static void lex_skip_whitespace(lexer_t *l)
{
    while (!lex_eof(l)) {
        char c = lex_char(l);
        if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
            lex_advance(l);
        } else if (c == '/' && l->pos + 1 < l->len && l->src[l->pos + 1] == '/') {
            /* Line comment */
            while (!lex_eof(l) && lex_char(l) != '\n') {
                lex_advance(l);
            }
        } else {
            break;
        }
    }
}

static int is_ident_start(char c)
{
    return isalpha((unsigned char)c) || c == '_';
}

static int is_ident_char(char c)
{
    return isalnum((unsigned char)c) || c == '_';
}

static int is_hex_digit(char c)
{
    return isxdigit((unsigned char)c);
}

static token_kind_t lookup_keyword(const char *text)
{
    for (int i = 0; keywords[i].text != NULL; i++) {
        if (strcmp(text, keywords[i].text) == 0) {
            return keywords[i].kind;
        }
    }
    return TOK_IDENT;
}

/* ============================================================================
 * LEXER API
 * ============================================================================ */

void lex_init(lexer_t *l, const char *src)
{
    l->src = src;
    l->pos = 0;
    l->len = (int)strlen(src);
    l->line = 1;
    l->col = 1;
    l->has_peeked = 0;
    l->current.kind = TOK_EOF;
    l->current.val.str = NULL;
}

static token_t lex_scan(lexer_t *l)
{
    token_t tok;
    tok.val.str = NULL;

    lex_skip_whitespace(l);

    tok.line = l->line;
    tok.col = l->col;

    if (lex_eof(l)) {
        tok.kind = TOK_EOF;
        return tok;
    }

    char c = lex_char(l);

    /* Single character tokens */
    switch (c) {
        case '(':
            lex_advance(l);
            tok.kind = TOK_LPAREN;
            return tok;
        case ')':
            lex_advance(l);
            tok.kind = TOK_RPAREN;
            return tok;
        case '{':
            lex_advance(l);
            tok.kind = TOK_LBRACE;
            return tok;
        case '}':
            lex_advance(l);
            tok.kind = TOK_RBRACE;
            return tok;
        case ':':
            lex_advance(l);
            tok.kind = TOK_COLON;
            return tok;
        case ';':
            lex_advance(l);
            tok.kind = TOK_SEMI;
            return tok;
        case ',':
            lex_advance(l);
            tok.kind = TOK_COMMA;
            return tok;
        case '=':
            lex_advance(l);
            tok.kind = TOK_EQ;
            return tok;
        case '+':
            lex_advance(l);
            tok.kind = TOK_PLUS;
            return tok;
        case '-':
            lex_advance(l);
            tok.kind = TOK_MINUS;
            return tok;
    }

    /* String literal */
    if (c == '"') {
        lex_advance(l);
        int start = l->pos;
        while (!lex_eof(l) && lex_char(l) != '"' && lex_char(l) != '\n') {
            lex_advance(l);
        }
        int end = l->pos;
        if (lex_char(l) == '"') {
            lex_advance(l);
        } else {
            fprintf(stderr, "error:%d:%d: unterminated string\n", tok.line, tok.col);
        }
        int len = end - start;
        tok.val.str = (char*)malloc(len + 1);
        if (tok.val.str) {
            memcpy(tok.val.str, l->src + start, len);
            tok.val.str[len] = '\0';
        }
        tok.kind = TOK_STRING;
        return tok;
    }

    /* Identifier or keyword */
    if (is_ident_start(c)) {
        int start = l->pos;
        while (!lex_eof(l) && is_ident_char(lex_char(l))) {
            lex_advance(l);
        }
        int end = l->pos;
        int len = end - start;
        char *text = (char*)malloc(len + 1);
        if (text) {
            memcpy(text, l->src + start, len);
            text[len] = '\0';
        }
        tok.kind = lookup_keyword(text);
        if (tok.kind == TOK_IDENT) {
            tok.val.str = text;
        } else {
            free(text);
            tok.val.str = NULL;
        }
        return tok;
    }

    /* Number (decimal or hex) */
    if (isdigit((unsigned char)c)) {
        /* Check for hex: 0x */
        if (c == '0' && l->pos + 1 < l->len &&
            (l->src[l->pos + 1] == 'x' || l->src[l->pos + 1] == 'X')) {
            lex_advance(l); /* Skip '0' */
            lex_advance(l); /* Skip 'x' */
            uint32_t val = 0;
            while (!lex_eof(l) && is_hex_digit(lex_char(l))) {
                char h = lex_char(l);
                val *= 16;
                if (h >= '0' && h <= '9') {
                    val += h - '0';
                } else if (h >= 'a' && h <= 'f') {
                    val += 10 + (h - 'a');
                } else if (h >= 'A' && h <= 'F') {
                    val += 10 + (h - 'A');
                }
                lex_advance(l);
            }
            tok.kind = TOK_HEX;
            tok.val.num = val;
            return tok;
        }
        /* Decimal number */
        uint32_t val = 0;
        while (!lex_eof(l) && isdigit((unsigned char)lex_char(l))) {
            val = val * 10 + (lex_char(l) - '0');
            lex_advance(l);
        }
        tok.kind = TOK_NUMBER;
        tok.val.num = val;
        return tok;
    }

    /* Unknown character */
    fprintf(stderr, "error:%d:%d: unexpected character '%c'\n", l->line, l->col, c);
    lex_advance(l);
    tok.kind = TOK_EOF;
    return tok;
}

token_t lex_next(lexer_t *l)
{
    if (l->has_peeked) {
        l->has_peeked = 0;
        return l->current;
    }
    return lex_scan(l);
}

token_t lex_peek(lexer_t *l)
{
    if (!l->has_peeked) {
        l->current = lex_scan(l);
        l->has_peeked = 1;
    }
    return l->current;
}

/* ============================================================================
 * TOKEN NAME FOR DEBUGGING
 * ============================================================================ */

const char* token_name(token_kind_t kind)
{
    switch (kind) {
        case TOK_EOF:         return "EOF";
        case TOK_IDENT:       return "IDENT";
        case TOK_NUMBER:      return "NUMBER";
        case TOK_HEX:         return "HEX";
        case TOK_STRING:      return "STRING";
        case TOK_KERNEL:      return "kernel";
        case TOK_VERIFY:      return "verify";
        case TOK_EXPORT:      return "export";
        case TOK_AS:          return "as";
        case TOK_FLAT:        return "flat";
        case TOK_PAULI:       return "pauli";
        case TOK_CLIFFORD:    return "clifford";
        case TOK_CAYLEY:      return "cayley";
        case TOK_CUSTOM:      return "custom";
        case TOK_ASSOCIATIVE: return "associative";
        case TOK_COMMUTATIVE: return "commutative";
        case TOK_CENTER:      return "center";
        case TOK_RADICAL:     return "radical";
        case TOK_LPAREN:      return "(";
        case TOK_RPAREN:      return ")";
        case TOK_LBRACE:      return "{";
        case TOK_RBRACE:      return "}";
        case TOK_COLON:       return ":";
        case TOK_SEMI:        return ";";
        case TOK_COMMA:       return ",";
        case TOK_EQ:          return "=";
        case TOK_PLUS:        return "+";
        case TOK_MINUS:       return "-";
        default:              return "?";
    }
}

/* Free token string if allocated */
void token_free(token_t *tok)
{
    if (tok->kind == TOK_IDENT || tok->kind == TOK_STRING) {
        free(tok->val.str);
        tok->val.str = NULL;
    }
}
