/*
 * NASM-compatible re2c lexer
 *
 *  Copyright (C) 2001-2007  Peter Johnson
 *
 *  Portions based on re2c's example code.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <util.h>
RCSID("$Id: nasm-token.re,v 1.1.1.1 2012/03/29 17:20:59 uid42307 Exp $");

#include <libyasm.h>

#include "modules/parsers/nasm/nasm-parser.h"


#define YYCURSOR        cursor
#define YYLIMIT         (s->lim)
#define YYMARKER        (s->ptr)
#define YYFILL(n)       {}

#define RETURN(i)       {s->cur = cursor; parser_nasm->tokch = s->tok[0]; \
                         return i;}

#define SCANINIT()      {s->tok = cursor;}

#define TOK             ((char *)s->tok)
#define TOKLEN          (size_t)(cursor-s->tok)


/* starting size of string buffer */
#define STRBUF_ALLOC_SIZE       128

/* string buffer used when parsing strings/character constants */
static YYCTYPE *strbuf = NULL;

/* length of strbuf (including terminating NULL character) */
static size_t strbuf_size = 0;

static int linechg_numcount;

/*!re2c
  any = [\001-\377];
  digit = [0-9];
  iletter = [a-zA-Z];
  bindigit = [01_];
  octdigit = [0-7_];
  hexdigit = [0-9a-fA-F_];
  ws = [ \t\r];
  quot = ["'];
*/

static int
handle_dot_label(YYSTYPE *lvalp, char *tok, size_t toklen, size_t zeropos,
                 yasm_parser_nasm *parser_nasm)
{
    /* check for special non-local labels like ..start */
    if (tok[zeropos+1] == '.') {
        lvalp->str_val = yasm__xstrndup(tok+zeropos, toklen-zeropos);
        /* check for special non-local ..@label */
        if (lvalp->str_val[zeropos+2] == '@')
            return NONLOCAL_ID;
        return SPECIAL_ID;
    }

    if (!parser_nasm->locallabel_base) {
        lvalp->str_val = yasm__xstrndup(tok+zeropos, toklen-zeropos);
        yasm_warn_set(YASM_WARN_GENERAL,
                      N_("no non-local label before `%s'"),
                      lvalp->str_val);
    } else {
        size_t len = toklen - zeropos + parser_nasm->locallabel_base_len;
        lvalp->str_val = yasm_xmalloc(len + 1);
        strcpy(lvalp->str_val, parser_nasm->locallabel_base);
        strncat(lvalp->str_val, tok+zeropos, toklen-zeropos);
        lvalp->str_val[len] = '\0';
    }

    return LOCAL_ID;
}

int
nasm_parser_lex(YYSTYPE *lvalp, yasm_parser_nasm *parser_nasm)
{
    yasm_scanner *s = &parser_nasm->s;
    YYCTYPE *cursor = s->cur;
    YYCTYPE endch;
    size_t count;
    YYCTYPE savech;

    /* Handle one token of lookahead */
    if (parser_nasm->peek_token != NONE) {
        int tok = parser_nasm->peek_token;
        *lvalp = parser_nasm->peek_tokval;  /* structure copy */
        parser_nasm->tokch = parser_nasm->peek_tokch;
        parser_nasm->peek_token = NONE;
        return tok;
    }

    /* Catch EOL (EOF from the scanner perspective) */
    if (s->eof && cursor == s->eof)
        return 0;

    /* Jump to proper "exclusive" states */
    switch (parser_nasm->state) {
        case DIRECTIVE:
            goto directive;
        case SECTION_DIRECTIVE:
            goto section_directive;
        case DIRECTIVE2:
            goto directive2;
        case LINECHG:
            goto linechg;
        case LINECHG2:
            goto linechg2;
        default:
            break;
    }

scan:
    SCANINIT();

    /*!re2c
        /* standard decimal integer */
        digit+ {
            savech = s->tok[TOKLEN];
            s->tok[TOKLEN] = '\0';
            lvalp->intn = yasm_intnum_create_dec(TOK);
            s->tok[TOKLEN] = savech;
            RETURN(INTNUM);
        }
        /* 10010011b - binary number */

        [01] bindigit* 'b' {
            s->tok[TOKLEN-1] = '\0'; /* strip off 'b' */
            lvalp->intn = yasm_intnum_create_bin(TOK);
            RETURN(INTNUM);
        }

        /* 777q or 777o - octal number */
        [0-7] octdigit* [qQoO] {
            s->tok[TOKLEN-1] = '\0'; /* strip off 'q' or 'o' */
            lvalp->intn = yasm_intnum_create_oct(TOK);
            RETURN(INTNUM);
        }

        /* 0AAh form of hexidecimal number */
        digit hexdigit* 'h' {
            s->tok[TOKLEN-1] = '\0'; /* strip off 'h' */
            lvalp->intn = yasm_intnum_create_hex(TOK);
            RETURN(INTNUM);
        }

        /* $0AA and 0xAA forms of hexidecimal number */
        (("$" digit) | '0x') hexdigit+ {
            savech = s->tok[TOKLEN];
            s->tok[TOKLEN] = '\0';
            if (s->tok[1] == 'x' || s->tok[1] == 'X')
                /* skip 0 and x */
                lvalp->intn = yasm_intnum_create_hex(TOK+2);
            else
                /* don't skip 0 */
                lvalp->intn = yasm_intnum_create_hex(TOK+1);
            s->tok[TOKLEN] = savech;
            RETURN(INTNUM);
        }

        /* floating point value */
        digit+ "." digit* ('e' [-+]? digit+)? {
            savech = s->tok[TOKLEN];
            s->tok[TOKLEN] = '\0';
            lvalp->flt = yasm_floatnum_create(TOK);
            s->tok[TOKLEN] = savech;
            RETURN(FLTNUM);
        }

        /* string/character constant values */
        quot {
            endch = s->tok[0];
            goto stringconst;
        }

        /* %line linenum+lineinc filename */
        "%line" {
            parser_nasm->state = LINECHG;
            linechg_numcount = 0;
            RETURN(LINE);
        }

        /* size specifiers */
        'byte'          { lvalp->int_info = 8; RETURN(SIZE_OVERRIDE); }
        'hword'         {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)/2;
            RETURN(SIZE_OVERRIDE);
        }
        'word'          {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch);
            RETURN(SIZE_OVERRIDE);
        }
        'dword' | 'long'        {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*2;
            RETURN(SIZE_OVERRIDE);
        }
        'qword'         {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*4;
            RETURN(SIZE_OVERRIDE);
        }
        'tword'         { lvalp->int_info = 80; RETURN(SIZE_OVERRIDE); }
        'dqword'        {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*8;
            RETURN(SIZE_OVERRIDE);
        }
        'oword'        {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*8;
            RETURN(SIZE_OVERRIDE);
        }
        'yword'        {
            lvalp->int_info = 256;
            RETURN(SIZE_OVERRIDE);
        }

        /* pseudo-instructions */
        'db'            { lvalp->int_info = 8; RETURN(DECLARE_DATA); }
        'dhw'           {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)/2;
            RETURN(DECLARE_DATA);
        }
        'dw'            {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch);
            RETURN(DECLARE_DATA);
        }
        'dd'            {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*2;
            RETURN(DECLARE_DATA);
        }
        'dq'            {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*4;
            RETURN(DECLARE_DATA);
        }
        'dt'            { lvalp->int_info = 80; RETURN(DECLARE_DATA); }
        'ddq'           {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*8;
            RETURN(DECLARE_DATA);
        }
        'do'           {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*8;
            RETURN(DECLARE_DATA);
        }
        'dy'           {
            lvalp->int_info = 256;
            RETURN(DECLARE_DATA);
        }

        'resb'          { lvalp->int_info = 8; RETURN(RESERVE_SPACE); }
        'reshw'         {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)/2;
            RETURN(RESERVE_SPACE);
        }
        'resw'          {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch);
            RETURN(RESERVE_SPACE);
        }
        'resd'          {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*2;
            RETURN(RESERVE_SPACE);
        }
        'resq'          {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*4;
            RETURN(RESERVE_SPACE);
        }
        'rest'          { lvalp->int_info = 80; RETURN(RESERVE_SPACE); }
        'resdq'         {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*8;
            RETURN(RESERVE_SPACE);
        }
        'reso'         {
            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*8;
            RETURN(RESERVE_SPACE);
        }
        'resy'         {
            lvalp->int_info = 256;
            RETURN(RESERVE_SPACE);
        }

        'incbin'        { RETURN(INCBIN); }

        'equ'           { RETURN(EQU); }

        'times'         { RETURN(TIMES); }

        'seg'           { RETURN(SEG); }
        'wrt'           { RETURN(WRT); }

        'abs'           { RETURN(ABS); }
        'rel'           { RETURN(REL); }

        'nosplit'       { RETURN(NOSPLIT); }
        'strict'        { RETURN(STRICT); }

        /* operators */
        "<<"                    { RETURN(LEFT_OP); }
        ">>"                    { RETURN(RIGHT_OP); }
        "//"                    { RETURN(SIGNDIV); }
        "%%"                    { RETURN(SIGNMOD); }
        "$$"                    { RETURN(START_SECTION_ID); }
        [-+|^*&/%~$():=,\[]     { RETURN(s->tok[0]); }
        "]"                     { RETURN(s->tok[0]); }

        /* local label (.label) */
        "." [a-zA-Z0-9_$#@~.?]+ {
            RETURN(handle_dot_label(lvalp, TOK, TOKLEN, 0, parser_nasm));
        }

        /* forced identifier */
        "$" [a-zA-Z0-9_$#@~.?]+ {
            if (TOK[1] == '.') {
                /* handle like .label */
                RETURN(handle_dot_label(lvalp, TOK, TOKLEN, 1, parser_nasm));
            }
            lvalp->str_val = yasm__xstrndup(TOK+1, TOKLEN-1);
            RETURN(ID);
        }

        /* identifier that may be a register, instruction, etc. */
        [a-zA-Z_?@][a-zA-Z0-9_$#@~.?]* {
            savech = s->tok[TOKLEN];
            s->tok[TOKLEN] = '\0';
            if (parser_nasm->state != INSTRUCTION) {
                uintptr_t prefix;
                switch (yasm_arch_parse_check_insnprefix
                        (p_object->arch, TOK, TOKLEN, cur_line, &lvalp->bc,
                         &prefix)) {
                    case YASM_ARCH_INSN:
                        parser_nasm->state = INSTRUCTION;
                        s->tok[TOKLEN] = savech;
                        RETURN(INSN);
                    case YASM_ARCH_PREFIX:
                        lvalp->arch_data = prefix;
                        s->tok[TOKLEN] = savech;
                        RETURN(PREFIX);
                    default:
                        break;
                }
            }
            switch (yasm_arch_parse_check_regtmod
                    (p_object->arch, TOK, TOKLEN, &lvalp->arch_data)) {
                case YASM_ARCH_REG:
                    s->tok[TOKLEN] = savech;
                    RETURN(REG);
                case YASM_ARCH_SEGREG:
                    s->tok[TOKLEN] = savech;
                    RETURN(SEGREG);
                case YASM_ARCH_TARGETMOD:
                    s->tok[TOKLEN] = savech;
                    RETURN(TARGETMOD);
                default:
                    s->tok[TOKLEN] = savech;
            }
            /* Propagate errors in case we got a warning from the arch */
            yasm_errwarn_propagate(parser_nasm->errwarns, cur_line);
            /* Just an identifier, return as such. */
            lvalp->str_val = yasm__xstrndup(TOK, TOKLEN);
            RETURN(ID);
        }

        ";" (any \ [\000])*     { goto scan; }

        ws+                     { goto scan; }

        [\000]                  {
            parser_nasm->state = INITIAL;
            RETURN(s->tok[0]);
        }

        any {
            yasm_warn_set(YASM_WARN_UNREC_CHAR,
                          N_("ignoring unrecognized character `%s'"),
                          yasm__conv_unprint(s->tok[0]));
            goto scan;
        }
    */

    /* %line linenum+lineinc filename */
linechg:
    SCANINIT();

    /*!re2c
        digit+ {
            linechg_numcount++;
            savech = s->tok[TOKLEN];
            s->tok[TOKLEN] = '\0';
            lvalp->intn = yasm_intnum_create_dec(TOK);
            s->tok[TOKLEN] = savech;
            RETURN(INTNUM);
        }

        [\000] {
            parser_nasm->state = INITIAL;
            RETURN(s->tok[0]);
        }

        "+" {
            RETURN(s->tok[0]);
        }

        ws+ {
            if (linechg_numcount == 2) {
                parser_nasm->state = LINECHG2;
                goto linechg2;
            }
            goto linechg;
        }

        any {
            yasm_warn_set(YASM_WARN_UNREC_CHAR,
                          N_("ignoring unrecognized character `%s'"),
                          yasm__conv_unprint(s->tok[0]));
            goto linechg;
        }
    */

linechg2:
    SCANINIT();

    /*!re2c
        [\000] {
            parser_nasm->state = INITIAL;
            RETURN(s->tok[0]);
        }

        "\r" { }

        (any \ [\000])+ {
            parser_nasm->state = LINECHG;
            lvalp->str_val = yasm__xstrndup(TOK, TOKLEN);
            RETURN(FILENAME);
        }
    */

    /* directive: [name value] */
directive:
    SCANINIT();

    /*!re2c
        [\]\000] {
            parser_nasm->state = INITIAL;
            RETURN(s->tok[0]);
        }

        [a-zA-Z_][a-zA-Z_0-9]* {
            lvalp->str_val = yasm__xstrndup(TOK, TOKLEN);
            if (yasm__strcasecmp(lvalp->str_val, "section") == 0 ||
                yasm__strcasecmp(lvalp->str_val, "segment") == 0)
                parser_nasm->state = SECTION_DIRECTIVE;
            else
                parser_nasm->state = DIRECTIVE2;
            RETURN(DIRECTIVE_NAME);
        }

        any {
            yasm_warn_set(YASM_WARN_UNREC_CHAR,
                          N_("ignoring unrecognized character `%s'"),
                          yasm__conv_unprint(s->tok[0]));
            goto directive;
        }
    */

    /* section directive (the section name portion thereof) */
section_directive:
    SCANINIT();

    /*!re2c
        [a-zA-Z0-9_$#@~.?-]+ {
            lvalp->str.contents = yasm__xstrndup(TOK, TOKLEN);
            lvalp->str.len = TOKLEN;
            parser_nasm->state = DIRECTIVE2;
            RETURN(STRING);
        }

        quot            {
            parser_nasm->state = DIRECTIVE2;
            endch = s->tok[0];
            goto stringconst;
        }

        ws+             {
            parser_nasm->state = DIRECTIVE2;
            goto section_directive;
        }

        "]" {
            parser_nasm->state = INITIAL;
            RETURN(s->tok[0]);
        }

        [\000]          {
            parser_nasm->state = INITIAL;
            RETURN(s->tok[0]);
        }

        any {
            yasm_warn_set(YASM_WARN_UNREC_CHAR,
                          N_("ignoring unrecognized character `%s'"),
                          yasm__conv_unprint(s->tok[0]));
            goto section_directive;
        }
    */

    /* inner part of directive */
directive2:
    SCANINIT();

    /*!re2c
        /* standard decimal integer */
        digit+ {
            savech = s->tok[TOKLEN];
            s->tok[TOKLEN] = '\0';
            lvalp->intn = yasm_intnum_create_dec(TOK);
            s->tok[TOKLEN] = savech;
            RETURN(INTNUM);
        }
        /* 10010011b - binary number */

        [01] bindigit* 'b' {
            s->tok[TOKLEN-1] = '\0'; /* strip off 'b' */
            lvalp->intn = yasm_intnum_create_bin(TOK);
            RETURN(INTNUM);
        }

        /* 777q or 777o - octal number */
        [0-7] octdigit* [qQoO] {
            s->tok[TOKLEN-1] = '\0'; /* strip off 'q' or 'o' */
            lvalp->intn = yasm_intnum_create_oct(TOK);
            RETURN(INTNUM);
        }

        /* 0AAh form of hexidecimal number */
        digit hexdigit* 'h' {
            s->tok[TOKLEN-1] = '\0'; /* strip off 'h' */
            lvalp->intn = yasm_intnum_create_hex(TOK);
            RETURN(INTNUM);
        }

        /* $0AA and 0xAA forms of hexidecimal number */
        (("$" digit) | '0x') hexdigit+ {
            savech = s->tok[TOKLEN];
            s->tok[TOKLEN] = '\0';
            if (s->tok[1] == 'x' || s->tok[1] == 'X')
                /* skip 0 and x */
                lvalp->intn = yasm_intnum_create_hex(TOK+2);
            else
                /* don't skip 0 */
                lvalp->intn = yasm_intnum_create_hex(TOK+1);
            s->tok[TOKLEN] = savech;
            RETURN(INTNUM);
        }

        /* string/character constant values */
        quot {
            endch = s->tok[0];
            goto stringconst;
        }

        /* operators */
        "<<"                    { RETURN(LEFT_OP); }
        ">>"                    { RETURN(RIGHT_OP); }
        "//"                    { RETURN(SIGNDIV); }
        "%%"                    { RETURN(SIGNMOD); }
        [-+|^*&/%~$():=,\[]     { RETURN(s->tok[0]); }

        /* handle ] for directives */
        "]" {
            parser_nasm->state = INITIAL;
            RETURN(s->tok[0]);
        }

        /* forced identifier; within directive, don't strip '$', this is
         * handled later.
         */
        "$" [a-zA-Z0-9_$#@~.?]+ {
            lvalp->str_val = yasm__xstrndup(TOK, TOKLEN);
            RETURN(ID);
        }

        /* identifier; within directive, no local label mechanism */
        [a-zA-Z_.?][a-zA-Z0-9_$#@~.?]* {
            savech = s->tok[TOKLEN];
            s->tok[TOKLEN] = '\0';
            switch (yasm_arch_parse_check_regtmod
                    (p_object->arch, TOK, TOKLEN, &lvalp->arch_data)) {
                case YASM_ARCH_REG:
                    s->tok[TOKLEN] = savech;
                    RETURN(REG);
                default:
                    s->tok[TOKLEN] = savech;
            }
            /* Propagate errors in case we got a warning from the arch */
            yasm_errwarn_propagate(parser_nasm->errwarns, cur_line);
            /* Just an identifier, return as such. */
            lvalp->str_val = yasm__xstrndup(TOK, TOKLEN);
            RETURN(ID);
        }

        ";" (any \ [\000])*     { goto directive2; }

        ws+                     { goto directive2; }

        [\000]                  {
            parser_nasm->state = INITIAL;
            RETURN(s->tok[0]);
        }

        any {
            yasm_warn_set(YASM_WARN_UNREC_CHAR,
                          N_("ignoring unrecognized character `%s'"),
                          yasm__conv_unprint(s->tok[0]));
            goto scan;
        }
     */

    /* string/character constant values */
stringconst:
    strbuf = yasm_xmalloc(STRBUF_ALLOC_SIZE);
    strbuf_size = STRBUF_ALLOC_SIZE;
    count = 0;

stringconst_scan:
    SCANINIT();

    /*!re2c
        [\000]  {
            yasm_error_set(YASM_ERROR_SYNTAX, N_("unterminated string"));
            strbuf[count] = '\0';
            lvalp->str.contents = (char *)strbuf;
            lvalp->str.len = count;
            RETURN(STRING);
        }

        any     {
            if (s->tok[0] == endch) {
                strbuf[count] = '\0';
                lvalp->str.contents = (char *)strbuf;
                lvalp->str.len = count;
                RETURN(STRING);
            }

            strbuf[count++] = s->tok[0];
            if (count >= strbuf_size) {
                strbuf = yasm_xrealloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE);
                strbuf_size += STRBUF_ALLOC_SIZE;
            }

            goto stringconst_scan;
        }
    */
}
