blob: 423835b6c0299e1adde98dc78e551f91696578b7 [file] [log] [blame]
#include <stdlib.h>
#include <string.h>
#include "tools/re2c/scanner.h"
#include "tools/re2c/parse.h"
#include "tools/re2c/globals.h"
#include "tools/re2c/parser.h"
#ifndef MAX
#define MAX(a,b) (((a)>(b))?(a):(b))
#endif
#define BSIZE 8192
#define YYCTYPE unsigned char
#define YYCURSOR cursor
#define YYLIMIT s->lim
#define YYMARKER s->ptr
#define YYFILL(n) {cursor = fill(s, cursor);}
#define RETURN(i) {s->cur = cursor; return i;}
static unsigned char *fill(Scanner*, unsigned char*);
void
Scanner_init(Scanner *s, FILE *i)
{
s->in = i;
s->bot = s->tok = s->ptr = s->cur = s->pos = s->lim = s->top =
s->eof = NULL;
s->tchar = s->tline = 0;
s->cline = 1;
}
static unsigned char *
fill(Scanner *s, unsigned char *cursor)
{
if(!s->eof){
unsigned int cnt = s->tok - s->bot;
if(cnt){
memcpy(s->bot, s->tok, s->lim - s->tok);
s->tok = s->bot;
s->ptr -= cnt;
cursor -= cnt;
s->pos -= cnt;
s->lim -= cnt;
}
if((s->top - s->lim) < BSIZE){
unsigned char *buf = malloc(((s->lim - s->bot) + BSIZE) + 1);
memcpy(buf, s->tok, s->lim - s->tok);
s->tok = buf;
s->ptr = &buf[s->ptr - s->bot];
cursor = &buf[cursor - s->bot];
s->pos = &buf[s->pos - s->bot];
s->lim = &buf[s->lim - s->bot];
s->top = &s->lim[BSIZE];
if (s->bot)
free(s->bot);
s->bot = buf;
}
if((cnt = fread(s->lim, 1, BSIZE, s->in)) != BSIZE){
s->eof = &s->lim[cnt]; *s->eof++ = '\0';
}
s->lim += cnt;
}
return cursor;
}
/*!re2c
zero = "\000";
any = [\000-\377];
dot = any \ [\n];
esc = dot \ [\\];
istring = "[" "^" ((esc \ [\]]) | "\\" dot)* "]" ;
cstring = "[" ((esc \ [\]]) | "\\" dot)* "]" ;
dstring = "\"" ((esc \ ["] ) | "\\" dot)* "\"";
sstring = "'" ((esc \ ['] ) | "\\" dot)* "'" ;
letter = [a-zA-Z];
digit = [0-9];
*/
int
Scanner_echo(Scanner *s, FILE *out)
{
unsigned char *cursor = s->cur;
int ignore_eoc = 0;
/* Catch EOF */
if (s->eof && cursor == s->eof)
return 0;
s->tok = cursor;
echo:
/*!re2c
"/*!re2c" { fwrite(s->tok, 1, &cursor[-7] - s->tok, out);
s->tok = cursor;
RETURN(1); }
"/*!max:re2c" {
fprintf(out, "#define YYMAXFILL %u\n", maxFill);
s->tok = s->pos = cursor;
ignore_eoc = 1;
goto echo;
}
"*" "/" {
if (ignore_eoc) {
ignore_eoc = 0;
} else {
fwrite(s->tok, 1, cursor - s->tok, out);
}
s->tok = s->pos = cursor;
goto echo;
}
"\n" { fwrite(s->tok, 1, cursor - s->tok, out);
s->tok = s->pos = cursor; s->cline++; oline++;
goto echo; }
zero { fwrite(s->tok, 1, cursor - s->tok - 1, out); /* -1 so we don't write out the \0 */
if(cursor == s->eof) { RETURN(0); } }
any { goto echo; }
*/
}
int
Scanner_scan(Scanner *s)
{
unsigned char *cursor = s->cur;
unsigned int depth;
scan:
s->tchar = cursor - s->pos;
s->tline = s->cline;
s->tok = cursor;
/*!re2c
"{" { depth = 1;
goto code;
}
"/*" { depth = 1;
goto comment; }
"*/" { s->tok = cursor;
RETURN(0); }
dstring { s->cur = cursor;
yylval.regexp = strToRE(Scanner_token(s));
return STRING; }
sstring { s->cur = cursor;
yylval.regexp = strToCaseInsensitiveRE(Scanner_token(s));
return STRING; }
"\"" { Scanner_fatal(s, "unterminated string constant (missing \")"); }
"'" { Scanner_fatal(s, "unterminated string constant (missing ')"); }
istring { s->cur = cursor;
yylval.regexp = invToRE(Scanner_token(s));
return RANGE; }
cstring { s->cur = cursor;
yylval.regexp = ranToRE(Scanner_token(s));
return RANGE; }
"[" { Scanner_fatal(s, "unterminated range (missing ])"); }
[()|=;/\\] { RETURN(*s->tok); }
[*+?] { yylval.op = *s->tok;
RETURN(CLOSE); }
"{" [0-9]+ "}" { yylval.extop.minsize = atoi((char *)s->tok+1);
yylval.extop.maxsize = atoi((char *)s->tok+1);
RETURN(CLOSESIZE); }
"{" [0-9]+ "," [0-9]+ "}" { yylval.extop.minsize = atoi((char *)s->tok+1);
yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)s->tok, ',')+1));
RETURN(CLOSESIZE); }
"{" [0-9]+ ",}" { yylval.extop.minsize = atoi((char *)s->tok+1);
yylval.extop.maxsize = -1;
RETURN(CLOSESIZE); }
letter (letter|digit)* { SubStr substr;
s->cur = cursor;
substr = Scanner_token(s);
yylval.symbol = Symbol_find(&substr);
return ID; }
[ \t]+ { goto scan; }
"\n" { if(cursor == s->eof) RETURN(0);
s->pos = cursor; s->cline++;
goto scan;
}
"." { s->cur = cursor;
yylval.regexp = mkDot();
return RANGE;
}
any { fprintf(stderr, "unexpected character: '%c'\n", *s->tok);
goto scan;
}
*/
code:
/*!re2c
"}" { if(--depth == 0){
s->cur = cursor;
yylval.token = Token_new(Scanner_token(s), s->tline);
return CODE;
}
goto code; }
"{" { ++depth;
goto code; }
"\n" { if(cursor == s->eof) Scanner_fatal(s, "missing '}'");
s->pos = cursor; s->cline++;
goto code;
}
dstring | sstring | any { goto code; }
*/
comment:
/*!re2c
"*/" { if(--depth == 0)
goto scan;
else
goto comment; }
"/*" { ++depth;
goto comment; }
"\n" { if(cursor == s->eof) RETURN(0);
s->tok = s->pos = cursor; s->cline++;
goto comment;
}
any { goto comment; }
*/
}
void
Scanner_fatal(Scanner *s, const char *msg)
{
fprintf(stderr, "line %d, column %d: %s\n", s->tline, s->tchar + 1, msg);
exit(1);
}