| /* |
| * Mesa 3-D graphics library |
| * Version: 6.6 |
| * |
| * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included |
| * in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
| * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| |
| /** |
| * \file grammar.c |
| * syntax parsing engine |
| * \author Michal Krol |
| */ |
| |
| #ifndef GRAMMAR_PORT_BUILD |
| #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file |
| #endif |
| |
| /* |
| */ |
| |
| /* |
| INTRODUCTION |
| ------------ |
| |
| The task is to check the syntax of an input string. Input string is a stream of ASCII |
| characters terminated with a null-character ('\0'). Checking it using C language is |
| difficult and hard to implement without bugs. It is hard to maintain and make changes when |
| the syntax changes. |
| |
| This is because of a high redundancy of the C code. Large blocks of code are duplicated with |
| only small changes. Even use of macros does not solve the problem because macros cannot |
| erase the complexity of the problem. |
| |
| The resolution is to create a new language that will be highly oriented to our task. Once |
| we describe a particular syntax, we are done. We can then focus on the code that implements |
| the language. The size and complexity of it is relatively small than the code that directly |
| checks the syntax. |
| |
| First, we must implement our new language. Here, the language is implemented in C, but it |
| could also be implemented in any other language. The code is listed below. We must take |
| a good care that it is bug free. This is simple because the code is simple and clean. |
| |
| Next, we must describe the syntax of our new language in itself. Once created and checked |
| manually that it is correct, we can use it to check another scripts. |
| |
| Note that our new language loading code does not have to check the syntax. It is because we |
| assume that the script describing itself is correct, and other scripts can be syntactically |
| checked by the former script. The loading code must only do semantic checking which leads us to |
| simple resolving references. |
| |
| THE LANGUAGE |
| ------------ |
| |
| Here I will describe the syntax of the new language (further called "Synek"). It is mainly a |
| sequence of declarations terminated by a semicolon. The declaration consists of a symbol, |
| which is an identifier, and its definition. A definition is in turn a sequence of specifiers |
| connected with ".and" or ".or" operator. These operators cannot be mixed together in a one |
| definition. Specifier can be a symbol, string, character, character range or a special |
| keyword ".true" or ".false". |
| |
| On the very beginning of the script there is a declaration of a root symbol and is in the form: |
| .syntax <root_symbol>; |
| The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if |
| the root symbol evaluates to true. A symbol evaluates to true if the definition associated with |
| the symbol evaluates to true. Definition evaluation depends on the operator used to connect |
| specifiers in the definition. If ".and" operator is used, definition evaluates to true if and |
| only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to |
| true if any of the specifiers evaluates to true. If definition contains only one specifier, |
| it is evaluated as if it was connected with ".true" keyword by ".and" operator. |
| |
| If specifier is a ".true" keyword, it always evaluates to true. |
| |
| If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false |
| when it does not evaluate to true. |
| |
| Character range specifier is in the form: |
| '<first_character>' - '<second_character>' |
| If specifier is a character range, it evaluates to true if character in the stream is greater |
| or equal to <first_character> and less or equal to <second_character>. In that situation |
| the stream pointer is advanced to point to next character in the stream. All C-style escape |
| sequences are supported although trigraph sequences are not. The comparisions are performed |
| on 8-bit unsigned integers. |
| |
| Character specifier is in the form: |
| '<single_character>' |
| It evaluates to true if the following character range specifier evaluates to true: |
| '<single_character>' - '<single_character>' |
| |
| String specifier is in the form: |
| "<string>" |
| Let N be the number of characters in <string>. Let <string>[i] designate i-th character in |
| <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N) |
| the following character specifier evaluates to true: |
| '<string>[i]' |
| If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'. |
| |
| Symbol specifier can be optionally preceded by a ".loop" keyword in the form: |
| .loop <symbol> (1) |
| where <symbol> is defined as follows: |
| <symbol> <definition>; (2) |
| Construction (1) is replaced by the following code: |
| <symbol$1> |
| and declaration (2) is replaced by the following: |
| <symbol$1> <symbol$2> .or .true; |
| <symbol$2> <symbol> .and <symbol$1>; |
| <symbol> <definition>; |
| |
| Synek supports also a register mechanizm. User can, in its SYN file, declare a number of |
| registers that can be accessed in the syn body. Each reg has its name and a default value. |
| The register is one byte wide. The C code can change the default value by calling |
| grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is |
| a sequence of specifiers joined with .and or .or operator. And now each specifier can be |
| prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)" |
| where <operator> can be == or !=. If the condition evaluates to false, the specifier |
| evaluates to .false. Otherwise it evalutes to the specifier. |
| |
| ESCAPE SEQUENCES |
| ---------------- |
| |
| Synek supports all escape sequences in character specifiers. The mapping table is listed below. |
| All occurences of the characters in the first column are replaced with the corresponding |
| character in the second column. |
| |
| Escape sequence Represents |
| ------------------------------------------------------------------------------------------------ |
| \a Bell (alert) |
| \b Backspace |
| \f Formfeed |
| \n New line |
| \r Carriage return |
| \t Horizontal tab |
| \v Vertical tab |
| \' Single quotation mark |
| \" Double quotation mark |
| \\ Backslash |
| \? Literal question mark |
| \ooo ASCII character in octal notation |
| \xhhh ASCII character in hexadecimal notation |
| ------------------------------------------------------------------------------------------------ |
| |
| RAISING ERRORS |
| -------------- |
| |
| Any specifier can be followed by a special construction that is executed when the specifier |
| evaluates to false. The construction is in the form: |
| .error <ERROR_TEXT> |
| <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is |
| in the form: |
| .errtext <ERROR_TEXT> "<error_desc>" |
| When specifier evaluates to false and this construction is present, parsing is stopped |
| immediately and <error_desc> is returned as a result of parsing. The error position is also |
| returned and it is meant as an offset from the beggining of the stream to the character that |
| was valid so far. Example: |
| |
| (**** syntax script ****) |
| |
| .syntax program; |
| .errtext MISSING_SEMICOLON "missing ';'" |
| program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and |
| .loop space .and '\0'; |
| declaration "declare" .and .loop space .and identifier; |
| space ' '; |
| |
| (**** sample code ****) |
| |
| declare foo , |
| |
| In the example above checking the sample code will result in error message "missing ';'" and |
| error position 12. The sample code is not correct. Note the presence of '\0' specifier to |
| assure that there is no code after semicolon - only spaces. |
| <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case, |
| the identifier and dollar signs are replaced by a string retrieved by invoking symbol with |
| the identifier name. The starting position is the error position. The lenght of the resulting |
| string is the position after invoking the symbol. |
| |
| PRODUCTION |
| ---------- |
| |
| Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers |
| that evaluate to true. That is, every specifier and optional error construction can be followed |
| by a number of emit constructions that are in the form: |
| .emit <parameter> |
| <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by |
| 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration |
| in the form: |
| .emtcode <identifier> <hex_number> |
| |
| When given specifier evaluates to true, all emits associated with the specifier are output |
| in order they were declared. A star means that last-read character should be output instead |
| of constant value. Example: |
| |
| (**** syntax script ****) |
| |
| .syntax foobar; |
| .emtcode WORD_FOO 0x01 |
| .emtcode WORD_BAR 0x02 |
| foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00; |
| FOO "foo" .and SPACE; |
| BAR "bar" .and SPACE; |
| SPACE ' ' .or '\0'; |
| |
| (**** sample text 1 ****) |
| |
| foo |
| |
| (**** sample text 2 ****) |
| |
| foobar |
| |
| For both samples the result will be one-element array. For first sample text it will be |
| value 1, for second - 0. Note that every text will be accepted because of presence of |
| .true as an alternative. |
| |
| Another example: |
| |
| (**** syntax script ****) |
| |
| .syntax declaration; |
| .emtcode VARIABLE 0x01 |
| declaration "declare" .and .loop space .and |
| identifier .emit VARIABLE .and (1) |
| .true .emit 0x00 .and (2) |
| .loop space .and ';'; |
| space ' ' .or '\t'; |
| identifier .loop id_char .emit *; (3) |
| id_char 'a'-'z' .or 'A'-'Z' .or '_'; |
| |
| (**** sample code ****) |
| |
| declare fubar; |
| |
| In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to |
| true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used |
| to terminate the string with null to signal when the string ends. Specifier (3) outputs |
| all characters that make declared identifier. The result of sample code will be the |
| following array: |
| { 1, 'f', 'u', 'b', 'a', 'r', 0 } |
| |
| If .emit is followed by dollar $, it means that current position should be output. Current |
| position is a 32-bit unsigned integer distance from the very beginning of the parsed string to |
| first character consumed by the specifier associated with the .emit instruction. Current |
| position is stored in the output buffer in Little-Endian convention (the lowest byte comes |
| first). |
| */ |
| |
| #include <stdio.h> |
| |
| static void mem_free (void **); |
| |
| /* |
| internal error messages |
| */ |
| static const byte *OUT_OF_MEMORY = (byte *) "internal error 1001: out of physical memory"; |
| static const byte *UNRESOLVED_REFERENCE = (byte *) "internal error 1002: unresolved reference '$'"; |
| static const byte *INVALID_GRAMMAR_ID = (byte *) "internal error 1003: invalid grammar object"; |
| static const byte *INVALID_REGISTER_NAME = (byte *) "internal error 1004: invalid register name: '$'"; |
| /*static const byte *DUPLICATE_IDENTIFIER = (byte *) "internal error 1005: identifier '$' already defined";*/ |
| static const byte *UNREFERENCED_IDENTIFIER =(byte *) "internal error 1006: unreferenced identifier '$'"; |
| |
| static const byte *error_message = NULL; /* points to one of the error messages above */ |
| static byte *error_param = NULL; /* this is inserted into error_message in place of $ */ |
| static int error_position = -1; |
| |
| static byte *unknown = (byte *) "???"; |
| |
| static void clear_last_error (void) |
| { |
| /* reset error message */ |
| error_message = NULL; |
| |
| /* free error parameter - if error_param is a "???" don't free it - it's static */ |
| if (error_param != unknown) |
| mem_free ((void **) (void *) &error_param); |
| else |
| error_param = NULL; |
| |
| /* reset error position */ |
| error_position = -1; |
| } |
| |
| static void set_last_error (const byte *msg, byte *param, int pos) |
| { |
| /* error message can be set only once */ |
| if (error_message != NULL) |
| { |
| mem_free ((void **) (void *) ¶m); |
| return; |
| } |
| |
| error_message = msg; |
| |
| /* if param is NULL, set error_param to unknown ("???") */ |
| /* note: do not try to strdup the "???" - it may be that we are here because of */ |
| /* out of memory error so strdup can fail */ |
| if (param != NULL) |
| error_param = param; |
| else |
| error_param = unknown; |
| |
| error_position = pos; |
| } |
| |
| /* |
| memory management routines |
| */ |
| static void *mem_alloc (size_t size) |
| { |
| void *ptr = grammar_alloc_malloc (size); |
| if (ptr == NULL) |
| set_last_error (OUT_OF_MEMORY, NULL, -1); |
| return ptr; |
| } |
| |
| static void *mem_copy (void *dst, const void *src, size_t size) |
| { |
| return grammar_memory_copy (dst, src, size); |
| } |
| |
| static void mem_free (void **ptr) |
| { |
| grammar_alloc_free (*ptr); |
| *ptr = NULL; |
| } |
| |
| static void *mem_realloc (void *ptr, size_t old_size, size_t new_size) |
| { |
| void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size); |
| if (ptr2 == NULL) |
| set_last_error (OUT_OF_MEMORY, NULL, -1); |
| return ptr2; |
| } |
| |
| static byte *str_copy_n (byte *dst, const byte *src, size_t max_len) |
| { |
| return grammar_string_copy_n (dst, src, max_len); |
| } |
| |
| static byte *str_duplicate (const byte *str) |
| { |
| byte *new_str = grammar_string_duplicate (str); |
| if (new_str == NULL) |
| set_last_error (OUT_OF_MEMORY, NULL, -1); |
| return new_str; |
| } |
| |
| static int str_equal (const byte *str1, const byte *str2) |
| { |
| return grammar_string_compare (str1, str2) == 0; |
| } |
| |
| static int str_equal_n (const byte *str1, const byte *str2, unsigned int n) |
| { |
| return grammar_string_compare_n (str1, str2, n) == 0; |
| } |
| |
| static int |
| str_length (const byte *str) |
| { |
| return (int) (grammar_string_length (str)); |
| } |
| |
| /* |
| useful macros |
| */ |
| #define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\ |
| static void _Ty##_append (_Ty **x, _Ty *nx) {\ |
| while (*x) x = &(**x).next;\ |
| *x = nx;\ |
| } |
| |
| /* |
| string to byte map typedef |
| */ |
| typedef struct map_byte_ |
| { |
| byte *key; |
| byte data; |
| struct map_byte_ *next; |
| } map_byte; |
| |
| static void map_byte_create (map_byte **ma) |
| { |
| *ma = (map_byte *) mem_alloc (sizeof (map_byte)); |
| if (*ma) |
| { |
| (**ma).key = NULL; |
| (**ma).data = '\0'; |
| (**ma).next = NULL; |
| } |
| } |
| |
| static void map_byte_destroy (map_byte **ma) |
| { |
| if (*ma) |
| { |
| map_byte_destroy (&(**ma).next); |
| mem_free ((void **) &(**ma).key); |
| mem_free ((void **) ma); |
| } |
| } |
| |
| GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte) |
| |
| /* |
| searches the map for the specified key, |
| returns pointer to the element with the specified key if it exists |
| returns NULL otherwise |
| */ |
| static map_byte *map_byte_locate (map_byte **ma, const byte *key) |
| { |
| while (*ma) |
| { |
| if (str_equal ((**ma).key, key)) |
| return *ma; |
| |
| ma = &(**ma).next; |
| } |
| |
| set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1); |
| return NULL; |
| } |
| |
| /* |
| searches the map for specified key, |
| if the key is matched, *data is filled with data associated with the key, |
| returns 0 if the key is matched, |
| returns 1 otherwise |
| */ |
| static int map_byte_find (map_byte **ma, const byte *key, byte *data) |
| { |
| map_byte *found = map_byte_locate (ma, key); |
| if (found != NULL) |
| { |
| *data = found->data; |
| |
| return 0; |
| } |
| |
| return 1; |
| } |
| |
| /* |
| regbyte context typedef |
| |
| Each regbyte consists of its name and a default value. These are static and created at |
| grammar script compile-time, for example the following line: |
| .regbyte vertex_blend 0x00 |
| adds a new regbyte named "vertex_blend" to the static list and initializes it to 0. |
| When the script is executed, this regbyte can be accessed by name for read and write. When a |
| particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx |
| stack. The new entry contains information abot which regbyte it references and its new value. |
| When a given regbyte is accessed for read, the stack is searched top-down to find an |
| entry that references the regbyte. The first matching entry is used to return the current |
| value it holds. If no entry is found, the default value is returned. |
| */ |
| typedef struct regbyte_ctx_ |
| { |
| map_byte *m_regbyte; |
| byte m_current_value; |
| struct regbyte_ctx_ *m_prev; |
| } regbyte_ctx; |
| |
| static void regbyte_ctx_create (regbyte_ctx **re) |
| { |
| *re = (regbyte_ctx *) mem_alloc (sizeof (regbyte_ctx)); |
| if (*re) |
| { |
| (**re).m_regbyte = NULL; |
| (**re).m_prev = NULL; |
| } |
| } |
| |
| static void regbyte_ctx_destroy (regbyte_ctx **re) |
| { |
| if (*re) |
| { |
| mem_free ((void **) re); |
| } |
| } |
| |
| static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg) |
| { |
| /* first lookup in the register stack */ |
| while (*re != NULL) |
| { |
| if ((**re).m_regbyte == reg) |
| return (**re).m_current_value; |
| |
| re = &(**re).m_prev; |
| } |
| |
| /* if not found - return the default value */ |
| return reg->data; |
| } |
| |
| /* |
| emit type typedef |
| */ |
| typedef enum emit_type_ |
| { |
| et_byte, /* explicit number */ |
| et_stream, /* eaten character */ |
| et_position /* current position */ |
| } emit_type; |
| |
| /* |
| emit destination typedef |
| */ |
| typedef enum emit_dest_ |
| { |
| ed_output, /* write to the output buffer */ |
| ed_regbyte /* write a particular regbyte */ |
| } emit_dest; |
| |
| /* |
| emit typedef |
| */ |
| typedef struct emit_ |
| { |
| emit_dest m_emit_dest; |
| emit_type m_emit_type; /* ed_output */ |
| byte m_byte; /* et_byte */ |
| map_byte *m_regbyte; /* ed_regbyte */ |
| byte *m_regname; /* ed_regbyte - temporary */ |
| struct emit_ *m_next; |
| } emit; |
| |
| static void emit_create (emit **em) |
| { |
| *em = (emit *) mem_alloc (sizeof (emit)); |
| if (*em) |
| { |
| (**em).m_emit_dest = ed_output; |
| (**em).m_emit_type = et_byte; |
| (**em).m_byte = '\0'; |
| (**em).m_regbyte = NULL; |
| (**em).m_regname = NULL; |
| (**em).m_next = NULL; |
| } |
| } |
| |
| static void emit_destroy (emit **em) |
| { |
| if (*em) |
| { |
| emit_destroy (&(**em).m_next); |
| mem_free ((void **) &(**em).m_regname); |
| mem_free ((void **) em); |
| } |
| } |
| |
| static unsigned int emit_size (emit *_E) |
| { |
| unsigned int n = 0; |
| |
| while (_E != NULL) |
| { |
| if (_E->m_emit_dest == ed_output) |
| { |
| if (_E->m_emit_type == et_position) |
| n += 4; /* position is a 32-bit unsigned integer */ |
| else |
| n++; |
| } |
| _E = _E->m_next; |
| } |
| |
| return n; |
| } |
| |
| static int emit_push (emit *_E, byte *_P, byte c, unsigned int _Pos, regbyte_ctx **_Ctx) |
| { |
| while (_E != NULL) |
| { |
| if (_E->m_emit_dest == ed_output) |
| { |
| if (_E->m_emit_type == et_byte) |
| *_P++ = _E->m_byte; |
| else if (_E->m_emit_type == et_stream) |
| *_P++ = c; |
| else /* _Em->type == et_position */ |
| { |
| *_P++ = (byte) (_Pos); |
| *_P++ = (byte) (_Pos >> 8); |
| *_P++ = (byte) (_Pos >> 16); |
| *_P++ = (byte) (_Pos >> 24); |
| } |
| } |
| else |
| { |
| regbyte_ctx *new_rbc; |
| regbyte_ctx_create (&new_rbc); |
| if (new_rbc == NULL) |
| return 1; |
| |
| new_rbc->m_prev = *_Ctx; |
| new_rbc->m_regbyte = _E->m_regbyte; |
| *_Ctx = new_rbc; |
| |
| if (_E->m_emit_type == et_byte) |
| new_rbc->m_current_value = _E->m_byte; |
| else if (_E->m_emit_type == et_stream) |
| new_rbc->m_current_value = c; |
| } |
| |
| _E = _E->m_next; |
| } |
| |
| return 0; |
| } |
| |
| /* |
| error typedef |
| */ |
| typedef struct error_ |
| { |
| byte *m_text; |
| byte *m_token_name; |
| struct rule_ *m_token; |
| } error; |
| |
| static void error_create (error **er) |
| { |
| *er = (error *) mem_alloc (sizeof (error)); |
| if (*er) |
| { |
| (**er).m_text = NULL; |
| (**er).m_token_name = NULL; |
| (**er).m_token = NULL; |
| } |
| } |
| |
| static void error_destroy (error **er) |
| { |
| if (*er) |
| { |
| mem_free ((void **) &(**er).m_text); |
| mem_free ((void **) &(**er).m_token_name); |
| mem_free ((void **) er); |
| } |
| } |
| |
| struct dict_; |
| |
| static byte * |
| error_get_token (error *, struct dict_ *, const byte *, int); |
| |
| /* |
| condition operand type typedef |
| */ |
| typedef enum cond_oper_type_ |
| { |
| cot_byte, /* constant 8-bit unsigned integer */ |
| cot_regbyte /* pointer to byte register containing the current value */ |
| } cond_oper_type; |
| |
| /* |
| condition operand typedef |
| */ |
| typedef struct cond_oper_ |
| { |
| cond_oper_type m_type; |
| byte m_byte; /* cot_byte */ |
| map_byte *m_regbyte; /* cot_regbyte */ |
| byte *m_regname; /* cot_regbyte - temporary */ |
| } cond_oper; |
| |
| /* |
| condition type typedef |
| */ |
| typedef enum cond_type_ |
| { |
| ct_equal, |
| ct_not_equal |
| } cond_type; |
| |
| /* |
| condition typedef |
| */ |
| typedef struct cond_ |
| { |
| cond_type m_type; |
| cond_oper m_operands[2]; |
| } cond; |
| |
| static void cond_create (cond **co) |
| { |
| *co = (cond *) mem_alloc (sizeof (cond)); |
| if (*co) |
| { |
| (**co).m_operands[0].m_regname = NULL; |
| (**co).m_operands[1].m_regname = NULL; |
| } |
| } |
| |
| static void cond_destroy (cond **co) |
| { |
| if (*co) |
| { |
| mem_free ((void **) &(**co).m_operands[0].m_regname); |
| mem_free ((void **) &(**co).m_operands[1].m_regname); |
| mem_free ((void **) co); |
| } |
| } |
| |
| /* |
| specifier type typedef |
| */ |
| typedef enum spec_type_ |
| { |
| st_false, |
| st_true, |
| st_byte, |
| st_byte_range, |
| st_string, |
| st_identifier, |
| st_identifier_loop, |
| st_debug |
| } spec_type; |
| |
| /* |
| specifier typedef |
| */ |
| typedef struct spec_ |
| { |
| spec_type m_spec_type; |
| byte m_byte[2]; /* st_byte, st_byte_range */ |
| byte *m_string; /* st_string */ |
| struct rule_ *m_rule; /* st_identifier, st_identifier_loop */ |
| emit *m_emits; |
| error *m_errtext; |
| cond *m_cond; |
| struct spec_ *next; |
| } spec; |
| |
| static void spec_create (spec **sp) |
| { |
| *sp = (spec *) mem_alloc (sizeof (spec)); |
| if (*sp) |
| { |
| (**sp).m_spec_type = st_false; |
| (**sp).m_byte[0] = '\0'; |
| (**sp).m_byte[1] = '\0'; |
| (**sp).m_string = NULL; |
| (**sp).m_rule = NULL; |
| (**sp).m_emits = NULL; |
| (**sp).m_errtext = NULL; |
| (**sp).m_cond = NULL; |
| (**sp).next = NULL; |
| } |
| } |
| |
| static void spec_destroy (spec **sp) |
| { |
| if (*sp) |
| { |
| spec_destroy (&(**sp).next); |
| emit_destroy (&(**sp).m_emits); |
| error_destroy (&(**sp).m_errtext); |
| mem_free ((void **) &(**sp).m_string); |
| cond_destroy (&(**sp).m_cond); |
| mem_free ((void **) sp); |
| } |
| } |
| |
| GRAMMAR_IMPLEMENT_LIST_APPEND(spec) |
| |
| /* |
| operator typedef |
| */ |
| typedef enum oper_ |
| { |
| op_none, |
| op_and, |
| op_or |
| } oper; |
| |
| /* |
| rule typedef |
| */ |
| typedef struct rule_ |
| { |
| oper m_oper; |
| spec *m_specs; |
| struct rule_ *next; |
| int m_referenced; |
| } rule; |
| |
| static void rule_create (rule **ru) |
| { |
| *ru = (rule *) mem_alloc (sizeof (rule)); |
| if (*ru) |
| { |
| (**ru).m_oper = op_none; |
| (**ru).m_specs = NULL; |
| (**ru).next = NULL; |
| (**ru).m_referenced = 0; |
| } |
| } |
| |
| static void rule_destroy (rule **ru) |
| { |
| if (*ru) |
| { |
| rule_destroy (&(**ru).next); |
| spec_destroy (&(**ru).m_specs); |
| mem_free ((void **) ru); |
| } |
| } |
| |
| GRAMMAR_IMPLEMENT_LIST_APPEND(rule) |
| |
| /* |
| returns unique grammar id |
| */ |
| static grammar next_valid_grammar_id (void) |
| { |
| static grammar id = 0; |
| |
| return ++id; |
| } |
| |
| /* |
| dictionary typedef |
| */ |
| typedef struct dict_ |
| { |
| rule *m_rulez; |
| rule *m_syntax; |
| rule *m_string; |
| map_byte *m_regbytes; |
| grammar m_id; |
| struct dict_ *next; |
| } dict; |
| |
| static void dict_create (dict **di) |
| { |
| *di = (dict *) mem_alloc (sizeof (dict)); |
| if (*di) |
| { |
| (**di).m_rulez = NULL; |
| (**di).m_syntax = NULL; |
| (**di).m_string = NULL; |
| (**di).m_regbytes = NULL; |
| (**di).m_id = next_valid_grammar_id (); |
| (**di).next = NULL; |
| } |
| } |
| |
| static void dict_destroy (dict **di) |
| { |
| if (*di) |
| { |
| rule_destroy (&(**di).m_rulez); |
| map_byte_destroy (&(**di).m_regbytes); |
| mem_free ((void **) di); |
| } |
| } |
| |
| GRAMMAR_IMPLEMENT_LIST_APPEND(dict) |
| |
| static void dict_find (dict **di, grammar key, dict **data) |
| { |
| while (*di) |
| { |
| if ((**di).m_id == key) |
| { |
| *data = *di; |
| return; |
| } |
| |
| di = &(**di).next; |
| } |
| |
| *data = NULL; |
| } |
| |
| static dict *g_dicts = NULL; |
| |
| /* |
| byte array typedef |
| */ |
| typedef struct barray_ |
| { |
| byte *data; |
| unsigned int len; |
| } barray; |
| |
| static void barray_create (barray **ba) |
| { |
| *ba = (barray *) mem_alloc (sizeof (barray)); |
| if (*ba) |
| { |
| (**ba).data = NULL; |
| (**ba).len = 0; |
| } |
| } |
| |
| static void barray_destroy (barray **ba) |
| { |
| if (*ba) |
| { |
| mem_free ((void **) &(**ba).data); |
| mem_free ((void **) ba); |
| } |
| } |
| |
| /* |
| reallocates byte array to requested size, |
| returns 0 on success, |
| returns 1 otherwise |
| */ |
| static int barray_resize (barray **ba, unsigned int nlen) |
| { |
| byte *new_pointer; |
| |
| if (nlen == 0) |
| { |
| mem_free ((void **) &(**ba).data); |
| (**ba).data = NULL; |
| (**ba).len = 0; |
| |
| return 0; |
| } |
| else |
| { |
| new_pointer = (byte *) mem_realloc ((**ba).data, (**ba).len * sizeof (byte), |
| nlen * sizeof (byte)); |
| if (new_pointer) |
| { |
| (**ba).data = new_pointer; |
| (**ba).len = nlen; |
| |
| return 0; |
| } |
| } |
| |
| return 1; |
| } |
| |
| /* |
| adds byte array pointed by *nb to the end of array pointed by *ba, |
| returns 0 on success, |
| returns 1 otherwise |
| */ |
| static int barray_append (barray **ba, barray **nb) |
| { |
| const unsigned int len = (**ba).len; |
| |
| if (barray_resize (ba, (**ba).len + (**nb).len)) |
| return 1; |
| |
| mem_copy ((**ba).data + len, (**nb).data, (**nb).len); |
| |
| return 0; |
| } |
| |
| /* |
| adds emit chain pointed by em to the end of array pointed by *ba, |
| returns 0 on success, |
| returns 1 otherwise |
| */ |
| static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc) |
| { |
| unsigned int count = emit_size (em); |
| |
| if (barray_resize (ba, (**ba).len + count)) |
| return 1; |
| |
| return emit_push (em, (**ba).data + ((**ba).len - count), c, pos, rbc); |
| } |
| |
| /* |
| byte pool typedef |
| */ |
| typedef struct bytepool_ |
| { |
| byte *_F; |
| unsigned int _Siz; |
| } bytepool; |
| |
| static void bytepool_destroy (bytepool **by) |
| { |
| if (*by != NULL) |
| { |
| mem_free ((void **) &(**by)._F); |
| mem_free ((void **) by); |
| } |
| } |
| |
| static void bytepool_create (bytepool **by, int len) |
| { |
| *by = (bytepool *) (mem_alloc (sizeof (bytepool))); |
| if (*by != NULL) |
| { |
| (**by)._F = (byte *) (mem_alloc (sizeof (byte) * len)); |
| (**by)._Siz = len; |
| |
| if ((**by)._F == NULL) |
| bytepool_destroy (by); |
| } |
| } |
| |
| static int bytepool_reserve (bytepool *by, unsigned int n) |
| { |
| byte *_P; |
| |
| if (n <= by->_Siz) |
| return 0; |
| |
| /* byte pool can only grow and at least by doubling its size */ |
| n = n >= by->_Siz * 2 ? n : by->_Siz * 2; |
| |
| /* reallocate the memory and adjust pointers to the new memory location */ |
| _P = (byte *) (mem_realloc (by->_F, sizeof (byte) * by->_Siz, sizeof (byte) * n)); |
| if (_P != NULL) |
| { |
| by->_F = _P; |
| by->_Siz = n; |
| return 0; |
| } |
| |
| return 1; |
| } |
| |
| /* |
| string to string map typedef |
| */ |
| typedef struct map_str_ |
| { |
| byte *key; |
| byte *data; |
| struct map_str_ *next; |
| } map_str; |
| |
| static void map_str_create (map_str **ma) |
| { |
| *ma = (map_str *) mem_alloc (sizeof (map_str)); |
| if (*ma) |
| { |
| (**ma).key = NULL; |
| (**ma).data = NULL; |
| (**ma).next = NULL; |
| } |
| } |
| |
| static void map_str_destroy (map_str **ma) |
| { |
| if (*ma) |
| { |
| map_str_destroy (&(**ma).next); |
| mem_free ((void **) &(**ma).key); |
| mem_free ((void **) &(**ma).data); |
| mem_free ((void **) ma); |
| } |
| } |
| |
| GRAMMAR_IMPLEMENT_LIST_APPEND(map_str) |
| |
| /* |
| searches the map for specified key, |
| if the key is matched, *data is filled with data associated with the key, |
| returns 0 if the key is matched, |
| returns 1 otherwise |
| */ |
| static int map_str_find (map_str **ma, const byte *key, byte **data) |
| { |
| while (*ma) |
| { |
| if (str_equal ((**ma).key, key)) |
| { |
| *data = str_duplicate ((**ma).data); |
| if (*data == NULL) |
| return 1; |
| |
| return 0; |
| } |
| |
| ma = &(**ma).next; |
| } |
| |
| set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1); |
| return 1; |
| } |
| |
| /* |
| string to rule map typedef |
| */ |
| typedef struct map_rule_ |
| { |
| byte *key; |
| rule *data; |
| struct map_rule_ *next; |
| } map_rule; |
| |
| static void map_rule_create (map_rule **ma) |
| { |
| *ma = (map_rule *) mem_alloc (sizeof (map_rule)); |
| if (*ma) |
| { |
| (**ma).key = NULL; |
| (**ma).data = NULL; |
| (**ma).next = NULL; |
| } |
| } |
| |
| static void map_rule_destroy (map_rule **ma) |
| { |
| if (*ma) |
| { |
| map_rule_destroy (&(**ma).next); |
| mem_free ((void **) &(**ma).key); |
| mem_free ((void **) ma); |
| } |
| } |
| |
| GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule) |
| |
| /* |
| searches the map for specified key, |
| if the key is matched, *data is filled with data associated with the key, |
| returns 0 if the is matched, |
| returns 1 otherwise |
| */ |
| static int map_rule_find (map_rule **ma, const byte *key, rule **data) |
| { |
| while (*ma) |
| { |
| if (str_equal ((**ma).key, key)) |
| { |
| *data = (**ma).data; |
| |
| return 0; |
| } |
| |
| ma = &(**ma).next; |
| } |
| |
| set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1); |
| return 1; |
| } |
| |
| /* |
| returns 1 if given character is a white space, |
| returns 0 otherwise |
| */ |
| static int is_space (byte c) |
| { |
| return c == ' ' || c == '\t' || c == '\n' || c == '\r'; |
| } |
| |
| /* |
| advances text pointer by 1 if character pointed by *text is a space, |
| returns 1 if a space has been eaten, |
| returns 0 otherwise |
| */ |
| static int eat_space (const byte **text) |
| { |
| if (is_space (**text)) |
| { |
| (*text)++; |
| |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| /* |
| returns 1 if text points to C-style comment start string, |
| returns 0 otherwise |
| */ |
| static int is_comment_start (const byte *text) |
| { |
| return text[0] == '/' && text[1] == '*'; |
| } |
| |
| /* |
| advances text pointer to first character after C-style comment block - if any, |
| returns 1 if C-style comment block has been encountered and eaten, |
| returns 0 otherwise |
| */ |
| static int eat_comment (const byte **text) |
| { |
| if (is_comment_start (*text)) |
| { |
| /* *text points to comment block - skip two characters to enter comment body */ |
| *text += 2; |
| /* skip any character except consecutive '*' and '/' */ |
| while (!((*text)[0] == '*' && (*text)[1] == '/')) |
| (*text)++; |
| /* skip those two terminating characters */ |
| *text += 2; |
| |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| /* |
| advances text pointer to first character that is neither space nor C-style comment block |
| */ |
| static void eat_spaces (const byte **text) |
| { |
| while (eat_space (text) || eat_comment (text)) |
| ; |
| } |
| |
| /* |
| resizes string pointed by *ptr to successfully add character c to the end of the string, |
| returns 0 on success, |
| returns 1 otherwise |
| */ |
| static int string_grow (byte **ptr, unsigned int *len, byte c) |
| { |
| /* reallocate the string in 16-byte increments */ |
| if ((*len & 0x0F) == 0x0F || *ptr == NULL) |
| { |
| byte *tmp = (byte *) mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte), |
| ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte)); |
| if (tmp == NULL) |
| return 1; |
| |
| *ptr = tmp; |
| } |
| |
| if (c) |
| { |
| /* append given character */ |
| (*ptr)[*len] = c; |
| (*len)++; |
| } |
| (*ptr)[*len] = '\0'; |
| |
| return 0; |
| } |
| |
| /* |
| returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _ |
| returns 0 otherwise |
| */ |
| static int is_identifier (byte c) |
| { |
| return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'; |
| } |
| |
| /* |
| copies characters from *text to *id until non-identifier character is encountered, |
| assumes that *id points to NULL object - caller is responsible for later freeing the string, |
| text pointer is advanced to point past the copied identifier, |
| returns 0 if identifier was successfully copied, |
| returns 1 otherwise |
| */ |
| static int get_identifier (const byte **text, byte **id) |
| { |
| const byte *t = *text; |
| byte *p = NULL; |
| unsigned int len = 0; |
| |
| if (string_grow (&p, &len, '\0')) |
| return 1; |
| |
| /* loop while next character in buffer is valid for identifiers */ |
| while (is_identifier (*t)) |
| { |
| if (string_grow (&p, &len, *t++)) |
| { |
| mem_free ((void **) (void *) &p); |
| return 1; |
| } |
| } |
| |
| *text = t; |
| *id = p; |
| |
| return 0; |
| } |
| |
| /* |
| converts sequence of DEC digits pointed by *text until non-DEC digit is encountered, |
| advances text pointer past the converted sequence, |
| returns the converted value |
| */ |
| static unsigned int dec_convert (const byte **text) |
| { |
| unsigned int value = 0; |
| |
| while (**text >= '0' && **text <= '9') |
| { |
| value = value * 10 + **text - '0'; |
| (*text)++; |
| } |
| |
| return value; |
| } |
| |
| /* |
| returns 1 if given character is HEX digit 0-9, A-F or a-f, |
| returns 0 otherwise |
| */ |
| static int is_hex (byte c) |
| { |
| return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); |
| } |
| |
| /* |
| returns value of passed character as if it was HEX digit |
| */ |
| static unsigned int hex2dec (byte c) |
| { |
| if (c >= '0' && c <= '9') |
| return c - '0'; |
| if (c >= 'A' && c <= 'F') |
| return c - 'A' + 10; |
| return c - 'a' + 10; |
| } |
| |
| /* |
| converts sequence of HEX digits pointed by *text until non-HEX digit is encountered, |
| advances text pointer past the converted sequence, |
| returns the converted value |
| */ |
| static unsigned int hex_convert (const byte **text) |
| { |
| unsigned int value = 0; |
| |
| while (is_hex (**text)) |
| { |
| value = value * 0x10 + hex2dec (**text); |
| (*text)++; |
| } |
| |
| return value; |
| } |
| |
| /* |
| returns 1 if given character is OCT digit 0-7, |
| returns 0 otherwise |
| */ |
| static int is_oct (byte c) |
| { |
| return c >= '0' && c <= '7'; |
| } |
| |
| /* |
| returns value of passed character as if it was OCT digit |
| */ |
| static int oct2dec (byte c) |
| { |
| return c - '0'; |
| } |
| |
| static byte get_escape_sequence (const byte **text) |
| { |
| int value = 0; |
| |
| /* skip '\' character */ |
| (*text)++; |
| |
| switch (*(*text)++) |
| { |
| case '\'': |
| return '\''; |
| case '"': |
| return '\"'; |
| case '?': |
| return '\?'; |
| case '\\': |
| return '\\'; |
| case 'a': |
| return '\a'; |
| case 'b': |
| return '\b'; |
| case 'f': |
| return '\f'; |
| case 'n': |
| return '\n'; |
| case 'r': |
| return '\r'; |
| case 't': |
| return '\t'; |
| case 'v': |
| return '\v'; |
| case 'x': |
| return (byte) hex_convert (text); |
| } |
| |
| (*text)--; |
| if (is_oct (**text)) |
| { |
| value = oct2dec (*(*text)++); |
| if (is_oct (**text)) |
| { |
| value = value * 010 + oct2dec (*(*text)++); |
| if (is_oct (**text)) |
| value = value * 010 + oct2dec (*(*text)++); |
| } |
| } |
| |
| return (byte) value; |
| } |
| |
| /* |
| copies characters from *text to *str until " or ' character is encountered, |
| assumes that *str points to NULL object - caller is responsible for later freeing the string, |
| assumes that *text points to " or ' character that starts the string, |
| text pointer is advanced to point past the " or ' character, |
| returns 0 if string was successfully copied, |
| returns 1 otherwise |
| */ |
| static int get_string (const byte **text, byte **str) |
| { |
| const byte *t = *text; |
| byte *p = NULL; |
| unsigned int len = 0; |
| byte term_char; |
| |
| if (string_grow (&p, &len, '\0')) |
| return 1; |
| |
| /* read " or ' character that starts the string */ |
| term_char = *t++; |
| /* while next character is not the terminating character */ |
| while (*t && *t != term_char) |
| { |
| byte c; |
| |
| if (*t == '\\') |
| c = get_escape_sequence (&t); |
| else |
| c = *t++; |
| |
| if (string_grow (&p, &len, c)) |
| { |
| mem_free ((void **) (void *) &p); |
| return 1; |
| } |
| } |
| /* skip " or ' character that ends the string */ |
| t++; |
| |
| *text = t; |
| *str = p; |
| return 0; |
| } |
| |
| /* |
| gets emit code, the syntax is: |
| ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character> |
| assumes that *text already points to <symbol>, |
| returns 0 if emit code is successfully read, |
| returns 1 otherwise |
| */ |
| static int get_emtcode (const byte **text, map_byte **ma) |
| { |
| const byte *t = *text; |
| map_byte *m = NULL; |
| |
| map_byte_create (&m); |
| if (m == NULL) |
| return 1; |
| |
| if (get_identifier (&t, &m->key)) |
| { |
| map_byte_destroy (&m); |
| return 1; |
| } |
| eat_spaces (&t); |
| |
| if (*t == '\'') |
| { |
| byte *c; |
| |
| if (get_string (&t, &c)) |
| { |
| map_byte_destroy (&m); |
| return 1; |
| } |
| |
| m->data = (byte) c[0]; |
| mem_free ((void **) (void *) &c); |
| } |
| else if (t[0] == '0' && (t[1] == 'x' || t[1] == 'X')) |
| { |
| /* skip HEX "0x" or "0X" prefix */ |
| t += 2; |
| m->data = (byte) hex_convert (&t); |
| } |
| else |
| { |
| m->data = (byte) dec_convert (&t); |
| } |
| |
| eat_spaces (&t); |
| |
| *text = t; |
| *ma = m; |
| return 0; |
| } |
| |
| /* |
| gets regbyte declaration, the syntax is: |
| ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character> |
| assumes that *text already points to <symbol>, |
| returns 0 if regbyte is successfully read, |
| returns 1 otherwise |
| */ |
| static int get_regbyte (const byte **text, map_byte **ma) |
| { |
| /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */ |
| return get_emtcode (text, ma); |
| } |
| |
| /* |
| returns 0 on success, |
| returns 1 otherwise |
| */ |
| static int get_errtext (const byte **text, map_str **ma) |
| { |
| const byte *t = *text; |
| map_str *m = NULL; |
| |
| map_str_create (&m); |
| if (m == NULL) |
| return 1; |
| |
| if (get_identifier (&t, &m->key)) |
| { |
| map_str_destroy (&m); |
| return 1; |
| } |
| eat_spaces (&t); |
| |
| if (get_string (&t, &m->data)) |
| { |
| map_str_destroy (&m); |
| return 1; |
| } |
| eat_spaces (&t); |
| |
| *text = t; |
| *ma = m; |
| return 0; |
| } |
| |
| /* |
| returns 0 on success, |
| returns 1 otherwise, |
| */ |
| static int get_error (const byte **text, error **er, map_str *maps) |
| { |
| const byte *t = *text; |
| byte *temp = NULL; |
| |
| if (*t != '.') |
| return 0; |
| |
| t++; |
| if (get_identifier (&t, &temp)) |
| return 1; |
| eat_spaces (&t); |
| |
| if (!str_equal ((byte *) "error", temp)) |
| { |
| mem_free ((void **) (void *) &temp); |
| return 0; |
| } |
| |
| mem_free ((void **) (void *) &temp); |
| |
| error_create (er); |
| if (*er == NULL) |
| return 1; |
| |
| if (*t == '\"') |
| { |
| if (get_string (&t, &(**er).m_text)) |
| { |
| error_destroy (er); |
| return 1; |
| } |
| eat_spaces (&t); |
| } |
| else |
| { |
| if (get_identifier (&t, &temp)) |
| { |
| error_destroy (er); |
| return 1; |
| } |
| eat_spaces (&t); |
| |
| if (map_str_find (&maps, temp, &(**er).m_text)) |
| { |
| mem_free ((void **) (void *) &temp); |
| error_destroy (er); |
| return 1; |
| } |
| |
| mem_free ((void **) (void *) &temp); |
| } |
| |
| /* try to extract "token" from "...$token$..." */ |
| { |
| byte *processed = NULL; |
| unsigned int len = 0; |
| int i = 0; |
| |
| if (string_grow (&processed, &len, '\0')) |
| { |
| error_destroy (er); |
| return 1; |
| } |
| |
| while (i < str_length ((**er).m_text)) |
| { |
| /* check if the dollar sign is repeated - if so skip it */ |
| if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$') |
| { |
| if (string_grow (&processed, &len, '$')) |
| { |
| mem_free ((void **) (void *) &processed); |
| error_destroy (er); |
| return 1; |
| } |
| |
| i += 2; |
| } |
| else if ((**er).m_text[i] != '$') |
| { |
| if (string_grow (&processed, &len, (**er).m_text[i])) |
| { |
| mem_free ((void **) (void *) &processed); |
| error_destroy (er); |
| return 1; |
| } |
| |
| i++; |
| } |
| else |
| { |
| if (string_grow (&processed, &len, '$')) |
| { |
| mem_free ((void **) (void *) &processed); |
| error_destroy (er); |
| return 1; |
| } |
| |
| { |
| /* length of token being extracted */ |
| unsigned int tlen = 0; |
| |
| if (string_grow (&(**er).m_token_name, &tlen, '\0')) |
| { |
| mem_free ((void **) (void *) &processed); |
| error_destroy (er); |
| return 1; |
| } |
| |
| /* skip the dollar sign */ |
| i++; |
| |
| while ((**er).m_text[i] != '$') |
| { |
| if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i])) |
| { |
| mem_free ((void **) (void *) &processed); |
| error_destroy (er); |
| return 1; |
| } |
| |
| i++; |
| } |
| |
| /* skip the dollar sign */ |
| i++; |
| } |
| } |
| } |
| |
| mem_free ((void **) &(**er).m_text); |
| (**er).m_text = processed; |
| } |
| |
| *text = t; |
| return 0; |
| } |
| |
| /* |
| returns 0 on success, |
| returns 1 otherwise, |
| */ |
| static int get_emits (const byte **text, emit **em, map_byte *mapb) |
| { |
| const byte *t = *text; |
| byte *temp = NULL; |
| emit *e = NULL; |
| emit_dest dest; |
| |
| if (*t != '.') |
| return 0; |
| |
| t++; |
| if (get_identifier (&t, &temp)) |
| return 1; |
| eat_spaces (&t); |
| |
| /* .emit */ |
| if (str_equal ((byte *) "emit", temp)) |
| dest = ed_output; |
| /* .load */ |
| else if (str_equal ((byte *) "load", temp)) |
| dest = ed_regbyte; |
| else |
| { |
| mem_free ((void **) (void *) &temp); |
| return 0; |
| } |
| |
| mem_free ((void **) (void *) &temp); |
| |
| emit_create (&e); |
| if (e == NULL) |
| return 1; |
| |
| e->m_emit_dest = dest; |
| |
| if (dest == ed_regbyte) |
| { |
| if (get_identifier (&t, &e->m_regname)) |
| { |
| emit_destroy (&e); |
| return 1; |
| } |
| eat_spaces (&t); |
| } |
| |
| /* 0xNN */ |
| if (*t == '0' && (t[1] == 'x' || t[1] == 'X')) |
| { |
| t += 2; |
| e->m_byte = (byte) hex_convert (&t); |
| |
| e->m_emit_type = et_byte; |
| } |
| /* NNN */ |
| else if (*t >= '0' && *t <= '9') |
| { |
| e->m_byte = (byte) dec_convert (&t); |
| |
| e->m_emit_type = et_byte; |
| } |
| /* * */ |
| else if (*t == '*') |
| { |
| t++; |
| |
| e->m_emit_type = et_stream; |
| } |
| /* $ */ |
| else if (*t == '$') |
| { |
| t++; |
| |
| e->m_emit_type = et_position; |
| } |
| /* 'c' */ |
| else if (*t == '\'') |
| { |
| if (get_string (&t, &temp)) |
| { |
| emit_destroy (&e); |
| return 1; |
| } |
| e->m_byte = (byte) temp[0]; |
| |
| mem_free ((void **) (void *) &temp); |
| |
| e->m_emit_type = et_byte; |
| } |
| else |
| { |
| if (get_identifier (&t, &temp)) |
| { |
| emit_destroy (&e); |
| return 1; |
| } |
| |
| if (map_byte_find (&mapb, temp, &e->m_byte)) |
| { |
| mem_free ((void **) (void *) &temp); |
| emit_destroy (&e); |
| return 1; |
| } |
| |
| mem_free ((void **) (void *) &temp); |
| |
| e->m_emit_type = et_byte; |
| } |
| |
| eat_spaces (&t); |
| |
| if (get_emits (&t, &e->m_next, mapb)) |
| { |
| emit_destroy (&e); |
| return 1; |
| } |
| |
| *text = t; |
| *em = e; |
| return 0; |
| } |
| |
| /* |
| returns 0 on success, |
| returns 1 otherwise, |
| */ |
| static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb) |
| { |
| const byte *t = *text; |
| spec *s = NULL; |
| |
| spec_create (&s); |
| if (s == NULL) |
| return 1; |
| |
| /* first - read optional .if statement */ |
| if (*t == '.') |
| { |
| const byte *u = t; |
| byte *keyword = NULL; |
| |
| /* skip the dot */ |
| u++; |
| |
| if (get_identifier (&u, &keyword)) |
| { |
| spec_destroy (&s); |
| return 1; |
| } |
| |
| /* .if */ |
| if (str_equal ((byte *) "if", keyword)) |
| { |
| cond_create (&s->m_cond); |
| if (s->m_cond == NULL) |
| { |
| spec_destroy (&s); |
| return 1; |
| } |
| |
| /* skip the left paren */ |
| eat_spaces (&u); |
| u++; |
| |
| /* get the left operand */ |
| eat_spaces (&u); |
| if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname)) |
| { |
| spec_destroy (&s); |
| return 1; |
| } |
| s->m_cond->m_operands[0].m_type = cot_regbyte; |
| |
| /* get the operator (!= or ==) */ |
| eat_spaces (&u); |
| if (*u == '!') |
| s->m_cond->m_type = ct_not_equal; |
| else |
| s->m_cond->m_type = ct_equal; |
| u += 2; |
| eat_spaces (&u); |
| |
| if (u[0] == '0' && (u[1] == 'x' || u[1] == 'X')) |
| { |
| /* skip the 0x prefix */ |
| u += 2; |
| |
| /* get the right operand */ |
| s->m_cond->m_operands[1].m_byte = hex_convert (&u); |
| s->m_cond->m_operands[1].m_type = cot_byte; |
| } |
| else /*if (*u >= '0' && *u <= '9')*/ |
| { |
| /* get the right operand */ |
| s->m_cond->m_operands[1].m_byte = dec_convert (&u); |
| s->m_cond->m_operands[1].m_type = cot_byte; |
| } |
| |
| /* skip the right paren */ |
| eat_spaces (&u); |
| u++; |
| |
| eat_spaces (&u); |
| |
| t = u; |
| } |
| |
| mem_free ((void **) (void *) &keyword); |
| } |
| |
| if (*t == '\'') |
| { |
| byte *temp = NULL; |
| |
| if (get_string (&t, &temp)) |
| { |
| spec_destroy (&s); |
| return 1; |
| } |
| eat_spaces (&t); |
| |
| if (*t == '-') |
| { |
| byte *temp2 = NULL; |
| |
| /* skip the '-' character */ |
| t++; |
| eat_spaces (&t); |
| |
| if (get_string (&t, &temp2)) |
| { |
| mem_free ((void **) (void *) &temp); |
| spec_destroy (&s); |
| return 1; |
| } |
| eat_spaces (&t); |
| |
| s->m_spec_type = st_byte_range; |
| s->m_byte[0] = *temp; |
| s->m_byte[1] = *temp2; |
| |
| mem_free ((void **) (void *) &temp2); |
| } |
| else |
| { |
| s->m_spec_type = st_byte; |
| *s->m_byte = *temp; |
| } |
| |
| mem_free ((void **) (void *) &temp); |
| } |
| else if (*t == '"') |
| { |
| if (get_string (&t, &s->m_string)) |
| { |
| spec_destroy (&s); |
| return 1; |
| } |
| eat_spaces (&t); |
| |
| s->m_spec_type = st_string; |
| } |
| else if (*t == '.') |
| { |
| byte *keyword = NULL; |
| |
| /* skip the dot */ |
| t++; |
| |
| if (get_identifier (&t, &keyword)) |
| { |
| spec_destroy (&s); |
| return 1; |
| } |
| eat_spaces (&t); |
| |
| /* .true */ |
| if (str_equal ((byte *) "true", keyword)) |
| { |
| s->m_spec_type = st_true; |
| } |
| /* .false */ |
| else if (str_equal ((byte *) "false", keyword)) |
| { |
| s->m_spec_type = st_false; |
| } |
| /* .debug */ |
| else if (str_equal ((byte *) "debug", keyword)) |
| { |
| s->m_spec_type = st_debug; |
| } |
| /* .loop */ |
| else if (str_equal ((byte *) "loop", keyword)) |
| { |
| if (get_identifier (&t, &s->m_string)) |
| { |
| mem_free ((void **) (void *) &keyword); |
| spec_destroy (&s); |
| return 1; |
| } |
| eat_spaces (&t); |
| |
| s->m_spec_type = st_identifier_loop; |
| } |
| mem_free ((void **) (void *) &keyword); |
| } |
| else |
| { |
| if (get_identifier (&t, &s->m_string)) |
| { |
| spec_destroy (&s); |
| return 1; |
| } |
| eat_spaces (&t); |
| |
| s->m_spec_type = st_identifier; |
| } |
| |
| if (get_error (&t, &s->m_errtext, maps)) |
| { |
| spec_destroy (&s); |
| return 1; |
| } |
| |
| if (get_emits (&t, &s->m_emits, mapb)) |
| { |
| spec_destroy (&s); |
| return 1; |
| } |
| |
| *text = t; |
| *sp = s; |
| return 0; |
| } |
| |
| /* |
| returns 0 on success, |
| returns 1 otherwise, |
| */ |
| static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb) |
| { |
| const byte *t = *text; |
| rule *r = NULL; |
| |
| rule_create (&r); |
| if (r == NULL) |
| return 1; |
| |
| if (get_spec (&t, &r->m_specs, maps, mapb)) |
| { |
| rule_destroy (&r); |
| return 1; |
| } |
| |
| while (*t != ';') |
| { |
| byte *op = NULL; |
| spec *sp = NULL; |
| |
| /* skip the dot that precedes "and" or "or" */ |
| t++; |
| |
| /* read "and" or "or" keyword */ |
| if (get_identifier (&t, &op)) |
| { |
| rule_destroy (&r); |
| return 1; |
| } |
| eat_spaces (&t); |
| |
| if (r->m_oper == op_none) |
| { |
| /* .and */ |
| if (str_equal ((byte *) "and", op)) |
| r->m_oper = op_and; |
| /* .or */ |
| else |
| r->m_oper = op_or; |
| } |
| |
| mem_free ((void **) (void *) &op); |
| |
| if (get_spec (&t, &sp, maps, mapb)) |
| { |
| rule_destroy (&r); |
| return 1; |
| } |
| |
| spec_append (&r->m_specs, sp); |
| } |
| |
| /* skip the semicolon */ |
| t++; |
| eat_spaces (&t); |
| |
| *text = t; |
| *ru = r; |
| return 0; |
| } |
| |
| /* |
| returns 0 on success, |
| returns 1 otherwise, |
| */ |
| static int update_dependency (map_rule *mapr, byte *symbol, rule **ru) |
| { |
| if (map_rule_find (&mapr, symbol, ru)) |
| return 1; |
| |
| (**ru).m_referenced = 1; |
| |
| return 0; |
| } |
| |
| /* |
| returns 0 on success, |
| returns 1 otherwise, |
| */ |
| static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol, |
| byte **string_symbol, map_byte *regbytes) |
| { |
| rule *rulez = di->m_rulez; |
| |
| /* update dependecies for the root and lexer symbols */ |
| if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) || |
| (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string))) |
| return 1; |
| |
| mem_free ((void **) syntax_symbol); |
| mem_free ((void **) string_symbol); |
| |
| /* update dependecies for the rest of the rules */ |
| while (rulez) |
| { |
| spec *sp = rulez->m_specs; |
| |
| /* iterate through all the specifiers */ |
| while (sp) |
| { |
| /* update dependency for identifier */ |
| if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop) |
| { |
| if (update_dependency (mapr, sp->m_string, &sp->m_rule)) |
| return 1; |
| |
| mem_free ((void **) &sp->m_string); |
| } |
| |
| /* some errtexts reference to a rule */ |
| if (sp->m_errtext && sp->m_errtext->m_token_name) |
| { |
| if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token)) |
| return 1; |
| |
| mem_free ((void **) &sp->m_errtext->m_token_name); |
| } |
| |
| /* update dependency for condition */ |
| if (sp->m_cond) |
| { |
| int i; |
| for (i = 0; i < 2; i++) |
| if (sp->m_cond->m_operands[i].m_type == cot_regbyte) |
| { |
| sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (®bytes, |
| sp->m_cond->m_operands[i].m_regname); |
| |
| if (sp->m_cond->m_operands[i].m_regbyte == NULL) |
| return 1; |
| |
| mem_free ((void **) &sp->m_cond->m_operands[i].m_regname); |
| } |
| } |
| |
| /* update dependency for all .load instructions */ |
| if (sp->m_emits) |
| { |
| emit *em = sp->m_emits; |
| while (em != NULL) |
| { |
| if (em->m_emit_dest == ed_regbyte) |
| { |
| em->m_regbyte = map_byte_locate (®bytes, em->m_regname); |
| |
| if (em->m_regbyte == NULL) |
| return 1; |
| |
| mem_free ((void **) &em->m_regname); |
| } |
| |
| em = em->m_next; |
| } |
| } |
| |
| sp = sp->next; |
| } |
| |
| rulez = rulez->next; |
| } |
| |
| /* check for unreferenced symbols */ |
| rulez = di->m_rulez; |
| while (rulez != NULL) |
| { |
| if (!rulez->m_referenced) |
| { |
| map_rule *ma = mapr; |
| while (ma) |
| { |
| if (ma->data == rulez) |
| { |
| set_last_error (UNREFERENCED_IDENTIFIER, str_duplicate (ma->key), -1); |
| return 1; |
| } |
| ma = ma->next; |
| } |
| } |
| rulez = rulez->next; |
| } |
| |
| return 0; |
| } |
| |
| static int satisfies_condition (cond *co, regbyte_ctx *ctx) |
| { |
| byte values[2]; |
| int i; |
| |
| if (co == NULL) |
| return 1; |
| |
| for (i = 0; i < 2; i++) |
| switch (co->m_operands[i].m_type) |
| { |
| case cot_byte: |
| values[i] = co->m_operands[i].m_byte; |
| break; |
| case cot_regbyte: |
| values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte); |
| break; |
| } |
| |
| switch (co->m_type) |
| { |
| case ct_equal: |
| return values[0] == values[1]; |
| case ct_not_equal: |
| return values[0] != values[1]; |
| } |
| |
| return 0; |
| } |
| |
| static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit) |
| { |
| while (top != limit) |
| { |
| regbyte_ctx *rbc = top->m_prev; |
| regbyte_ctx_destroy (&top); |
| top = rbc; |
| } |
| } |
| |
| typedef enum match_result_ |
| { |
| mr_not_matched, /* the examined string does not match */ |
| mr_matched, /* the examined string matches */ |
| mr_error_raised, /* mr_not_matched + error has been raised */ |
| mr_dont_emit, /* used by identifier loops only */ |
| mr_internal_error /* an internal error has occured such as out of memory */ |
| } match_result; |
| |
| /* |
| * This function does the main job. It parses the text and generates output data. |
| */ |
| static match_result |
| match (dict *di, const byte *text, int *index, rule *ru, barray **ba, int filtering_string, |
| regbyte_ctx **rbc) |
| { |
| int ind = *index; |
| match_result status = mr_not_matched; |
| spec *sp = ru->m_specs; |
| regbyte_ctx *ctx = *rbc; |
| |
| /* for every specifier in the rule */ |
| while (sp) |
| { |
| int i, len, save_ind = ind; |
| barray *array = NULL; |
| |
| if (satisfies_condition (sp->m_cond, ctx)) |
| { |
| switch (sp->m_spec_type) |
| { |
| case st_identifier: |
| barray_create (&array); |
| if (array == NULL) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_internal_error; |
| } |
| |
| status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx); |
| |
| if (status == mr_internal_error) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| barray_destroy (&array); |
| return mr_internal_error; |
| } |
| break; |
| case st_string: |
| len = str_length (sp->m_string); |
| |
| /* prefilter the stream */ |
| if (!filtering_string && di->m_string) |
| { |
| barray *ba; |
| int filter_index = 0; |
| match_result result; |
| regbyte_ctx *null_ctx = NULL; |
| |
| barray_create (&ba); |
| if (ba == NULL) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_internal_error; |
| } |
| |
| result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx); |
| |
| if (result == mr_internal_error) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| barray_destroy (&ba); |
| return mr_internal_error; |
| } |
| |
| if (result != mr_matched) |
| { |
| barray_destroy (&ba); |
| status = mr_not_matched; |
| break; |
| } |
| |
| barray_destroy (&ba); |
| |
| if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len)) |
| { |
| status = mr_not_matched; |
| break; |
| } |
| |
| status = mr_matched; |
| ind += len; |
| } |
| else |
| { |
| status = mr_matched; |
| for (i = 0; status == mr_matched && i < len; i++) |
| if (text[ind + i] != sp->m_string[i]) |
| status = mr_not_matched; |
| |
| if (status == mr_matched) |
| ind += len; |
| } |
| break; |
| case st_byte: |
| status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched; |
| if (status == mr_matched) |
| ind++; |
| break; |
| case st_byte_range: |
| status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ? |
| mr_matched : mr_not_matched; |
| if (status == mr_matched) |
| ind++; |
| break; |
| case st_true: |
| status = mr_matched; |
| break; |
| case st_false: |
| status = mr_not_matched; |
| break; |
| case st_debug: |
| status = ru->m_oper == op_and ? mr_matched : mr_not_matched; |
| break; |
| case st_identifier_loop: |
| barray_create (&array); |
| if (array == NULL) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_internal_error; |
| } |
| |
| status = mr_dont_emit; |
| for (;;) |
| { |
| match_result result; |
| |
| save_ind = ind; |
| result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx); |
| |
| if (result == mr_error_raised) |
| { |
| status = result; |
| break; |
| } |
| else if (result == mr_matched) |
| { |
| if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) || |
| barray_append (ba, &array)) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| barray_destroy (&array); |
| return mr_internal_error; |
| } |
| barray_destroy (&array); |
| barray_create (&array); |
| if (array == NULL) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_internal_error; |
| } |
| } |
| else if (result == mr_internal_error) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| barray_destroy (&array); |
| return mr_internal_error; |
| } |
| else |
| break; |
| } |
| break; |
| } |
| } |
| else |
| { |
| status = mr_not_matched; |
| } |
| |
| if (status == mr_error_raised) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| barray_destroy (&array); |
| |
| return mr_error_raised; |
| } |
| |
| if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| barray_destroy (&array); |
| |
| if (sp->m_errtext) |
| { |
| set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text, |
| ind), ind); |
| |
| return mr_error_raised; |
| } |
| |
| return mr_not_matched; |
| } |
| |
| if (status == mr_matched) |
| { |
| if (sp->m_emits) |
| if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx)) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| barray_destroy (&array); |
| return mr_internal_error; |
| } |
| |
| if (array) |
| if (barray_append (ba, &array)) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| barray_destroy (&array); |
| return mr_internal_error; |
| } |
| } |
| |
| barray_destroy (&array); |
| |
| /* if the rule operator is a logical or, we pick up the first matching specifier */ |
| if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit)) |
| { |
| *index = ind; |
| *rbc = ctx; |
| return mr_matched; |
| } |
| |
| sp = sp->next; |
| } |
| |
| /* everything went fine - all specifiers match up */ |
| if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit)) |
| { |
| *index = ind; |
| *rbc = ctx; |
| return mr_matched; |
| } |
| |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_not_matched; |
| } |
| |
| static match_result |
| fast_match (dict *di, const byte *text, int *index, rule *ru, int *_PP, bytepool *_BP, |
| int filtering_string, regbyte_ctx **rbc) |
| { |
| int ind = *index; |
| int _P = filtering_string ? 0 : *_PP; |
| int _P2; |
| match_result status = mr_not_matched; |
| spec *sp = ru->m_specs; |
| regbyte_ctx *ctx = *rbc; |
| |
| /* for every specifier in the rule */ |
| while (sp) |
| { |
| int i, len, save_ind = ind; |
| |
| _P2 = _P + (sp->m_emits ? emit_size (sp->m_emits) : 0); |
| if (bytepool_reserve (_BP, _P2)) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_internal_error; |
| } |
| |
| if (satisfies_condition (sp->m_cond, ctx)) |
| { |
| switch (sp->m_spec_type) |
| { |
| case st_identifier: |
| status = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx); |
| |
| if (status == mr_internal_error) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_internal_error; |
| } |
| break; |
| case st_string: |
| len = str_length (sp->m_string); |
| |
| /* prefilter the stream */ |
| if (!filtering_string && di->m_string) |
| { |
| int filter_index = 0; |
| match_result result; |
| regbyte_ctx *null_ctx = NULL; |
| |
| result = fast_match (di, text + ind, &filter_index, di->m_string, NULL, _BP, 1, &null_ctx); |
| |
| if (result == mr_internal_error) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_internal_error; |
| } |
| |
| if (result != mr_matched) |
| { |
| status = mr_not_matched; |
| break; |
| } |
| |
| if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len)) |
| { |
| status = mr_not_matched; |
| break; |
| } |
| |
| status = mr_matched; |
| ind += len; |
| } |
| else |
| { |
| status = mr_matched; |
| for (i = 0; status == mr_matched && i < len; i++) |
| if (text[ind + i] != sp->m_string[i]) |
| status = mr_not_matched; |
| |
| if (status == mr_matched) |
| ind += len; |
| } |
| break; |
| case st_byte: |
| status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched; |
| if (status == mr_matched) |
| ind++; |
| break; |
| case st_byte_range: |
| status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ? |
| mr_matched : mr_not_matched; |
| if (status == mr_matched) |
| ind++; |
| break; |
| case st_true: |
| status = mr_matched; |
| break; |
| case st_false: |
| status = mr_not_matched; |
| break; |
| case st_debug: |
| status = ru->m_oper == op_and ? mr_matched : mr_not_matched; |
| break; |
| case st_identifier_loop: |
| status = mr_dont_emit; |
| for (;;) |
| { |
| match_result result; |
| |
| save_ind = ind; |
| result = fast_match (di, text, &ind, sp->m_rule, &_P2, _BP, filtering_string, &ctx); |
| |
| if (result == mr_error_raised) |
| { |
| status = result; |
| break; |
| } |
| else if (result == mr_matched) |
| { |
| if (!filtering_string) |
| { |
| if (sp->m_emits != NULL) |
| { |
| if (emit_push (sp->m_emits, _BP->_F + _P, text[ind - 1], save_ind, &ctx)) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_internal_error; |
| } |
| } |
| |
| _P = _P2; |
| _P2 += sp->m_emits ? emit_size (sp->m_emits) : 0; |
| if (bytepool_reserve (_BP, _P2)) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_internal_error; |
| } |
| } |
| } |
| else if (result == mr_internal_error) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_internal_error; |
| } |
| else |
| break; |
| } |
| break; |
| } |
| } |
| else |
| { |
| status = mr_not_matched; |
| } |
| |
| if (status == mr_error_raised) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| |
| return mr_error_raised; |
| } |
| |
| if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| |
| if (sp->m_errtext) |
| { |
| set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text, |
| ind), ind); |
| |
| return mr_error_raised; |
| } |
| |
| return mr_not_matched; |
| } |
| |
| if (status == mr_matched) |
| { |
| if (sp->m_emits != NULL) { |
| const byte ch = (ind <= 0) ? 0 : text[ind - 1]; |
| if (emit_push (sp->m_emits, _BP->_F + _P, ch, save_ind, &ctx)) |
| { |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_internal_error; |
| } |
| |
| } |
| _P = _P2; |
| } |
| |
| /* if the rule operator is a logical or, we pick up the first matching specifier */ |
| if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit)) |
| { |
| *index = ind; |
| *rbc = ctx; |
| if (!filtering_string) |
| *_PP = _P; |
| return mr_matched; |
| } |
| |
| sp = sp->next; |
| } |
| |
| /* everything went fine - all specifiers match up */ |
| if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit)) |
| { |
| *index = ind; |
| *rbc = ctx; |
| if (!filtering_string) |
| *_PP = _P; |
| return mr_matched; |
| } |
| |
| free_regbyte_ctx_stack (ctx, *rbc); |
| return mr_not_matched; |
| } |
| |
| static byte * |
| error_get_token (error *er, dict *di, const byte *text, int ind) |
| { |
| byte *str = NULL; |
| |
| if (er->m_token) |
| { |
| barray *ba; |
| int filter_index = 0; |
| regbyte_ctx *ctx = NULL; |
| |
| barray_create (&ba); |
| if (ba != NULL) |
| { |
| if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched && |
| filter_index) |
| { |
| str = (byte *) mem_alloc (filter_index + 1); |
| if (str != NULL) |
| { |
| str_copy_n (str, text + ind, filter_index); |
| str[filter_index] = '\0'; |
| } |
| } |
| barray_destroy (&ba); |
| } |
| } |
| |
| return str; |
| } |
| |
| typedef struct grammar_load_state_ |
| { |
| dict *di; |
| byte *syntax_symbol; |
| byte *string_symbol; |
| map_str *maps; |
| map_byte *mapb; |
| map_rule *mapr; |
| } grammar_load_state; |
| |
| static void grammar_load_state_create (grammar_load_state **gr) |
| { |
| *gr = (grammar_load_state *) mem_alloc (sizeof (grammar_load_state)); |
| if (*gr) |
| { |
| (**gr).di = NULL; |
| (**gr).syntax_symbol = NULL; |
| (**gr).string_symbol = NULL; |
| (**gr).maps = NULL; |
| (**gr).mapb = NULL; |
| (**gr).mapr = NULL; |
| } |
| } |
| |
| static void grammar_load_state_destroy (grammar_load_state **gr) |
| { |
| if (*gr) |
| { |
| dict_destroy (&(**gr).di); |
| mem_free ((void **) &(**gr).syntax_symbol); |
| mem_free ((void **) &(**gr).string_symbol); |
| map_str_destroy (&(**gr).maps); |
| map_byte_destroy (&(**gr).mapb); |
| map_rule_destroy (&(**gr).mapr); |
| mem_free ((void **) gr); |
| } |
| } |
| |
| |
| static void error_msg(int line, const char *msg) |
| { |
| fprintf(stderr, "Error in grammar_load_from_text() at line %d: %s\n", line, msg); |
| } |
| |
| |
| /* |
| the API |
| */ |
| grammar grammar_load_from_text (const byte *text) |
| { |
| grammar_load_state *g = NULL; |
| grammar id = 0; |
| |
| clear_last_error (); |
| |
| grammar_load_state_create (&g); |
| if (g == NULL) { |
| error_msg(__LINE__, ""); |
| return 0; |
| } |
| |
| dict_create (&g->di); |
| if (g->di == NULL) |
| { |
| grammar_load_state_destroy (&g); |
| error_msg(__LINE__, ""); |
| return 0; |
| } |
| |
| eat_spaces (&text); |
| |
| /* skip ".syntax" keyword */ |
| text += 7; |
| eat_spaces (&text); |
| |
| /* retrieve root symbol */ |
| if (get_identifier (&text, &g->syntax_symbol)) |
| { |
| grammar_load_state_destroy (&g); |
| error_msg(__LINE__, ""); |
| return 0; |
| } |
| eat_spaces (&text); |
| |
| /* skip semicolon */ |
| text++; |
| eat_spaces (&text); |
| |
| while (*text) |
| { |
| byte *symbol = NULL; |
| int is_dot = *text == '.'; |
| |
| if (is_dot) |
| text++; |
| |
| if (get_identifier (&text, &symbol)) |
| { |
| grammar_load_state_destroy (&g); |
| error_msg(__LINE__, ""); |
| return 0; |
| } |
| eat_spaces (&text); |
| |
| /* .emtcode */ |
| if (is_dot && str_equal (symbol, (byte *) "emtcode")) |
| { |
| map_byte *ma = NULL; |
| |
| mem_free ((void **) (void *) &symbol); |
| |
| if (get_emtcode (&text, &ma)) |
| { |
| grammar_load_state_destroy (&g); |
| error_msg(__LINE__, ""); |
| return 0; |
| } |
| |
| map_byte_append (&g->mapb, ma); |
| } |
| /* .regbyte */ |
| else if (is_dot && str_equal (symbol, (byte *) "regbyte")) |
| { |
| map_byte *ma = NULL; |
| |
| mem_free ((void **) (void *) &symbol); |
| |
| if (get_regbyte (&text, &ma)) |
| { |
| grammar_load_state_destroy (&g); |
| error_msg(__LINE__, ""); |
| return 0; |
| } |
| |
| map_byte_append (&g->di->m_regbytes, ma); |
| } |
| /* .errtext */ |
| else if (is_dot && str_equal (symbol, (byte *) "errtext")) |
| { |
| map_str *ma = NULL; |
| |
| mem_free ((void **) (void *) &symbol); |
| |
| if (get_errtext (&text, &ma)) |
| { |
| grammar_load_state_destroy (&g); |
| error_msg(__LINE__, ""); |
| return 0; |
| } |
| |
| map_str_append (&g->maps, ma); |
| } |
| /* .string */ |
| else if (is_dot && str_equal (symbol, (byte *) "string")) |
| { |
| mem_free ((void **) (void *) &symbol); |
| |
| if (g->di->m_string != NULL) |
| { |
| grammar_load_state_destroy (&g); |
| error_msg(__LINE__, ""); |
| return 0; |
| } |
| |
| if (get_identifier (&text, &g->string_symbol)) |
| { |
| grammar_load_state_destroy (&g); |
| error_msg(__LINE__, ""); |
| return 0; |
| } |
| |
| /* skip semicolon */ |
| eat_spaces (&text); |
| text++; |
| eat_spaces (&text); |
| } |
| else |
| { |
| rule *ru = NULL; |
| map_rule *ma = NULL; |
| |
| if (get_rule (&text, &ru, g->maps, g->mapb)) |
| { |
| grammar_load_state_destroy (&g); |
| error_msg(__LINE__, ""); |
| return 0; |
| } |
| |
| rule_append (&g->di->m_rulez, ru); |
| |
| /* if a rule consist of only one specifier, give it an ".and" operator */ |
| if (ru->m_oper == op_none) |
| ru->m_oper = op_and; |
| |
| map_rule_create (&ma); |
| if (ma == NULL) |
| { |
| grammar_load_state_destroy (&g); |
| error_msg(__LINE__, ""); |
| return 0; |
| } |
| |
| ma->key = symbol; |
| ma->data = ru; |
| map_rule_append (&g->mapr, ma); |
| } |
| } |
| |
| if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol, |
| g->di->m_regbytes)) |
| { |
| grammar_load_state_destroy (&g); |
| error_msg(__LINE__, "update_dependencies() failed"); |
| return 0; |
| } |
| |
| dict_append (&g_dicts, g->di); |
| id = g->di->m_id; |
| g->di = NULL; |
| |
| grammar_load_state_destroy (&g); |
| |
| return id; |
| } |
| |
| int grammar_set_reg8 (grammar id, const byte *name, byte value) |
| { |
| dict *di = NULL; |
| map_byte *reg = NULL; |
| |
| clear_last_error (); |
| |
| dict_find (&g_dicts, id, &di); |
| if (di == NULL) |
| { |
| set_last_error (INVALID_GRAMMAR_ID, NULL, -1); |
| return 0; |
| } |
| |
| reg = map_byte_locate (&di->m_regbytes, name); |
| if (reg == NULL) |
| { |
| set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1); |
| return 0; |
| } |
| |
| reg->data = value; |
| return 1; |
| } |
| |
| /* |
| internal checking function used by both grammar_check and grammar_fast_check functions |
| */ |
| static int _grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size, |
| unsigned int estimate_prod_size, int use_fast_path) |
| { |
| dict *di = NULL; |
| int index = 0; |
| |
| clear_last_error (); |
| |
| dict_find (&g_dicts, id, &di); |
| if (di == NULL) |
| { |
| set_last_error (INVALID_GRAMMAR_ID, NULL, -1); |
| return 0; |
| } |
| |
| *prod = NULL; |
| *size = 0; |
| |
| if (use_fast_path) |
| { |
| regbyte_ctx *rbc = NULL; |
| bytepool *bp = NULL; |
| int _P = 0; |
| |
| bytepool_create (&bp, estimate_prod_size); |
| if (bp == NULL) |
| return 0; |
| |
| if (fast_match (di, text, &index, di->m_syntax, &_P, bp, 0, &rbc) != mr_matched) |
| { |
| bytepool_destroy (&bp); |
| free_regbyte_ctx_stack (rbc, NULL); |
| return 0; |
| } |
| |
| free_regbyte_ctx_stack (rbc, NULL); |
| |
| *prod = bp->_F; |
| *size = _P; |
| bp->_F = NULL; |
| bytepool_destroy (&bp); |
| } |
| else |
| { |
| regbyte_ctx *rbc = NULL; |
| barray *ba = NULL; |
| |
| barray_create (&ba); |
| if (ba == NULL) |
| return 0; |
| |
| if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched) |
| { |
| barray_destroy (&ba); |
| free_regbyte_ctx_stack (rbc, NULL); |
| return 0; |
| } |
| |
| free_regbyte_ctx_stack (rbc, NULL); |
| |
| *prod = (byte *) mem_alloc (ba->len * sizeof (byte)); |
| if (*prod == NULL) |
| { |
| barray_destroy (&ba); |
| return 0; |
| } |
| |
| mem_copy (*prod, ba->data, ba->len * sizeof (byte)); |
| *size = ba->len; |
| barray_destroy (&ba); |
| } |
| |
| return 1; |
| } |
| |
| int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size) |
| { |
| return _grammar_check (id, text, prod, size, 0, 0); |
| } |
| |
| int grammar_fast_check (grammar id, const byte *text, byte **prod, unsigned int *size, |
| unsigned int estimate_prod_size) |
| { |
| return _grammar_check (id, text, prod, size, estimate_prod_size, 1); |
| } |
| |
| int grammar_destroy (grammar id) |
| { |
| dict **di = &g_dicts; |
| |
| clear_last_error (); |
| |
| while (*di != NULL) |
| { |
| if ((**di).m_id == id) |
| { |
| dict *tmp = *di; |
| *di = (**di).next; |
| dict_destroy (&tmp); |
| return 1; |
| } |
| |
| di = &(**di).next; |
| } |
| |
| set_last_error (INVALID_GRAMMAR_ID, NULL, -1); |
| return 0; |
| } |
| |
| static void append_character (const char x, byte *text, int *dots_made, int *len, int size) |
| { |
| if (*dots_made == 0) |
| { |
| if (*len < size - 1) |
| { |
| text[(*len)++] = x; |
| text[*len] = '\0'; |
| } |
| else |
| { |
| int i; |
| for (i = 0; i < 3; i++) |
| if (--(*len) >= 0) |
| text[*len] = '.'; |
| *dots_made = 1; |
| } |
| } |
| } |
| |
| void grammar_get_last_error (byte *text, unsigned int size, int *pos) |
| { |
| int len = 0, dots_made = 0; |
| const byte *p = error_message; |
| |
| *text = '\0'; |
| |
| if (p) |
| { |
| while (*p) |
| { |
| if (*p == '$') |
| { |
| const byte *r = error_param; |
| |
| while (*r) |
| { |
| append_character (*r++, text, &dots_made, &len, (int) size); |
| } |
| |
| p++; |
| } |
| else |
| { |
| append_character (*p++, text, &dots_made, &len, size); |
| } |
| } |
| } |
| |
| *pos = error_position; |
| } |