/* Copyright (C) 1997-2000 Etienne M. Gagnon, M.Sc.. All rights reserved. This work is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This work is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this work in the file "COPYING-LESSER"; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA If you have any question, send an electronic message to Etienne M. Gagnon, M.Sc. , or write to: Etienne M. Gagnon J-Meg inc. 11348 Brunet Montreal-Nord (Quebec) H1G 5G1 Canada If you you want to submit a bug report, or a patch, please visit the SableCC Grammars web site: http://sablecc.sourceforge.net/ If you want to discuss this grammar, please subscribe to the SableCC mailing list. You can find all the necessary information on the SableCC web site at: http://www.sable.mcgill.ca/sablecc/ */ /* This grammar defines the SableCC 2.x input language. */ Package org.sablecc.sablecc2x; // Root Java package for generated files. Helpers /* These are character sets and regular expressions used in the definition of tokens. */ all = [0 .. 0xFFFF]; lowercase = ['a' .. 'z']; uppercase = ['A' .. 'Z']; digit = ['0' .. '9']; hex_digit = [digit + [['a' .. 'f'] + ['A' .. 'F']]]; tab = 9; cr = 13; lf = 10; eol = cr lf | cr | lf; // This takes care of different platforms not_cr_lf = [all - [cr + lf]]; not_star = [all - '*']; not_star_slash = [not_star - '/']; blank = (' ' | tab | eol)+; short_comment = '//' not_cr_lf* eol; long_comment = '/*' not_star* '*'+ (not_star_slash not_star* '*'+)* '/'; comment = short_comment | long_comment; letter = lowercase | uppercase | '_' | '$'; id_part = lowercase (lowercase | digit)*; States normal, /* The first state is the initial state. */ package; Tokens /* These are token definitions. It is allowed to use helper regular * * expressions in the body of a token definition. * * On a given input, the longest valid definition is chosen, In * * case of a match, the definition that appears first is chosen. * * Example: on input -> 's' <- "char" will have precedence on * * "string", because it appears first. */ {package} pkg_id = letter (letter | digit)*; {normal->package} package = 'Package'; states = 'States'; helpers = 'Helpers'; tokens = 'Tokens'; ignored = 'Ignored'; productions = 'Productions'; token_specifier = 'T'; production_specifier = 'P'; dot = '.'; d_dot = '..'; {normal, package->normal} semicolon = ';'; equal = '='; l_bkt = '['; r_bkt = ']'; l_par = '('; r_par = ')'; l_brace = '{'; r_brace = '}'; plus = '+'; minus = '-'; q_mark = '?'; star = '*'; bar = '|'; comma = ','; slash = '/'; arrow = '->'; colon = ':'; id = id_part ('_' id_part)*; char = ''' not_cr_lf '''; dec_char = digit+; hex_char = '0' ('x' | 'X') hex_digit+; string = ''' [not_cr_lf - ''']+ '''; blank = blank; comment = comment; Ignored Tokens /* These tokens are simply ignored by the parser. */ blank, comment; Productions /* These are the productions of the grammar. The first production is * * used by the implicit start production: * * start = (first production) EOF; * * ?, * and + have the same meaning as in a regular expression. * * In case a token and a production share the same name, the use of * * P. (for production) or T. (for token) is required. * * Each alternative can be explicitely named by preceding it with a * * name enclosed in braces. * * Each alternative element can be explicitely named by preceding it * * with a name enclosed in brackets and followed by a colon. */ grammar = P.package? P.helpers? P.states? P.tokens? ign_tokens? P.productions?; package = T.package pkg_name; pkg_name = pkg_id [pkg_ids]:pkg_name_tail* semicolon; pkg_name_tail = dot pkg_id; helpers = T.helpers [helper_defs]:helper_def+; helper_def = id equal reg_exp semicolon; states = T.states id_list semicolon; id_list = id [ids]:id_list_tail*; id_list_tail = comma id; tokens = T.tokens [token_defs]:token_def+; token_def = state_list? id equal reg_exp look_ahead? semicolon; state_list = l_brace id transition? [state_lists]:state_list_tail* r_brace; state_list_tail = comma id transition?; transition = arrow id; ign_tokens = ignored T.tokens id_list? semicolon; look_ahead = slash reg_exp; reg_exp = concat [concats]:reg_exp_tail*; reg_exp_tail = bar concat; concat = [un_exps]:un_exp*; un_exp = basic un_op?; basic = {char} P.char | {set} set | {string} string | {id} id | {reg_exp} l_par reg_exp r_par; char = {char} T.char | {dec} dec_char | {hex} hex_char; set = {operation} l_bkt [left]:basic bin_op [right]:basic r_bkt | {interval} l_bkt [left]:P.char d_dot [right]:P.char r_bkt; un_op = {star} star | {q_mark} q_mark | {plus} plus; bin_op = {plus} plus | {minus} minus; productions = T.productions [prods]:prod+; prod = id equal alts semicolon; alts = alt [alts]:alts_tail*; alts_tail = bar alt; alt = {parsed} alt_name? [elems]:elem* | {ignored} l_par alt_name? [elems]:elem* r_par; alt_name = l_brace id r_brace; elem = elem_name? specifier? id un_op?; elem_name = l_bkt id r_bkt colon; specifier = {token} token_specifier dot | {production} production_specifier dot;
This site has been last updated by the web master
on 2007/6/24.