A SableCC Grammar (org.sablecc.sablecc2x)

/*

Copyright (C) 1997-2000 Etienne M. Gagnon, M.Sc. .
All rights reserved.

This work is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.

This work is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this work in the file "COPYING-LESSER"; if not,
write to the Free Software Foundation, Inc., 59 Temple Place, Suite
330, Boston, MA 02111-1307  USA

If you have any question, send an electronic message to
Etienne M. Gagnon, M.Sc. , or write to:

Etienne M. Gagnon
J-Meg inc.
11348 Brunet
Montreal-Nord (Quebec)
H1G 5G1  Canada

If you you want to submit a bug report, or a patch, please visit the
SableCC Grammars web site: 
  
  http://sablecc.sourceforge.net/

If you want to discuss this grammar, please subscribe to the
SableCC mailing list.  You can find all the necessary information
on the SableCC web site at: 
 
  http://www.sable.mcgill.ca/sablecc/

*/


/* This grammar defines the SableCC 2.x input language. */

Package org.sablecc.sablecc2x; // Root Java package for generated files.

Helpers

/* These are character sets and regular expressions used in the definition of tokens. */
   
    all = [0 .. 0xFFFF];  
    lowercase = ['a' .. 'z'];
    uppercase = ['A' .. 'Z'];
    digit = ['0' .. '9'];
    hex_digit = [digit + [['a' .. 'f'] + ['A' .. 'F']]];

    tab = 9;
    cr = 13;
    lf = 10;
    eol = cr lf | cr | lf;        // This takes care of different platforms

    not_cr_lf = [all - [cr + lf]];
    not_star = [all - '*'];
    not_star_slash = [not_star - '/'];

    blank = (' ' | tab | eol)+;

    short_comment = '//' not_cr_lf* eol;
    long_comment = '/*' not_star* '*'+ (not_star_slash not_star* '*'+)* '/';
    comment = short_comment | long_comment;

    letter = lowercase | uppercase | '_' | '$'; 
    id_part = lowercase (lowercase | digit)*;

States
    normal, /* The first state is the initial state. */
    package;

Tokens

/* These are token definitions. It is allowed to use helper regular *
 * expressions in the body of a token definition.                   *
 * On a given input, the longest valid definition is chosen, In     *
 * case of a match, the definition that appears first is chosen.    *
 * Example: on input -> 's' <- "char" will have precedence on       *
 * "string", because it appears first.                              */

{package}
    pkg_id = letter (letter | digit)*;

{normal->package}
    package = 'Package';

    states = 'States';
    helpers = 'Helpers';
    tokens = 'Tokens';
    ignored = 'Ignored';
    productions = 'Productions';

    token_specifier = 'T';
    production_specifier = 'P';

    dot = '.';
    d_dot = '..';

{normal, package->normal}
    semicolon = ';';

    equal = '=';
    l_bkt = '[';
    r_bkt = ']';
    l_par = '(';
    r_par = ')';
    l_brace =  '{';
    r_brace =  '}';
    plus = '+';
    minus = '-';
    q_mark = '?';
    star = '*';
    bar = '|';
    comma = ',';
    slash = '/';
    arrow = '->';
    colon = ':';

    id = id_part ('_' id_part)*;

    char = ''' not_cr_lf ''';
    dec_char = digit+;
    hex_char = '0' ('x' | 'X') hex_digit+;

    string = ''' [not_cr_lf - ''']+ ''';

    blank = blank;
    comment = comment;

Ignored Tokens 

/* These tokens are simply ignored by the parser. */

    blank,
    comment;

Productions

/* These are the productions of the grammar. The first production is *
 * used by the implicit start production:                            *
 *   start = (first production) EOF;                                 *
 * ?, * and + have the same meaning as in a regular expression.      *
 * In case a token and a production share the same name, the use of  *
 * P. (for production) or T. (for token) is required.                *
 * Each alternative can be explicitely named by preceding it with a  *
 * name enclosed in braces.                                          *
 * Each alternative element can be explicitely named by preceding it *
 * with a name enclosed in brackets and followed by a colon.         */


    grammar =
        P.package? P.helpers? P.states? P.tokens? ign_tokens? P.productions?;

    package = 
        T.package pkg_name;
        
    pkg_name =
        pkg_id [pkg_ids]:pkg_name_tail* semicolon;

    pkg_name_tail =
        dot pkg_id;

    helpers =
        T.helpers [helper_defs]:helper_def+;

    helper_def =
        id equal reg_exp semicolon;

    states =
        T.states id_list semicolon;

    id_list = 
        id [ids]:id_list_tail*;

    id_list_tail =
        comma id;

    tokens =
        T.tokens [token_defs]:token_def+;

    token_def =
        state_list? id equal reg_exp look_ahead? semicolon;

    state_list =
        l_brace id transition? [state_lists]:state_list_tail* r_brace;

    state_list_tail =
        comma id transition?;

    transition =
        arrow id;

    ign_tokens =
        ignored T.tokens id_list? semicolon;

    look_ahead =
        slash reg_exp;

    reg_exp =
        concat [concats]:reg_exp_tail*;

    reg_exp_tail =
        bar concat;

    concat =
        [un_exps]:un_exp*;

    un_exp =
        basic un_op?;

    basic =
        {char}    P.char |
        {set}     set |
        {string}  string |
        {id}      id |
        {reg_exp} l_par reg_exp r_par;

    char = 
        {char} T.char | 
        {dec}  dec_char |
        {hex}  hex_char;

    set =
        {operation} l_bkt [left]:basic  bin_op [right]:basic  r_bkt |
        {interval}  l_bkt [left]:P.char d_dot  [right]:P.char r_bkt;

    un_op = 
        {star}   star |
        {q_mark} q_mark |
        {plus}   plus;

    bin_op =
        {plus}  plus |
        {minus} minus;

    productions =
        T.productions [prods]:prod+;

    prod =
        id equal alts semicolon;

    alts =
        alt [alts]:alts_tail*;

    alts_tail =
        bar alt;

    alt =
        {parsed} alt_name? [elems]:elem* |
        {ignored} l_par alt_name? [elems]:elem* r_par;
        
    alt_name =
        l_brace id r_brace;

    elem =
        elem_name? specifier? id un_op?;

    elem_name =
        l_bkt id r_bkt colon;

    specifier =
        {token}         token_specifier dot |
        {production} production_specifier dot;
This site has been last updated by the web master on 2007/6/24.