java parser generator
home
features
documentation
grammars
downloads
site originally designed by ninth ave
/****************************************************************
 * Copyright (c) 2000, Fidel Viegas (viegasfh@hotmail.com).	*
 * All rights reserved.						*
 *								*
 * Please see the LICENSE file for license information.		*
 ****************************************************************/

/****************************************************************
 * This is a grammar for a small subset of Pascal.              *
 * It was implemented as an illustration of how to write a      *
 * compiler in SableCC and Java. 				*	
 ****************************************************************/
 
Package org.sablecc.pascal; // package name

Helpers
  /**
   * Pascal is a case-insensitive language. So, we'll use helpers
   * to simplify our regular expressions. E.g. Instead of writing, 
   * for instance, end = ('e' | 'E') ('n' | 'N') ('d' | 'D'), we 
   * may write:  end = e n d, which takes less space is makes the 
   * regular expression more readable.
   */
  a = 'a' | 'A' ; // this could also be written as ['a' + 'A']
  b = 'b' | 'B' ; // but I prefer the old lex style
  d = 'd' | 'D' ;
  e = 'e' | 'E' ;
  g = 'g' | 'g' ;
  i = 'i' | 'I' ;
  l = 'l' | 'L' ;
  m = 'm' | 'M' ;
  n = 'n' | 'N' ;
  o = 'o' | 'O' ;
  p = 'p' | 'P' ;
  r = 'r' | 'R' ;
  t = 't' | 'T' ;
  v = 'v' | 'V' ;
  w = 'w' | 'W' ;

  l_curly_bracket = '{' ;
  r_curly_bracket = '}' ;    
  ascii_char = [32 .. 127] ;

  // letters and digits
  letter = [['a' .. 'z'] + ['A' .. 'Z']];
  digit = ['0' .. '9'] ;

  // un-printable characters
  tab = 9 ;
  cr = 13 ;
  lf = 10 ;
  blank = ' ' ;

Tokens
  // reserved words
  end = e n d ;
  div = d i v ; // integer division
  var = v a r ;
  begin = b e g i n ;
  program = p r o g r a m ;
  writeln = w r i t e l n ;
  // I prefer to let the parser do the job
  // of tracking the standard type rather
  // then processing it in the semantic phase
  integer = i n t e g e r ; 

  // arithmetic symbols
  plus = '+' ;
  minus = '-' ;
  mult = '*' ;
  assignop = ':=' ;
    
  // symbols separators
  comma = ',' ;
  colon = ':' ;
  semicolon = ';' ;
  dot = '.' ;
  l_paren = '(' ;
  r_paren = ')' ;

  // identifiers
  identifier = letter (letter | digit)* ;

  // numbers
  number = digit+ ; // integer numbers only

  // comments
  comment = l_curly_bracket [ascii_char - [l_curly_bracket + r_curly_bracket]]*
            r_curly_bracket ;

  // blanks 
  blanks = blank | cr lf | cr | lf | tab ;

Ignored Tokens
  comment, 
  blanks ;

Productions

  program =
    program_heading
      declarations
    body
    dot ;

  program_heading =
    // program must be prefixed with T. because there is a token and a production with
    // the same name
    T.program identifier semicolon ;

  // declarations

  declarations = 
    variables_declaration? ;

  variables_declaration =
    var variables_definition_list ;

  variables_definition_list =
    {single} variables_definition |
    {multiple} variables_definition_list variables_definition ;

  variables_definition =
    identifier_list colon type semicolon ;

  identifier_list =
    {single} identifier |
    {multiple} identifier_list comma identifier ;

  type =
    integer ; // only data type allowed is the integer data type

  // body definition
  body =
    begin
      statement_sequence
    end ;

  // statements
  statement_sequence =
    {single} statement |
    {multiple} statement_sequence semicolon statement ;

  statement =
    {writeln} writeln l_paren expression r_paren |
    {assignment} identifier assignop expression |
    {empty} ;

  // expressions

  expression =
    {term} term |
    {plus} expression plus term |
    {minus} expression minus term ;

  term =
    {factor} factor |
    {mult} term mult factor |
    {div} term div factor ;

  factor =
    {identifier} identifier |
    {number} number |
    {expression} l_paren expression r_paren;

// end of grammar.

This site has been last updated by the web master on 2007/6/24.