/* * This file is part of the pep8analyser project. * * Copyright 2013 Alexis Laferrière * * Inspired from the Nit language grammar by: * Copyright 2008-2009 Jean Privat * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* This grammar defines the Pep/8 language. */ /*****************************************************************************/ Helpers /*****************************************************************************/ all = [0 .. 0xFF]; lowercase = ['a' .. 'z']; uppercase = ['A' .. 'Z']; digit = ['0' .. '9']; letter = lowercase | uppercase | digit | '_'; tab = 9; cr = 13; lf = 10; any = [all - [cr + lf]]; str_char = [[any - '"'] + '\'] | '\' any; eol_helper = cr lf | cr | lf; // This takes care of different platforms; hex_digit = ['0' .. '9'] | ['A' .. 'F'] | ['a' .. 'f']; /*****************************************************************************/ States /*****************************************************************************/ initial; /*****************************************************************************/ Tokens /*****************************************************************************/ blank = (' ' | tab)+; eol = eol_helper; number = '-'? digit+; float = digit* '.' digit+; char = (''' [[any - '''] - '\'] ''') | (''' '\' any ''') | (''' '\' 'x' hex_digit hex_digit '''); string = '"' str_char* '"'; hex = '0' ('x'|'X') hex_digit+; colon = ':'; comma = ','; comment = ';' any*; tk_byte = '.' ('B'|'b') ('Y'|'y') ('T'|'t') ('E'|'e'); tk_word = '.' ('W'|'w') ('O'|'o') ('R'|'r') ('D'|'d'); tk_block = '.' ('B'|'b') ('L'|'l') ('O'|'o') ('C'|'c') ('K'|'k'); tk_ascii = '.' ('A'|'a') ('S'|'s') ('C'|'c') ('I'|'i') ('I'|'i'); tk_addrss = '.' ('A'|'a') ('D'|'d') ('D'|'d') ('R'|'r') ('S'|'s') ('S'|'s'); tk_equate = '.' ('E'|'e') ('Q'|'q') ('U'|'u') ('A'|'a') ('T'|'t') ('E'|'e'); tk_burn = '.' ('B'|'b') ('U'|'u') ('R'|'r') ('N'|'n'); end_block = '.' ('E'|'e') ('N'|'n') ('D'|'d') (any | eol_helper)*; id = letter+; /*****************************************************************************/ Ignored Tokens /*****************************************************************************/ blank; /*****************************************************************************/ Productions /*****************************************************************************/ listing = [lines]:line* label_decl? end_block; line = {empty} label_decl? comment? eol | {instruction} label_decl? instruction comment? eol | {directive} label_decl? directive comment? eol; label_decl = id colon; instruction = {unary} id | {binary} id operand; /* operands * We will manage which operands are possible with each stmt at a higher level. * This will allow better error messages and a cleaner model. */ operand = {immediate} value | {any} value comma id; value = {label} id | {number} number | {char} char | {string} string | {hex} hex; directive = {byte} tk_byte value | {word} tk_word value | {block} tk_block value | {ascii} tk_ascii value | {addrss} tk_addrss value | {equate} tk_equate value | {burn} tk_burn value;