01f8d6eae7e45d4004f9ac23d549b1b5db8b4766
[nit.git] / src / parser / prescc.sh
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2009 Jean Privat <jean@pryen.org>
4 # Copyright 2009 Jean-Sebastien Gelinas <calestar@gmail.com>
5 #
6 # Licensed under the Apache License, Version 2.0 (the "License");
7 # you may not use this file except in compliance with the License.
8 # You may obtain a copy of the License at
9 #
10 # http://www.apache.org/licenses/LICENSE-2.0
11 #
12 # Unless required by applicable law or agreed to in writing, software
13 # distributed under the License is distributed on an "AS IS" BASIS,
14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 # See the License for the specific language governing permissions and
16 # limitations under the License.
17
18 # prescc, a Sablecc preprocessor.
19 #
20 # Synopsis
21 #
22 # Extends a sablecc grammar with parametrized productions.
23 #
24 # Description
25 #
26 # A production named foo~bar~baz semantically correspond to a production foo with two boolean parameters bar and baz
27 # In fact foo is a family of 4 distinct productions: foo, foo_bar, foo_baz and foo_bar_baz
28 # In a parametrized production with a parameter ~xxx:
29 # * parameters (~xxx) are substituted with _xxx if the parameter is true and removed if the parameter is false
30 # * guarded alternatives (!xxx) are disabled if the parameter is true
31 #
32 # Limitations
33 #
34 # prescc is badly implemented with shell, sed and perl and is not robust.
35 # Users must remember the following:
36 # * parametrized productions MUST be terminated with a line containing only a single semicolon (;)
37 # * parameters (~) and guards (!) in alternatives MUST correspond to a parameter of the enclosing production
38 # * if required, names in transformations MUST contain the full invocation name (with all parameters)
39 # foo bar_x~y~z_t baz {-> New p(foo, bar_x~y~z_t.q)}
40 # * guards do not understand grammar, they just remove the whole line
41 # * The AST MUST start with a line containing only "Abstract Syntax Tree"
42 #
43 # Example of the dangling else implementation:
44 #
45 # stmt~withelse =
46 # 'if' expr 'then' stmt_withelse 'else' stmt~withelse |
47 # !withelse 'if' expr 'then' stmt |
48 # nop
49 # ;
50
51
52 case $# in
53 2);;
54 *) echo "Usage: prescc infile outfile"; exit
55 esac
56
57
58 infile=$1
59 outfile=$2
60 tmpfile=`mktemp "$2.XXXXXX"`
61
62 printf "/* This file is autogenerated, do not modify it */" > "$outfile"
63 cat "$infile" >> "$outfile"
64
65 # The perl code is used to list all the available parameters in the extended grammar
66 for token in `perl -ne 'if (/\~(\w+)/ && !$found{$1}) {print "$1\n"; $found{$1}=1}' "$infile"`
67 do
68 echo "Parameter ~$token"
69 # first, sed starts from first line to the AST line and removes ~xxx and !xxx
70 sed -n -e "
71 1,/^Abstract Syntax Tree/{
72 /^Abstract Syntax Tree/b
73 s/[\~!]$token//g
74 p
75 }
76 " "$outfile" > "$tmpfile"
77 # second, sed clones ~xxx parametrized productions, substitute ~xxx with _xxx and delete !xxx lines
78 sed -n -e "
79 /\~$token/,/;/{
80 s/\~$token/_$token/g
81 /!$token/d
82 p
83 }
84 " "$outfile" >> "$tmpfile"
85 # third, sed continues fron AST line to last line and remove ~xxx and !xxx
86 sed -n -e "
87 /^Abstract Syntax Tree/,\${
88 s/[\~!]$token//g
89 p
90 }
91 " "$outfile" >> "$tmpfile"
92 mv "$tmpfile" "$outfile"
93 done
94