parser: migrate prescc to Perl
[nit.git] / src / parser / prescc.pl
1 #!/usr/bin/perl -w
2 # This file is part of NIT ( http://www.nitlanguage.org ).
3 #
4 # Copyright 2009 Jean Privat <jean@pryen.org>
5 # Copyright 2009 Jean-Sebastien Gelinas <calestar@gmail.com>
6 #
7 # Licensed under the Apache License, Version 2.0 (the "License");
8 # you may not use this file except in compliance with the License.
9 # You may obtain a copy of the License at
10 #
11 # http://www.apache.org/licenses/LICENSE-2.0
12 #
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
18
19 # prescc, a Sablecc preprocessor.
20 #
21 # Synopsis
22 #
23 # Extends a sablecc grammar with parametrized productions.
24 #
25 # Description
26 #
27 # A production named foo~bar~baz semantically correspond to a production foo with two boolean parameters bar and baz
28 # In fact foo is a family of 4 distinct productions: foo, foo_bar, foo_baz and foo_bar_baz
29 # In a parametrized production with a parameter ~xxx:
30 # * parameters (~xxx) are substituted with _xxx if the parameter is true and removed if the parameter is false
31 # * guarded alternatives (!xxx) are disabled if the parameter is true
32 #
33 # Limitations
34 #
35 # prescc is badly implemented in perl and is not robust.
36 # Users must remember the following:
37 # * parametrized productions MUST be terminated with a line containing only a single semicolon (;)
38 # * parameters (~) and guards (!) in alternatives MUST correspond to a parameter of the enclosing production
39 # * if required, names in transformations MUST contain the full invocation name (with all parameters)
40 # foo bar_x~y~z_t baz {-> New p(foo, bar_x~y~z_t.q)}
41 # * guards do not understand grammar, they just remove the whole line
42 # * The AST MUST start with a line containing only "Abstract Syntax Tree"
43 #
44 # Example of the dangling else implementation:
45 #
46 # stmt~withelse =
47 # 'if' expr 'then' stmt_withelse 'else' stmt~withelse |
48 # !withelse 'if' expr 'then' stmt |
49 # nop
50 # ;
51
52 while (<>) {
53 push @lines, $_;
54 }
55 $lines = join "", @lines;
56
57 # List all the available parameters in the extended grammar
58 @params = ();
59 while ($lines =~ /\~([a-zA-Z]+)/g) {
60 if (!$found{$1}) {
61 push @params, $1;
62 $found{$1}=1;
63 }
64 }
65
66 $ast = "Abstract Syntax Tree";
67 @res = ();
68 for $token (@params) {
69 print STDERR "Parameter ~$token\n";
70 #push @res, "//Start part $token\n";
71 # first, sed starts from first line to the AST line and removes ~xxx and !xxx
72 for $l (@lines) {
73 $_ = $l;
74 last if (/^$ast/);
75 s/[~!]$token//g;
76 push @res, $_;
77 }
78 #push @res, "//Generated part $token\n";
79 # second, sed clones ~xxx parametrized productions, substitute ~xxx with _xxx and delete !xxx lines
80 $into = 0;
81 for $l (@lines) {
82 $_ = $l;
83 $into = 1 if (/~$token/);
84 next if (!$into);
85 s/~$token/_$token/g;
86 next if /!$token/;
87 push @res, $_;
88 $into = 0 if (/;/);
89 }
90 #push @res, "//End of generated part $token\n";
91
92 # third, sed continues fron AST line to last line and remove ~xxx and !xxx
93 $into = 0;
94 for (@lines) {
95 $into = 1 if (/^$ast/);
96 next if (!$into);
97 push @res, $_;
98 }
99 #push @res, "//End part $token\n";
100 @lines = @res;
101 @res = ();
102 }
103 print "/* This file is autogenerated, do not modify it */";
104 print (join "", @lines);