65608d0fab1ccee577564819407ee842723de8c5
[nit.git] / contrib / nitcc / src / nitcc_parser_gen.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Bootstraping the nitcc parser
16 #
17 # Instead of commiting a generated parser on each version,
18 # this program just generate the nitcc_parser using the API of `grammar`
19 #
20 # Pros:
21 #
22 # - no generated file commited
23 # - easier to modify and bootstrap
24 #
25 # Cons:
26 #
27 # - somewhat dublicate the ful grammar of nitcc
28 # - need an ad-hoc lexer (nitcc_lexer0.nit)
29 #
30 module nitcc_parser_gen
31
32 import grammar
33
34 var g = new Gram
35 var p_gr = new Production("grammar")
36 var p_lex = new Production("lexer")
37 var p_exprs = new Production("exprs")
38 var p_expr = new Production("expression")
39 var p_re = new Production("re")
40 var p_re1 = new Production("re1")
41 var p_re2 = new Production("re2")
42 var p_re3 = new Production("re3")
43 var p_text = new Production("text")
44 var p_par = new Production("parser")
45 var p_ign = new Production("ignored")
46 var p_rej = new Production("rejected")
47 var p_prods = new Production("prods")
48 var p_prod = new Production("production")
49 var p_ptrans_o = new Production("ptrans_o")
50 var p_alts = new Production("alts")
51 var p_alt = new Production("alternative")
52 var p_altid_o = new Production("altid_o")
53 var p_altid = new Production("altident")
54 var p_elems = new Production("elems")
55 var p_elem_list = new Production("elem_list")
56 var p_elem = new Production("elem")
57 var p_pri = new Production("priority")
58 g.prods.add_all([p_gr, p_re, p_re1, p_re2, p_re3, p_text, p_lex, p_exprs, p_expr, p_par, p_ign, p_rej, p_prods, p_prod, p_ptrans_o, p_alts, p_alt, p_altid_o, p_altid, p_elems, p_elem_list, p_elem, p_pri])
59 g.prods.add(new Production("atrans"))
60 g.prods.add(new Production("elemid"))
61 g.prods.add(new Production("nelem"))
62 g.prods.add(new Production("tree_part"))
63
64 var t_opar = new Token("opar")
65 var t_cpar = new Token("cpar")
66 var t_ocur = new Token("ocur")
67 var t_ccur = new Token("ccur")
68 var t_pipe = new Token("pipe")
69 var t_star = new Token("star")
70 var t_ques = new Token("ques")
71 var t_plus = new Token("plus")
72 var t_minus = new Token("minus")
73 var t_comma = new Token("comma")
74 var t_colo = new Token("colo")
75 var t_semi = new Token("semi")
76 var t_dot = new Token("dot")
77 var t_eq = new Token("eq")
78 var t_arrow = new Token("arrow")
79 var t_str = new Token("str")
80 var t_id = new Token("id")
81 var t_kw = new Token("kw")
82 var t_any = new Token("any")
83 var t_end = new Token("end")
84 var t_and = new Token("and")
85 var t_except = new Token("except")
86 var t_shortest = new Token("shortest")
87 var t_longest = new Token("longest")
88 var t_ch_dec = new Token("ch_dec")
89 var t_ch_hex = new Token("ch_hex")
90 g.tokens.add_all([t_opar,
91 t_cpar,
92 t_ocur,
93 t_ccur,
94 t_pipe,
95 t_star,
96 t_ques,
97 t_plus,
98 t_minus,
99 t_comma,
100 t_colo,
101 t_semi,
102 t_dot,
103 t_eq,
104 t_arrow,
105 t_str,
106 t_id,
107 t_kw,
108 t_any,
109 t_end,
110 t_and,
111 t_except,
112 t_shortest,
113 t_longest,
114 t_ch_dec,
115 t_ch_hex])
116
117 p_gr.new_alt("gr", t_kw, t_id, t_semi, p_lex, p_par)
118
119 p_lex.new_alt("lex", t_kw, p_exprs)
120
121 p_exprs.new_alt("exprs_many", p_exprs, p_expr)
122 p_exprs.new_alt0("exprs_none")
123
124 p_expr.new_alt("expr", t_id, t_eq, p_re, t_semi)
125
126 p_re.new_alt("re_alter", p_re, t_pipe, p_re1)
127 p_re.new_alt("re_re2", p_re1)
128
129 p_re1.new_alt("re_minus", p_re1, t_minus, p_re2)
130 p_re1.new_alt("re_except", p_re1, t_and, p_re2)
131 p_re1.new_alt("re_and", p_re1, t_except, p_re2)
132 p_re1.new_alt("re_re1", p_re2)
133
134 p_re2.new_alt("re_conc", p_re2, p_re3)
135 p_re2.new_alt("re_re3", p_re3)
136
137 p_re3.new_alt("re_star", p_re3, t_star)
138 p_re3.new_alt("re_ques", p_re3, t_ques)
139 p_re3.new_alt("re_plus", p_re3, t_plus)
140 p_re3.new_alt("re_shortest", t_shortest, t_opar, p_re, t_cpar)
141 p_re3.new_alt("re_longest", t_longest, t_opar, p_re, t_cpar)
142 p_re3.new_alt("re_par", t_opar, p_re, t_cpar)
143 p_re3.new_alt("re_class", p_text, t_dot, t_dot, p_text)
144 p_re3.new_alt("re_any", t_any)
145 p_re3.new_alt("re_end", t_end)
146 p_re3.new_alt("re_id", t_id)
147 p_re3.new_alt("re_text", p_text)
148
149 p_text.new_alt("re_str", t_str)
150 p_text.new_alt("re_ch_dec", t_ch_dec)
151 p_text.new_alt("re_ch_hex", t_ch_hex)
152
153 p_par.new_alt("par", t_kw, p_ign, p_rej, p_prods)
154
155 p_ign.new_alt("ign", t_kw, p_elem_list, t_semi)
156
157 p_rej.new_alt("rej", t_kw, p_elem_list, t_semi)
158
159 p_prods.new_alt("prods_many", p_prods, p_prod)
160 p_prods.new_alt0("prods_none")
161
162 p_prod.new_alt("prod", t_id, p_ptrans_o, t_eq, p_alts, t_semi)
163
164 p_ptrans_o.new_alt("ptrans", t_ocur, t_arrow, t_id, t_ccur)
165 p_ptrans_o.new_alt0("ptrans_none")
166
167 p_alts.new_alt("alts_many", p_alts, t_pipe, p_alt)
168 p_alts.new_alt("alts_one", p_alt)
169
170 p_alt.new_alt("alt", p_altid_o, p_elems)
171
172 p_altid_o.new_alt0("altid_o_none")
173 p_altid_o.new_alt("altid_o_one", p_altid)
174
175 p_altid.new_alt("altid", t_ocur, t_id, t_colo, t_ccur)
176
177 p_elems.new_alt("elems_many", p_elems, p_elem)
178 p_elems.new_alt0("elems_none")
179
180 p_elem_list.new_alt("elem_list_many", p_elem_list, t_comma, p_elem)
181 p_elem_list.new_alt("elem_list_one", p_elem)
182
183 p_elem.new_alt("elem_id", t_id)
184 p_elem.new_alt("elem_str", p_text)
185 p_elem.new_alt("elem_par", t_opar, p_alts, t_cpar)
186 p_elem.new_alt("elem_star", p_elem, t_star)
187 p_elem.new_alt("elem_ques", p_elem, t_ques)
188 p_elem.new_alt("elem_plus", p_elem, t_plus)
189
190 p_pri.new_alt0("priority_left").phony = true
191 p_pri.new_alt0("priority_right").phony = true
192 p_pri.new_alt0("priority_unary").phony = true
193
194 var a = g.lr0
195
196 print "LR automaton: {a.states.length} states (see nitcc0.lr.dot)"
197 a.to_dot("nitcc0.lr.dot")
198
199 a.gen_to_nit("nitcc_parser.nit", "nitcc")
200
201 var f = new OFStream.open("nitcc_lexer.nit")
202 f.write("import nitcc_lexer0\n")
203 f.close