Merge: doc: fixed some typos and other misc. corrections
[nit.git] / src / astutil.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Additional features on Nit AST
16 #
17 # Most of these feature require that the precomputation `parentize_tokens`
18 # is run on the root node of a AST.
19 #
20 # These aditionnal features are used either to have a better association between tokens and productions
21 # But also to query how productions and tokens are placed in the lines of code.
22 module astutil
23
24 intrude import parser
25 import html
26
27 redef class ANode
28 # Visit the AST and computes advanced AST attributes on Tokens and Prod
29 # This also force a parent on the detashed tokens
30 fun parentize_tokens
31 do
32 var v = new PTokenVisitor
33 v.work(self)
34 end
35 end
36
37 redef class Prod
38 # The first token of the production in the AST
39 # Computed by `parentize_tokens`
40 var first_token: nullable Token = null
41
42 # The last token of the production in the AST
43 # Computed by `parentize_tokens`
44 var last_token: nullable Token = null
45
46 # Is the production contained in full block of line?
47 # Computed by `parentize_tokens`
48 fun is_block: Bool
49 do
50 if first_token == null or last_token == null then return false
51 #sys.stderr.write "{self}@{location}: start={first_token.is_starting_line} {first_token.inspect}@{first_token.location} ; end={last_token.is_ending_line} {last_token.inspect}@{last_token.location}\n"
52 return first_token.is_starting_line and last_token.is_ending_line
53 end
54
55 # Is the production a part of a single line (without being a block)
56 # Computed by `parentize_tokens`
57 fun is_span: Bool
58 do
59 if first_token == null or last_token == null then return false
60 return not is_block and location.line_start == location.line_end
61 end
62
63 # A XML representation of the AST
64 # Productions and token become elements
65 #
66 # detached tokens and whitespaces are preserved in the XML
67 #
68 # ~~~
69 # import parser_util
70 # var text = "y += foo"
71 # var ast = (new ToolContext).parse_something(text)
72 # assert ast isa AExpr
73 # ast.parentize_tokens
74 # assert ast.to_xml.write_to_string == """<ACallReassignExpr><AQid><TId>y</TId></AQid> <APlusAssignOp><TPluseq>+=</TPluseq></APlusAssignOp> <ACallExpr><AQid><TId>foo</TId></AQid></ACallExpr></ACallReassignExpr>"""
75 # ~~~
76 fun to_xml: HTMLTag
77 do
78 var res = new HTMLTag("AST")
79 var stack = new Array[HTMLTag]
80 var c = first_token
81 while c != null do
82 if c != first_token then res.append(c.blank_before)
83 var sp = c.starting_prods
84 if sp != null then for p in sp do
85 var tag = new HTMLTag(p.class_name)
86 res.add tag
87 stack.add res
88 res = tag
89 end
90 var tag = new HTMLTag(c.class_name)
91 res.add tag
92 tag.append c.text
93 var ep = c.ending_prods
94 if ep != null then for p in ep do
95 res = stack.pop
96 end
97
98 if c == last_token then
99 c = null
100 else
101 c = c.next_token
102 end
103 end
104 assert stack.is_empty
105 return res.children.first
106 end
107 end
108
109 redef class Token
110 # Is self the first AST token on its line in the source
111 # Computed by `parentize_tokens`
112 #
113 # Note, some tokens detached from the AST
114 # may precede `self` even if `is_starting_line` is true
115 # One can use `first_real_token_in_line` to get the real starting token
116 var is_starting_line = false
117
118 # Is self the last AST token on its line in the source
119 # Computed by `parentize_tokens`
120 #
121 # Note, some tokens detached from the AST (like comments)
122 # may follow `self` even if `is_ending_line` is true.
123 # One can use `last_real_token_in_line` to get the real ending token
124 var is_ending_line = false
125
126 # The first real token that starts the line of `self`
127 #
128 # This could return a token that is detached from the AST.
129 # See `first_token_in_line` if a AST token is required.
130 fun first_real_token_in_line: Token
131 do
132 var line = location.line_start
133 var t = self
134 loop
135 var p = t.prev_token
136 if p == null or p.location.line_start != line then
137 return t
138 end
139 t = p
140 end
141 end
142
143 # The first AST token that starts the line of `self`.
144 # May be null is the line contains only detached tokens (only comment)
145 #
146 # Computed by `parentize_tokens`
147 #
148 # ENSURE `result != null implies result.is_starting_line`
149 fun first_token_in_line: nullable Token
150 do
151 return first_real_token_in_line.first_ast_token
152 end
153
154 # The first AST token.
155 # This only work on the `first_real_token_in_line`
156 private var first_ast_token: nullable Token
157
158 # The last read token that ends the line of `self`
159 #
160 # This usually return a detached token lake a TEol or a comment.
161 # See `last_token_in_line` if a AST token is required.
162 fun last_real_token_in_line: Token
163 do
164 var line = location.line_start
165 var t = self
166 loop
167 var p = t.next_token
168 if p == null or p.location.line_start != line then
169 return t
170 end
171 t = p
172 end
173 end
174
175 # The last AST token that starts the line of `self`
176 # May be null is the line contains only detached tokens (only comment)
177 #
178 # Computed by `parentize_tokens`
179 #
180 # ENSURE `result.is_ending_line`
181 fun last_token_in_line: nullable Token
182 do
183 return last_real_token_in_line.last_ast_token
184 end
185
186 # The last AST token.
187 # This only work on the `last_real_token_in_line`
188 private var last_ast_token: nullable Token
189
190 # The productions that starts with `self`, if any
191 # Productions goes from the most general to the most specific
192 #
193 # Computed by `parentize_tokens`
194 var starting_prods: nullable Array[Prod]
195
196 # The productions that ends with `self`, if any
197 # Productions goes from the most specific to the most general
198 #
199 # Computed by `parentize_tokens`
200 var ending_prods: nullable Array[Prod]
201 end
202
203
204 private class PTokenVisitor
205 super Visitor
206
207 var last_token: nullable Token = null
208
209 # productions that need a fisrt token
210 var stack = new Array[Prod]
211
212 fun work(n: ANode)
213 do
214 enter_visit(n)
215 # process remaining detashed tokens
216 var c = last_token
217 if c != null then
218 c.is_ending_line = true
219 c.last_real_token_in_line.last_ast_token = c
220 c = c.next_token
221 end
222 var r = n.root
223 while c != null and c.parent == null do
224 c.parent = r
225 c = c.next_token
226 end
227 end
228
229 redef fun visit(n)
230 do
231 if not n isa Token then
232 assert n isa Prod
233 stack.push(n)
234 n.visit_all(self)
235 if n.first_token == null then
236 # epsilon production, just discard
237 assert stack.pop == n
238 else
239 var t = last_token
240 if t != null then
241 # last token ends the production
242 n.last_token = t
243 if t.ending_prods == null then t.ending_prods = new Array[Prod]
244 t.ending_prods.add n
245 end
246 end
247 return
248 end
249
250 # We have a token, give it to prods that need one
251 if not stack.is_empty then
252 n.starting_prods = new Array[Prod]
253 for p in stack do
254 p.first_token = n
255 n.starting_prods.add p
256 end
257 stack.clear
258 end
259
260 var last_token = last_token
261
262 # n starts a new line
263 if last_token == null or last_token.location.line_start != n.location.line_start then
264 n.is_starting_line = true
265 n.first_real_token_in_line.first_ast_token = n
266
267 end
268 # last_token ended a line
269 if last_token != null and last_token.location.line_start != n.location.line_start then
270 last_token.is_ending_line = true
271 last_token.last_real_token_in_line.last_ast_token = last_token
272 end
273
274 # Get the common parent
275 var p
276 if last_token == null then
277 p = n.root
278 else
279 p = last_token.common_parent(n)
280 end
281 if p isa Prod then
282 # And apply it to detached tokens between `last_token` and `n`
283 var c = n.prev_token
284 while c != null and c.parent == null do
285 c.parent = p
286 c = c.prev_token
287 end
288 end
289
290 self.last_token = n
291 end
292 end