1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Internal algorithm and data structures for the Nit parser
18 intrude import parser_prod
20 # State of the parser automata as stored in the parser stack.
22 # The internal state number
25 # The node stored with the state in the stack
26 var nodes
: nullable Object
29 # The parser of the Nit language.
35 # Stack of pushed states and productions
36 private var stack
= new Array[State]
38 # Position in the stack
39 private var stack_pos
: Int = -1
46 # Do a transition in the automata
47 private fun go_to
(index
: Int): Int
51 var high
= parser_goto
(index
, 0) - 1
54 var middle
= (low
+ high
) / 2
55 var subindex
= middle
* 2 + 1 # +1 because parser_goto(index, 0) is the length
57 var goal
= parser_goto
(index
, subindex
)
60 else if state
> goal
then
63 return parser_goto
(index
, subindex
+1)
67 return parser_goto
(index
, 2) # Default value
70 # Push someting in the state stack
71 private fun push
(numstate
: Int, list_node
: nullable Object)
73 var pos
= _stack_pos
+ 1
75 if pos
< _stack
.length
then
76 var state
= _stack
[pos
]
77 state
._state
= numstate
78 state
._nodes
= list_node
80 _stack
.push
(new State(numstate
, list_node
))
85 private fun state
: Int
87 return _stack
[_stack_pos
]._state
90 # Pop something from the stack state
91 private fun pop
: nullable Object
93 var res
= _stack
[_stack_pos
]._nodes
94 _stack_pos
= _stack_pos
-1
98 # Build and return a full AST.
105 var token
= lexer
.peek
106 if token
isa AError then
107 return new Start(null, token
)
110 var state
= self.state
111 var index
= token
.parser_index
112 var action_type
= parser_action
(state
, 2)
113 var action_value
= parser_action
(state
, 3)
116 var high
= parser_action
(state
, 0) - 1
119 var middle
= (low
+ high
) / 2
120 var subindex
= middle
* 3 + 1 # +1 because parser_action(state, 0) is the length
122 var goal
= parser_action
(state
, subindex
)
125 else if index
> goal
then
128 action_type
= parser_action
(state
, subindex
+1)
129 action_value
= parser_action
(state
, subindex
+2)
134 if action_type
== 0 then # SHIFT
135 push
(action_value
, lexer
.next
)
136 else if action_type
== 1 then # REDUCE
137 _reduce_table
[action_value
].action
(self)
138 else if action_type
== 2 then # ACCEPT
139 var node2
= lexer
.next
142 assert node1
isa AModule
143 var node
= new Start(node1
, node2
)
145 (new ComputeProdLocationVisitor(lexer
.file
.first_token
)).enter_visit
(node
)
147 else if action_type
== 3 then # ERROR
148 # skip injected tokens
149 while not isset token
._location
do token
= lexer
.next
150 var node2
= new AParserError.init_parser_error
("Syntax Error: unexpected {token}.", token
.location
, token
)
151 var node
= new Start(null, node2
)
157 private var reduce_table
: Array[ReduceAction] is noinit
158 private fun build_reduce_table
is abstract
162 # Location on the first token after the start of a production
163 # So outside the production for epsilon production
164 var first_location
: nullable Location
166 # Join the text of all visited tokens
167 fun collect_text
: String
169 var v
= new TextCollectorVisitor
176 # Find location of production nodes
177 # Uses existing token locations to infer location of productions.
178 private class ComputeProdLocationVisitor
181 # The current (or starting) cursor on the token sequence used to collect loose tokens
182 var token
: nullable Token
184 # Currently visited productions that need a first token
185 var need_first_prods
= new Array[Prod]
187 # Already visited epsilon productions that waits something after them
188 var need_after_epsilons
= new Array[Prod]
190 # The last visited token in the current production
191 var last_token
: nullable Token = null
193 redef fun visit
(n
: ANode)
196 # Skip injected tokens
197 if not isset n
._location
then return
199 # Collect loose tokens (not in the AST) and attach them to token in the AST
203 # In order, we have the tokens:
204 # * `lt` the previous visited token in the AST (if any)
205 # * then `cursor` the loose tokens to attach
206 # * then `n` the current visited token in the AST
208 # In the following, we advance `cursor` to add them to `lt.next_looses` or to `n.prev_looses`.
210 var ltl
= lt
.location
.line_end
211 # floating tokens on the same line of a AST-token follows it
212 while cursor
!= null and cursor
!= n
and ltl
== cursor
.location
.line_start
do
213 cursor
.is_loose
= true
214 lt
.next_looses
.add cursor
215 cursor
= cursor
.next_token
218 # other loose tokens precede the next AST-token
219 while cursor
!= null and cursor
!= n
do
220 cursor
.is_loose
= true
221 n
.prev_looses
.add cursor
222 cursor
= cursor
.next_token
227 var loc
= n
._location
230 # Add a first token to productions that need one
231 if not _need_first_prods
.is_empty
then
232 for no
in _need_first_prods
do
233 no
._first_location
= loc
235 _need_first_prods
.clear
238 # Find location for already visited epsilon production that need one
239 if not _need_after_epsilons
.is_empty
then
240 var loco
= new Location(loc
.file
, loc
.line_start
, loc
.line_start
, loc
.column_start
, loc
.column_start
)
241 for no
in _need_after_epsilons
do
244 _need_after_epsilons
.clear
248 _need_first_prods
.add
(n
)
252 var startl
= n
._first_location
253 if startl
!= null then
254 # Non-epsilon production
255 var endl
= _last_token
.location
257 if startl
== endl
then
260 n
.location
= new Location(startl
.file
, startl
.line_start
, endl
.line_end
, startl
.column_start
, endl
.column_end
)
263 if not _need_after_epsilons
.is_empty
then
264 var loc
= new Location(endl
.file
, endl
.line_end
, endl
.line_end
, endl
.column_end
, endl
.column_end
)
265 for no
in _need_after_epsilons
do
266 # Epsilon production that finishes the current non-epsilon production
269 _need_after_epsilons
.clear
272 # Epsilon production in the middle or that finishes a parent non-epsilon production
273 _need_after_epsilons
.add
(n
)
279 private class TextCollectorVisitor
281 var text
: String = ""
284 if n
isa Token then text
+= n
.text
290 # Each reduce action has its own class, this one is the root of the hierarchy.
291 private abstract class ReduceAction
292 fun action
(p
: Parser) is abstract
293 fun concat
(l1
, l2
: Array[Object]): Array[Object]
295 if l1
.is_empty
then return l2
304 # Get `self` as a single identifier.
305 # Return null if not a single identifier.
306 fun as_id
: nullable String
308 if self isa AMethidExpr then
309 return self.collect_text
311 if not self isa ACallExpr then return null
312 if not self.n_expr
isa AImplicitSelfExpr then return null
313 if not self.n_args
.n_exprs
.is_empty
then return null
314 return self.n_qid
.n_id
.text