Merge: Loose Tokens

author Jean Privat <jean@pryen.org>

Thu, 4 Jun 2015 10:34:34 +0000 (06:34 -0400)

committer Jean Privat <jean@pryen.org>

Thu, 4 Jun 2015 10:34:34 +0000 (06:34 -0400)
author Jean Privat <jean@pryen.org>
Thu, 4 Jun 2015 10:34:34 +0000 (06:34 -0400)
committer Jean Privat <jean@pryen.org>
Thu, 4 Jun 2015 10:34:34 +0000 (06:34 -0400)
diff --git a/src/parser/parser_nodes.nit b/src/parser/parser_nodes.nit

index 6ec146f..9131c3e 100644 (file)
--- a/src/parser/parser_nodes.nit
+++ b/src/parser/parser_nodes.nit
@@ -311,6 +311,35 @@ abstract class Token
         # May have disappeared in the AST
         var next_token: nullable Token = null
  
+       # Is `self` a token discarded from the AST?
+       #
+       # Loose tokens are not present in the AST.
+       # It means they were identified by the lexer but were discarded by the parser.
+       # It also means that they are not visited or manipulated by AST-related functions.
+       #
+       # Each loose token is attached to the non-loose token that precedes or follows it.
+       # The rules are the following:
+       #
+       # * tokens that follow a non-loose token on a same line are attached to it.
+       #   See `next_looses`.
+       # * other tokens, thus that precede a non-loose token on the same line or the next one,
+       # are attached to this one. See `prev_looses`.
+       #
+       # Loose tokens are mostly end of lines (`TEol`) and comments (`TComment`).
+       # Whitespace are ignored by the lexer, so they are not even considered as loose tokens.
+       # See `blank_before` to get the whitespace that separate tokens.
+       var is_loose = false
+
+       # Loose tokens that precede `self`.
+       #
+       # These tokens start the line or belong to a line with only loose tokens.
+       var prev_looses = new Array[Token] is lazy
+
+       # Loose tokens that follow `self`
+       #
+       # These tokens are on the same line than `self`.
+       var next_looses = new Array[Token] is lazy
+
         # The verbatim blank text between `prev_token` and `self`
         fun blank_before: String
         do
diff --git a/src/parser/parser_work.nit b/src/parser/parser_work.nit

index c2eff11..a344fc7 100644 (file)
--- a/src/parser/parser_work.nit
+++ b/src/parser/parser_work.nit
@@ -141,7 +141,8 @@ class Parser
                                 var node1 = pop
                                 assert node1 isa AModule
                                 var node = new Start(node1, node2)
-                               (new ComputeProdLocationVisitor).enter_visit(node)
+                               node2.parent = node
+                               (new ComputeProdLocationVisitor(lexer.file.first_token)).enter_visit(node)
                                 return node
                         else if action_type == 3 then # ERROR
                                 # skip injected tokens
@@ -176,21 +177,55 @@ end
  # Uses existing token locations to infer location of productions.
  private class ComputeProdLocationVisitor
         super Visitor
+
+       # The current (or starting) cursor on the token sequence used to collect loose tokens
+       var token: nullable Token
+
         # Currently visited productions that need a first token
         var need_first_prods = new Array[Prod]
  
         # Already visited epsilon productions that waits something after them
         var need_after_epsilons = new Array[Prod]
  
-       # Location of the last visited token in the current production
-       var last_location: nullable Location = null
+       # The last visited token in the current production
+       var last_token: nullable Token = null
  
         redef fun visit(n: ANode)
         do
                 if n isa Token then
+                       # Skip injected tokens
                         if not isset n._location then return
+
+                       # Collect loose tokens (not in the AST) and attach them to token in the AST
+                       var cursor = token
+                       if n != cursor then
+                               var lt = last_token
+                               # In order, we have the tokens:
+                               # * `lt` the previous visited token in the AST (if any)
+                               # * then `cursor` the loose tokens to attach
+                               # * then `n` the current visited token in the AST
+
+                               # In the following, we advance `cursor` to add them to `lt.next_looses` or to `n.prev_looses`.
+                               if lt != null then
+                                       var ltl = lt.location.line_end
+                                       # floating tokens on the same line of a AST-token follows it
+                                       while cursor != null and cursor != n and ltl == cursor.location.line_start do
+                                               cursor.is_loose = true
+                                               lt.next_looses.add cursor
+                                               cursor = cursor.next_token
+                                       end
+                               end
+                               # other loose tokens precede the next AST-token
+                               while cursor != null and cursor != n do
+                                       cursor.is_loose = true
+                                       n.prev_looses.add cursor
+                                       cursor = cursor.next_token
+                               end
+                       end
+                       token = n.next_token
+
                         var loc = n._location
-                       _last_location = loc
+                       _last_token = n
  
                         # Add a first token to productions that need one
                         if not _need_first_prods.is_empty then
@@ -217,8 +252,7 @@ private class ComputeProdLocationVisitor
                         var startl = n._first_location
                         if startl != null then
                                 # Non-epsilon production
-                               var endl = _last_location
-                               assert endl != null
+                               var endl = _last_token.location
  
                                 if startl == endl then
                                         n.location = startl
author	Jean Privat <jean@pryen.org>
	Thu, 4 Jun 2015 10:34:34 +0000 (06:34 -0400)
committer	Jean Privat <jean@pryen.org>
	Thu, 4 Jun 2015 10:34:34 +0000 (06:34 -0400)
src/parser/parser_nodes.nit		patch \| blob \| history
src/parser/parser_work.nit		patch \| blob \| history