Additional features on Nit AST

Most of these feature require that the precomputation parentize_tokens is run on the root node of a AST.

These aditionnal features are used either to have a better association between tokens and productions But also to query how productions and tokens are placed in the lines of code.

Introduced classes

private class PTokenVisitor

nitc :: PTokenVisitor

Redefined classes

redef abstract class ANode

nitc :: astutil $ ANode

Root of the AST class-hierarchy
redef abstract class Prod

nitc :: astutil $ Prod

Ancestor of all productions
redef abstract class Token

nitc :: astutil $ Token

Ancestor of all tokens

All class definitions

redef abstract class ANode

nitc :: astutil $ ANode

Root of the AST class-hierarchy
private class PTokenVisitor

nitc $ PTokenVisitor

redef abstract class Prod

nitc :: astutil $ Prod

Ancestor of all productions
redef abstract class Token

nitc :: astutil $ Token

Ancestor of all tokens
package_diagram nitc::astutil astutil nitc\>parser\> parser nitc::astutil->nitc\>parser\> html html nitc::astutil->html nitc nitc nitc\>parser\>->nitc ordered_tree ordered_tree nitc\>parser\>->ordered_tree console console nitc\>parser\>->console core core nitc\>parser\>->core html->core template template html->template ...nitc ... ...nitc->nitc ...ordered_tree ... ...ordered_tree->ordered_tree ...console ... ...console->console ...core ... ...core->core ...template ... ...template->template nitc::highlight highlight nitc::highlight->nitc::astutil nitc::pretty pretty nitc::pretty->nitc::astutil nitc::test_parser test_parser nitc::test_parser->nitc::astutil nitc::htmlight htmlight nitc::htmlight->nitc::highlight nitc::md_commands md_commands nitc::md_commands->nitc::highlight nitc::htmlight... ... nitc::htmlight...->nitc::htmlight nitc::md_commands... ... nitc::md_commands...->nitc::md_commands nitc::nitpretty nitpretty nitc::nitpretty->nitc::pretty nitc::nitpretty... ... nitc::nitpretty...->nitc::nitpretty a_star-m a_star-m a_star-m->nitc::test_parser a_star-m... ... a_star-m...->a_star-m

Ancestors

module abstract_collection

core :: abstract_collection

Abstract collection classes and services.
module abstract_text

core :: abstract_text

Abstract class for manipulation of sequences of characters
module array

core :: array

This module introduces the standard array structure.
module bitset

core :: bitset

Services to handle BitSet
module bytes

core :: bytes

Services for byte streams and arrays
module circular_array

core :: circular_array

Efficient data structure to access both end of the sequence.
module codec_base

core :: codec_base

Base for codecs to use with streams
module codecs

core :: codecs

Group module for all codec-related manipulations
module collection

core :: collection

This module define several collection classes.
module console

console :: console

Defines some ANSI Terminal Control Escape Sequences.
module core

core :: core

Standard classes and methods used by default by Nit programs and libraries.
module environ

core :: environ

Access to the environment variables of the process
module error

core :: error

Standard error-management infrastructure.
module exec

core :: exec

Invocation and management of operating system sub-processes.
module file

core :: file

File manipulations (create, read, write, etc.)
module fixed_ints

core :: fixed_ints

Basic integers of fixed-precision
module fixed_ints_text

core :: fixed_ints_text

Text services to complement fixed_ints
module flat

core :: flat

All the array-based text representations
module gc

core :: gc

Access to the Nit internal garbage collection mechanism
module hash_collection

core :: hash_collection

Introduce HashMap and HashSet.
module iso8859_1

core :: iso8859_1

Codec for ISO8859-1 I/O
module kernel

core :: kernel

Most basic classes and methods.
module lexer

nitc :: lexer

Lexer and its tokens.
module lexer_work

nitc :: lexer_work

Internal algorithm and data structures for the Nit lexer
module list

core :: list

This module handle double linked lists
module location

nitc :: location

Nit source-file and locations in source-file
module math

core :: math

Mathematical operations
module native

core :: native

Native structures for text and bytes
module numeric

core :: numeric

Advanced services for Numeric types
module ordered_tree

ordered_tree :: ordered_tree

Manipulation and presentation of ordered trees.
module parser_nodes

nitc :: parser_nodes

AST nodes of the Nit language
module parser_prod

nitc :: parser_prod

Production AST nodes full definition.
module parser_work

nitc :: parser_work

Internal algorithm and data structures for the Nit parser
module protocol

core :: protocol

module queue

core :: queue

Queuing data structures and wrappers
module range

core :: range

Module for range of discrete objects.
module re

core :: re

Regular expression support for all services based on Pattern
module ropes

core :: ropes

Tree-based representation of a String.
module sorter

core :: sorter

This module contains classes used to compare things and sorts arrays.
module stream

core :: stream

Input and output streams of characters
module tables

nitc :: tables

Module that interfaces the parsing tables.
module text

core :: text

All the classes and methods related to the manipulation of text entities
module time

core :: time

Management of time and dates
module union_find

core :: union_find

union–find algorithm using an efficient disjoint-set data structure
module utf8

core :: utf8

Codec for UTF-8 I/O

Parents

module html

html :: html

HTML output facilities
module parser

nitc :: parser

Parser.

Children

module highlight

nitc :: highlight

Highlighting of Nit AST
module pretty

nitc :: pretty

Library used to pretty print Nit code.
module test_parser

nitc :: test_parser

Program used to test the NIT parser

Descendants

module a_star-m

a_star-m

module api

nitc :: api

Components required to build a web server about the nit model.
module api_auth

nitc :: api_auth

module api_base

nitc :: api_base

Base classes used by nitweb.
module api_docdown

nitc :: api_docdown

Nitdoc specific Markdown format handling for Nitweb
module api_feedback

nitc :: api_feedback

Feedback related features
module api_light

nitc :: api_light

Highlight and collect messages from a piece of code
module api_model

nitc :: api_model

module commands_docdown

nitc :: commands_docdown

Doc down related queries
module html_commands

nitc :: html_commands

Render commands results as HTML
module html_model

nitc :: html_model

Translate mentities to html blocks.
module htmlight

nitc :: htmlight

Highlighting of Nit AST with HTML
module json_commands

nitc :: json_commands

Translate command results to json
module json_model

nitc :: json_model

Make model entities Serializable.
module md_commands

nitc :: md_commands

Render commands results as Markdown
module nitcatalog

nitc :: nitcatalog

Basic catalog generator for Nit packages
module nitdoc

nitc :: nitdoc

Generator of static API documentation for the Nit language
module nitlight

nitc :: nitlight

Tool that produces highlighting for Nit programs
module nitpretty

nitc :: nitpretty

module nitweb

nitc :: nitweb

Runs a webserver based on nitcorn that render things from model.
module nitx

nitc :: nitx

nitx, a command tool that displays useful data about Nit code
module static

nitc :: static

Nitdoc generation framework
module static_base

nitc :: static_base

Base entities shared by all the nitdoc code
module static_cards

nitc :: static_cards

Cards templates for the static documentation
module static_html

nitc :: static_html

Render documentation pages as HTML
module static_index

nitc :: static_index

Manage indexing of Nit model for Nitdoc QuickSearch.
module static_structure

nitc :: static_structure

Composes the pages of the static documentation
module term

nitc :: term

module test_highlight

nitc :: test_highlight

Program used to test the Nit highlighter
# Additional features on Nit AST
#
# Most of these feature require that the precomputation `parentize_tokens`
# is run on the root node of a AST.
#
# These aditionnal features are used either to have a better association between tokens and productions
# But also to query how productions and tokens are placed in the lines of code.
module astutil

intrude import parser
import html

redef class ANode
	# Visit the AST and computes advanced AST attributes on Tokens and Prod
	# This also force a parent on the detashed tokens
	fun parentize_tokens
	do
		var v = new PTokenVisitor
		v.work(self)
	end
end

redef class Prod
	# The first token of the production in the AST
	# Computed by `parentize_tokens`
	var first_token: nullable Token = null

	# The last token of the production in the AST
	# Computed by `parentize_tokens`
	var last_token: nullable Token = null

	# Is the production contained in full block of line?
	# Computed by `parentize_tokens`
	fun is_block: Bool
	do
		if first_token == null or last_token == null then return false
		#sys.stderr.write "{self}@{location}: start={first_token.is_starting_line} {first_token.inspect}@{first_token.location} ; end={last_token.is_ending_line} {last_token.inspect}@{last_token.location}\n"
		return first_token.is_starting_line and last_token.is_ending_line
	end

	# Is the production a part of a single line (without being a block)
	# Computed by `parentize_tokens`
	fun is_span: Bool
	do
		if first_token == null or last_token == null then return false
		return not is_block and location.line_start == location.line_end
	end

	# A XML representation of the AST
	# Productions and token become elements
	# 
	# detached tokens and whitespaces are preserved in the XML
	# 
	# ~~~
	# import parser_util
	# var text = "y += foo"
	# var ast = (new ToolContext).parse_something(text)
	# assert ast isa AExpr
	# ast.parentize_tokens
	# assert ast.to_xml.write_to_string == """<ACallReassignExpr><AQid><TId>y</TId></AQid> <APlusAssignOp><TPluseq>+=</TPluseq></APlusAssignOp> <ACallExpr><AQid><TId>foo</TId></AQid></ACallExpr></ACallReassignExpr>"""
	# ~~~
	fun to_xml: HTMLTag
	do
		var res = new HTMLTag("AST")
		var stack = new Array[HTMLTag]
		var c = first_token
		while c != null do
			if c != first_token then res.append(c.blank_before)
			var sp = c.starting_prods
			if sp != null then for p in sp do
				var tag = new HTMLTag(p.class_name)
				res.add tag
				stack.add res
				res = tag
			end
			var tag = new HTMLTag(c.class_name)
			res.add tag
			tag.append c.text
			var ep = c.ending_prods
			if ep != null then for p in ep do
				res = stack.pop
			end

			if c == last_token then
				c = null
			else
				c = c.next_token
			end
		end
		assert stack.is_empty
		return res.children.first
	end
end

redef class Token
	# Is self the first AST token on its line in the source
	# Computed by `parentize_tokens`
	#
	# Note, some tokens detached from the AST
	# may precede `self` even if `is_starting_line` is true
	# One can use `first_real_token_in_line` to get the real starting token
	var is_starting_line = false

	# Is self the last AST token on its line in the source
	# Computed by `parentize_tokens`
	#
	# Note, some tokens detached from the AST (like comments)
	# may follow `self` even if `is_ending_line` is true.
	# One can use `last_real_token_in_line` to get the real ending token
	var is_ending_line = false

	# The first real token that starts the line of `self`
	#
	# This could return a token that is detached from the AST.
	# See `first_token_in_line` if a AST token is required.
	fun first_real_token_in_line: Token
	do
		var line = location.line_start
		var t = self
		loop
			var p = t.prev_token
			if p == null or p.location.line_start != line then
				return t
			end
			t = p
		end
	end

	# The first AST token that starts the line of `self`.
	# May be null is the line contains only detached tokens (only comment)
	#
	# Computed by `parentize_tokens`
	#
	# ENSURE `result != null implies result.is_starting_line`
	fun first_token_in_line: nullable Token
	do
		return first_real_token_in_line.first_ast_token
	end

	# The first AST token.
	# This only work on the `first_real_token_in_line`
	private var first_ast_token: nullable Token

	# The last read token that ends the line of `self`
	#
	# This usually return a detached token lake a TEol or a comment.
	# See `last_token_in_line` if a AST token is required.
	fun last_real_token_in_line: Token
	do
		var line = location.line_start
		var t = self
		loop
			var p = t.next_token
			if p == null or p.location.line_start != line then
				return t
			end
			t = p
		end
	end

	# The last AST token that starts the line of `self`
	# May be null is the line contains only detached tokens (only comment)
	#
	# Computed by `parentize_tokens`
	#
	# ENSURE `result.is_ending_line`
	fun last_token_in_line: nullable Token
	do
		return last_real_token_in_line.last_ast_token
	end

	# The last AST token.
	# This only work on the `last_real_token_in_line`
	private var last_ast_token: nullable Token

	# The productions that starts with `self`, if any
	# Productions goes from the most general to the most specific
	#
	# Computed by `parentize_tokens`
	var starting_prods: nullable Array[Prod]

	# The productions that ends with `self`, if any
	# Productions goes from the most specific to the most general
	#
	# Computed by `parentize_tokens`
	var ending_prods: nullable Array[Prod]
end


private class PTokenVisitor
	super Visitor

	var last_token: nullable Token = null

	# productions that need a fisrt token
	var stack = new Array[Prod]

	fun work(n: ANode)
	do
		enter_visit(n)
		# process remaining detashed tokens
		var c = last_token
		if c != null then
			c.is_ending_line = true
			c.last_real_token_in_line.last_ast_token = c
			c = c.next_token
		end
		var r = n.root
		while c != null and c.parent == null do
			c.parent = r
			c = c.next_token
		end
	end

	redef fun visit(n)
	do
		if not n isa Token then
			assert n isa Prod
			stack.push(n)
			n.visit_all(self)
			if n.first_token == null then
				# epsilon production, just discard
				assert stack.pop == n
			else
				var t = last_token
				if t != null then
					# last token ends the production
					n.last_token = t
					if t.ending_prods == null then t.ending_prods = new Array[Prod]
					t.ending_prods.add n
				end
			end
			return
		end

		# We have a token, give it to prods that need one
		if not stack.is_empty then
			n.starting_prods = new Array[Prod]
			for p in stack do
				p.first_token = n
				n.starting_prods.add p
			end
			stack.clear
		end

		var last_token = last_token

		# n starts a new line
		if last_token == null or last_token.location.line_start != n.location.line_start then
			n.is_starting_line = true
			n.first_real_token_in_line.first_ast_token = n

		end
		# last_token ended a line
		if last_token != null and last_token.location.line_start != n.location.line_start then
			last_token.is_ending_line = true
			last_token.last_real_token_in_line.last_ast_token = last_token
		end

		# Get the common parent
		var p
		if last_token == null then
			p = n.root
		else
			p = last_token.common_parent(n)
		end
		if p isa Prod then
			# And apply it to detached tokens between `last_token` and `n`
			var c = n.prev_token
			while c != null and c.parent == null do
				c.parent = p
				c = c.prev_token
			end
		end

		self.last_token = n
	end
end
src/astutil.nit:15,1--292,3