Runtime library required by parsers and lexers generated by nitcc

Introduced classes

interface DFAState

nitcc_runtime :: DFAState

A state in a lexer automaton generated by nitcc
abstract class LRGoto

nitcc_runtime :: LRGoto

A concrete production in a parser LR automation generated by nitcc
abstract class LRState

nitcc_runtime :: LRState

A state in a parser LR automaton generated by nitcc
abstract class Lexer

nitcc_runtime :: Lexer

A abstract lexer engine generated by nitcc
class NEof

nitcc_runtime :: NEof

The special token for the end of stream
abstract class NError

nitcc_runtime :: NError

A special token used to represent a parser or lexer error
class NLexerError

nitcc_runtime :: NLexerError

A lexer error as a token for the unexpected characted
class NParserError

nitcc_runtime :: NParserError

A parser error linked to a unexpected token
abstract class NProd

nitcc_runtime :: NProd

A production with a specific, named and statically typed children
abstract class NToken

nitcc_runtime :: NToken

A token produced by the lexer and used in a syntactic tree
abstract class Node

nitcc_runtime :: Node

A node of a syntactic tree
class Nodes[T: Node]

nitcc_runtime :: Nodes

A hogeneous sequence of node, used to represent unbounded lists (and + modifier)
abstract class Parser

nitcc_runtime :: Parser

A abstract parser engine generated by nitcc
class Position

nitcc_runtime :: Position

A position into a input stream
abstract class TestParser

nitcc_runtime :: TestParser

All-in-one abstract class to test generated parsers on a given
class TreePrinterVisitor

nitcc_runtime :: TreePrinterVisitor

Print a node (using to_s) on a line and recustively each children indented (with two spaces)
abstract class Visitor

nitcc_runtime :: Visitor

A abstract visitor on syntactic trees generated by nitcc

Redefined classes

redef abstract class Deserializer

nitcc_runtime :: nitcc_runtime $ Deserializer

Abstract deserialization service

All class definitions

interface DFAState

nitcc_runtime $ DFAState

A state in a lexer automaton generated by nitcc
redef abstract class Deserializer

nitcc_runtime :: nitcc_runtime $ Deserializer

Abstract deserialization service
abstract class LRGoto

nitcc_runtime $ LRGoto

A concrete production in a parser LR automation generated by nitcc
abstract class LRState

nitcc_runtime $ LRState

A state in a parser LR automaton generated by nitcc
abstract class Lexer

nitcc_runtime $ Lexer

A abstract lexer engine generated by nitcc
class NEof

nitcc_runtime $ NEof

The special token for the end of stream
abstract class NError

nitcc_runtime $ NError

A special token used to represent a parser or lexer error
class NLexerError

nitcc_runtime $ NLexerError

A lexer error as a token for the unexpected characted
class NParserError

nitcc_runtime $ NParserError

A parser error linked to a unexpected token
abstract class NProd

nitcc_runtime $ NProd

A production with a specific, named and statically typed children
abstract class NToken

nitcc_runtime $ NToken

A token produced by the lexer and used in a syntactic tree
abstract class Node

nitcc_runtime $ Node

A node of a syntactic tree
class Nodes[T: Node]

nitcc_runtime $ Nodes

A hogeneous sequence of node, used to represent unbounded lists (and + modifier)
abstract class Parser

nitcc_runtime $ Parser

A abstract parser engine generated by nitcc
class Position

nitcc_runtime $ Position

A position into a input stream
abstract class TestParser

nitcc_runtime $ TestParser

All-in-one abstract class to test generated parsers on a given
class TreePrinterVisitor

nitcc_runtime $ TreePrinterVisitor

Print a node (using to_s) on a line and recustively each children indented (with two spaces)
abstract class Visitor

nitcc_runtime $ Visitor

A abstract visitor on syntactic trees generated by nitcc
package_diagram nitcc_runtime::nitcc_runtime nitcc_runtime serialization serialization nitcc_runtime::nitcc_runtime->serialization poset poset serialization->poset meta meta serialization->meta json json serialization->json ...poset ... ...poset->poset ...meta ... ...meta->meta ...json ... ...json->json a_star-m a_star-m a_star-m->nitcc_runtime::nitcc_runtime

Ancestors

module abstract_collection

core :: abstract_collection

Abstract collection classes and services.
module abstract_text

core :: abstract_text

Abstract class for manipulation of sequences of characters
module array

core :: array

This module introduces the standard array structure.
module bitset

core :: bitset

Services to handle BitSet
module bytes

core :: bytes

Services for byte streams and arrays
module caching

serialization :: caching

Services for caching serialization engines
module circular_array

core :: circular_array

Efficient data structure to access both end of the sequence.
module codec_base

core :: codec_base

Base for codecs to use with streams
module codecs

core :: codecs

Group module for all codec-related manipulations
module collection

core :: collection

This module define several collection classes.
module core

core :: core

Standard classes and methods used by default by Nit programs and libraries.
module engine_tools

serialization :: engine_tools

Advanced services for serialization engines
module environ

core :: environ

Access to the environment variables of the process
module error

core :: error

Standard error-management infrastructure.
module exec

core :: exec

Invocation and management of operating system sub-processes.
module file

core :: file

File manipulations (create, read, write, etc.)
module fixed_ints

core :: fixed_ints

Basic integers of fixed-precision
module fixed_ints_text

core :: fixed_ints_text

Text services to complement fixed_ints
module flat

core :: flat

All the array-based text representations
module gc

core :: gc

Access to the Nit internal garbage collection mechanism
module hash_collection

core :: hash_collection

Introduce HashMap and HashSet.
module inspect

serialization :: inspect

Refine Serializable::inspect to show more useful information
module iso8859_1

core :: iso8859_1

Codec for ISO8859-1 I/O
module kernel

core :: kernel

Most basic classes and methods.
module list

core :: list

This module handle double linked lists
module math

core :: math

Mathematical operations
module meta

meta :: meta

Simple user-defined meta-level to manipulate types of instances as object.
module native

core :: native

Native structures for text and bytes
module numeric

core :: numeric

Advanced services for Numeric types
module protocol

core :: protocol

module queue

core :: queue

Queuing data structures and wrappers
module range

core :: range

Module for range of discrete objects.
module re

core :: re

Regular expression support for all services based on Pattern
module ropes

core :: ropes

Tree-based representation of a String.
module serialization_core

serialization :: serialization_core

Abstract services to serialize Nit objects to different formats
module sorter

core :: sorter

This module contains classes used to compare things and sorts arrays.
module stream

core :: stream

Input and output streams of characters
module text

core :: text

All the classes and methods related to the manipulation of text entities
module time

core :: time

Management of time and dates
module union_find

core :: union_find

union–find algorithm using an efficient disjoint-set data structure
module utf8

core :: utf8

Codec for UTF-8 I/O

Parents

module serialization

serialization :: serialization

General serialization services

Children

module a_star-m

a_star-m

# Runtime library required by parsers and lexers generated by nitcc
module nitcc_runtime

import serialization

# A abstract parser engine generated by nitcc
abstract class Parser
	# The list of tokens
	# FIXME: provide something better, like a lexer?
	var tokens = new CircularArray[NToken]

	# Look at the next token
	# Used by generated parsers
	fun peek_token: NToken do return tokens.first

	# Consume the next token
	# Used by generated parsers
	fun get_token: NToken do return tokens.shift

	# Consume the next token and shift to the state `dest`.
	# Used by generated parsers
	fun shift(dest: LRState)
	do
		var t = get_token
		#print "shift {t} -> {dest}"
		node_stack.push t
		state_stack.push state
		state = dest
	end

	# After a reduction on `goto` go to the next state
	# Used by generated parsers
	fun goto(goto: LRGoto)
	do
		#print "reduce from {state} -> {prod}"
		state.goto(self, goto)
	end

	# push a new state on the stack of states (
	# Used by generated parsers
	fun push(dest: LRState)
	do
		#print "push prod {prod} -> {dest}"
		state_stack.push state
		state = dest
	end

	# Pop and return the last node
	# Also pop (and discard) the associated state
	# Used by generated parsers
	fun pop: Node
	do
		var res = node_stack.pop
		state = state_stack.pop
		return res
	end

	# Produce a parse error and stop the parsing
	# Used by generated parsers
	fun parse_error
	do
		var token = peek_token
		#print "* parse error in state {state} on token {token}"
		#print "  expected: {state.error_msg}"
		#print "  node_stack={node_stack.join(", ")}"
		#print "  state_stack={state_stack.join(", ")}"
		node_stack.push(token)
		var error: NError
		if token isa NLexerError then
			error = token
		else
			error = new NParserError
			error.position = token.position
			error.text = token.text
			error.token = token
		end
		error.error_tree.children.add_all(node_stack)
		error.expected = state.error_msg
		node_stack.clear
		node_stack.add error
		stop = true
	end

	# The stating state for parsing
	# Used by generated parsers
	protected fun start_state: LRState is abstract

	# The current state
	# Used by generated parsers
	var state: LRState is noinit

	init
	do
		state = start_state
	end

	# The stack of nodes
	# Used by generated parsers
	var node_stack = new Array[Node]

	# The stack of states
	# Used by generated parsers
	var state_stack = new Array[LRState]

	# Should the parser stop
	# Used by generated parsers
	var stop = true is writable

	# Parse a full sequence of tokens and return a complete syntactic tree
	fun parse: Node
	do
		state = start_state
		state_stack.clear
		node_stack.clear
		stop = false
		while not stop do
			#print "* current state {state}"
			#print "  tokens={tokens.join(" ")}"
			#print "  node_stack={node_stack.join(" ")}"
			#print "  state_stack={state_stack.join(" ")}"
			state.action(self)
		end
		#print "* over"
		return node_stack.first
	end
end

# A state in a parser LR automaton generated by nitcc
# Used by generated parsers
abstract class LRState
	fun action(parser: Parser) is abstract
	fun goto(parser: Parser, goto: LRGoto) is abstract
	fun error_msg: String do return "FIXME"
end

# A concrete production in a parser LR automation generated by nitcc
# Used by generated parsers
abstract class LRGoto
end

###

# A abstract lexer engine generated by nitcc
abstract class Lexer
	# The input stream of characters
	var stream: String

	# The stating state for lexing
	# Used by generated parsers
	protected fun start_state: DFAState is abstract

	# Lexize a stream of characters and return a sequence of tokens
	fun lex: CircularArray[NToken]
	do
		var res = new CircularArray[NToken]
		loop
			var t = next_token
			if t != null then res.add t
			if t isa NEof or t isa NError then break
		end
		return res
	end

	# Cursor current position (in chars, starting from 0)
	var pos_start = 0

	# Cursor current line (starting from 1)
	var line_start = 1

	# Cursor current column (in chars, starting from 1)
	var col_start = 1

	# Move the cursor and return the next token.
	#
	# Returns a `NEof` and the end.
	# Returns `null` if the token is ignored.
	fun next_token: nullable NToken
	do
		var state = start_state
		var pos = pos_start
		var pos_end = pos_start - 1
		var line = line_start
		var line_end = line_start - 1
		var col = col_start
		var col_end = col_start - 1
		var last_state: nullable DFAState = null
		var text = stream
		var length = text.length
		loop
			if state.is_accept then
				pos_end = pos - 1
				line_end = line
				col_end = col
				last_state = state
			end
			var c
			var next
			if pos >= length then
				c = '\0'
				next = null
			else
				c = text.chars[pos]
				next = state.trans(c)
			end
			if next == null then
				var token
				if pos_start < length then
					if last_state == null then
						token = new NLexerError
						var position = new Position(pos_start, pos, line_start, line, col_start, col)
						token.position = position
						token.text = text.substring(pos_start, pos-pos_start+1)
					else if not last_state.is_ignored then
						var position = new Position(pos_start, pos_end, line_start, line_end, col_start, col_end)
						token = last_state.make_token(position, text)
					else
						token = null
					end
				else
					token = new NEof
					var position = new Position(pos, pos, line, line, col, col)
					token.position = position
					token.text = ""
				end
				pos_start = pos_end + 1
				line_start = line_end
				col_start = col_end

				return token
			end
			state = next
			pos += 1
			col += 1
			if c == '\n' then
				line += 1
				col = 1
			end
		end
	end
end

# A state in a lexer automaton generated by nitcc
# Used by generated lexers
interface DFAState
	fun is_accept: Bool do return false
	fun trans(c: Char): nullable DFAState do return null
	fun make_token(position: Position, source: String): nullable NToken is abstract
	fun is_ignored: Bool do return false
end

###

# A abstract visitor on syntactic trees generated by nitcc
abstract class Visitor
	# The main entry point to visit a node `n`
	# Should not be redefined
	fun enter_visit(n: Node)
	do
		visit(n)
	end

	# The specific implementation of a visit
	#
	# Should be redefined by concrete visitors
	#
	# Should not be called directly (use `enter_visit` instead)
	#
	# By default, the visitor just rescursively visit the children of `n`
	protected fun visit(n: Node)
	do
		n.visit_children(self)
	end
end

# Print a node (using to_s) on a line and recustively each children indented (with two spaces)
class TreePrinterVisitor
	super Visitor
	var writer: Writer
	private var indent = 0
	redef fun visit(n)
	do
		for i in [0..indent[ do writer.write("  ")
		writer.write(n.to_s)
		writer.write("\n")
		indent += 1
		super
		indent -= 1
	end
end

# A position into a input stream
# Used to give position to tokens
class Position
	serialize

	var pos_start: Int
	var pos_end: Int
	var line_start: Int
	var line_end: Int
	var col_start: Int
	var col_end: Int

	redef fun to_s do return "{line_start}:{col_start}-{line_end}:{col_end}"

	# Extract the content from the given source
	fun extract(source: String): String
	do
		return source.substring(pos_start, pos_end-pos_start+1)
	end

	# Get the lines covered by `self` and underline the target columns.
	#
	# This is useful for pretty printing errors or debug the output
	#
	# ~~~
	# var src = "var Foo = new Array[Int]"
	# var pos = new Position(0,0, 1, 1, 5, 8)
	#
	# assert pos.underline(src) == """
	# var Foo = new Array[Int]
	#     ^^^"""
	# ~~~
	fun underline(source: Text): String
	do
		var res = new FlatBuffer

		# All the concerned lines
		var lines = source.split("\n")
		for line in [line_start..line_end] do
			res.append lines[line-1]
			res.append "\n"
		end

		# Cover all columns, no matter their lines
		var col_start = col_start.min(col_end)
		var col_end = self.col_start.max(col_end)

		# "           ^^^^"
		var ptr = " "*(col_start-1).max(0) + "^"*(col_end-col_start)
		res.append ptr

		return res.to_s
	end
end

# A node of a syntactic tree
abstract class Node
	# The name of the node (as used in the grammar file)
	fun node_name: String do return class_name

	# A point of view on the direct children of the node
	fun children: SequenceRead[nullable Node] is abstract

	# A point of view of a depth-first visit of all non-null children
	var depth: Collection[Node] = new DephCollection(self) is lazy

	# Visit all the children of the node with the visitor `v`
	protected fun visit_children(v: Visitor)
	do
		for c in children do if c != null then v.enter_visit(c)
	end

	# The position of the node in the input stream
	var position: nullable Position = null is writable

	# Produce a graphiz file for the syntaxtic tree rooted at `self`.
	fun to_dot(filepath: String)
	do
		var f = new FileWriter.open(filepath)
		f.write("digraph g \{\n")
		f.write("rankdir=BT;\n")

		var a = new Array[NToken]
		to_dot_visitor(f, a)

		f.write("\{ rank=same\n")
		var first = true
		for n in a do
			if first then
				first = false
			else
				f.write("->")
			end
			f.write("n{n.object_id}")
		end
		f.write("[style=invis];\n")
		f.write("\}\n")

		f.write("\}\n")
		f.close
	end

	private fun to_dot_visitor(f: Writer, a: Array[NToken])
	do
		f.write("n{object_id} [label=\"{node_name}\"];\n")
		for x in children do
			if x == null then continue
			f.write("n{x.object_id} -> n{object_id};\n")
			x.to_dot_visitor(f,a )
		end
	end

	redef fun to_s do
		var pos = position
		if pos == null then
			return "{node_name}"
		else
			return "{node_name}@({pos})"
		end
	end
end

private class DephCollection
	super Collection[Node]
	var node: Node
	redef fun iterator do return new DephIterator([node].iterator)
end

private class DephIterator
	super Iterator[Node]

	var stack = new Array[Iterator[nullable Node]]

	init(i: Iterator[nullable Node]) is old_style_init do
		stack.push i
	end

	redef fun is_ok do return not stack.is_empty
	redef fun item do return stack.last.item.as(not null)
	redef fun next
	do
		var i = stack.last
		stack.push i.item.children.iterator
		i.next
		while is_ok do
			if not stack.last.is_ok then
				stack.pop
				continue
			end
			if stack.last.item == null then
				stack.last.next
				continue
			end
			return
		end
	end
end

# A token produced by the lexer and used in a syntactic tree
abstract class NToken
	super Node

	redef fun children do return once new Array[Node]

	redef fun to_dot_visitor(f, a)
	do
		var labe = "{node_name}"
		var pos = position
		if pos != null then labe += "\\n{pos}"
		var text = self.text
		if node_name != "'{text}'" then
			labe += "\\n'{text.escape_to_c}'"
		end
		f.write("n{object_id} [label=\"{labe}\",shape=box];\n")
		a.add(self)
	end

	# The text associated with the token
	var text: String = "" is writable

	redef fun to_s do
		var res = super
		var text = self.text
		if node_name != "'{text}'" then
			res += "='{text.escape_to_c}'"
		end
		return res
	end
end

# The special token for the end of stream
class NEof
	super NToken
end

# A special token used to represent a parser or lexer error
abstract class NError
	super NToken

	# All the partially built tree during parsing (aka the node_stack)
	var error_tree = new Nodes[Node]

	# The things unexpected
	fun unexpected: String is abstract

	# The things expected (if any)
	var expected: nullable String = null

	# The error message,using `expected` and `unexpected`
	fun message: String
	do
		var exp = expected
		var res = "Unexpected {unexpected}"
		if exp != null then res += "; is acceptable instead: {exp}"
		return res
	end
end

# A lexer error as a token for the unexpected characted
class NLexerError
	super NError

	redef fun unexpected do return "character '{text.chars.first}'"
end

# A parser error linked to a unexpected token
class NParserError
	super NError

	# The unexpected token
	var token: nullable NToken = null

	redef fun unexpected
	do
		var res = token.node_name
		var text = token.text
		if not text.is_empty and res != "'{text}'" then
			res += " '{text.escape_to_c}'"
		end
		return res
	end
end

# A hogeneous sequence of node, used to represent unbounded lists (and + modifier)
class Nodes[T: Node]
	super Node
	redef var children: Array[T] = new Array[T]
end

# A production with a specific, named and statically typed children
abstract class NProd
	super Node
	redef var children: SequenceRead[nullable Node] = new NProdChildren(self)

	# The exact number of direct children
	# Used to implement `children` by generated parsers
	fun number_of_children: Int is abstract

	# The specific children got by its index
	# Used to implement `children` by generated parsers
	fun child(x: Int): nullable Node is abstract
end


private class NProdChildren
	super SequenceRead[nullable Node]
	var prod: NProd
	redef fun iterator do return new NProdIterator(prod)
	redef fun length do return prod.number_of_children
	redef fun is_empty do return prod.number_of_children == 0
	redef fun [](i) do return prod.child(i)
end

private class NProdIterator
	super IndexedIterator[nullable Node]
	var prod: NProd
	redef var index = 0
	redef fun is_ok do return index < prod.number_of_children
	redef fun next do index += 1
	redef fun item do return prod.child(index)
end

# All-in-one abstract class to test generated parsers on a given
abstract class TestParser
	# How to get a new lexer on a given stream of character
	fun new_lexer(text: String): Lexer is abstract

	# How to get a new parser
	fun new_parser: Parser is abstract

	# The name of the language (used for generated files)
	fun name: String is abstract

	# Use the class as the main enrty point of the program
	# - parse arguments and options of the command
	# - test the parser (see `work`)
	fun main: Node
	do
		if args.is_empty then
			print "usage {name}_test <filepath> | - | -e <text>"
			exit 0
		end

		var filepath = args.shift
		var text
		if filepath == "-" then
			text = sys.stdin.read_all
		else if filepath == "-e" then
			if args.is_empty then
				print "Error: -e need a text"
				exit 1
			end
			text = args.shift
		else
			var f = new FileReader.open(filepath)
			text = f.read_all
			f.close
		end

		if not args.is_empty then
			print "Error: superfluous arguments."
			exit 1
		end

		return work(text)
	end

	# Produce a full syntactic tree for a given stream of character
	# Produce also statistics and output files
	fun work(text: String): Node
	do
		print "INPUT: {text.length} chars"
		var l = new_lexer(text)
		var tokens = l.lex

		var tokout = "{name}.tokens.out"
		print "TOKEN: {tokens.length} tokens (see {tokout})"

		var f = new FileWriter.open(tokout)
		for t in tokens do
			f.write "{t.to_s}\n"
		end
		f.close

		var p = new_parser
		p.tokens.add_all(tokens)

		var n = p.parse

		var astout = "{name}.ast.out"
		f = new FileWriter.open(astout)
		var tpv = new TreePrinterVisitor(f)
		var astdotout = "{name}.ast.dot"
		if n isa NError then
			print "Syntax error: {n.message}"
			print "ERROR: {n} (see {astout} and {astdotout})"
			tpv.enter_visit(n)
			n = n.error_tree
		else
			print "ROOT: {n}; {n.depth.length} nodes (see {astout} and {astdotout})"
		end
		tpv.enter_visit(n)
		n.to_dot(astdotout)
		f.close

		return n
	end
end

lib/nitcc_runtime/nitcc_runtime.nit:15,1--672,3