X-Git-Url: http://nitlanguage.org diff --git a/lib/nitcc_runtime.nit b/lib/nitcc_runtime.nit index b5fb0fb..cfddec3 100644 --- a/lib/nitcc_runtime.nit +++ b/lib/nitcc_runtime.nit @@ -15,11 +15,13 @@ # Runtime library required by parsers and lexers generated by nitcc module nitcc_runtime +import serialization + # A abstract parser engine generated by nitcc abstract class Parser # The list of tokens # FIXME: provide something better, like a lexer? - var tokens = new List[NToken] + var tokens = new CircularArray[NToken] # Look at the next token # Used by generated parsers @@ -76,7 +78,7 @@ abstract class Parser #print " expected: {state.error_msg}" #print " node_stack={node_stack.join(", ")}" #print " state_stack={state_stack.join(", ")}" - node_stack.add(token) + node_stack.push(token) var error: NError if token isa NLexerError then error = token @@ -99,7 +101,7 @@ abstract class Parser # The current state # Used by generated parsers - var state: LRState + var state: LRState is noinit init do @@ -116,7 +118,7 @@ abstract class Parser # Should the parser stop # Used by generated parsers - var stop writable = true + var stop = true is writable # Parse a full sequence of tokens and return a complete syntactic tree fun parse: Node @@ -162,19 +164,39 @@ abstract class Lexer protected fun start_state: DFAState is abstract # Lexize a stream of characters and return a sequence of tokens - fun lex: List[NToken] + fun lex: CircularArray[NToken] + do + var res = new CircularArray[NToken] + loop + var t = next_token + if t != null then res.add t + if t isa NEof or t isa NError then break + end + return res + end + + # Cursor current position (in chars, starting from 0) + var pos_start = 0 + + # Cursor current line (starting from 1) + var line_start = 1 + + # Cursor current column (in chars, starting from 1) + var col_start = 1 + + # Move the cursor and return the next token. + # + # Returns a `NEof` and the end. + # Returns `null` if the token is ignored. + fun next_token: nullable NToken do - var res = new List[NToken] var state = start_state - var pos = 0 - var pos_start = 0 - var pos_end = 0 - var line = 1 - var line_start = 1 - var line_end = 0 - var col = 1 - var col_start = 1 - var col_end = 0 + var pos = pos_start + var pos_end = pos_start - 1 + var line = line_start + var line_end = line_start - 1 + var col = col_start + var col_end = col_start - 1 var last_state: nullable DFAState = null var text = stream var length = text.length @@ -186,44 +208,39 @@ abstract class Lexer last_state = state end var c + var next if pos >= length then c = '\0' + next = null else - c = text[pos] + c = text.chars[pos] + next = state.trans(c) end - var next = state.trans(c) if next == null then + var token if pos_start < length then if last_state == null then - var token = new NLexerError + token = new NLexerError var position = new Position(pos_start, pos, line_start, line, col_start, col) token.position = position token.text = text.substring(pos_start, pos-pos_start+1) - res.add token - break + else if not last_state.is_ignored then + var position = new Position(pos_start, pos_end, line_start, line_end, col_start, col_end) + token = last_state.make_token(position, text) + else + token = null end - var position = new Position(pos_start, pos_end, line_start, line_end, col_start, col_end) - var token = last_state.make_token(position, text.substring(pos_start, pos_end-pos_start+1)) - if token != null then res.add(token) - end - if pos >= length then - var token = new NEof + else + token = new NEof var position = new Position(pos, pos, line, line, col, col) token.position = position token.text = "" - res.add token - break end - state = start_state pos_start = pos_end + 1 - pos = pos_start line_start = line_end - line = line_start col_start = col_end - col = col_start - last_state = null - continue + return token end state = next pos += 1 @@ -233,7 +250,6 @@ abstract class Lexer col = 1 end end - return res end end @@ -242,7 +258,8 @@ end interface DFAState fun is_accept: Bool do return false fun trans(c: Char): nullable DFAState do return null - fun make_token(position: Position, text: String): nullable NToken is abstract + fun make_token(position: Position, source: String): nullable NToken is abstract + fun is_ignored: Bool do return false end ### @@ -272,9 +289,8 @@ end # Print a node (using to_s) on a line and recustively each children indented (with two spaces) class TreePrinterVisitor super Visitor - var writer: OStream + var writer: Writer private var indent = 0 - init(writer: OStream) do self.writer = writer redef fun visit(n) do for i in [0..indent[ do writer.write(" ") @@ -289,13 +305,56 @@ end # A position into a input stream # Used to give position to tokens class Position + serialize + var pos_start: Int var pos_end: Int var line_start: Int var line_end: Int var col_start: Int var col_end: Int + redef fun to_s do return "{line_start}:{col_start}-{line_end}:{col_end}" + + # Extract the content from the given source + fun extract(source: String): String + do + return source.substring(pos_start, pos_end-pos_start+1) + end + + # Get the lines covered by `self` and underline the target columns. + # + # This is useful for pretty printing errors or debug the output + # + # ~~~ + # var src = "var Foo = new Array[Int]" + # var pos = new Position(0,0, 1, 1, 5, 8) + # + # assert pos.underline(src) == """ + # var Foo = new Array[Int] + # ^^^""" + # ~~~ + fun underline(source: Text): String + do + var res = new FlatBuffer + + # All the concerned lines + var lines = source.split("\n") + for line in [line_start..line_end] do + res.append lines[line-1] + res.append "\n" + end + + # Cover all columns, no matter their lines + var col_start = col_start.min(col_end) + var col_end = self.col_start.max(col_end) + + # " ^^^^" + var ptr = " "*(col_start-1).max(0) + "^"*(col_end-col_start) + res.append ptr + + return res.to_s + end end # A node of a syntactic tree @@ -307,7 +366,7 @@ abstract class Node fun children: SequenceRead[nullable Node] is abstract # A point of view of a depth-first visit of all non-null children - var depth: Collection[Node] = new DephCollection(self) + var depth: Collection[Node] = new DephCollection(self) is lazy # Visit all the children of the node with the visitor `v` protected fun visit_children(v: Visitor) @@ -316,12 +375,12 @@ abstract class Node end # The position of the node in the input stream - var position: nullable Position writable = null + var position: nullable Position = null is writable # Produce a graphiz file for the syntaxtic tree rooted at `self`. fun to_dot(filepath: String) do - var f = new OFStream.open(filepath) + var f = new FileWriter.open(filepath) f.write("digraph g \{\n") f.write("rankdir=BT;\n") @@ -345,7 +404,7 @@ abstract class Node f.close end - private fun to_dot_visitor(f: OStream, a: Array[NToken]) + private fun to_dot_visitor(f: Writer, a: Array[NToken]) do f.write("n{object_id} [label=\"{node_name}\"];\n") for x in children do @@ -373,11 +432,11 @@ end private class DephIterator super Iterator[Node] - var stack = new List[Iterator[nullable Node]] - init(i: Iterator[nullable Node]) - do - stack.add i + var stack = new Array[Iterator[nullable Node]] + + init(i: Iterator[nullable Node]) is old_style_init do + stack.push i end redef fun is_ok do return not stack.is_empty @@ -421,7 +480,7 @@ abstract class NToken end # The text associated with the token - var text: String writable = "" + var text: String = "" is writable redef fun to_s do var res = super @@ -465,14 +524,15 @@ end class NLexerError super NError - redef fun unexpected do return "character '{text.first}'" + redef fun unexpected do return "character '{text.chars.first}'" end # A parser error linked to a unexpected token class NParserError super NError + # The unexpected token - var token: nullable NToken + var token: nullable NToken = null redef fun unexpected do @@ -488,11 +548,11 @@ end # A hogeneous sequence of node, used to represent unbounded lists (and + modifier) class Nodes[T: Node] super Node - redef var children = new Array[T] + redef var children: Array[T] = new Array[T] end # A production with a specific, named and statically typed children -class NProd +abstract class NProd super Node redef var children: SequenceRead[nullable Node] = new NProdChildren(self) @@ -548,7 +608,7 @@ abstract class TestParser var filepath = args.shift var text if filepath == "-" then - text = stdin.read_all + text = sys.stdin.read_all else if filepath == "-e" then if args.is_empty then print "Error: -e need a text" @@ -556,7 +616,7 @@ abstract class TestParser end text = args.shift else - var f = new IFStream.open(filepath) + var f = new FileReader.open(filepath) text = f.read_all f.close end @@ -580,7 +640,7 @@ abstract class TestParser var tokout = "{name}.tokens.out" print "TOKEN: {tokens.length} tokens (see {tokout})" - var f = new OFStream.open(tokout) + var f = new FileWriter.open(tokout) for t in tokens do f.write "{t.to_s}\n" end @@ -592,7 +652,7 @@ abstract class TestParser var n = p.parse var astout = "{name}.ast.out" - f = new OFStream.open(astout) + f = new FileWriter.open(astout) var tpv = new TreePrinterVisitor(f) var astdotout = "{name}.ast.dot" if n isa NError then