lib/nitcc_runtime.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Runtime library required by parsers and lexers generated by nitcc
  16 module nitcc_runtime
  17
  18 # A abstract parser engine generated by nitcc
  19 abstract class Parser
  20         # The list of tokens
  21         # FIXME: provide something better, like a lexer?
  22         var tokens = new CircularArray[NToken]
  23
  24         # Look at the next token
  25         # Used by generated parsers
  26         fun peek_token: NToken do return tokens.first
  27
  28         # Consume the next token
  29         # Used by generated parsers
  30         fun get_token: NToken do return tokens.shift
  31
  32         # Consume the next token and shift to the state `dest`.
  33         # Used by generated parsers
  34         fun shift(dest: LRState)
  35         do
  36                 var t = get_token
  37                 #print "shift {t} -> {dest}"
  38                 node_stack.push t
  39                 state_stack.push state
  40                 state = dest
  41         end
  42
  43         # After a reduction on `goto` go to the next state
  44         # Used by generated parsers
  45         fun goto(goto: LRGoto)
  46         do
  47                 #print "reduce from {state} -> {prod}"
  48                 state.goto(self, goto)
  49         end
  50
  51         # push a new state on the stack of states (
  52         # Used by generated parsers
  53         fun push(dest: LRState)
  54         do
  55                 #print "push prod {prod} -> {dest}"
  56                 state_stack.push state
  57                 state = dest
  58         end
  59
  60         # Pop and return the last node
  61         # Also pop (and discard) the associated state
  62         # Used by generated parsers
  63         fun pop: Node
  64         do
  65                 var res = node_stack.pop
  66                 state = state_stack.pop
  67                 return res
  68         end
  69
  70         # Produce a parse error and stop the parsing
  71         # Used by generated parsers
  72         fun parse_error
  73         do
  74                 var token = peek_token
  75                 #print "* parse error in state {state} on token {token}"
  76                 #print "  expected: {state.error_msg}"
  77                 #print "  node_stack={node_stack.join(", ")}"
  78                 #print "  state_stack={state_stack.join(", ")}"
  79                 node_stack.push(token)
  80                 var error: NError
  81                 if token isa NLexerError then
  82                         error = token
  83                 else
  84                         error = new NParserError
  85                         error.position = token.position
  86                         error.text = token.text
  87                         error.token = token
  88                 end
  89                 error.error_tree.children.add_all(node_stack)
  90                 error.expected = state.error_msg
  91                 node_stack.clear
  92                 node_stack.add error
  93                 stop = true
  94         end
  95
  96         # The stating state for parsing
  97         # Used by generated parsers
  98         protected fun start_state: LRState is abstract
  99
 100         # The current state
 101         # Used by generated parsers
 102         var state: LRState is noinit
 103
 104         init
 105         do
 106                 state = start_state
 107         end
 108
 109         # The stack of nodes
 110         # Used by generated parsers
 111         var node_stack = new Array[Node]
 112
 113         # The stack of states
 114         # Used by generated parsers
 115         var state_stack = new Array[LRState]
 116
 117         # Should the parser stop
 118         # Used by generated parsers
 119         var stop = true is writable
 120
 121         # Parse a full sequence of tokens and return a complete syntactic tree
 122         fun parse: Node
 123         do
 124                 state = start_state
 125                 state_stack.clear
 126                 node_stack.clear
 127                 stop = false
 128                 while not stop do
 129                         #print "* current state {state}"
 130                         #print "  tokens={tokens.join(" ")}"
 131                         #print "  node_stack={node_stack.join(" ")}"
 132                         #print "  state_stack={state_stack.join(" ")}"
 133                         state.action(self)
 134                 end
 135                 #print "* over"
 136                 return node_stack.first
 137         end
 138 end
 139
 140 # A state in a parser LR automaton generated by nitcc
 141 # Used by generated parsers
 142 abstract class LRState
 143         fun action(parser: Parser) is abstract
 144         fun goto(parser: Parser, goto: LRGoto) is abstract
 145         fun error_msg: String do return "FIXME"
 146 end
 147
 148 # A concrete production in a parser LR automation generated by nitcc
 149 # Used by generated parsers
 150 abstract class LRGoto
 151 end
 152
 153 ###
 154
 155 # A abstract lexer engine generated by nitcc
 156 abstract class Lexer
 157         # The input stream of characters
 158         var stream: String
 159
 160         # The stating state for lexing
 161         # Used by generated parsers
 162         protected fun start_state: DFAState is abstract
 163
 164         # Lexize a stream of characters and return a sequence of tokens
 165         fun lex: CircularArray[NToken]
 166         do
 167                 var res = new CircularArray[NToken]
 168                 loop
 169                         var t = next_token
 170                         if t != null then res.add t
 171                         if t isa NEof or t isa NError then break
 172                 end
 173                 return res
 174         end
 175
 176         # Cursor current position (in chars, starting from 0)
 177         var pos_start = 0
 178
 179         # Cursor current line (starting from 1)
 180         var line_start = 1
 181
 182         # Cursor current column (in chars, starting from 1)
 183         var col_start = 1
 184
 185         # Move the cursor and return the next token.
 186         #
 187         # Returns a `NEof` and the end.
 188         # Returns `null` if the token is ignored.
 189         fun next_token: nullable NToken
 190         do
 191                 var state = start_state
 192                 var pos = pos_start
 193                 var pos_end = pos_start - 1
 194                 var line = line_start
 195                 var line_end = line_start - 1
 196                 var col = col_start
 197                 var col_end = col_start - 1
 198                 var last_state: nullable DFAState = null
 199                 var text = stream
 200                 var length = text.length
 201                 loop
 202                         if state.is_accept then
 203                                 pos_end = pos - 1
 204                                 line_end = line
 205                                 col_end = col
 206                                 last_state = state
 207                         end
 208                         var c
 209                         var next
 210                         if pos >= length then
 211                                 c = '\0'
 212                                 next = null
 213                         else
 214                                 c = text.chars[pos]
 215                                 next = state.trans(c)
 216                         end
 217                         if next == null then
 218                                 var token
 219                                 if pos_start < length then
 220                                         if last_state == null then
 221                                                 token = new NLexerError
 222                                                 var position = new Position(pos_start, pos, line_start, line, col_start, col)
 223                                                 token.position = position
 224                                                 token.text = text.substring(pos_start, pos-pos_start+1)
 225                                         else if not last_state.is_ignored then
 226                                                 var position = new Position(pos_start, pos_end, line_start, line_end, col_start, col_end)
 227                                                 token = last_state.make_token(position, text)
 228                                         else
 229                                                 token = null
 230                                         end
 231                                 else
 232                                         token = new NEof
 233                                         var position = new Position(pos, pos, line, line, col, col)
 234                                         token.position = position
 235                                         token.text = ""
 236                                 end
 237                                 pos_start = pos_end + 1
 238                                 line_start = line_end
 239                                 col_start = col_end
 240
 241                                 return token
 242                         end
 243                         state = next
 244                         pos += 1
 245                         col += 1
 246                         if c == '\n' then
 247                                 line += 1
 248                                 col = 1
 249                         end
 250                 end
 251         end
 252 end
 253
 254 # A state in a lexer automaton generated by nitcc
 255 # Used by generated lexers
 256 interface DFAState
 257         fun is_accept: Bool do return false
 258         fun trans(c: Char): nullable DFAState do return null
 259         fun make_token(position: Position, source: String): nullable NToken is abstract
 260         fun is_ignored: Bool do return false
 261 end
 262
 263 ###
 264
 265 # A abstract visitor on syntactic trees generated by nitcc
 266 abstract class Visitor
 267         # The main entry point to visit a node `n`
 268         # Should not be redefined
 269         fun enter_visit(n: Node)
 270         do
 271                 visit(n)
 272         end
 273
 274         # The specific implementation of a visit
 275         #
 276         # Should be redefined by concrete visitors
 277         #
 278         # Should not be called directly (use `enter_visit` instead)
 279         #
 280         # By default, the visitor just rescursively visit the children of `n`
 281         protected fun visit(n: Node)
 282         do
 283                 n.visit_children(self)
 284         end
 285 end
 286
 287 # Print a node (using to_s) on a line and recustively each children indented (with two spaces)
 288 class TreePrinterVisitor
 289         super Visitor
 290         var writer: Writer
 291         private var indent = 0
 292         redef fun visit(n)
 293         do
 294                 for i in [0..indent[ do writer.write("  ")
 295                 writer.write(n.to_s)
 296                 writer.write("\n")
 297                 indent += 1
 298                 super
 299                 indent -= 1
 300         end
 301 end
 302
 303 # A position into a input stream
 304 # Used to give position to tokens
 305 class Position
 306         var pos_start: Int
 307         var pos_end: Int
 308         var line_start: Int
 309         var line_end: Int
 310         var col_start: Int
 311         var col_end: Int
 312
 313         redef fun to_s do return "{line_start}:{col_start}-{line_end}:{col_end}"
 314
 315         # Extract the content from the given source
 316         fun extract(source: String): String
 317         do
 318                 return source.substring(pos_start, pos_end-pos_start+1)
 319         end
 320
 321         # Get the lines covered by `self` and underline the target columns.
 322         #
 323         # This is useful for pretty printing errors or debug the output
 324         #
 325         # ~~~
 326         # var src = "var Foo = new Array[Int]"
 327         # var pos = new Position(0,0, 1, 1, 5, 8)
 328         #
 329         # assert pos.underline(src) == """
 330         # var Foo = new Array[Int]
 331         #     ^^^"""
 332         # ~~~
 333         fun underline(source: Text): String
 334         do
 335                 var res = new FlatBuffer
 336
 337                 # All the concerned lines
 338                 var lines = source.split("\n")
 339                 for line in [line_start..line_end] do
 340                         res.append lines[line-1]
 341                         res.append "\n"
 342                 end
 343
 344                 # Cover all columns, no matter their lines
 345                 var col_start = col_start.min(col_end)
 346                 var col_end = self.col_start.max(col_end)
 347
 348                 # "           ^^^^"
 349                 var ptr = " "*(col_start-1).max(0) + "^"*(col_end-col_start)
 350                 res.append ptr
 351
 352                 return res.to_s
 353         end
 354 end
 355
 356 # A node of a syntactic tree
 357 abstract class Node
 358         # The name of the node (as used in the grammar file)
 359         fun node_name: String do return class_name
 360
 361         # A point of view on the direct children of the node
 362         fun children: SequenceRead[nullable Node] is abstract
 363
 364         # A point of view of a depth-first visit of all non-null children
 365         var depth: Collection[Node] = new DephCollection(self) is lazy
 366
 367         # Visit all the children of the node with the visitor `v`
 368         protected fun visit_children(v: Visitor)
 369         do
 370                 for c in children do if c != null then v.enter_visit(c)
 371         end
 372
 373         # The position of the node in the input stream
 374         var position: nullable Position = null is writable
 375
 376         # Produce a graphiz file for the syntaxtic tree rooted at `self`.
 377         fun to_dot(filepath: String)
 378         do
 379                 var f = new FileWriter.open(filepath)
 380                 f.write("digraph g \{\n")
 381                 f.write("rankdir=BT;\n")
 382
 383                 var a = new Array[NToken]
 384                 to_dot_visitor(f, a)
 385
 386                 f.write("\{ rank=same\n")
 387                 var first = true
 388                 for n in a do
 389                         if first then
 390                                 first = false
 391                         else
 392                                 f.write("->")
 393                         end
 394                         f.write("n{n.object_id}")
 395                 end
 396                 f.write("[style=invis];\n")
 397                 f.write("\}\n")
 398
 399                 f.write("\}\n")
 400                 f.close
 401         end
 402
 403         private fun to_dot_visitor(f: Writer, a: Array[NToken])
 404         do
 405                 f.write("n{object_id} [label=\"{node_name}\"];\n")
 406                 for x in children do
 407                         if x == null then continue
 408                         f.write("n{x.object_id} -> n{object_id};\n")
 409                         x.to_dot_visitor(f,a )
 410                 end
 411         end
 412
 413         redef fun to_s do
 414                 var pos = position
 415                 if pos == null then
 416                         return "{node_name}"
 417                 else
 418                         return "{node_name}@({pos})"
 419                 end
 420         end
 421 end
 422
 423 private class DephCollection
 424         super Collection[Node]
 425         var node: Node
 426         redef fun iterator do return new DephIterator([node].iterator)
 427 end
 428
 429 private class DephIterator
 430         super Iterator[Node]
 431
 432         var stack = new Array[Iterator[nullable Node]]
 433
 434         init(i: Iterator[nullable Node]) is old_style_init do
 435                 stack.push i
 436         end
 437
 438         redef fun is_ok do return not stack.is_empty
 439         redef fun item do return stack.last.item.as(not null)
 440         redef fun next
 441         do
 442                 var i = stack.last
 443                 stack.push i.item.children.iterator
 444                 i.next
 445                 while is_ok do
 446                         if not stack.last.is_ok then
 447                                 stack.pop
 448                                 continue
 449                         end
 450                         if stack.last.item == null then
 451                                 stack.last.next
 452                                 continue
 453                         end
 454                         return
 455                 end
 456         end
 457 end
 458
 459 # A token produced by the lexer and used in a syntactic tree
 460 abstract class NToken
 461         super Node
 462
 463         redef fun children do return once new Array[Node]
 464
 465         redef fun to_dot_visitor(f, a)
 466         do
 467                 var labe = "{node_name}"
 468                 var pos = position
 469                 if pos != null then labe += "\\n{pos}"
 470                 var text = self.text
 471                 if node_name != "'{text}'" then
 472                         labe += "\\n'{text.escape_to_c}'"
 473                 end
 474                 f.write("n{object_id} [label=\"{labe}\",shape=box];\n")
 475                 a.add(self)
 476         end
 477
 478         # The text associated with the token
 479         var text: String = "" is writable
 480
 481         redef fun to_s do
 482                 var res = super
 483                 var text = self.text
 484                 if node_name != "'{text}'" then
 485                         res += "='{text.escape_to_c}'"
 486                 end
 487                 return res
 488         end
 489 end
 490
 491 # The special token for the end of stream
 492 class NEof
 493         super NToken
 494 end
 495
 496 # A special token used to represent a parser or lexer error
 497 abstract class NError
 498         super NToken
 499
 500         # All the partially built tree during parsing (aka the node_stack)
 501         var error_tree = new Nodes[Node]
 502
 503         # The things unexpected
 504         fun unexpected: String is abstract
 505
 506         # The things expected (if any)
 507         var expected: nullable String = null
 508
 509         # The error message,using `expected` and `unexpected`
 510         fun message: String
 511         do
 512                 var exp = expected
 513                 var res = "Unexpected {unexpected}"
 514                 if exp != null then res += "; is acceptable instead: {exp}"
 515                 return res
 516         end
 517 end
 518
 519 # A lexer error as a token for the unexpected characted
 520 class NLexerError
 521         super NError
 522
 523         redef fun unexpected do return "character '{text.chars.first}'"
 524 end
 525
 526 # A parser error linked to a unexpected token
 527 class NParserError
 528         super NError
 529
 530         # The unexpected token
 531         var token: nullable NToken = null
 532
 533         redef fun unexpected
 534         do
 535                 var res = token.node_name
 536                 var text = token.text
 537                 if not text.is_empty and res != "'{text}'" then
 538                         res += " '{text.escape_to_c}'"
 539                 end
 540                 return res
 541         end
 542 end
 543
 544 # A hogeneous sequence of node, used to represent unbounded lists (and + modifier)
 545 class Nodes[T: Node]
 546         super Node
 547         redef var children: Array[T] = new Array[T]
 548 end
 549
 550 # A production with a specific, named and statically typed children
 551 abstract class NProd
 552         super Node
 553         redef var children: SequenceRead[nullable Node] = new NProdChildren(self)
 554
 555         # The exact number of direct children
 556         # Used to implement `children` by generated parsers
 557         fun number_of_children: Int is abstract
 558
 559         # The specific children got by its index
 560         # Used to implement `children` by generated parsers
 561         fun child(x: Int): nullable Node is abstract
 562 end
 563
 564
 565 private class NProdChildren
 566         super SequenceRead[nullable Node]
 567         var prod: NProd
 568         redef fun iterator do return new NProdIterator(prod)
 569         redef fun length do return prod.number_of_children
 570         redef fun is_empty do return prod.number_of_children == 0
 571         redef fun [](i) do return prod.child(i)
 572 end
 573
 574 private class NProdIterator
 575         super IndexedIterator[nullable Node]
 576         var prod: NProd
 577         redef var index = 0
 578         redef fun is_ok do return index < prod.number_of_children
 579         redef fun next do index += 1
 580         redef fun item do return prod.child(index)
 581 end
 582
 583 # All-in-one abstract class to test generated parsers on a given
 584 abstract class TestParser
 585         # How to get a new lexer on a given stream of character
 586         fun new_lexer(text: String): Lexer is abstract
 587
 588         # How to get a new parser
 589         fun new_parser: Parser is abstract
 590
 591         # The name of the language (used for generated files)
 592         fun name: String is abstract
 593
 594         # Use the class as the main enrty point of the program
 595         # - parse arguments and options of the command
 596         # - test the parser (see `work`)
 597         fun main: Node
 598         do
 599                 if args.is_empty then
 600                         print "usage {name}_test <filepath> | - | -e <text>"
 601                         exit 0
 602                 end
 603
 604                 var filepath = args.shift
 605                 var text
 606                 if filepath == "-" then
 607                         text = sys.stdin.read_all
 608                 else if filepath == "-e" then
 609                         if args.is_empty then
 610                                 print "Error: -e need a text"
 611                                 exit 1
 612                         end
 613                         text = args.shift
 614                 else
 615                         var f = new FileReader.open(filepath)
 616                         text = f.read_all
 617                         f.close
 618                 end
 619
 620                 if not args.is_empty then
 621                         print "Error: superfluous arguments."
 622                         exit 1
 623                 end
 624
 625                 return work(text)
 626         end
 627
 628         # Produce a full syntactic tree for a given stream of character
 629         # Produce also statistics and output files
 630         fun work(text: String): Node
 631         do
 632                 print "INPUT: {text.length} chars"
 633                 var l = new_lexer(text)
 634                 var tokens = l.lex
 635
 636                 var tokout = "{name}.tokens.out"
 637                 print "TOKEN: {tokens.length} tokens (see {tokout})"
 638
 639                 var f = new FileWriter.open(tokout)
 640                 for t in tokens do
 641                         f.write "{t.to_s}\n"
 642                 end
 643                 f.close
 644
 645                 var p = new_parser
 646                 p.tokens.add_all(tokens)
 647
 648                 var n = p.parse
 649
 650                 var astout = "{name}.ast.out"
 651                 f = new FileWriter.open(astout)
 652                 var tpv = new TreePrinterVisitor(f)
 653                 var astdotout = "{name}.ast.dot"
 654                 if n isa NError then
 655                         print "Syntax error: {n.message}"
 656                         print "ERROR: {n} (see {astout} and {astdotout})"
 657                         tpv.enter_visit(n)
 658                         n = n.error_tree
 659                 else
 660                         print "ROOT: {n}; {n.depth.length} nodes (see {astout} and {astdotout})"
 661                 end
 662                 tpv.enter_visit(n)
 663                 n.to_dot(astdotout)
 664                 f.close
 665
 666                 return n
 667         end
 668 end