lib: Update libs to use correctly ascii and code_point
[nit.git] / contrib / nitcc / src / nitcc_semantic.nit
index dd18071..795cee0 100644 (file)
@@ -28,6 +28,7 @@ import re2nfa
 class CollectNameVisitor
        super Visitor
 
+       # All the productions
        var nprods = new Array[Nprod]
 
        # Symbol table to associate things (prods and exprs) with their name
@@ -41,19 +42,25 @@ class CollectNameVisitor
        # Read the result of the visit here
        var nfa = new Automaton.empty
 
+       # The current production, used to initialize priorities
+       var prod: nullable Production = null
+
+       # The current priority counter to name transformed productions
+       var pricpt: Int = 0
+
        # Run the semantic analysis of the grammar
        fun start(n: Node)
        do
                # First visit to collect names
                enter_visit(n)
 
-               # Second visit to use collectec names and build rhings
+               # Second visit to use collected names and build things
                var v2 = new CheckNameVisitor(self)
                v2.enter_visit(n)
 
-               # Inline all the ?
+               # Inline all the `?`
                gram.inline(v2.quesizes.values)
-               # Inlile all the prods sufixed by '_imline' #TODO use a real keyword
+               # Inline all the prods suffixed by '_inline' #TODO use a real keyword
                for p in gram.prods do
                        if not p.name.has_suffix("_inline") then continue
                        print "inline {p}"
@@ -62,19 +69,23 @@ class CollectNameVisitor
 
                # Build the NFA automaton
                for t in gram.tokens do
-                       var nexpr = t.nexpr
-                       var nfa2
-                       if nexpr == null then
-                               nfa2 = new Automaton.epsilon
-                               for c in t.text.as(not null) do
-                                       nfa2.concat(new Automaton.atom(c.ascii))
-                               end
-                       else
-                               nfa2 = nexpr.build_nfa
-                       end
+                       var nfa2 = t.build_nfa
                        nfa2.tag_accept(t)
                        nfa.absorb(nfa2)
                end
+
+               if not v2.ignoreds.is_empty then
+                       var ign = new Token("Ignored")
+                       var nfa2 = new Automaton.empty
+                       for t in v2.ignoreds do
+                               assert t isa Token
+                               var nfa3 = t.build_nfa
+                               nfa2.alternate(nfa3)
+                       end
+                       nfa2.tag_accept(ign)
+                       nfa.absorb(nfa2)
+               end
+
        end
 
        redef fun visit(n) do n.accept_collect_prod(self)
@@ -92,17 +103,24 @@ private class CheckNameVisitor
        # The collected element names, for the alternative
        var elems_names = new Array[nullable String]
 
-       # The collected elementname, for the nelem
+       # The collected element names, for the nelem
        var elemname: nullable String = null
 
        # Is the alternative transformed, for the alternative
        var trans = false
 
+       # The current priority class
+       # Used the check, and transform the grammar
+       var pri: nullable Npriority = null
+
+       # Known ignored tokens
+       var ignoreds = new Array[Element]
+
        # Known rejected tokens
        var rejecteds = new Array[Element]
 
        # Pool of elements that are modified with + (reuse them!)
-       private var plusizes = new HashMap[Element, Production]
+       var plusizes = new HashMap[Element, Production]
 
        # Create a + version of an element
        fun plusize(e: Element): Production
@@ -119,7 +137,7 @@ private class CheckNameVisitor
        end
 
        # Pool of elements that are modified with ? (reuse them!)
-       private var quesizes = new HashMap[Element, Production]
+       var quesizes = new HashMap[Element, Production]
 
        # Create a ? version of an element
        fun quesize(e: Element): Production
@@ -138,14 +156,13 @@ private class CheckNameVisitor
        end
 
        # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
-       var nexpr: nullable Nexpr
+       var nexpr: nullable Nexpr = null
 
        # The current production, used to initialize alternatives
-       var prod: nullable Production
+       var prod: nullable Production = null
 
        # The main visitor, used to access the grammar of the symbol table
        var v1: CollectNameVisitor
-       init(v1: CollectNameVisitor) do self.v1 = v1
 
        redef fun visit(n) do n.accept_check_name_visitor(self)
 end
@@ -187,7 +204,7 @@ redef class Nexpr
        # The associated NFA (cached, see `build_nfa`)
        private var nfa: nullable Automaton
 
-       # Build the NFA, possibily building the NFA of required expressions
+       # Build the NFA, possibly building the NFA of required expressions
        # Print an error if there is a circular dependency
        # The result is cached
        fun build_nfa: Automaton do
@@ -241,35 +258,23 @@ redef class Nre_id
 end
 
 redef class Nign
-       # The named element
-       var elem: nullable Element
-
        redef fun accept_check_name_visitor(v) do
-               var id = children[1].as(Nid)
-               var name = id.text
-               if not v.v1.names.has_key(name) then
-                       print "{id.position.to_s} Error: unknown name {name}"
-                       exit(1)
-                       abort
-               end
-               var node = v.v1.names[name]
-               var elem: nullable Element
-               if node isa Nprod then
-                       print "{id.position.to_s} Error: cannot ignore a production"
-                       exit(1)
-                       abort
-               else if node isa Nexpr then
-                       elem = node.token
-                       if elem == null then
-                               elem = new Token("Ignored")
-                               elem.nexpr = node
-                               v.v1.gram.tokens.add(elem)
-                               node.token = elem
+               # Add elements to the ignored list
+               v.elems = v.ignoreds
+               super
+               for e in v.elems do
+                       if e isa Production then
+                               print "Error: cannot ignore {e}, it is a production"
+                               exit(1)
+                               abort
+                       else if e isa Token then
+                               # The token was build and registered during the visit
+                               # So, unregister them, the big Ignored token will be build later
+                               v.v1.gram.tokens.remove(e)
+                       else
+                               abort
                        end
-               else
-                       abort
                end
-               self.elem = elem
        end
 end
 
@@ -293,9 +298,15 @@ redef class Nrej
 end
 
 redef class Nprod
-       # The associated production
+       # The associated main production.
+       # i.e. the last priority class.
        var prod: nullable Production
 
+       # The associated most-priority production.
+       # i.e. the first priority class.
+       # If there is no priority then `sub_prod == prod`.
+       var sub_prod: nullable Production
+
        redef fun accept_collect_prod(v) do
                var id = children.first.as(Nid)
                var name = id.text
@@ -305,11 +316,33 @@ redef class Nprod
                end
                v.names[name] = self
                v.nprods.add(self)
-               prod = new Production(name)
-               v.gram.prods.add(prod.as(not null))
+               var prod = new Production(name)
+               self.prod = prod
+               v.gram.prods.add(prod)
+
+               # Check priority block
+               var pris = children[4]
+               if pris isa Nodes[Npriority] then
+                       var lastpri = pris.children.last
+                       lastpri.is_last = true
+
+                       v.pricpt = pris.children.length
+
+                       # Create a new production for the first priority class
+                       # The main production will be used for the last priority class
+                       var spe = prod
+                       prod = new Production(name + "$0")
+                       prod.spe = spe
+                       v.gram.prods.add(prod)
+               end
+               self.sub_prod = prod
+
+               v.prod = prod
+               super
+               v.prod = null
        end
        redef fun accept_check_name_visitor(v) do
-               v.prod = prod
+               v.prod = sub_prod
                super
                v.prod = null
        end
@@ -342,7 +375,96 @@ redef class Natrans
        end
 end
 
+redef class Npriority
+       # It is the last priority group?
+       var is_last = false
+
+       # The associated production
+       var prod: nullable Production
+
+       # The production in the with the next less priority class.
+       # `null` if there is no priority or if the first priority class.
+       var next: nullable Production
+
+       redef fun accept_collect_prod(v) do
+               var old = v.prod
+               assert old != null
+               var spe = old.spe
+               assert spe != null
+               if is_last then
+                       prod = spe
+               else
+                       v.pricpt -= 1
+                       prod = new Production(spe.name + "${v.pricpt}")
+                       prod.spe = spe
+                       v.gram.prods.add(prod.as(not null))
+               end
+               next = old
+               v.prod = prod
+               super
+
+       end
+       redef fun accept_check_name_visitor(v) do
+               var old = v.prod
+               v.prod = prod
+               v.pri = self
+               super
+
+               # Inject a new alternative that goes to the next less priority class
+               var alt = prod.new_alt2(prod.name + "_" + prod.alts.length.to_s, [next.as(not null)])
+               alt.trans = true
+               alt.codes = [new CodePop]
+
+               v.pri = null
+               v.prod = old
+       end
+
+       # Check and transform `v.elems` for priority
+       private fun check_priority(v: CheckNameVisitor) is abstract
+end
+
+redef class Npriority_left
+       redef fun check_priority(v) do
+               var p = prod.spe or else prod
+               assert p != null
+               if v.elems.length < 2 or v.elems.first != p or v.elems.last != p then
+                       print("Error: in a Left priority class, left and right must be the production")
+                       exit(1)
+               end
+               v.elems.first = prod.as(not null)
+               v.elems.last = next.as(not null)
+       end
+end
+
+redef class Npriority_right
+       redef fun check_priority(v) do
+               var p = prod.spe or else prod
+               assert p != null
+               if v.elems.length < 2 or v.elems.first != p or v.elems.last != p then
+                       print("Error: in a Right priority class, left and right must be the production")
+                       exit(1)
+               end
+               v.elems.first = next.as(not null)
+               v.elems.last = prod.as(not null)
+       end
+end
+
+redef class Npriority_unary
+       redef fun check_priority(v) do
+               var p = prod.spe or else prod
+               assert p != null
+               if v.elems.length < 2 or (v.elems.first != p and v.elems.last != p) then
+                       print("Error: in a Unary priority class, left or right must be the production")
+                       exit(1)
+               end
+               if v.elems.first == p then v.elems.first = prod.as(not null)
+               if v.elems.last == p then v.elems.last = prod.as(not null)
+       end
+end
+
 redef class Alternative
+       # The short name of the alternative.
+       # Used for errors
        var short_name: nullable String
 end
 
@@ -356,7 +478,12 @@ redef class Nalt
                v.trans = false
                v.elems = new Array[Element]
                v.elems_names = new Array[nullable String]
+
                super
+
+               var pri = v.pri
+               if pri != null then pri.check_priority(v)
+
                var prod = v.prod.as(not null)
                var prodabs = prod.spe
                if prodabs == null then prodabs = prod
@@ -364,7 +491,7 @@ redef class Nalt
                if name == null then
                        if v.trans then
                                name = prod.name + "_" + prod.alts.length.to_s
-                       else if prod.spe == null and prod.alts.is_empty then
+                       else if prod.spe == null and prod.alts.is_empty and pri == null then
                                name = prod.name
                                prod.altone = true
                        else
@@ -381,6 +508,7 @@ redef class Nalt
                        end
                        name = prodabs.name + "_" + name
                end
+
                var alt = prod.new_alt2(name, v.elems)
                alt.short_name = v.altname
                alt.elems_names.add_all(v.elems_names)
@@ -419,13 +547,23 @@ redef class Nelem
        do
                assert self.elem == null
                self.elem = elem
-               if elem isa Token and v.rejecteds.has(elem) then
-                       if pos != null then
-                               print "{pos} Error: {elem.name} is already a rejected token."
-                       else
-                               print "Error: {elem.name} is already a rejected token."
+               if elem isa Token then
+                       if v.ignoreds.has(elem) then
+                               if pos != null then
+                                       print "{pos} Error: {elem.name} is already an ignored token."
+                               else
+                                       print "Error: {elem.name} is already an ignored token."
+                               end
+                               exit(1)
+                       end
+                       if v.rejecteds.has(elem) then
+                               if pos != null then
+                                       print "{pos} Error: {elem.name} is already a rejected token."
+                               else
+                                       print "Error: {elem.name} is already a rejected token."
+                               end
+                               exit(1)
                        end
-                       exit(1)
                end
                v.elems.push(elem)
        end
@@ -436,6 +574,26 @@ redef class Token
        var nexpr: nullable Nexpr
        # The associated text (if any, ie defined in the parser part)
        var text: nullable String
+
+       # Build a NFA according to nexpr or text
+       # Does not tag it!
+       fun build_nfa: Automaton
+       do
+               var nexpr = self.nexpr
+               if nexpr != null then
+                       assert self.text == null
+                       return nexpr.build_nfa
+               end
+               var text = self.text
+               if text != null then
+                       var nfa = new Automaton.epsilon
+                       for c in text.chars do
+                               nfa.concat(new Automaton.atom(c.code_point))
+                       end
+                       return nfa
+               end
+               abort
+       end
 end
 
 redef class Nnelem
@@ -544,3 +702,14 @@ redef class Nelem_plus
                set_elem(v, null, elem)
        end
 end
+
+redef class Ntree_part
+       redef fun accept_collect_prod(v) do
+               print "NOT YET IMPLEMENTED: `Tree` part; it is ignored"
+               # SKIP by doing nothing
+       end
+
+       redef fun accept_check_name_visitor(v) do
+               # SKIP by doing nothing
+       end
+end