lib: Update libs to use correctly ascii and code_point
[nit.git] / contrib / nitcc / src / nitcc_semantic.nit
index 9854075..795cee0 100644 (file)
@@ -28,6 +28,7 @@ import re2nfa
 class CollectNameVisitor
        super Visitor
 
+       # All the productions
        var nprods = new Array[Nprod]
 
        # Symbol table to associate things (prods and exprs) with their name
@@ -41,19 +42,25 @@ class CollectNameVisitor
        # Read the result of the visit here
        var nfa = new Automaton.empty
 
+       # The current production, used to initialize priorities
+       var prod: nullable Production = null
+
+       # The current priority counter to name transformed productions
+       var pricpt: Int = 0
+
        # Run the semantic analysis of the grammar
        fun start(n: Node)
        do
                # First visit to collect names
                enter_visit(n)
 
-               # Second visit to use collectec names and build rhings
+               # Second visit to use collected names and build things
                var v2 = new CheckNameVisitor(self)
                v2.enter_visit(n)
 
-               # Inline all the ?
+               # Inline all the `?`
                gram.inline(v2.quesizes.values)
-               # Inlile all the prods sufixed by '_imline' #TODO use a real keyword
+               # Inline all the prods suffixed by '_inline' #TODO use a real keyword
                for p in gram.prods do
                        if not p.name.has_suffix("_inline") then continue
                        print "inline {p}"
@@ -62,21 +69,23 @@ class CollectNameVisitor
 
                # Build the NFA automaton
                for t in gram.tokens do
-                       var nexpr = t.nexpr
-                       var nfa2
-                       if nexpr == null then
-                               nfa2 = new Automaton.epsilon
-                               for c in t.text.as(not null) do
-                                       nfa2.concat(new Automaton.atom(c.ascii))
-                               end
-                       else
-                               nfa2 = nexpr.build_nfa
+                       var nfa2 = t.build_nfa
+                       nfa2.tag_accept(t)
+                       nfa.absorb(nfa2)
+               end
+
+               if not v2.ignoreds.is_empty then
+                       var ign = new Token("Ignored")
+                       var nfa2 = new Automaton.empty
+                       for t in v2.ignoreds do
+                               assert t isa Token
+                               var nfa3 = t.build_nfa
+                               nfa2.alternate(nfa3)
                        end
-                       nfa.states.add_all nfa2.states
-                       nfa.start.add_trans(nfa2.start, null)
-                       for s in nfa2.accept do nfa.add_tag(s, t)
-                       nfa.accept.add_all nfa2.accept
+                       nfa2.tag_accept(ign)
+                       nfa.absorb(nfa2)
                end
+
        end
 
        redef fun visit(n) do n.accept_collect_prod(self)
@@ -94,17 +103,24 @@ private class CheckNameVisitor
        # The collected element names, for the alternative
        var elems_names = new Array[nullable String]
 
-       # The collected elementname, for the nelem
+       # The collected element names, for the nelem
        var elemname: nullable String = null
 
        # Is the alternative transformed, for the alternative
        var trans = false
 
+       # The current priority class
+       # Used the check, and transform the grammar
+       var pri: nullable Npriority = null
+
+       # Known ignored tokens
+       var ignoreds = new Array[Element]
+
        # Known rejected tokens
-       var rejecteds = new Array[Token]
+       var rejecteds = new Array[Element]
 
        # Pool of elements that are modified with + (reuse them!)
-       private var plusizes = new HashMap[Element, Production]
+       var plusizes = new HashMap[Element, Production]
 
        # Create a + version of an element
        fun plusize(e: Element): Production
@@ -121,7 +137,7 @@ private class CheckNameVisitor
        end
 
        # Pool of elements that are modified with ? (reuse them!)
-       private var quesizes = new HashMap[Element, Production]
+       var quesizes = new HashMap[Element, Production]
 
        # Create a ? version of an element
        fun quesize(e: Element): Production
@@ -140,14 +156,13 @@ private class CheckNameVisitor
        end
 
        # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
-       var nexpr: nullable Nexpr
+       var nexpr: nullable Nexpr = null
 
        # The current production, used to initialize alternatives
-       var prod: nullable Production
+       var prod: nullable Production = null
 
        # The main visitor, used to access the grammar of the symbol table
        var v1: CollectNameVisitor
-       init(v1: CollectNameVisitor) do self.v1 = v1
 
        redef fun visit(n) do n.accept_check_name_visitor(self)
 end
@@ -170,6 +185,10 @@ redef class Nexpr
        redef fun accept_collect_prod(v) do
                var id = children.first.as(Nid)
                var name = id.text
+               if v.names.has_key(name) then
+                       print "{id.position.to_s} Error {name} already defined."
+                       exit(1)
+               end
                v.names[name] = self
                self.name = name
        end
@@ -185,7 +204,7 @@ redef class Nexpr
        # The associated NFA (cached, see `build_nfa`)
        private var nfa: nullable Automaton
 
-       # Build the NFA, possibily building the NFA of required expressions
+       # Build the NFA, possibly building the NFA of required expressions
        # Print an error if there is a circular dependency
        # The result is cached
        fun build_nfa: Automaton do
@@ -216,13 +235,13 @@ redef class Nre_id
                var id = children.first.as(Nid)
                var name = id.text
                if not v.v1.names.has_key(name) then
-                       print "{id.position} Error: unknown name {name}"
+                       print "{id.position.to_s} Error: unknown name {name}"
                        exit(1)
                        abort
                end
                var node = v.v1.names[name]
                if node isa Nprod then
-                       print "{id.position} Error: cannot use production {name} in a regular expression"
+                       print "{id.position.to_s} Error: cannot use production {name} in a regular expression"
                        exit(1)
                        abort
                else if not node isa Nexpr then
@@ -239,41 +258,30 @@ redef class Nre_id
 end
 
 redef class Nign
-       # The named element
-       var elem: nullable Element
-
        redef fun accept_check_name_visitor(v) do
-               var id = children[1].as(Nid)
-               var name = id.text
-               if not v.v1.names.has_key(name) then
-                       print "{id.position} Error: unknown name {name}"
-                       exit(1)
-                       abort
-               end
-               var node = v.v1.names[name]
-               var elem: nullable Element
-               if node isa Nprod then
-                       print "{id.position} Error: cannot ignore a production"
-                       exit(1)
-                       abort
-               else if node isa Nexpr then
-                       elem = node.token
-                       if elem == null then
-                               elem = new Token("Ignored")
-                               elem.nexpr = node
-                               v.v1.gram.tokens.add(elem)
-                               node.token = elem
+               # Add elements to the ignored list
+               v.elems = v.ignoreds
+               super
+               for e in v.elems do
+                       if e isa Production then
+                               print "Error: cannot ignore {e}, it is a production"
+                               exit(1)
+                               abort
+                       else if e isa Token then
+                               # The token was build and registered during the visit
+                               # So, unregister them, the big Ignored token will be build later
+                               v.v1.gram.tokens.remove(e)
+                       else
+                               abort
                        end
-               else
-                       abort
                end
-               self.elem = elem
        end
 end
 
 redef class Nrej
        redef fun accept_check_name_visitor(v) do
-               v.elems = new Array[Element]
+               # Add elements to the rejected list
+               v.elems = v.rejecteds
                super
                for e in v.elems do
                        if e isa Production then
@@ -282,8 +290,6 @@ redef class Nrej
                                abort
                        else if e isa Token then
                                # The token was build and registered during the visit
-                               # Just add it to the rejected list
-                               v.rejecteds.add(e)
                        else
                                abort
                        end
@@ -292,19 +298,51 @@ redef class Nrej
 end
 
 redef class Nprod
-       # The associated production
+       # The associated main production.
+       # i.e. the last priority class.
        var prod: nullable Production
 
+       # The associated most-priority production.
+       # i.e. the first priority class.
+       # If there is no priority then `sub_prod == prod`.
+       var sub_prod: nullable Production
+
        redef fun accept_collect_prod(v) do
                var id = children.first.as(Nid)
                var name = id.text
+               if v.names.has_key(name) then
+                       print "{id.position.to_s} Error {name} already defined."
+                       exit(1)
+               end
                v.names[name] = self
                v.nprods.add(self)
-               prod = new Production(name)
-               v.gram.prods.add(prod.as(not null))
+               var prod = new Production(name)
+               self.prod = prod
+               v.gram.prods.add(prod)
+
+               # Check priority block
+               var pris = children[4]
+               if pris isa Nodes[Npriority] then
+                       var lastpri = pris.children.last
+                       lastpri.is_last = true
+
+                       v.pricpt = pris.children.length
+
+                       # Create a new production for the first priority class
+                       # The main production will be used for the last priority class
+                       var spe = prod
+                       prod = new Production(name + "$0")
+                       prod.spe = spe
+                       v.gram.prods.add(prod)
+               end
+               self.sub_prod = prod
+
+               v.prod = prod
+               super
+               v.prod = null
        end
        redef fun accept_check_name_visitor(v) do
-               v.prod = prod
+               v.prod = sub_prod
                super
                v.prod = null
        end
@@ -333,13 +371,103 @@ end
 
 redef class Natrans
        redef fun accept_check_name_visitor(v) do
-               var id = children[2].as(Nid)
-               var name = id.text
-
                v.trans = true
        end
 end
 
+redef class Npriority
+       # It is the last priority group?
+       var is_last = false
+
+       # The associated production
+       var prod: nullable Production
+
+       # The production in the with the next less priority class.
+       # `null` if there is no priority or if the first priority class.
+       var next: nullable Production
+
+       redef fun accept_collect_prod(v) do
+               var old = v.prod
+               assert old != null
+               var spe = old.spe
+               assert spe != null
+               if is_last then
+                       prod = spe
+               else
+                       v.pricpt -= 1
+                       prod = new Production(spe.name + "${v.pricpt}")
+                       prod.spe = spe
+                       v.gram.prods.add(prod.as(not null))
+               end
+               next = old
+               v.prod = prod
+               super
+
+       end
+       redef fun accept_check_name_visitor(v) do
+               var old = v.prod
+               v.prod = prod
+               v.pri = self
+               super
+
+               # Inject a new alternative that goes to the next less priority class
+               var alt = prod.new_alt2(prod.name + "_" + prod.alts.length.to_s, [next.as(not null)])
+               alt.trans = true
+               alt.codes = [new CodePop]
+
+               v.pri = null
+               v.prod = old
+       end
+
+       # Check and transform `v.elems` for priority
+       private fun check_priority(v: CheckNameVisitor) is abstract
+end
+
+redef class Npriority_left
+       redef fun check_priority(v) do
+               var p = prod.spe or else prod
+               assert p != null
+               if v.elems.length < 2 or v.elems.first != p or v.elems.last != p then
+                       print("Error: in a Left priority class, left and right must be the production")
+                       exit(1)
+               end
+               v.elems.first = prod.as(not null)
+               v.elems.last = next.as(not null)
+       end
+end
+
+redef class Npriority_right
+       redef fun check_priority(v) do
+               var p = prod.spe or else prod
+               assert p != null
+               if v.elems.length < 2 or v.elems.first != p or v.elems.last != p then
+                       print("Error: in a Right priority class, left and right must be the production")
+                       exit(1)
+               end
+               v.elems.first = next.as(not null)
+               v.elems.last = prod.as(not null)
+       end
+end
+
+redef class Npriority_unary
+       redef fun check_priority(v) do
+               var p = prod.spe or else prod
+               assert p != null
+               if v.elems.length < 2 or (v.elems.first != p and v.elems.last != p) then
+                       print("Error: in a Unary priority class, left or right must be the production")
+                       exit(1)
+               end
+               if v.elems.first == p then v.elems.first = prod.as(not null)
+               if v.elems.last == p then v.elems.last = prod.as(not null)
+       end
+end
+
+redef class Alternative
+       # The short name of the alternative.
+       # Used for errors
+       var short_name: nullable String
+end
+
 redef class Nalt
        # The associated alternative
        var alt: nullable Alternative
@@ -350,7 +478,12 @@ redef class Nalt
                v.trans = false
                v.elems = new Array[Element]
                v.elems_names = new Array[nullable String]
+
                super
+
+               var pri = v.pri
+               if pri != null then pri.check_priority(v)
+
                var prod = v.prod.as(not null)
                var prodabs = prod.spe
                if prodabs == null then prodabs = prod
@@ -358,7 +491,7 @@ redef class Nalt
                if name == null then
                        if v.trans then
                                name = prod.name + "_" + prod.alts.length.to_s
-                       else if prod.spe == null and prod.alts.is_empty then
+                       else if prod.spe == null and prod.alts.is_empty and pri == null then
                                name = prod.name
                                prod.altone = true
                        else
@@ -375,7 +508,9 @@ redef class Nalt
                        end
                        name = prodabs.name + "_" + name
                end
+
                var alt = prod.new_alt2(name, v.elems)
+               alt.short_name = v.altname
                alt.elems_names.add_all(v.elems_names)
                self.alt = alt
                if v.trans then
@@ -391,6 +526,15 @@ redef class Naltid
                var id = children[1].as(Nid)
                var name = id.text
                v.altname = name
+               var prod = v.prod.as(not null)
+               var prodabs = prod.spe
+               if prodabs == null then prodabs = prod
+               for x in prodabs.alts do
+                       if x.short_name == name then
+                               print "{id.position.to_s} Error: already an alternative named {name}"
+                               exit(1)
+                       end
+               end
        end
 end
 
@@ -399,22 +543,30 @@ redef class Nelem
        var elem: nullable Element
 
        # Set the element and check things
-       fun set_elem(v: CheckNameVisitor, pos: nullable Position, elem: Element)
+       private fun set_elem(v: CheckNameVisitor, pos: nullable Position, elem: Element)
        do
                assert self.elem == null
                self.elem = elem
-               v.elems.push(elem)
-               if elem isa Token and v.rejecteds.has(elem) then
-                       if pos != null then
-                               print "{pos} Error: {elem.name} is already a rejected token."
-                       else
-                               print "Error: {elem.name} is already a rejected token."
+               if elem isa Token then
+                       if v.ignoreds.has(elem) then
+                               if pos != null then
+                                       print "{pos} Error: {elem.name} is already an ignored token."
+                               else
+                                       print "Error: {elem.name} is already an ignored token."
+                               end
+                               exit(1)
+                       end
+                       if v.rejecteds.has(elem) then
+                               if pos != null then
+                                       print "{pos} Error: {elem.name} is already a rejected token."
+                               else
+                                       print "Error: {elem.name} is already a rejected token."
+                               end
+                               exit(1)
                        end
-                       exit(1)
                end
-
+               v.elems.push(elem)
        end
-
 end
 
 redef class Token
@@ -422,6 +574,26 @@ redef class Token
        var nexpr: nullable Nexpr
        # The associated text (if any, ie defined in the parser part)
        var text: nullable String
+
+       # Build a NFA according to nexpr or text
+       # Does not tag it!
+       fun build_nfa: Automaton
+       do
+               var nexpr = self.nexpr
+               if nexpr != null then
+                       assert self.text == null
+                       return nexpr.build_nfa
+               end
+               var text = self.text
+               if text != null then
+                       var nfa = new Automaton.epsilon
+                       for c in text.chars do
+                               nfa.concat(new Automaton.atom(c.code_point))
+                       end
+                       return nfa
+               end
+               abort
+       end
 end
 
 redef class Nnelem
@@ -444,6 +616,12 @@ redef class Nelemid
        do
                var id = children[1].as(Nid)
                var name = id.text
+               for n2 in v.elems_names do
+                       if name == n2 then
+                               print "{id.position.to_s} Error: already an element named {name}."
+                               exit(1)
+                       end
+               end
                v.elemname = name
        end
 end
@@ -453,7 +631,7 @@ redef class Nelem_id
                var id = children.first.as(Nid)
                var name = id.text
                if not v.v1.names.has_key(name) then
-                       print "{id.position} Error: unknown name {name}"
+                       print "{id.position.to_s} Error: unknown name {name}"
                        exit(1)
                        abort
                end
@@ -480,7 +658,7 @@ end
 
 redef class Nelem_str
        redef fun accept_check_name_visitor(v) do
-               var str = children.first.as(Nstr)
+               var str = children.first.children.first.as(NToken)
                var text = str.value
                var name = str.text
                var elem: nullable Element
@@ -524,3 +702,14 @@ redef class Nelem_plus
                set_elem(v, null, elem)
        end
 end
+
+redef class Ntree_part
+       redef fun accept_collect_prod(v) do
+               print "NOT YET IMPLEMENTED: `Tree` part; it is ignored"
+               # SKIP by doing nothing
+       end
+
+       redef fun accept_check_name_visitor(v) do
+               # SKIP by doing nothing
+       end
+end