nitcc: add `Prefixes` operator
[nit.git] / contrib / nitcc / src / re2nfa.nit
index dc43daa..12cd600 100644 (file)
@@ -24,17 +24,21 @@ redef class Node
                print inspect
                abort
        end
+
+       # The real value of the string
+       fun value: String do
+               print inspect
+               abort
+       end
 end
 
 redef class Nstr
-       # The real value of the string
-       fun value: String do return text.substring(1, text.length-2).unescape_nit
+       redef fun value: String do return text.substring(1, text.length-2).unescape_nit
        redef fun make_rfa: Automaton
        do
                var a = new Automaton.epsilon
-               var val
-               for c in self.value do
-                       var b = new Automaton.atom(c.ascii)
+               for c in self.value.chars do
+                       var b = new Automaton.atom(c.code_point)
                        a.concat(b)
                end
                return a
@@ -42,11 +46,19 @@ redef class Nstr
 end
 
 redef class Nch_dec
-       # The real value of the char
-       fun value: String do return text.substring_from(1).to_i.ascii.to_s
+       redef fun value: String do return text.substring_from(1).to_i.code_point.to_s
        redef fun make_rfa: Automaton
        do
-               var a = new Automaton.atom(self.value.first.ascii)
+               var a = new Automaton.atom(self.value.chars.first.code_point)
+               return a
+       end
+end
+
+redef class Nch_hex
+       redef fun value: String do return text.substring_from(2).to_hex.code_point.to_s
+       redef fun make_rfa: Automaton
+       do
+               var a = new Automaton.atom(self.value.chars.first.code_point)
                return a
        end
 end
@@ -76,8 +88,17 @@ redef class Nre_minus
                var b = children[2].make_rfa.to_dfa
                for t in b.start.outs do
                        if not t.to.outs.is_empty then
-                               print "Not Yet Implemented Error: '-' only works on single char"
-                               exit(1)
+                               # `b` is not a single char, so just use except
+                               # "a - b == a Except (Any* b Any*)"
+                               var any1 = new Automaton.cla(0, null)
+                               any1.close
+                               var any2 = new Automaton.cla(0, null)
+                               any2.close
+                               var b2 = any1
+                               b2.concat(b)
+                               b2.concat(any2)
+                               var c = a.except(b2)
+                               return c
                        end
                        a.minus_sym(t.symbol.as(not null))
                end
@@ -85,6 +106,14 @@ redef class Nre_minus
        end
 end
 
+redef class Nre_end
+       redef fun make_rfa
+       do
+               print "{children.first.position.to_s}: NOT YET IMPLEMENTED: token `End`; replaced with an empty string"
+               return new Automaton.epsilon
+       end
+end
+
 redef class Nre_and
        redef fun make_rfa
        do
@@ -115,25 +144,8 @@ redef class Nre_except
        redef fun make_rfa
        do
                var a = children[0].make_rfa
-               var ta = new Token("1")
-               a.tag_accept(ta)
                var b = children[2].make_rfa
-               var tb = new Token("2")
-               b.tag_accept(tb)
-
-               var c = new Automaton.empty
-               c.absorb(a)
-               c.absorb(b)
-               c = c.to_dfa
-               c.accept.clear
-               for s in c.retrotags[ta] do
-                       if not c.tags[s].has(tb) then
-                               c.accept.add(s)
-                       end
-               end
-               c.clear_tag(ta)
-               c.clear_tag(tb)
-               return c
+               return a.except(b)
        end
 end
 
@@ -161,6 +173,16 @@ redef class Nre_longest
        end
 end
 
+redef class Nre_prefixes
+       redef fun make_rfa
+       do
+               var a = children[2].make_rfa
+               a.trim
+               a.accept.add_all a.states
+               return a
+       end
+end
+
 redef class Nre_conc
        redef fun make_rfa
        do
@@ -208,14 +230,28 @@ end
 redef class Nre_class
        redef fun make_rfa: Automaton
        do
-               var c1 = children[0].as(Nstr).value
-               var c2 = children[3].as(Nstr).value
+               var c1 = children[0].children[0].value
+               var c2 = children[3].children[0].value
                if c1.length != 1 or c2.length != 1 then
-                       print "Classes only works on single char"
+                       print "Classes expect a single char"
+                       exit(1)
+                       abort
+               end
+               var a = new Automaton.cla(c1.chars.first.code_point, c2.chars.first.code_point)
+               return a
+       end
+end
+
+redef class Nre_openclass
+       redef fun make_rfa: Automaton
+       do
+               var c1 = children[0].children[0].value
+               if c1.length != 1 then
+                       print "Classes expect a single char"
                        exit(1)
                        abort
                end
-               var a = new Automaton.cla(c1.first.ascii, c2.first.ascii)
+               var a = new Automaton.cla(c1.chars.first.code_point, null)
                return a
        end
 end