Merge: Added contributing guidelines and link from readme
[nit.git] / lib / core / re.nit
index 99421f8..0ac109c 100644 (file)
@@ -22,7 +22,7 @@
 module re
 
 import text
-intrude import text::flat
+import text::flat
 import gc
 import error
 
@@ -183,7 +183,7 @@ class Regex
        # Cache of a single `regmatch_t` to prevent many calls to `malloc`
        private var native_match: NativeMatchArray is lazy do
                native_match_is_init = true
-               return new NativeMatchArray.malloc(native.re_nsub+1)
+               return new NativeMatchArray.malloc(native.as(not null).re_nsub+1)
        end
 
        private var native_match_is_init = false
@@ -272,6 +272,9 @@ class Regex
 
        private fun get_error(errcode: Int): String
        do
+               var native = native
+               assert native != null
+
                # Error, should be out of memory but we cover any possible error anyway
                var error_cstr = native.regerror(errcode)
 
@@ -291,6 +294,9 @@ class Regex
                var comp_res = compile
                assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
 
+               var native = native
+               assert native != null
+
                # Actually execute
                var eflags = gather_eflags
                var res = native.regexec_match_only(text.to_cstring, eflags)
@@ -320,6 +326,9 @@ class Regex
                var comp_res = compile
                assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
 
+               var native = native
+               assert native != null
+
                # Actually execute
                text = text.to_s
                var cstr = text.substring_from(from).to_cstring
@@ -346,39 +355,48 @@ class Regex
        #     assert "el+o".to_re.search_in("hello world", 0).from == 1
        #     assert "l+".to_re.search_in("hello world", 3).from == 3
        #     assert "z".to_re.search_in("hello world", 0) == null
-       redef fun search_in(text, from)
+       #     assert "cd(e)".to_re.search_in("abcdef", 2)[1].to_s == "e"
+       redef fun search_in(text, charfrom)
        do
                assert not optimize_has
 
                var comp_res = compile
                assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
 
+               var native = native
+               assert native != null
+
                # Actually execute
-               text = text.to_s
-               var sub = text.substring_from(from)
-               var cstr = sub.to_cstring
-               var bstr = new FlatString.full(cstr, sub.bytelen, 0, sub.bytelen - 1, text.length - from)
+               var cstr = text.to_cstring
+               var rets = cstr.to_s_with_length(text.bytelen)
+               var bytefrom = cstr.char_to_byte_index_cached(charfrom, 0, 0)
+               var subcstr = cstr.fast_cstring(bytefrom)
                var eflags = gather_eflags
                var native_match = self.native_match
 
                var nsub = native.re_nsub
-               var res = native.regexec(cstr, nsub+1, native_match, eflags)
+               var res = native.regexec(subcstr, nsub + 1, native_match, eflags)
 
                # Found one?
                if res == 0 then
-                       var bso = bstr.byte_to_char_index(native_match.rm_so)
-                       var ln = bstr.byte_to_char_index(native_match.rm_eo - native_match.rm_so - 1)
-                       var match = new Match(text,
-                               from + bso,
-                               ln + 1)
+                       var bfrom = native_match.rm_so + bytefrom
+                       var bto = native_match.rm_eo - 1 + bytefrom
+                       var cpos = cstr.byte_to_char_index_cached(bfrom, charfrom, bytefrom)
+                       var len = cstr.utf8_length(bfrom, bto - bfrom + 1)
+                       var match = new Match(rets, cpos, len)
+                       var subs = match.subs
 
                        # Add sub expressions
                        for i in [1 .. nsub] do
-                               bso = bstr.byte_to_char_index(native_match[i].rm_so)
-                               ln = bstr.byte_to_char_index(native_match[i].rm_eo - native_match[i].rm_so - 1)
-                               match.subs.add new Match( text,
-                                       bso ,
-                                       ln + 1)
+                               if native_match[i].rm_so < 0 then
+                                       subs.add null
+                                       continue
+                               end
+                               var sub_bfrom = native_match[i].rm_so + bytefrom
+                               var sub_bto = native_match[i].rm_eo - 1 + bytefrom
+                               var sub_cpos = cstr.byte_to_char_index_cached(sub_bfrom, cpos, bfrom)
+                               var sub_len = cstr.utf8_length(sub_bfrom, sub_bto - sub_bfrom + 1)
+                               subs.add(new Match(rets, sub_cpos, sub_len))
                        end
 
                        return match
@@ -404,35 +422,48 @@ class Regex
                var comp_res = compile
                assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
 
+               var native = native
+               assert native != null
+
                # Actually execute
-               text = text.to_s
                var cstr = text.to_cstring
+               var subcstr = cstr
+               var rets = cstr.to_s_with_length(text.bytelen)
                var eflags = gather_eflags
                var eflags_or_notbol = eflags | flag_notbol
                var native_match = self.native_match
                var matches = new Array[Match]
 
                var nsub = native.re_nsub
-               var res = native.regexec(cstr, nsub+1, native_match, eflags)
-               var d = 0
+               var res = native.regexec(subcstr, nsub + 1, native_match, eflags)
+               var bytesub = 0
+               var charsub = 0
                while res == 0 do
-                       var match = new Match(text,
-                               d + native_match.rm_so,
-                               native_match.rm_eo - native_match.rm_so)
+                       var bfrom = native_match.rm_so + bytesub
+                       var bto = native_match.rm_eo - 1 + bytesub
+                       var cstart = cstr.byte_to_char_index_cached(bfrom, charsub, bytesub)
+                       var len = cstr.utf8_length(bfrom, bto - bfrom + 1)
+                       var match = new Match(rets, cstart, len)
                        matches.add match
+                       var subs = match.subs
 
                        # Add sub expressions
-                       for i in [1..nsub] do
-                               match.subs.add new Match( text,
-                                       d + native_match[i].rm_so,
-                                       native_match[i].rm_eo - native_match[i].rm_so)
+                       for i in [1 .. nsub] do
+                               if native_match[i].rm_so < 0 then
+                                       subs.add null
+                                       continue
+                               end
+                               var sub_bfrom = native_match[i].rm_so + bytesub
+                               var sub_bto = native_match[i].rm_eo - 1 + bytesub
+                               var sub_cstart = cstr.byte_to_char_index_cached(sub_bfrom, cstart, bfrom)
+                               var sub_len = cstr.utf8_length(sub_bfrom, sub_bto - sub_bfrom + 1)
+                               subs.add(new Match(rets, sub_cstart, sub_len))
                        end
 
-                       if d == native_match.rm_eo then
-                               d += 1
-                       else d = d + native_match.rm_eo
-                       cstr = cstr.substring_from(native_match.rm_eo)
-                       res = native.regexec(cstr, nsub+1, native_match, eflags_or_notbol)
+                       bytesub = bto + 1
+                       charsub = cstart + len
+                       subcstr = cstr.fast_cstring(bytesub)
+                       res = native.regexec(subcstr, nsub + 1, native_match, eflags_or_notbol)
                end
 
                # No more match?
@@ -456,7 +487,7 @@ redef class Match
        # assert match.subs.length == 1
        # assert match.subs.first.to_s == "d eee"
        # ~~~
-       var subs = new Array[Match] is lazy
+       var subs = new Array[nullable Match] is lazy
 
        # Get the `n`th expression in this match
        #
@@ -471,7 +502,7 @@ redef class Match
        # assert match[0].to_s == "c d eee f"
        # assert match[1].to_s == "d eee"
        # ~~~
-       fun [](n: Int): Match do
+       fun [](n: Int): nullable Match do
                if n == 0 then return self
                assert n > 0 and n <= subs.length
                return subs[n-1]