X-Git-Url: http://nitlanguage.org

diff --git a/lib/core/re.nit b/lib/core/re.nit
index eb74893..0ac109c 100644
--- a/lib/core/re.nit
+++ b/lib/core/re.nit
@@ -22,6 +22,7 @@
 module re
 
 import text
+import text::flat
 import gc
 import error
 
@@ -152,11 +153,11 @@ class Regex
 	# Ignore case when matching letters
 	var ignore_case = false is writable
 
-	# Optimize `self` for `is_in` and `String::has`, but do not support searches
+	# Optimize `self` for `String::has` and `is_in`, but do not support searches
 	#
 	# If `true`, `self` cannont be used with `String::search_all`, `String::replace`
 	# or `String::split`.
-	var optimize_is_in = false is writable
+	var optimize_has = false is writable
 
 	# Treat a newline in string as dividing string into multiple lines
 	#
@@ -182,7 +183,7 @@ class Regex
 	# Cache of a single `regmatch_t` to prevent many calls to `malloc`
 	private var native_match: NativeMatchArray is lazy do
 		native_match_is_init = true
-		return new NativeMatchArray.malloc(native.re_nsub+1)
+		return new NativeMatchArray.malloc(native.as(not null).re_nsub+1)
 	end
 
 	private var native_match_is_init = false
@@ -207,7 +208,7 @@ class Regex
 		var cflags = 0
 		if extended then cflags |= flag_extended
 		if ignore_case then cflags |= flag_icase
-		if optimize_is_in then cflags |= flag_nosub
+		if optimize_has then cflags |= flag_nosub
 		if newline then cflags |= flag_newline
 
 		var native = self.native
@@ -271,6 +272,9 @@ class Regex
 
 	private fun get_error(errcode: Int): String
 	do
+		var native = native
+		assert native != null
+
 		# Error, should be out of memory but we cover any possible error anyway
 		var error_cstr = native.regerror(errcode)
 
@@ -290,6 +294,9 @@ class Regex
 		var comp_res = compile
 		assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
 
+		var native = native
+		assert native != null
+
 		# Actually execute
 		var eflags = gather_eflags
 		var res = native.regexec_match_only(text.to_cstring, eflags)
@@ -306,7 +313,7 @@ class Regex
 		abort
 	end
 
-	# require: not optimize_is_in
+	# require: not optimize_has
 	#
 	#     assert "l".to_re.search_index_in("hello world", 0) == 2
 	#     assert "el+o".to_re.search_index_in("hello world", 0) == 1
@@ -314,11 +321,14 @@ class Regex
 	#     assert "z".to_re.search_index_in("hello world", 0) == -1
 	redef fun search_index_in(text, from)
 	do
-		assert not optimize_is_in
+		assert not optimize_has
 
 		var comp_res = compile
 		assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
 
+		var native = native
+		assert native != null
+
 		# Actually execute
 		text = text.to_s
 		var cstr = text.substring_from(from).to_cstring
@@ -339,39 +349,54 @@ class Regex
 		abort
 	end
 
-	# require: not optimize_is_in
+	# require: not optimize_has
 	#
 	#     assert "l".to_re.search_in("hello world", 0).from == 2
 	#     assert "el+o".to_re.search_in("hello world", 0).from == 1
 	#     assert "l+".to_re.search_in("hello world", 3).from == 3
 	#     assert "z".to_re.search_in("hello world", 0) == null
-	redef fun search_in(text, from)
+	#     assert "cd(e)".to_re.search_in("abcdef", 2)[1].to_s == "e"
+	redef fun search_in(text, charfrom)
 	do
-		assert not optimize_is_in
+		assert not optimize_has
 
 		var comp_res = compile
 		assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
 
+		var native = native
+		assert native != null
+
 		# Actually execute
-		text = text.to_s
-		var cstr = text.substring_from(from).to_cstring
+		var cstr = text.to_cstring
+		var rets = cstr.to_s_with_length(text.bytelen)
+		var bytefrom = cstr.char_to_byte_index_cached(charfrom, 0, 0)
+		var subcstr = cstr.fast_cstring(bytefrom)
 		var eflags = gather_eflags
 		var native_match = self.native_match
 
 		var nsub = native.re_nsub
-		var res = native.regexec(cstr, nsub+1, native_match, eflags)
+		var res = native.regexec(subcstr, nsub + 1, native_match, eflags)
 
 		# Found one?
 		if res == 0 then
-			var match = new Match(text,
-				from + native_match.rm_so,
-				native_match.rm_eo - native_match.rm_so)
+			var bfrom = native_match.rm_so + bytefrom
+			var bto = native_match.rm_eo - 1 + bytefrom
+			var cpos = cstr.byte_to_char_index_cached(bfrom, charfrom, bytefrom)
+			var len = cstr.utf8_length(bfrom, bto - bfrom + 1)
+			var match = new Match(rets, cpos, len)
+			var subs = match.subs
 
 			# Add sub expressions
-			for i in [1..nsub] do
-				match.subs.add new Match( text,
-					native_match[i].rm_so,
-					native_match[i].rm_eo - native_match[i].rm_so)
+			for i in [1 .. nsub] do
+				if native_match[i].rm_so < 0 then
+					subs.add null
+					continue
+				end
+				var sub_bfrom = native_match[i].rm_so + bytefrom
+				var sub_bto = native_match[i].rm_eo - 1 + bytefrom
+				var sub_cpos = cstr.byte_to_char_index_cached(sub_bfrom, cpos, bfrom)
+				var sub_len = cstr.utf8_length(sub_bfrom, sub_bto - sub_bfrom + 1)
+				subs.add(new Match(rets, sub_cpos, sub_len))
 			end
 
 			return match
@@ -386,46 +411,59 @@ class Regex
 		abort
 	end
 
-	# require: not optimize_is_in
+	# require: not optimize_has
 	#
 	#     assert "ab".to_re.search_all_in("abbab").join(", ") == "ab, ab"
 	#     assert "b+".to_re.search_all_in("abbabaabbbbbcab").join(", ") == "bb, b, bbbbb, b"
 	redef fun search_all_in(text)
 	do
-		assert not optimize_is_in
+		assert not optimize_has
 
 		var comp_res = compile
 		assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
 
+		var native = native
+		assert native != null
+
 		# Actually execute
-		text = text.to_s
 		var cstr = text.to_cstring
+		var subcstr = cstr
+		var rets = cstr.to_s_with_length(text.bytelen)
 		var eflags = gather_eflags
 		var eflags_or_notbol = eflags | flag_notbol
 		var native_match = self.native_match
 		var matches = new Array[Match]
 
 		var nsub = native.re_nsub
-		var res = native.regexec(cstr, nsub+1, native_match, eflags)
-		var d = 0
+		var res = native.regexec(subcstr, nsub + 1, native_match, eflags)
+		var bytesub = 0
+		var charsub = 0
 		while res == 0 do
-			var match = new Match(text,
-				d + native_match.rm_so,
-				native_match.rm_eo - native_match.rm_so)
+			var bfrom = native_match.rm_so + bytesub
+			var bto = native_match.rm_eo - 1 + bytesub
+			var cstart = cstr.byte_to_char_index_cached(bfrom, charsub, bytesub)
+			var len = cstr.utf8_length(bfrom, bto - bfrom + 1)
+			var match = new Match(rets, cstart, len)
 			matches.add match
+			var subs = match.subs
 
 			# Add sub expressions
-			for i in [1..nsub] do
-				match.subs.add new Match( text,
-					d + native_match[i].rm_so,
-					native_match[i].rm_eo - native_match[i].rm_so)
+			for i in [1 .. nsub] do
+				if native_match[i].rm_so < 0 then
+					subs.add null
+					continue
+				end
+				var sub_bfrom = native_match[i].rm_so + bytesub
+				var sub_bto = native_match[i].rm_eo - 1 + bytesub
+				var sub_cstart = cstr.byte_to_char_index_cached(sub_bfrom, cstart, bfrom)
+				var sub_len = cstr.utf8_length(sub_bfrom, sub_bto - sub_bfrom + 1)
+				subs.add(new Match(rets, sub_cstart, sub_len))
 			end
 
-			if d == native_match.rm_eo then
-				d += 1
-			else d = d + native_match.rm_eo
-			cstr = cstr.substring_from(native_match.rm_eo)
-			res = native.regexec(cstr, nsub+1, native_match, eflags_or_notbol)
+			bytesub = bto + 1
+			charsub = cstart + len
+			subcstr = cstr.fast_cstring(bytesub)
+			res = native.regexec(subcstr, nsub + 1, native_match, eflags_or_notbol)
 		end
 
 		# No more match?
@@ -449,7 +487,7 @@ redef class Match
 	# assert match.subs.length == 1
 	# assert match.subs.first.to_s == "d eee"
 	# ~~~
-	var subs = new Array[Match] is lazy
+	var subs = new Array[nullable Match] is lazy
 
 	# Get the `n`th expression in this match
 	#
@@ -464,7 +502,7 @@ redef class Match
 	# assert match[0].to_s == "c d eee f"
 	# assert match[1].to_s == "d eee"
 	# ~~~
-	fun [](n: Int): Match do
+	fun [](n: Int): nullable Match do
 		if n == 0 then return self
 		assert n > 0 and n <= subs.length
 		return subs[n-1]