module re
import text
+import text::flat
import gc
import error
new malloc `{ return malloc(sizeof(regex_t)); `}
# Compile the regular expression `regex` into a form that is suitable for subsequent `regexec` searches
- fun regcomp(regex: NativeString, cflags: Int): Int `{
+ fun regcomp(regex: CString, cflags: Int): Int `{
return regcomp(self, regex, cflags);
`}
#
# `nmatch` and `pmatch` are used to provide information regarding the location of any matches.
# `eflags` may be the bitwise-or of one or both of `flag_notbol` and `flag_noteol`.
- fun regexec(string: NativeString, nmatch: Int, pmatch: NativeMatchArray, eflags: Int): Int `{
+ fun regexec(string: CString, nmatch: Int, pmatch: NativeMatchArray, eflags: Int): Int `{
return regexec(self, string, nmatch, pmatch, eflags);
`}
# Match `string` against the precompiled pattern buffer of `self`, do not locate matches
#
# `eflags` may be the bitwise-or of one or both of `flag_notbol` and `flag_noteol`.
- fun regexec_match_only(string: NativeString, eflags: Int): Int `{
+ fun regexec_match_only(string: CString, eflags: Int): Int `{
return regexec(self, string, 0, NULL, eflags);
`}
fun regfree `{ regfree(self); `}
# Turn the error codes that can be returned by both `regcomp` and `regexec` into error message strings
- fun regerror(errcode: Int): NativeString `{
+ fun regerror(errcode: Int): CString `{
size_t len = regerror(errcode, self, NULL, 0);
char *message = malloc(len);
regerror(errcode, self, message, len);
fun [](index: Int): NativeMatchArray `{ return self + index; `}
end
-redef extern class NativeString
- private fun substring_from(index: Int): NativeString `{ return self + index; `}
+redef extern class CString
+ private fun substring_from(index: Int): CString `{ return self + index; `}
end
redef class Text
# Ignore case when matching letters
var ignore_case = false is writable
- # Optimize `self` for `is_in` and `String::has`, but do not support searches
+ # Optimize `self` for `String::has` and `is_in`, but do not support searches
#
# If `true`, `self` cannont be used with `String::search_all`, `String::replace`
# or `String::split`.
- var optimize_is_in = false is writable
+ var optimize_has = false is writable
# Treat a newline in string as dividing string into multiple lines
#
# Cache of a single `regmatch_t` to prevent many calls to `malloc`
private var native_match: NativeMatchArray is lazy do
native_match_is_init = true
- return new NativeMatchArray.malloc(native.re_nsub+1)
+ return new NativeMatchArray.malloc(native.as(not null).re_nsub+1)
end
private var native_match_is_init = false
var cflags = 0
if extended then cflags |= flag_extended
if ignore_case then cflags |= flag_icase
- if optimize_is_in then cflags |= flag_nosub
+ if optimize_has then cflags |= flag_nosub
if newline then cflags |= flag_newline
var native = self.native
var error_cstr = native.regerror(res)
# We leave it to the lib to decide how to allocate the string that we keep
- var error_str = error_cstr.to_s_with_copy
+ var error_str = error_cstr.to_s
error_cstr.free
return new Error(error_str)
private fun get_error(errcode: Int): String
do
+ var native = native
+ assert native != null
+
# Error, should be out of memory but we cover any possible error anyway
var error_cstr = native.regerror(errcode)
# We leave it to the lib to decide how to allocate the string that we keep
- var error_str = error_cstr.to_s_with_copy
+ var error_str = error_cstr.to_s
error_cstr.free
return error_str
var comp_res = compile
assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
+ var native = native
+ assert native != null
+
# Actually execute
var eflags = gather_eflags
var res = native.regexec_match_only(text.to_cstring, eflags)
abort
end
- # require: not optimize_is_in
+ # require: not optimize_has
#
# assert "l".to_re.search_index_in("hello world", 0) == 2
# assert "el+o".to_re.search_index_in("hello world", 0) == 1
# assert "z".to_re.search_index_in("hello world", 0) == -1
redef fun search_index_in(text, from)
do
- assert not optimize_is_in
+ assert not optimize_has
var comp_res = compile
assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
+ var native = native
+ assert native != null
+
# Actually execute
text = text.to_s
var cstr = text.substring_from(from).to_cstring
abort
end
- # require: not optimize_is_in
+ # require: not optimize_has
#
# assert "l".to_re.search_in("hello world", 0).from == 2
# assert "el+o".to_re.search_in("hello world", 0).from == 1
# assert "l+".to_re.search_in("hello world", 3).from == 3
# assert "z".to_re.search_in("hello world", 0) == null
- redef fun search_in(text, from)
+ # assert "cd(e)".to_re.search_in("abcdef", 2)[1].to_s == "e"
+ redef fun search_in(text, charfrom)
do
- assert not optimize_is_in
+ assert not optimize_has
var comp_res = compile
assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
+ var native = native
+ assert native != null
+
# Actually execute
- text = text.to_s
- var cstr = text.substring_from(from).to_cstring
+ var cstr = text.to_cstring
+ var rets = cstr.to_s_unsafe(text.byte_length, copy=false)
+ var bytefrom = cstr.char_to_byte_index_cached(charfrom, 0, 0)
+ var subcstr = cstr.fast_cstring(bytefrom)
var eflags = gather_eflags
var native_match = self.native_match
var nsub = native.re_nsub
- var res = native.regexec(cstr, nsub+1, native_match, eflags)
+ var res = native.regexec(subcstr, nsub + 1, native_match, eflags)
# Found one?
if res == 0 then
- var match = new Match(text,
- from + native_match.rm_so,
- native_match.rm_eo - native_match.rm_so)
+ var bfrom = native_match.rm_so + bytefrom
+ var bto = native_match.rm_eo - 1 + bytefrom
+ var cpos = cstr.byte_to_char_index_cached(bfrom, charfrom, bytefrom)
+ var len = cstr.utf8_length(bfrom, bto - bfrom + 1)
+ var match = new Match(rets, cpos, len)
+ var subs = match.subs
# Add sub expressions
- for i in [1..nsub] do
- match.subs.add new Match( text,
- native_match[i].rm_so,
- native_match[i].rm_eo - native_match[i].rm_so)
+ for i in [1 .. nsub] do
+ if native_match[i].rm_so < 0 then
+ subs.add null
+ continue
+ end
+ var sub_bfrom = native_match[i].rm_so + bytefrom
+ var sub_bto = native_match[i].rm_eo - 1 + bytefrom
+ var sub_cpos = cstr.byte_to_char_index_cached(sub_bfrom, cpos, bfrom)
+ var sub_len = cstr.utf8_length(sub_bfrom, sub_bto - sub_bfrom + 1)
+ subs.add(new Match(rets, sub_cpos, sub_len))
end
return match
abort
end
- # require: not optimize_is_in
+ # require: not optimize_has
#
# assert "ab".to_re.search_all_in("abbab").join(", ") == "ab, ab"
# assert "b+".to_re.search_all_in("abbabaabbbbbcab").join(", ") == "bb, b, bbbbb, b"
redef fun search_all_in(text)
do
- assert not optimize_is_in
+ assert not optimize_has
var comp_res = compile
assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
+ var native = native
+ assert native != null
+
# Actually execute
- text = text.to_s
var cstr = text.to_cstring
+ var subcstr = cstr
+ var rets = cstr.to_s_unsafe(text.byte_length, copy=false)
var eflags = gather_eflags
var eflags_or_notbol = eflags | flag_notbol
var native_match = self.native_match
var matches = new Array[Match]
var nsub = native.re_nsub
- var res = native.regexec(cstr, nsub+1, native_match, eflags)
- var d = 0
+ var res = native.regexec(subcstr, nsub + 1, native_match, eflags)
+ var bytesub = 0
+ var charsub = 0
while res == 0 do
- var match = new Match(text,
- d + native_match.rm_so,
- native_match.rm_eo - native_match.rm_so)
+ var bfrom = native_match.rm_so + bytesub
+ var bto = native_match.rm_eo - 1 + bytesub
+ var cstart = cstr.byte_to_char_index_cached(bfrom, charsub, bytesub)
+ var len = cstr.utf8_length(bfrom, bto - bfrom + 1)
+ var match = new Match(rets, cstart, len)
matches.add match
+ var subs = match.subs
# Add sub expressions
- for i in [1..nsub] do
- match.subs.add new Match( text,
- d + native_match[i].rm_so,
- native_match[i].rm_eo - native_match[i].rm_so)
+ for i in [1 .. nsub] do
+ if native_match[i].rm_so < 0 then
+ subs.add null
+ continue
+ end
+ var sub_bfrom = native_match[i].rm_so + bytesub
+ var sub_bto = native_match[i].rm_eo - 1 + bytesub
+ var sub_cstart = cstr.byte_to_char_index_cached(sub_bfrom, cstart, bfrom)
+ var sub_len = cstr.utf8_length(sub_bfrom, sub_bto - sub_bfrom + 1)
+ subs.add(new Match(rets, sub_cstart, sub_len))
end
- if d == native_match.rm_eo then
- d += 1
- else d = d + native_match.rm_eo
- cstr = cstr.substring_from(native_match.rm_eo)
- res = native.regexec(cstr, nsub+1, native_match, eflags_or_notbol)
+ bytesub = bto + 1
+ charsub = cstart + len
+ subcstr = cstr.fast_cstring(bytesub)
+ res = native.regexec(subcstr, nsub + 1, native_match, eflags_or_notbol)
end
# No more match?
# assert match.subs.length == 1
# assert match.subs.first.to_s == "d eee"
# ~~~
- var subs = new Array[Match] is lazy
+ var subs = new Array[nullable Match] is lazy
# Get the `n`th expression in this match
#
# assert match[0].to_s == "c d eee f"
# assert match[1].to_s == "d eee"
# ~~~
- fun [](n: Int): Match do
+ fun [](n: Int): nullable Match do
if n == 0 then return self
assert n > 0 and n <= subs.length
return subs[n-1]