var bfrom = native_match.rm_so + bytefrom
var bto = native_match.rm_eo - 1 + bytefrom
var cpos = cstr.byte_to_char_index_cached(bfrom, charfrom, bytefrom)
- var len = cstr.utf8_length(bfrom, bto)
+ var len = cstr.utf8_length(bfrom, bto - bfrom + 1)
var match = new Match(rets, cpos, len)
var subs = match.subs
var sub_bfrom = native_match[i].rm_so + bytefrom
var sub_bto = native_match[i].rm_eo - 1 + bytefrom
var sub_cpos = cstr.byte_to_char_index_cached(sub_bfrom, cpos, bfrom)
- var sub_len = cstr.utf8_length(sub_bfrom, sub_bto)
+ var sub_len = cstr.utf8_length(sub_bfrom, sub_bto - sub_bfrom + 1)
subs.add(new Match(rets, sub_cpos, sub_len))
end
var bfrom = native_match.rm_so + bytesub
var bto = native_match.rm_eo - 1 + bytesub
var cstart = cstr.byte_to_char_index_cached(bfrom, charsub, bytesub)
- var len = cstr.utf8_length(bfrom, bto)
+ var len = cstr.utf8_length(bfrom, bto - bfrom + 1)
var match = new Match(rets, cstart, len)
matches.add match
var subs = match.subs
var sub_bfrom = native_match[i].rm_so + bytesub
var sub_bto = native_match[i].rm_eo - 1 + bytesub
var sub_cstart = cstr.byte_to_char_index_cached(sub_bfrom, cstart, bfrom)
- var sub_len = cstr.utf8_length(sub_bfrom, sub_bto)
+ var sub_len = cstr.utf8_length(sub_bfrom, sub_bto - sub_bfrom + 1)
subs.add(new Match(rets, sub_cstart, sub_len))
end
return endpos
end
- # Number of UTF-8 characters in `self` between positions `from` and `to`
- fun utf8_length(from, to: Int): Int do
+ # Number of UTF-8 characters in `self` starting at `from`, for a length of `bytelen`
+ fun utf8_length(from, bytelen: Int): Int do
var st = from
- var lst = to
var ln = 0
- while st <= lst do
- st += length_of_char_at(st)
+ while bytelen > 0 do
+ while bytelen >= 4 do
+ var i = fetch_4_chars(st)
+ if i & 0x80808080 != 0 then break
+ bytelen -= 4
+ st += 4
+ ln += 4
+ end
+ if bytelen == 0 then break
+ var cln = length_of_char_at(st)
+ st += cln
ln += 1
+ bytelen -= cln
end
return ln
end