# assert "\\ud800\\udfd3".from_utf16_escape == '๐'
# assert "\\u00e8".from_utf16_escape == 'รจ'
# assert "\\u3042".from_utf16_escape == 'ใ'
- fun from_utf16_escape: Char do
- var ln = length
- if ln != 6 and ln != 12 then return 0xFFFD.code_point
- var cphi = substring(2, 4).to_hex
- if cphi < 0xD800 then return cphi.code_point
- if cphi > 0xDFFF then return cphi.code_point
- if cphi > 0xDBFF then return 0xFFFD.code_point
- var cp = 0
- cp += (cphi - 0xD800) << 10
- var cplo = substring(8, 4).to_hex
+ fun from_utf16_escape(pos, ln: nullable Int): Char do
+ if pos == null then pos = 0
+ if ln == null then ln = length - pos
+ if ln < 6 then return 0xFFFD.code_point
+ var cp = from_utf16_digit(pos + 2)
+ if cp < 0xD800 then return cp.code_point
+ if cp > 0xDFFF then return cp.code_point
+ if cp > 0xDBFF then return 0xFFFD.code_point
+ if ln == 6 then return 0xFFFD.code_point
+ if ln < 12 then return 0xFFFD.code_point
+ cp <<= 16
+ cp += from_utf16_digit(pos + 8)
+ var cplo = cp & 0xFFFF
if cplo < 0xDC00 then return 0xFFFD.code_point
if cplo > 0xDFFF then return 0xFFFD.code_point
- cp += cplo - 0xDC00
- cp += 0x10000
- return cp.code_point
+ return cp.from_utf16_surr.code_point
+ end
+
+ # Returns a UTF-16 escape value
+ #
+ # var s = "\\ud800\\udfd3"
+ # assert s.from_utf16_digit(2) == 0xD800
+ # assert s.from_utf16_digit(8) == 0xDFD3
+ fun from_utf16_digit(pos: nullable Int): Int do
+ if pos == null then pos = 0
+ return to_hex(pos, 4)
end
# Encode `self` to percent (or URL) encoding
return s.plain_to_s
end
+ # Return the Levenshtein distance between two strings
+ #
+ # ~~~
+ # assert "abcd".levenshtein_distance("abcd") == 0
+ # assert "".levenshtein_distance("abcd") == 4
+ # assert "abcd".levenshtein_distance("") == 4
+ # assert "abcd".levenshtein_distance("xyz") == 4
+ # assert "abcd".levenshtein_distance("xbdy") == 3
+ # ~~~
+ fun levenshtein_distance(other: String): Int
+ do
+ var slen = self.length
+ var olen = other.length
+
+ # fast cases
+ if slen == 0 then return olen
+ if olen == 0 then return slen
+ if self == other then return 0
+
+ # previous row of distances
+ var v0 = new Array[Int].with_capacity(olen+1)
+
+ # current row of distances
+ var v1 = new Array[Int].with_capacity(olen+1)
+
+ for j in [0..olen] do
+ # prefix insert cost
+ v0[j] = j
+ end
+
+ for i in [0..slen[ do
+
+ # prefix delete cost
+ v1[0] = i + 1
+
+ for j in [0..olen[ do
+ # delete cost
+ var cost1 = v1[j] + 1
+ # insert cost
+ var cost2 = v0[j + 1] + 1
+ # same char cost (+0)
+ var cost3 = v0[j]
+ # change cost
+ if self[i] != other[j] then cost3 += 1
+ # keep the min
+ v1[j+1] = cost1.min(cost2).min(cost3)
+ end
+
+ # Switch columns:
+ # * v1 become v0 in the next iteration
+ # * old v0 is reused as the new v1
+ var tmp = v1
+ v1 = v0
+ v0 = tmp
+ end
+
+ return v0[olen]
+ end
+
# Copies `n` bytes from `self` at `src_offset` into `dest` starting at `dest_offset`
#
# Basically a high-level synonym of NativeString::copy_to