# assert "\\ud800\\udfd3".from_utf16_escape == '๐'
# assert "\\u00e8".from_utf16_escape == 'รจ'
# assert "\\u3042".from_utf16_escape == 'ใ'
- fun from_utf16_escape: Char do
- var ln = length
- if ln != 6 and ln != 12 then return 0xFFFD.code_point
- var cphi = substring(2, 4).to_hex
- if cphi < 0xD800 then return cphi.code_point
- if cphi > 0xDFFF then return cphi.code_point
- if cphi > 0xDBFF then return 0xFFFD.code_point
- var cp = 0
- cp += (cphi - 0xD800) << 10
- var cplo = substring(8, 4).to_hex
+ fun from_utf16_escape(pos, ln: nullable Int): Char do
+ if pos == null then pos = 0
+ if ln == null then ln = length - pos
+ if ln < 6 then return 0xFFFD.code_point
+ var cp = from_utf16_digit(pos + 2)
+ if cp < 0xD800 then return cp.code_point
+ if cp > 0xDFFF then return cp.code_point
+ if cp > 0xDBFF then return 0xFFFD.code_point
+ if ln == 6 then return 0xFFFD.code_point
+ if ln < 12 then return 0xFFFD.code_point
+ cp <<= 16
+ cp += from_utf16_digit(pos + 8)
+ var cplo = cp & 0xFFFF
if cplo < 0xDC00 then return 0xFFFD.code_point
if cplo > 0xDFFF then return 0xFFFD.code_point
- cp += cplo - 0xDC00
- cp += 0x10000
- return cp.code_point
+ return cp.from_utf16_surr.code_point
+ end
+
+ # Returns a UTF-16 escape value
+ #
+ # var s = "\\ud800\\udfd3"
+ # assert s.from_utf16_digit(2) == 0xD800
+ # assert s.from_utf16_digit(8) == 0xDFD3
+ fun from_utf16_digit(pos: nullable Int): Int do
+ if pos == null then pos = 0
+ return to_hex(pos, 4)
end
# Encode `self` to percent (or URL) encoding
protected fun json_to_nit_string: String do
var res = new FlatBuffer.with_capacity(bytelen)
var i = 0
- while i < self.length do
+ var ln = self.length
+ while i < ln do
var char = self[i]
if char == '\\' then
i += 1
else if char == 't' then
char = '\t'
else if char == 'u' then
- var code = substring(i + 1, 4)
- var hx = code.to_hex
- if hx >= 0xD800 and hx <= 0xDFFF then
- var lostr = substring(i + 7, 4)
- if lostr.length < 4 then
- hx = 0xFFFD
+ var u16_esc = from_utf16_digit(i + 1)
+ char = u16_esc.code_point
+ if char.is_surrogate and i + 10 < ln then
+ if self[i + 5] == '\\' and self[i + 6] == 'u' then
+ u16_esc <<= 16
+ u16_esc += from_utf16_digit(i + 7)
+ char = u16_esc.from_utf16_surr.code_point
+ i += 6
else
- hx <<= 16
- hx += lostr.to_hex
- hx = hx.from_utf16_surr
+ char = 0xFFFD.code_point
end
- i += 6
end
i += 4
- char = hx.code_point
end
# `"`, `/` or `\` => Keep `char` as-is.
end