From ff1b3e86e3b5cfadf68a3d8374df7793a09eb285 Mon Sep 17 00:00:00 2001 From: Lucas Bajolet Date: Tue, 20 Oct 2015 13:47:49 -0400 Subject: [PATCH] lib/core: Optimized `html_escape` for FlatText variants Signed-off-by: Lucas Bajolet --- lib/core/text/flat.nit | 98 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/lib/core/text/flat.nit b/lib/core/text/flat.nit index 6cf1584..c89b222 100644 --- a/lib/core/text/flat.nit +++ b/lib/core/text/flat.nit @@ -85,6 +85,104 @@ redef class FlatText return ns_i end + # By escaping `self` to HTML, how many more bytes will be needed ? + fun chars_to_html_escape: Int do + var its = _items + var max = last_byte + var pos = first_byte + var endlen = 0 + while pos <= max do + var c = its[pos] + if c == 0x3Cu8 then + endlen += 3 + else if c == 0x3Eu8 then + endlen += 3 + else if c == 0x26u8 then + endlen += 4 + else if c == 0x22u8 then + endlen += 4 + else if c == 0x27u8 then + endlen += 4 + else if c == 0x2Fu8 then + endlen += 4 + end + pos += 1 + end + return endlen + end + + redef fun html_escape + do + var extra = chars_to_html_escape + if extra == 0 then return to_s + var its = _items + var max = last_byte + var pos = first_byte + var nlen = extra + _bytelen + var nits = new NativeString(nlen) + var outpos = 0 + while pos <= max do + var c = its[pos] + # Special codes: + # Some HTML characters are used as meta-data, they need + # to be replaced by an HTML-Escaped equivalent + # + # * 0x3C (<) => < + # * 0x3E (>) => > + # * 0x26 (&) => & + # * 0x22 (") => " + # * 0x27 (') => ' + # * 0x2F (/) => / + if c == 0x3Cu8 then + nits[outpos] = 0x26u8 + nits[outpos + 1] = 0x6Cu8 + nits[outpos + 2] = 0x74u8 + nits[outpos + 3] = 0x3Bu8 + outpos += 4 + else if c == 0x3Eu8 then + nits[outpos] = 0x26u8 + nits[outpos + 1] = 0x67u8 + nits[outpos + 2] = 0x74u8 + nits[outpos + 3] = 0x3Bu8 + outpos += 4 + else if c == 0x26u8 then + nits[outpos] = 0x26u8 + nits[outpos + 1] = 0x61u8 + nits[outpos + 2] = 0x6Du8 + nits[outpos + 3] = 0x70u8 + nits[outpos + 4] = 0x3Bu8 + outpos += 5 + else if c == 0x22u8 then + nits[outpos] = 0x26u8 + nits[outpos + 1] = 0x23u8 + nits[outpos + 2] = 0x33u8 + nits[outpos + 3] = 0x34u8 + nits[outpos + 4] = 0x3Bu8 + outpos += 5 + else if c == 0x27u8 then + nits[outpos] = 0x26u8 + nits[outpos + 1] = 0x23u8 + nits[outpos + 2] = 0x33u8 + nits[outpos + 3] = 0x39u8 + nits[outpos + 4] = 0x3Bu8 + outpos += 5 + else if c == 0x2Fu8 then + nits[outpos] = 0x26u8 + nits[outpos + 1] = 0x23u8 + nits[outpos + 2] = 0x34u8 + nits[outpos + 3] = 0x37u8 + nits[outpos + 4] = 0x3Bu8 + outpos += 5 + else + nits[outpos] = c + outpos += 1 + end + pos += 1 + end + var s = new FlatString.with_infos(nits, nlen, 0, nlen - 1) + return s + end + # By escaping `self` to C, how many more bytes will be needed ? # # This enables a double-optimization in `escape_to_c` since if this -- 1.7.9.5