lib/core: Added an optimized `to_hex` function to `FlatText`
[nit.git] / lib / core / text / flat.nit
index 52de988..b2336e6 100644 (file)
@@ -36,18 +36,20 @@ end
 
 redef class FlatText
 
-       private fun first_byte: Int do return 0
+       # First byte of the NativeString
+       protected fun first_byte: Int do return 0
 
-       private fun last_byte: Int do return _bytelen - 1
+       # Last byte of the NativeString
+       protected fun last_byte: Int do return _bytelen - 1
 
        # Cache of the latest position (char) explored in the string
-       private var position: Int = 0
+       var position: Int = 0
 
        # Cached position (bytes) in the NativeString underlying the String
-       private var bytepos: Int = 0
+       var bytepos: Int = 0
 
        # Index of the character `index` in `_items`
-       private fun char_to_byte_index(index: Int): Int do
+       fun char_to_byte_index(index: Int): Int do
                var ln = length
                assert index >= 0
                assert index < ln
@@ -85,12 +87,110 @@ redef class FlatText
                return ns_i
        end
 
+       # By escaping `self` to HTML, how many more bytes will be needed ?
+       fun chars_to_html_escape: Int do
+               var its = _items
+               var max = last_byte
+               var pos = first_byte
+               var endlen = 0
+               while pos <= max do
+                       var c = its[pos]
+                       if c == 0x3Cu8 then
+                               endlen += 3
+                       else if c == 0x3Eu8 then
+                               endlen += 3
+                       else if c == 0x26u8 then
+                               endlen += 4
+                       else if c == 0x22u8 then
+                               endlen += 4
+                       else if c == 0x27u8 then
+                               endlen += 4
+                       else if c == 0x2Fu8 then
+                               endlen += 4
+                       end
+                       pos += 1
+               end
+               return endlen
+       end
+
+       redef fun html_escape
+       do
+               var extra = chars_to_html_escape
+               if extra == 0 then return to_s
+               var its = _items
+               var max = last_byte
+               var pos = first_byte
+               var nlen = extra + _bytelen
+               var nits = new NativeString(nlen)
+               var outpos = 0
+               while pos <= max do
+                       var c = its[pos]
+                       # Special codes:
+                       # Some HTML characters are used as meta-data, they need
+                       # to be replaced by an HTML-Escaped equivalent
+                       #
+                       # * 0x3C (<) => &lt;
+                       # * 0x3E (>) => &gt;
+                       # * 0x26 (&) => &amp;
+                       # * 0x22 (") => &#34;
+                       # * 0x27 (') => &#39;
+                       # * 0x2F (/) => &#47;
+                       if c == 0x3Cu8 then
+                               nits[outpos] = 0x26u8
+                               nits[outpos + 1] = 0x6Cu8
+                               nits[outpos + 2] = 0x74u8
+                               nits[outpos + 3] = 0x3Bu8
+                               outpos += 4
+                       else if c == 0x3Eu8 then
+                               nits[outpos] = 0x26u8
+                               nits[outpos + 1] = 0x67u8
+                               nits[outpos + 2] = 0x74u8
+                               nits[outpos + 3] = 0x3Bu8
+                               outpos += 4
+                       else if c == 0x26u8 then
+                               nits[outpos] = 0x26u8
+                               nits[outpos + 1] = 0x61u8
+                               nits[outpos + 2] = 0x6Du8
+                               nits[outpos + 3] = 0x70u8
+                               nits[outpos + 4] = 0x3Bu8
+                               outpos += 5
+                       else if c == 0x22u8 then
+                               nits[outpos] = 0x26u8
+                               nits[outpos + 1] = 0x23u8
+                               nits[outpos + 2] = 0x33u8
+                               nits[outpos + 3] = 0x34u8
+                               nits[outpos + 4] = 0x3Bu8
+                               outpos += 5
+                       else if c == 0x27u8 then
+                               nits[outpos] = 0x26u8
+                               nits[outpos + 1] = 0x23u8
+                               nits[outpos + 2] = 0x33u8
+                               nits[outpos + 3] = 0x39u8
+                               nits[outpos + 4] = 0x3Bu8
+                               outpos += 5
+                       else if c == 0x2Fu8 then
+                               nits[outpos] = 0x26u8
+                               nits[outpos + 1] = 0x23u8
+                               nits[outpos + 2] = 0x34u8
+                               nits[outpos + 3] = 0x37u8
+                               nits[outpos + 4] = 0x3Bu8
+                               outpos += 5
+                       else
+                               nits[outpos] = c
+                               outpos += 1
+                       end
+                       pos += 1
+               end
+               var s = new FlatString.with_infos(nits, nlen, 0, nlen - 1)
+               return s
+       end
+
        # By escaping `self` to C, how many more bytes will be needed ?
        #
        # This enables a double-optimization in `escape_to_c` since if this
        # method returns 0, then `self` does not need escaping and can be
        # returned as-is
-       protected fun chars_to_escape_to_c: Int do
+       fun chars_to_escape_to_c: Int do
                var its = _items
                var max = last_byte
                var pos = first_byte
@@ -178,6 +278,23 @@ redef class FlatText
        end
 
        redef fun [](index) do return _items.char_at(char_to_byte_index(index))
+
+       # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
+       #
+       #     assert "ff".to_hex == 255
+       redef fun to_hex(pos, ln) do
+               var res = 0
+               if pos == null then pos = 0
+               if ln == null then ln = length - pos
+               pos = char_to_byte_index(pos)
+               var its = _items
+               var max = pos + ln
+               for i in [pos .. max[ do
+                       res <<= 4
+                       res += its[i].ascii.from_hex
+               end
+               return res
+       end
 end
 
 # Immutable strings of characters.
@@ -200,6 +317,14 @@ class FlatString
                return _items.utf8_length(_first_byte, _last_byte)
        end
 
+       redef var to_cstring is lazy do
+               var blen = _bytelen
+               var new_items = new NativeString(blen + 1)
+               _items.copy_to(new_items, blen, _first_byte, 0)
+               new_items[blen] = 0u8
+               return new_items
+       end
+
        redef fun reversed
        do
                var b = new FlatBuffer.with_capacity(_bytelen + 1)
@@ -304,19 +429,9 @@ class FlatString
                _bytepos = from
        end
 
-       redef fun to_cstring do
-               if real_items != null then return real_items.as(not null)
-               var blen = _bytelen
-               var new_items = new NativeString(blen + 1)
-               _items.copy_to(new_items, blen, _first_byte, 0)
-               new_items[blen] = 0u8
-               real_items = new_items
-               return new_items
-       end
-
        redef fun ==(other)
        do
-               if not other isa FlatString then return super
+               if not other isa FlatText then return super
 
                if self.object_id == other.object_id then return true
 
@@ -325,7 +440,7 @@ class FlatString
                if other._bytelen != my_length then return false
 
                var my_index = _first_byte
-               var its_index = other._first_byte
+               var its_index = other.first_byte
 
                var last_iteration = my_index + my_length
 
@@ -343,29 +458,32 @@ class FlatString
 
        redef fun <(other)
        do
-               if not other isa FlatString then return super
+               if not other isa FlatText then return super
 
                if self.object_id == other.object_id then return false
 
-               var my_length = self._bytelen
-               var its_length = other._bytelen
+               var myits = _items
+               var itsits = other._items
 
-               var max = if my_length < its_length then my_length else its_length
+               var mbt = _bytelen
+               var obt = other.bytelen
 
-               var myits = self.bytes
-               var itsits = other.bytes
+               var minln = if mbt < obt then mbt else obt
+               var mst = _first_byte
+               var ost = other.first_byte
 
-               for i in [0 .. max[ do
-                       var my_curr_char = myits[i]
-                       var its_curr_char = itsits[i]
+               for i in [0 .. minln[ do
+                       var my_curr_char = myits[mst]
+                       var its_curr_char = itsits[ost]
 
-                       if my_curr_char != its_curr_char then
-                               if my_curr_char < its_curr_char then return true
-                               return false
-                       end
+                       if my_curr_char > its_curr_char then return false
+                       if my_curr_char < its_curr_char then return true
+
+                       mst += 1
+                       ost += 1
                end
 
-               return my_length < its_length
+               return mbt < obt
        end
 
        redef fun +(o) do
@@ -437,11 +555,6 @@ private class FlatStringCharReverseIterator
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatString, pos: Int)
-       do
-               init(tgt, pos)
-       end
-
        redef fun is_ok do return curr_pos >= 0
 
        redef fun item do return target[curr_pos]
@@ -457,14 +570,11 @@ private class FlatStringCharIterator
 
        var target: FlatString
 
-       var max: Int
+       var max: Int is noautoinit
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatString, pos: Int)
-       do
-               init(tgt, tgt.length - 1, pos)
-       end
+       init do max = target.length - 1
 
        redef fun is_ok do return curr_pos <= max
 
@@ -483,9 +593,9 @@ private class FlatStringCharView
 
        redef fun [](index) do return target[index]
 
-       redef fun iterator_from(start) do return new FlatStringCharIterator.with_pos(target, start)
+       redef fun iterator_from(start) do return new FlatStringCharIterator(target, start)
 
-       redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator.with_pos(target, start)
+       redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator(target, start)
 
 end
 
@@ -494,13 +604,15 @@ private class FlatStringByteReverseIterator
 
        var target: FlatString
 
-       var target_items: NativeString
+       var target_items: NativeString is noautoinit
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatString, pos: Int)
+       init
        do
-               init(tgt, tgt._items, pos + tgt._first_byte)
+               var tgt = target
+               target_items = tgt._items
+               curr_pos += tgt._first_byte
        end
 
        redef fun is_ok do return curr_pos >= target._first_byte
@@ -518,13 +630,15 @@ private class FlatStringByteIterator
 
        var target: FlatString
 
-       var target_items: NativeString
+       var target_items: NativeString is noautoinit
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatString, pos: Int)
+       init
        do
-               init(tgt, tgt._items, pos + tgt._first_byte)
+               var tgt = target
+               target_items = tgt._items
+               curr_pos += tgt._first_byte
        end
 
        redef fun is_ok do return curr_pos <= target._last_byte
@@ -553,9 +667,9 @@ private class FlatStringByteView
                return target._items[ind]
        end
 
-       redef fun iterator_from(start) do return new FlatStringByteIterator.with_pos(target, start)
+       redef fun iterator_from(start) do return new FlatStringByteIterator(target, start)
 
-       redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator.with_pos(target, start)
+       redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator(target, start)
 
 end
 
@@ -582,6 +696,9 @@ class FlatBuffer
 
        private var capacity = 0
 
+       # Real items, used as cache for when to_cstring is called
+       private var real_items: NativeString is noinit
+
        redef fun fast_cstring do return _items.fast_cstring(0)
 
        redef fun substrings do return new FlatSubstringsIter(self)
@@ -702,7 +819,7 @@ class FlatBuffer
                        real_items = new_native
                        is_dirty = false
                end
-               return real_items.as(not null)
+               return real_items
        end
 
        # Create a new empty string.
@@ -779,18 +896,15 @@ class FlatBuffer
                assert count >= 0
                if from < 0 then from = 0
                if (from + count) > length then count = length - from
-               if count != 0 then
-                       var its = _items
-                       var bytefrom = its.char_to_byte_index(from)
-                       var byteto = its.char_to_byte_index(count + from - 1)
-                       byteto += its.char_at(byteto).u8char_len - 1
-                       var byte_length = byteto - bytefrom + 1
-                       var r_items = new NativeString(byte_length)
-                       its.copy_to(r_items, byte_length, bytefrom, 0)
-                       return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
-               else
-                       return new Buffer
-               end
+               if count <= 0 then return new Buffer
+               var its = _items
+               var bytefrom = its.char_to_byte_index(from)
+               var byteto = its.char_to_byte_index(count + from - 1)
+               byteto += its.char_at(byteto).u8char_len - 1
+               var byte_length = byteto - bytefrom + 1
+               var r_items = new NativeString(byte_length)
+               its.copy_to(r_items, byte_length, bytefrom, 0)
+               return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
        end
 
        redef fun reverse
@@ -828,14 +942,11 @@ private class FlatBufferByteReverseIterator
 
        var target: FlatBuffer
 
-       var target_items: NativeString
+       var target_items: NativeString is noautoinit
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatBuffer, pos: Int)
-       do
-               init(tgt, tgt._items, pos)
-       end
+       init do target_items = target._items
 
        redef fun index do return curr_pos
 
@@ -854,9 +965,9 @@ private class FlatBufferByteView
 
        redef fun [](index) do return target._items[index]
 
-       redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos)
+       redef fun iterator_from(pos) do return new FlatBufferByteIterator(target, pos)
 
-       redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos)
+       redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator(target, pos)
 
 end
 
@@ -865,14 +976,11 @@ private class FlatBufferByteIterator
 
        var target: FlatBuffer
 
-       var target_items: NativeString
+       var target_items: NativeString is noautoinit
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatBuffer, pos: Int)
-       do
-               init(tgt, tgt._items, pos)
-       end
+       init do target_items = target._items
 
        redef fun index do return curr_pos
 
@@ -891,11 +999,6 @@ private class FlatBufferCharReverseIterator
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatBuffer, pos: Int)
-       do
-               init(tgt, pos)
-       end
-
        redef fun index do return curr_pos
 
        redef fun is_ok do return curr_pos >= 0
@@ -945,9 +1048,9 @@ private class FlatBufferCharView
                for i in s do target.add i
        end
 
-       redef fun iterator_from(pos) do return new FlatBufferCharIterator.with_pos(target, pos)
+       redef fun iterator_from(pos) do return new FlatBufferCharIterator(target, pos)
 
-       redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator.with_pos(target, pos)
+       redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator(target, pos)
 
 end
 
@@ -956,14 +1059,11 @@ private class FlatBufferCharIterator
 
        var target: FlatBuffer
 
-       var max: Int
+       var max: Int is noautoinit
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatBuffer, pos: Int)
-       do
-               init(tgt, tgt.length - 1, pos)
-       end
+       init do max = target.length - 1
 
        redef fun index do return curr_pos
 
@@ -1002,7 +1102,7 @@ redef class NativeString
                copy_to(new_self, length, 0, 0)
                var str = new FlatString.with_infos(new_self, length, 0, length - 1)
                new_self[length] = 0u8
-               str.real_items = new_self
+               str.to_cstring = new_self
                return str
        end
 
@@ -1035,7 +1135,7 @@ redef class NativeString
                        end
                        var ok_c: Bool
                        var c = char_at(pos)
-                       var cp = c.ascii
+                       var cp = c.code_point
                        if nxst == 1 then
                                ok_c = cp >= 0 and cp <= 0x7F
                        else if nxst == 2 then
@@ -1147,8 +1247,9 @@ redef class Array[E]
        do
                var l = length
                if l == 0 then return ""
-               if l == 1 then if self[0] == null then return "" else return self[0].to_s
-               var its = _items
+               var its = _items.as(not null)
+               var first = its[0]
+               if l == 1 then if first == null then return "" else return first.to_s
                var na = new NativeArray[String](l)
                var i = 0
                var sl = 0