lib/core: Have Bytes::to_s and stream use the new and improved to_s_with_length
authorLucas Bajolet <r4pass@hotmail.com>
Fri, 11 Sep 2015 15:36:03 +0000 (11:36 -0400)
committerLucas Bajolet <r4pass@hotmail.com>
Fri, 11 Sep 2015 15:36:03 +0000 (11:36 -0400)
Signed-off-by: Lucas Bajolet <r4pass@hotmail.com>

lib/core/bytes.nit
lib/core/stream.nit
lib/core/text/flat.nit

index 59c4c5f..332efc2 100644 (file)
@@ -146,80 +146,13 @@ class Bytes
        redef fun to_s do
                persisted = true
                var b = self
-               if not is_utf8 then
-                       b = clean_utf8
-                       persisted = false
-               end
-               return new FlatString.with_infos(b.items, b.length, 0, b.length -1)
+               var r = b.items.to_s_with_length(length)
+               if r != items then persisted = false
+               return r
        end
 
        redef fun iterator do return new BytesIterator.with_buffer(self)
 
-       # Is the byte collection valid UTF-8 ?
-       fun is_utf8: Bool do
-               var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
-               var lobounds = once [0, 0x80, 0x800, 0x10000]
-               var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
-               var pos = 0
-               var len = length
-               var mits = items
-               while pos < len do
-                       var nxst = mits.length_of_char_at(pos)
-                       var charst_index = (nxst - 1) * 2
-                       if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
-                               var c = mits.char_at(pos)
-                               var cp = c.ascii
-                               if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
-                                       if cp >= 0xD800 and cp <= 0xDFFF or
-                                          cp == 0xFFFE or cp == 0xFFFF then return false
-                               else
-                                       return false
-                               end
-                       else
-                               return false
-                       end
-                       pos += nxst
-               end
-               return true
-       end
-
-       # Cleans the bytes of `self` to be UTF-8 compliant
-       private fun clean_utf8: Bytes do
-               var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
-               var badchar = once [0xEFu8, 0xBFu8, 0xBDu8]
-               var lobounds = once [0, 0x80, 0x800, 0x10000]
-               var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
-               var pos = 0
-               var len = length
-               var ret = new Bytes.with_capacity(len)
-               var mits = items
-               while pos < len do
-                       var nxst = mits.length_of_char_at(pos)
-                       var charst_index = (nxst - 1) * 2
-                       if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
-                               var c = mits.char_at(pos)
-                               var cp = c.ascii
-                               if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
-                                       if cp >= 0xD800 and cp <= 0xDFFF or
-                                          cp == 0xFFFE or cp == 0xFFFF then
-                                               ret.append badchar
-                                               pos += 1
-                                       else
-                                               var pend = pos + nxst
-                                               for i in [pos .. pend[ do ret.add mits[i]
-                                               pos += nxst
-                                       end
-                               else
-                                       ret.append badchar
-                                       pos += 1
-                               end
-                       else
-                               ret.append badchar
-                               pos += 1
-                       end
-               end
-               return ret
-       end
 end
 
 private class BytesIterator
index 2db319a..4b1e826 100644 (file)
@@ -173,12 +173,13 @@ abstract class Reader
        # ~~~
        fun read_all: String do
                var s = read_all_bytes
-               if not s.is_utf8 then s = s.clean_utf8
                var slen = s.length
                if slen == 0 then return ""
                var rets = ""
                var pos = 0
-               var sits = s.items
+               var str = s.items.clean_utf8(slen)
+               slen = str.bytelen
+               var sits = str.items
                var remsp = slen
                while pos < slen do
                        # The 129 size was decided more or less arbitrarily
index 7eebc08..2d4a942 100644 (file)
@@ -985,8 +985,7 @@ redef class NativeString
        redef fun to_s_with_length(length): FlatString
        do
                assert length >= 0
-               var str = new FlatString.with_infos(self, length, 0, length - 1)
-               return str
+               return clean_utf8(length)
        end
 
        redef fun to_s_full(bytelen, unilen) do
@@ -997,6 +996,8 @@ redef class NativeString
        redef fun to_s_with_copy: FlatString
        do
                var length = cstring_length
+               var r = clean_utf8(length)
+               if r.items != self then return r
                var new_self = new NativeString(length + 1)
                copy_to(new_self, length, 0, 0)
                var str = new FlatString.with_infos(new_self, length, 0, length - 1)