lib/core: Improved the speed of `clean_utf8`
authorLucas Bajolet <r4pass@hotmail.com>
Fri, 18 Dec 2015 20:40:47 +0000 (15:40 -0500)
committerLucas Bajolet <r4pass@hotmail.com>
Tue, 29 Dec 2015 04:49:28 +0000 (23:49 -0500)
Signed-off-by: Lucas Bajolet <r4pass@hotmail.com>

lib/core/text/flat.nit

index 22707d0..bedf03c 100644 (file)
@@ -1101,8 +1101,23 @@ redef class NativeString
                var end_length = len
                var pos = 0
                var chr_ln = 0
-               while pos < len do
+               var rem = len
+               while rem > 0 do
+                       while rem >= 4 do
+                               var i = fetch_4_chars(pos)
+                               if i & 0x80808080 != 0 then break
+                               pos += 4
+                               chr_ln += 4
+                               rem -= 4
+                       end
+                       if rem == 0 then break
                        var b = self[pos]
+                       if b & 0x80u8 == 0x00u8 then
+                               pos += 1
+                               chr_ln += 1
+                               rem -= 1
+                               continue
+                       end
                        var nxst = length_of_char_at(pos)
                        var ok_st: Bool
                        if nxst == 1 then
@@ -1119,6 +1134,7 @@ redef class NativeString
                                replacements.add pos
                                end_length += 2
                                pos += 1
+                               rem -= 1
                                chr_ln += 1
                                continue
                        end
@@ -1141,9 +1157,12 @@ redef class NativeString
                                end_length += 2
                                pos += 1
                                chr_ln += 1
+                               rem -= 1
                                continue
                        end
-                       pos += c.u8char_len
+                       var clen = c.u8char_len
+                       pos += clen
+                       rem -= clen
                        chr_ln += 1
                end
                var ret = self