From: Jean Privat Date: Tue, 24 May 2016 23:12:29 +0000 (-0400) Subject: Merge: Added `copy_from` service to `NativeString` X-Git-Url: http://nitlanguage.org?hp=-c Merge: Added `copy_from` service to `NativeString` Usability PR, since I got tired of handling indexes and different structures manually, this method abstracts the job of copying the content of a `Text` to a `NativeString` object. Pull-Request: #2062 Reviewed-by: Jean Privat Reviewed-by: Alexis Laferrière --- 23903dc4840e6bb35aab9e51bfe7a1ae5f97d4db diff --combined lib/core/text/abstract_text.nit index c9d1279,a372cfc..d7dfa48 --- a/lib/core/text/abstract_text.nit +++ b/lib/core/text/abstract_text.nit @@@ -1514,42 -1514,6 +1514,42 @@@ abstract class Buffe # In Buffers, the internal sequence of character is mutable # Thus, `chars` can be used to modify the buffer. redef fun chars: Sequence[Char] is abstract + + # Appends `length` chars from `s` starting at index `from` + # + # ~~~nit + # var b = new Buffer + # b.append_substring("abcde", 1, 2) + # assert b == "bc" + # b.append_substring("vwxyz", 2, 3) + # assert b == "bcxyz" + # b.append_substring("ABCDE", 4, 300) + # assert b == "bcxyzE" + # b.append_substring("VWXYZ", 400, 1) + # assert b == "bcxyzE" + # ~~~ + fun append_substring(s: Text, from, length: Int) do + if from < 0 then + length += from + from = 0 + end + var ln = s.length + if (length + from) > ln then length = ln - from + if length <= 0 then return + append_substring_impl(s, from, length) + end + + # Unsafe version of `append_substring` for performance + # + # NOTE: Use only if sure about `from` and `length`, no checks + # or bound recalculation is done + fun append_substring_impl(s: Text, from, length: Int) do + var pos = from + for i in [0 .. length[ do + self.add s[pos] + pos += 1 + end + end end # View for chars on Buffer objects, extends Sequence @@@ -1791,18 -1755,6 +1791,18 @@@ redef class Cha return cp >= 0xD800 and cp <= 0xDFFF end + # Is `self` a UTF-16 high surrogate ? + fun is_hi_surrogate: Bool do + var cp = code_point + return cp >= 0xD800 and cp <= 0xDBFF + end + + # Is `self` a UTF-16 low surrogate ? + fun is_lo_surrogate: Bool do + var cp = code_point + return cp >= 0xDC00 and cp <= 0xDFFF + end + # Length of `self` in a UTF-8 String fun u8char_len: Int do var c = self.code_point @@@ -2160,6 -2112,11 +2160,11 @@@ redef class NativeStrin # SEE: `abstract_text::Text` for more info on the difference # between `Text::bytelen` and `Text::length`. fun to_s_full(bytelen, unilen: Int): String is abstract + + # Copies the content of `src` to `self` + # + # NOTE: `self` must be large enough to withold `self.bytelen` bytes + fun fill_from(src: Text) do src.copy_to_native(self, src.bytelen, 0, 0) end redef class NativeArray[E] diff --combined lib/core/text/flat.nit index 2fa0bcb,3d94394..9e70321 --- a/lib/core/text/flat.nit +++ b/lib/core/text/flat.nit @@@ -369,6 -369,10 +369,10 @@@ redef class FlatTex end return res end + + redef fun copy_to_native(dst, n, src_off, dst_off) do + _items.copy_to(dst, n, first_byte + src_off, dst_off) + end end # Immutable strings of characters. @@@ -411,7 -415,7 +415,7 @@@ abstract class FlatStrin if from < 0 then count += from - if count < 0 then return "" + if count <= 0 then return "" from = 0 end @@@ -666,15 -670,15 +670,15 @@@ private class ASCIIFlatStrin end redef fun substring(from, count) do + var ln = _length + if count <= 0 then return "" + if (count + from) > ln then count = ln - from if count <= 0 then return "" - if from < 0 then count += from - if count < 0 then return "" + if count <= 0 then return "" from = 0 end - var ln = _length - if (count + from) > ln then count = ln - from return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count) end @@@ -924,10 -928,7 +928,10 @@@ class FlatBuffe is_dirty = true _bytelen = 0 _length = 0 - if written then reset + if written then + _capacity = 16 + reset + end end redef fun empty do return new Buffer @@@ -1052,21 -1053,6 +1056,21 @@@ return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count) end + redef fun append_substring_impl(s, from, length) do + if length <= 0 then return + if not s isa FlatText then + super + return + end + var bytest = s.char_to_byte_index(from) + var bytend = s.char_to_byte_index(from + length - 1) + var btln = bytend - bytest + 1 + enlarge(btln + _bytelen) + s._items.copy_to(_items, btln, bytest, _bytelen) + _bytelen += btln + _length += length + end + redef fun reverse do written = false @@@ -1369,26 -1355,37 +1373,26 @@@ redef class NativeStrin # # Very unsafe, make sure to have room for this char prior to calling this function. private fun set_char_at(pos: Int, c: Char) do - if c.code_point < 128 then - self[pos] = c.code_point.to_b + var cp = c.code_point + if cp < 128 then + self[pos] = cp.to_b return end var ln = c.u8char_len - native_set_char(pos, c, ln) - end - - private fun native_set_char(pos: Int, c: Char, ln: Int) `{ - char* dst = self + pos; - switch(ln){ - case 1: - dst[0] = c; - break; - case 2: - dst[0] = 0xC0 | ((c & 0x7C0) >> 6); - dst[1] = 0x80 | (c & 0x3F); - break; - case 3: - dst[0] = 0xE0 | ((c & 0xF000) >> 12); - dst[1] = 0x80 | ((c & 0xFC0) >> 6); - dst[2] = 0x80 | (c & 0x3F); - break; - case 4: - dst[0] = 0xF0 | ((c & 0x1C0000) >> 18); - dst[1] = 0x80 | ((c & 0x3F000) >> 12); - dst[2] = 0x80 | ((c & 0xFC0) >> 6); - dst[3] = 0x80 | (c & 0x3F); - break; - } - `} + if ln == 2 then + self[pos] = (0xC0 | ((cp & 0x7C0) >> 6)).to_b + self[pos + 1] = (0x80 | (cp & 0x3F)).to_b + else if ln == 3 then + self[pos] = (0xE0 | ((cp & 0xF000) >> 12)).to_b + self[pos + 1] = (0x80 | ((cp & 0xFC0) >> 6)).to_b + self[pos + 2] = (0x80 | (cp & 0x3F)).to_b + else if ln == 4 then + self[pos] = (0xF0 | ((cp & 0x1C0000) >> 18)).to_b + self[pos + 1] = (0x80 | ((cp & 0x3F000) >> 12)).to_b + self[pos + 2] = (0x80 | ((cp & 0xFC0) >> 6)).to_b + self[pos + 3] = (0x80 | (cp & 0x3F)).to_b + end + end end redef class Int