X-Git-Url: http://nitlanguage.org diff --git a/lib/core/text/flat.nit b/lib/core/text/flat.nit index 2533323..06cdcfe 100644 --- a/lib/core/text/flat.nit +++ b/lib/core/text/flat.nit @@ -36,16 +36,16 @@ end redef class FlatText - # First byte of the NativeString + # First byte of the CString protected fun first_byte: Int do return 0 - # Last byte of the NativeString - protected fun last_byte: Int do return first_byte + _bytelen - 1 + # Last byte of the CString + protected fun last_byte: Int do return first_byte + _byte_length - 1 # Cache of the latest position (char) explored in the string var position: Int = 0 - # Cached position (bytes) in the NativeString underlying the String + # Cached position (bytes) in the CString underlying the String var bytepos: Int = 0 # Index of the character `index` in `_items` @@ -113,15 +113,15 @@ redef class FlatText var endlen = 0 while pos <= max do var c = its[pos] - if c == 0x3Cu8 then + if c == b'<' then endlen += 3 - else if c == 0x3Eu8 then + else if c == b'>' then endlen += 3 - else if c == 0x26u8 then + else if c == b'&' then endlen += 4 - else if c == 0x22u8 then + else if c == b'"' then endlen += 4 - else if c == 0x27u8 then + else if c == b'\'' then endlen += 4 else if c == 0x2Fu8 then endlen += 4 @@ -138,60 +138,53 @@ redef class FlatText var its = _items var max = last_byte var pos = first_byte - var nlen = extra + _bytelen - var nits = new NativeString(nlen) + var nlen = extra + _byte_length + var nits = new CString(nlen) var outpos = 0 while pos <= max do var c = its[pos] # Special codes: # Some HTML characters are used as meta-data, they need # to be replaced by an HTML-Escaped equivalent - # - # * 0x3C (<) => < - # * 0x3E (>) => > - # * 0x26 (&) => & - # * 0x22 (") => " - # * 0x27 (') => ' - # * 0x2F (/) => / - if c == 0x3Cu8 then - nits[outpos] = 0x26u8 - nits[outpos + 1] = 0x6Cu8 - nits[outpos + 2] = 0x74u8 - nits[outpos + 3] = 0x3Bu8 + if c == b'<' then + nits[outpos] = b'&' + nits[outpos + 1] = b'l' + nits[outpos + 2] = b't' + nits[outpos + 3] = b';' outpos += 4 - else if c == 0x3Eu8 then - nits[outpos] = 0x26u8 - nits[outpos + 1] = 0x67u8 - nits[outpos + 2] = 0x74u8 - nits[outpos + 3] = 0x3Bu8 + else if c == b'>' then + nits[outpos] = b'&' + nits[outpos + 1] = b'g' + nits[outpos + 2] = b't' + nits[outpos + 3] = b';' outpos += 4 - else if c == 0x26u8 then - nits[outpos] = 0x26u8 - nits[outpos + 1] = 0x61u8 - nits[outpos + 2] = 0x6Du8 - nits[outpos + 3] = 0x70u8 - nits[outpos + 4] = 0x3Bu8 + else if c == b'&' then + nits[outpos] = b'&' + nits[outpos + 1] = b'a' + nits[outpos + 2] = b'm' + nits[outpos + 3] = b'p' + nits[outpos + 4] = b';' outpos += 5 - else if c == 0x22u8 then - nits[outpos] = 0x26u8 - nits[outpos + 1] = 0x23u8 - nits[outpos + 2] = 0x33u8 - nits[outpos + 3] = 0x34u8 - nits[outpos + 4] = 0x3Bu8 + else if c == b'"' then + nits[outpos] = b'&' + nits[outpos + 1] = b'#' + nits[outpos + 2] = b'3' + nits[outpos + 3] = b'4' + nits[outpos + 4] = b';' outpos += 5 - else if c == 0x27u8 then - nits[outpos] = 0x26u8 - nits[outpos + 1] = 0x23u8 - nits[outpos + 2] = 0x33u8 - nits[outpos + 3] = 0x39u8 - nits[outpos + 4] = 0x3Bu8 + else if c == b'\'' then + nits[outpos] = b'&' + nits[outpos + 1] = b'#' + nits[outpos + 2] = b'3' + nits[outpos + 3] = b'9' + nits[outpos + 4] = b';' outpos += 5 else if c == 0x2Fu8 then - nits[outpos] = 0x26u8 - nits[outpos + 1] = 0x23u8 - nits[outpos + 2] = 0x34u8 - nits[outpos + 3] = 0x37u8 - nits[outpos + 4] = 0x3Bu8 + nits[outpos] = b'&' + nits[outpos + 1] = b'#' + nits[outpos + 2] = b'4' + nits[outpos + 3] = b'7' + nits[outpos + 4] = b';' outpos += 5 else nits[outpos] = c @@ -215,16 +208,32 @@ redef class FlatText var req_esc = 0 while pos <= max do var c = its[pos] - if c == 0x0Au8 then + if c == b'\n' then req_esc += 1 - else if c == 0x09u8 then + else if c == b'\t' then req_esc += 1 - else if c == 0x22u8 then + else if c == b'"' then req_esc += 1 - else if c == 0x27u8 then + else if c == b'\'' then req_esc += 1 - else if c == 0x5Cu8 then + else if c == b'\\' then req_esc += 1 + else if c == 0x3Fu8 then + var j = pos + 1 + if j < length then + var next = its[j] + # We ignore `??'` because it will be escaped as `??\'`. + if + next == 0x21u8 or + next == 0x28u8 or + next == 0x29u8 or + next == 0x2Du8 or + next == 0x2Fu8 or + next == 0x3Cu8 or + next == 0x3Du8 or + next == 0x3Eu8 + then req_esc += 1 + end else if c < 32u8 then req_esc += 3 end @@ -238,8 +247,8 @@ redef class FlatText if ln_extra == 0 then return self.to_s var its = _items var max = last_byte - var nlen = _bytelen + ln_extra - var nns = new NativeString(nlen) + var nlen = _byte_length + ln_extra + var nns = new CString(nlen) var pos = first_byte var opos = 0 while pos <= max do @@ -260,31 +269,52 @@ redef class FlatText # * 0x22 => \" # * 0x27 => \' # * 0x5C => \\ - if c == 0x09u8 then - nns[opos] = 0x5Cu8 - nns[opos + 1] = 0x74u8 + if c == b'\t' then + nns[opos] = b'\\' + nns[opos + 1] = b't' opos += 2 - else if c == 0x0Au8 then - nns[opos] = 0x5Cu8 - nns[opos + 1] = 0x6Eu8 + else if c == b'\n' then + nns[opos] = b'\\' + nns[opos + 1] = b'n' opos += 2 - else if c == 0x22u8 then - nns[opos] = 0x5Cu8 - nns[opos + 1] = 0x22u8 + else if c == b'"' then + nns[opos] = b'\\' + nns[opos + 1] = b'"' opos += 2 - else if c == 0x27u8 then - nns[opos] = 0x5Cu8 - nns[opos + 1] = 0x27u8 + else if c == b'\'' then + nns[opos] = b'\\' + nns[opos + 1] = b'\'' opos += 2 - else if c == 0x5Cu8 then - nns[opos] = 0x5Cu8 - nns[opos + 1] = 0x5Cu8 + else if c == b'\\' then + nns[opos] = b'\\' + nns[opos + 1] = b'\\' opos += 2 + else if c == 0x3Fu8 then + var j = pos + 1 + if j < length then + var next = its[j] + # We ignore `??'` because it will be escaped as `??\'`. + if + next == 0x21u8 or + next == 0x28u8 or + next == 0x29u8 or + next == 0x2Du8 or + next == 0x2Fu8 or + next == 0x3Cu8 or + next == 0x3Du8 or + next == 0x3Eu8 + then + nns[opos] = 0x5Cu8 + opos += 1 + end + end + nns[opos] = 0x3Fu8 + opos += 1 else if c < 32u8 then - nns[opos] = 0x5Cu8 - nns[opos + 1] = 0x30u8 - nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8 - nns[opos + 3] = (c & 0x07u8) + 0x30u8 + nns[opos] = b'\\' + nns[opos + 1] = b'0' + nns[opos + 2] = ((c & 0x38u8) >> 3) + b'0' + nns[opos + 3] = (c & 0x07u8) + b'0' opos += 4 else nns[opos] = c @@ -292,7 +322,7 @@ redef class FlatText end pos += 1 end - return nns.to_s_unsafe(nlen) + return nns.to_s_unsafe(nlen, copy=false, clean=false) end redef fun [](index) do @@ -369,6 +399,10 @@ redef class FlatText end return res end + + redef fun copy_to_native(dst, n, src_off, dst_off) do + _items.copy_to(dst, n, first_byte + src_off, dst_off) + end end # Immutable strings of characters. @@ -379,20 +413,20 @@ abstract class FlatString # Index at which `self` begins in `_items`, inclusively redef var first_byte is noinit - redef var chars = new FlatStringCharView(self) is lazy + redef fun chars do return new FlatStringCharView(self) - redef var bytes = new FlatStringByteView(self) is lazy + redef fun bytes do return new FlatStringByteView(self) - redef var to_cstring is lazy do - var blen = _bytelen - var new_items = new NativeString(blen + 1) + redef fun to_cstring do + var blen = _byte_length + var new_items = new CString(blen + 1) _items.copy_to(new_items, blen, _first_byte, 0) new_items[blen] = 0u8 return new_items end redef fun reversed do - var b = new FlatBuffer.with_capacity(_bytelen + 1) + var b = new FlatBuffer.with_capacity(_byte_length + 1) var i = _length - 1 while i >= 0 do b.add self.fetch_char_at(i) @@ -411,7 +445,7 @@ abstract class FlatString if from < 0 then count += from - if count < 0 then return "" + if count <= 0 then return "" from = 0 end @@ -448,7 +482,7 @@ abstract class FlatString redef fun to_upper do - var outstr = new FlatBuffer.with_capacity(self._bytelen + 1) + var outstr = new FlatBuffer.with_capacity(self._byte_length + 1) var mylen = _length var pos = 0 @@ -463,7 +497,7 @@ abstract class FlatString redef fun to_lower do - var outstr = new FlatBuffer.with_capacity(self._bytelen + 1) + var outstr = new FlatBuffer.with_capacity(self._byte_length + 1) var mylen = _length var pos = 0 @@ -489,21 +523,21 @@ abstract class FlatString # # `_items` will be used as is, without copy, to retrieve the characters of the string. # Aliasing issues is the responsibility of the caller. - private new with_infos(items: NativeString, bytelen, from: Int) + private new with_infos(items: CString, byte_length, from: Int) do - var len = items.utf8_length(from, bytelen) - if bytelen == len then return new ASCIIFlatString.full_data(items, bytelen, from, len) - return new UnicodeFlatString.full_data(items, bytelen, from, len) + var len = items.utf8_length(from, byte_length) + if byte_length == len then return new ASCIIFlatString.full_data(items, byte_length, from, len) + return new UnicodeFlatString.full_data(items, byte_length, from, len) end # Low-level creation of a new string with all the data. # # `_items` will be used as is, without copy, to retrieve the characters of the string. # Aliasing issues is the responsibility of the caller. - private new full(items: NativeString, bytelen, from, length: Int) + private new full(items: CString, byte_length, from, length: Int) do - if bytelen == length then return new ASCIIFlatString.full_data(items, bytelen, from, length) - return new UnicodeFlatString.full_data(items, bytelen, from, length) + if byte_length == length then return new ASCIIFlatString.full_data(items, byte_length, from, length) + return new UnicodeFlatString.full_data(items, byte_length, from, length) end redef fun ==(other) @@ -512,9 +546,9 @@ abstract class FlatString if self.object_id == other.object_id then return true - var my_length = _bytelen + var my_length = _byte_length - if other._bytelen != my_length then return false + if other._byte_length != my_length then return false var my_index = _first_byte var its_index = other.first_byte @@ -542,8 +576,8 @@ abstract class FlatString var myits = _items var itsits = other._items - var mbt = _bytelen - var obt = other.bytelen + var mbt = _byte_length + var obt = other.byte_length var minln = if mbt < obt then mbt else obt var mst = _first_byte @@ -565,15 +599,15 @@ abstract class FlatString redef fun +(o) do var s = o.to_s - var slen = s.bytelen - var mlen = _bytelen + var slen = s.byte_length + var mlen = _byte_length var nlen = mlen + slen var mits = _items var mifrom = _first_byte if s isa FlatText then var sits = s._items var sifrom = s.first_byte - var ns = new NativeString(nlen + 1) + var ns = new CString(nlen + 1) mits.copy_to(ns, mlen, mifrom, 0) sits.copy_to(ns, slen, sifrom, mlen) return new FlatString.full(ns, nlen, 0, _length + o.length) @@ -583,21 +617,21 @@ abstract class FlatString end redef fun *(i) do - var mybtlen = _bytelen - var new_bytelen = mybtlen * i + var mybtlen = _byte_length + var new_byte_length = mybtlen * i var mylen = _length var newlen = mylen * i var its = _items var fb = _first_byte - var ns = new NativeString(new_bytelen + 1) - ns[new_bytelen] = 0u8 + var ns = new CString(new_byte_length + 1) + ns[new_byte_length] = 0u8 var offset = 0 while i > 0 do its.copy_to(ns, mybtlen, fb, offset) offset += mybtlen i -= 1 end - return new FlatString.full(ns, new_bytelen, 0, newlen) + return new FlatString.full(ns, new_byte_length, 0, newlen) end redef fun hash @@ -628,10 +662,10 @@ end private class UnicodeFlatString super FlatString - init full_data(items: NativeString, bytelen, from, length: Int) do + init full_data(items: CString, byte_length, from, length: Int) do self._items = items self._length = length - self._bytelen = bytelen + self._byte_length = byte_length _first_byte = from _bytepos = from end @@ -641,7 +675,7 @@ private class UnicodeFlatString if from <= 0 then return self var c = char_to_byte_index(from) var st = c - _first_byte - var fln = bytelen - st + var fln = byte_length - st return new FlatString.full(items, fln, c, _length - from) end end @@ -652,34 +686,34 @@ end private class ASCIIFlatString super FlatString - init full_data(items: NativeString, bytelen, from, length: Int) do + init full_data(items: CString, byte_length, from, length: Int) do self._items = items self._length = length - self._bytelen = bytelen + self._byte_length = byte_length _first_byte = from _bytepos = from end redef fun [](idx) do - assert idx < _bytelen and idx >= 0 + assert idx < _byte_length and idx >= 0 return _items[idx + _first_byte].ascii end redef fun substring(from, count) do + var ln = _length + if count <= 0 then return "" + if (count + from) > ln then count = ln - from if count <= 0 then return "" - if from < 0 then count += from - if count < 0 then return "" + if count <= 0 then return "" from = 0 end - var ln = _length - if (count + from) > ln then count = ln - from return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count) end redef fun reversed do - var b = new FlatBuffer.with_capacity(_bytelen + 1) + var b = new FlatBuffer.with_capacity(_byte_length + 1) var i = _length - 1 while i >= 0 do b.add self[i] @@ -754,7 +788,7 @@ private class FlatStringByteReverseIterator var target: FlatString - var target_items: NativeString is noautoinit + var target_items: CString is noautoinit var curr_pos: Int @@ -780,7 +814,7 @@ private class FlatStringByteIterator var target: FlatString - var target_items: NativeString is noautoinit + var target_items: CString is noautoinit var curr_pos: Int @@ -811,7 +845,7 @@ private class FlatStringByteView # Check that the index (+ _first_byte) is not larger than last_byte # In other terms, if the index is valid var target = _target - assert index >= 0 and index < target._bytelen + assert index >= 0 and index < target._byte_length var ind = index + target._first_byte return target._items[ind] end @@ -833,44 +867,37 @@ class FlatBuffer super FlatText super Buffer - redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy - - redef var bytes = new FlatBufferByteView(self) is lazy - - private var char_cache: Int = -1 + redef fun chars do return new FlatBufferCharView(self) - private var byte_cache: Int = -1 + redef fun bytes do return new FlatBufferByteView(self) private var capacity = 0 - # Real items, used as cache for when to_cstring is called - private var real_items: NativeString is noinit - redef fun fast_cstring do return _items.fast_cstring(0) redef fun substrings do return new FlatSubstringsIter(self) - # Re-copies the `NativeString` into a new one and sets it as the new `Buffer` + # Re-copies the `CString` into a new one and sets it as the new `Buffer` # # This happens when an operation modifies the current `Buffer` and # the Copy-On-Write flag `written` is set at true. private fun reset do - var nns = new NativeString(capacity) - if _bytelen != 0 then _items.copy_to(nns, _bytelen, 0, 0) + var nns = new CString(capacity) + if _byte_length != 0 then _items.copy_to(nns, _byte_length, 0, 0) _items = nns written = false end # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from` # - # Internal only, does not modify _bytelen or length, this is the caller's responsability + # Internal only, does not modify _byte_length or length, this is the caller's responsability private fun rshift_bytes(from: Int, len: Int) do var oit = _items var nit = _items - var bt = _bytelen + var bt = _byte_length if bt + len > capacity then capacity = capacity * 2 + 2 - nit = new NativeString(capacity) + nit = new CString(capacity) oit.copy_to(nit, 0, 0, from) end oit.copy_to(nit, bt - from, from, from + len) @@ -878,17 +905,16 @@ class FlatBuffer # Shifts the content of the buffer by `len` bytes to the left, starting at `from` # - # Internal only, does not modify _bytelen or length, this is the caller's responsability + # Internal only, does not modify _byte_length or length, this is the caller's responsability private fun lshift_bytes(from: Int, len: Int) do var it = _items - it.copy_to(it, _bytelen - from, from, from - len) + it.copy_to(it, _byte_length - from, from, from - len) end redef fun []=(index, item) do assert index >= 0 and index <= _length if written then reset - is_dirty = true if index == _length then add item return @@ -904,27 +930,60 @@ class FlatBuffer else if size_diff < 0 then lshift_bytes(ip + clen, -size_diff) end - _bytelen += size_diff + _byte_length += size_diff it.set_char_at(ip, item) end + redef fun insert(s, pos) do + assert pos >= 0 and pos <= length + if pos == length then + append s + return + end + var slen = s.byte_length + enlarge(byte_length + slen) + var it = _items + var shpos = it.char_to_byte_index(pos) + rshift_bytes(shpos, slen) + s.copy_to_native(it, slen, 0, shpos) + length += s.length + byte_length += slen + end + + redef fun insert_char(c, pos) do + assert pos >= 0 and pos <= length + if pos == length then + add c + return + end + var clen = c.u8char_len + enlarge(byte_length + clen) + var it = _items + var shpos = it.char_to_byte_index(pos) + rshift_bytes(shpos, clen) + it.set_char_at(shpos, c) + length += 1 + byte_length += clen + end + redef fun add(c) do if written then reset - is_dirty = true var clen = c.u8char_len - var bt = _bytelen + var bt = _byte_length enlarge(bt + clen) _items.set_char_at(bt, c) - _bytelen += clen + _byte_length += clen _length += 1 end redef fun clear do - is_dirty = true - if written then reset - _bytelen = 0 + _byte_length = 0 _length = 0 + if written then + _capacity = 16 + reset + end end redef fun empty do return new Buffer @@ -938,8 +997,8 @@ class FlatBuffer # The COW flag can be set at false here, since # it does a copy of the current `Buffer` written = false - var bln = _bytelen - var a = new NativeString(c) + var bln = _byte_length + var a = new CString(c) if bln > 0 then var it = _items if bln > 0 then it.copy_to(a, bln, 0, 0) @@ -951,22 +1010,18 @@ class FlatBuffer redef fun to_s do written = true - var bln = _bytelen - if bln == 0 then _items = new NativeString(1) + var bln = _byte_length + if bln == 0 then _items = new CString(1) return new FlatString.full(_items, bln, 0, _length) end redef fun to_cstring do - if is_dirty then - var bln = _bytelen - var new_native = new NativeString(bln + 1) - new_native[bln] = 0u8 - if _length > 0 then _items.copy_to(new_native, bln, 0, 0) - real_items = new_native - is_dirty = false - end - return real_items + var bln = _byte_length + var new_native = new CString(bln + 1) + new_native[bln] = 0u8 + if _length > 0 then _items.copy_to(new_native, bln, 0, 0) + return new_native end # Create a new empty string. @@ -979,52 +1034,46 @@ class FlatBuffer # # If `_items` is shared, `written` should be set to true after the creation # so that a modification will do a copy-on-write. - private init with_infos(items: NativeString, capacity, bytelen, length: Int) + private init with_infos(items: CString, capacity, byte_length, length: Int) do self._items = items self.capacity = capacity - self._bytelen = bytelen + self._byte_length = byte_length self._length = length end # Create a new string copied from `s`. init from(s: Text) do - _items = new NativeString(s.bytelen) - if s isa FlatText then - _items = s._items - else - for i in substrings do i.as(FlatString)._items.copy_to(_items, i._bytelen, 0, 0) - end - _bytelen = s.bytelen + _items = new CString(s.byte_length) + for i in s.substrings do i._items.copy_to(_items, i._byte_length, first_byte, 0) + _byte_length = s.byte_length _length = s.length - _capacity = _bytelen - written = true + _capacity = _byte_length end # Create a new empty string with a given capacity. init with_capacity(cap: Int) do assert cap >= 0 - _items = new NativeString(cap + 1) + _items = new CString(cap) capacity = cap - _bytelen = 0 + _byte_length = 0 end redef fun append(s) do if s.is_empty then return - is_dirty = true - var sl = s.bytelen - var nln = _bytelen + sl + var sl = s.byte_length + var nln = _byte_length + sl enlarge(nln) if s isa FlatText then - s._items.copy_to(_items, sl, s.first_byte, _bytelen) + s._items.copy_to(_items, sl, s.first_byte, _byte_length) else for i in s.substrings do append i return end - _bytelen = nln + _byte_length = nln _length += s.length end @@ -1049,11 +1098,40 @@ class FlatBuffer var byteto = its.char_to_byte_index(count + from - 1) byteto += its.char_at(byteto).u8char_len - 1 var byte_length = byteto - bytefrom + 1 - var r_items = new NativeString(byte_length) + var r_items = new CString(byte_length) its.copy_to(r_items, byte_length, bytefrom, 0) return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count) end + redef fun append_substring_impl(s, from, length) do + if length <= 0 then return + if not s isa FlatText then + super + return + end + var sits = s._items + var bytest = s.char_to_byte_index(from) + var bytend = s.char_to_byte_index(from + length - 1) + var btln = bytend - bytest + sits.char_at(bytend).u8char_len + enlarge(btln + _byte_length) + sits.copy_to(_items, btln, bytest, _byte_length) + _byte_length += btln + _length += length + end + + redef fun remove_at(p, len) do + if len == null then len = 1 + if len == 0 then return + var its = _items + var bst = char_to_byte_index(p) + var bend = char_to_byte_index(p + len - 1) + bend += its.char_at(bend).u8char_len + var blen = bend - bst + lshift_bytes(bend, bend - bst) + byte_length -= blen + length -= len + end + redef fun reverse do written = false @@ -1064,7 +1142,7 @@ class FlatBuffer redef fun times(repeats) do - var bln = _bytelen + var bln = _byte_length var x = new FlatString.full(_items, bln, 0, _length) for i in [1 .. repeats[ do append(x) @@ -1089,7 +1167,7 @@ private class FlatBufferByteReverseIterator var target: FlatBuffer - var target_items: NativeString is noautoinit + var target_items: CString is noautoinit var curr_pos: Int @@ -1123,7 +1201,7 @@ private class FlatBufferByteIterator var target: FlatBuffer - var target_items: NativeString is noautoinit + var target_items: CString is noautoinit var curr_pos: Int @@ -1131,7 +1209,7 @@ private class FlatBufferByteIterator redef fun index do return curr_pos - redef fun is_ok do return curr_pos < target._bytelen + redef fun is_ok do return curr_pos < target._byte_length redef fun item do return target_items[curr_pos] @@ -1222,43 +1300,49 @@ private class FlatBufferCharIterator end -redef class NativeString - redef fun to_s - do - return to_s_with_length(cstring_length) - end +redef class CString - redef fun to_s_with_length(length) - do - assert length >= 0 - return clean_utf8(length) - end + # Get a `String` from the data at `self` copied into Nit memory + # + # Require: `self` is a null-terminated string. + redef fun to_s do return to_s_unsafe - redef fun to_s_full(bytelen, unilen) do - return new FlatString.full(self, bytelen, 0, unilen) - end + # Get a `String` from `byte_length` bytes at `self` copied into Nit memory + # + # The string is cleaned. + fun to_s_with_length(byte_length: Int): String do return to_s_unsafe(byte_length) - redef fun to_s_unsafe(len) do - if len == null then len = cstring_length - return new FlatString.with_infos(self, len, 0) - end + redef fun to_s_unsafe(byte_length, char_length, copy, clean) + do + byte_length = byte_length or else cstring_length + clean = clean or else true + copy = copy or else true + + # Clean? + var str = null + if clean then + str = clean_utf8(byte_length) + char_length = str.length + else + char_length = char_length or else utf8_length(0, byte_length) + end - redef fun to_s_with_copy do return to_s_with_copy_and_length(cstring_length) + # Copy? (if not already copied by `clean_utf8`) + if copy and (str == null or str.items == self) then + var new_cstr = new CString(byte_length + 1) + copy_to(new_cstr, byte_length, 0, 0) + new_cstr[byte_length] = 0u8 + str = new FlatString.full(new_cstr, byte_length, 0, char_length) + end + + if str == null then + str = new FlatString.full(self, byte_length, 0, char_length) + end - # Get a `String` from `length` bytes at `self` copied into Nit memory - fun to_s_with_copy_and_length(length: Int): String - do - var r = clean_utf8(length) - if r.items != self then return r - var new_self = new NativeString(length + 1) - copy_to(new_self, length, 0, 0) - var str = new FlatString.with_infos(new_self, length, 0) - new_self[length] = 0u8 - str.to_cstring = new_self return str end - # Cleans a NativeString if necessary + # Cleans a CString if necessary fun clean_utf8(len: Int): FlatString do var replacements: nullable Array[Int] = null var end_length = len @@ -1268,7 +1352,7 @@ redef class NativeString while rem > 0 do while rem >= 4 do var i = fetch_4_chars(pos) - if i & 0x80808080 != 0 then break + if i & 0x80808080u32 != 0u32 then break pos += 4 chr_ln += 4 rem -= 4 @@ -1330,7 +1414,7 @@ redef class NativeString end var ret = self if end_length != len then - ret = new NativeString(end_length) + ret = new CString(end_length) var old_repl = 0 var off = 0 var repls = replacements.as(not null) @@ -1356,37 +1440,26 @@ redef class NativeString # # Very unsafe, make sure to have room for this char prior to calling this function. private fun set_char_at(pos: Int, c: Char) do - if c.code_point < 128 then - self[pos] = c.code_point.to_b + var cp = c.code_point + if cp < 128 then + self[pos] = cp.to_b return end var ln = c.u8char_len - native_set_char(pos, c, ln) - end - - private fun native_set_char(pos: Int, c: Char, ln: Int) `{ - char* dst = self + pos; - switch(ln){ - case 1: - dst[0] = c; - break; - case 2: - dst[0] = 0xC0 | ((c & 0x7C0) >> 6); - dst[1] = 0x80 | (c & 0x3F); - break; - case 3: - dst[0] = 0xE0 | ((c & 0xF000) >> 12); - dst[1] = 0x80 | ((c & 0xFC0) >> 6); - dst[2] = 0x80 | (c & 0x3F); - break; - case 4: - dst[0] = 0xF0 | ((c & 0x1C0000) >> 18); - dst[1] = 0x80 | ((c & 0x3F000) >> 12); - dst[2] = 0x80 | ((c & 0xFC0) >> 6); - dst[3] = 0x80 | (c & 0x3F); - break; - } - `} + if ln == 2 then + self[pos] = (0xC0 | ((cp & 0x7C0) >> 6)).to_b + self[pos + 1] = (0x80 | (cp & 0x3F)).to_b + else if ln == 3 then + self[pos] = (0xE0 | ((cp & 0xF000) >> 12)).to_b + self[pos + 1] = (0x80 | ((cp & 0xFC0) >> 6)).to_b + self[pos + 2] = (0x80 | (cp & 0x3F)).to_b + else if ln == 4 then + self[pos] = (0xF0 | ((cp & 0x1C0000) >> 18)).to_b + self[pos + 1] = (0x80 | ((cp & 0x3F000) >> 12)).to_b + self[pos + 2] = (0x80 | ((cp & 0xFC0) >> 6)).to_b + self[pos + 3] = (0x80 | (cp & 0x3F)).to_b + end + end end redef class Int @@ -1400,7 +1473,7 @@ redef class Int if self == 1 then return "1" var nslen = int_to_s_len - var ns = new NativeString(nslen + 1) + var ns = new CString(nslen + 1) ns[nslen] = 0u8 native_int_to_s(ns, nslen + 1) return new FlatString.full(ns, nslen, 0, nslen) @@ -1428,25 +1501,25 @@ redef class Array[E] continue end var tmp = itsi.to_s - sl += tmp.bytelen + sl += tmp.byte_length na[mypos] = tmp i += 1 mypos += 1 end - var ns = new NativeString(sl + 1) + var ns = new CString(sl + 1) ns[sl] = 0u8 i = 0 var off = 0 while i < mypos do var tmp = na[i] if tmp isa FlatString then - var tpl = tmp._bytelen + var tpl = tmp._byte_length tmp._items.copy_to(ns, tpl, tmp._first_byte, off) off += tpl else for j in tmp.substrings do var s = j.as(FlatString) - var slen = s._bytelen + var slen = s._byte_length s._items.copy_to(ns, slen, s._first_byte, off) off += slen end @@ -1466,24 +1539,24 @@ redef class NativeArray[E] var sl = 0 var mypos = 0 while i < l do - sl += na[i].bytelen + sl += na[i].byte_length i += 1 mypos += 1 end - var ns = new NativeString(sl + 1) + var ns = new CString(sl + 1) ns[sl] = 0u8 i = 0 var off = 0 while i < mypos do var tmp = na[i] if tmp isa FlatString then - var tpl = tmp._bytelen + var tpl = tmp._byte_length tmp._items.copy_to(ns, tpl, tmp._first_byte, off) off += tpl else for j in tmp.substrings do var s = j.as(FlatString) - var slen = s._bytelen + var slen = s._byte_length s._items.copy_to(ns, slen, s._first_byte, off) off += slen end