1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # All the array-based text representations
14 intrude import abstract_text
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
25 var tgt
: nullable FlatText
29 return tgt
.as(not null)
32 redef fun is_ok
do return tgt
!= null
34 redef fun next
do tgt
= null
39 # First byte of the NativeString
40 protected fun first_byte
: Int do return 0
42 # Last byte of the NativeString
43 protected fun last_byte
: Int do return first_byte
+ _bytelen
- 1
45 # Cache of the latest position (char) explored in the string
48 # Cached position (bytes) in the NativeString underlying the String
51 # Index of the character `index` in `_items`
52 fun char_to_byte_index
(index
: Int): Int do
53 var dpos
= index
- _position
58 if its
[b
] & 0x80u
8 == 0x00u
8 then
61 b
+= its
.length_of_char_at
(b
)
68 b
= its
.find_beginning_of_char_at
(b
- 1)
73 if dpos
== 0 then return b
77 # Find best insertion point
78 var delta_begin
= index
79 var delta_end
= (ln
- 1) - index
80 var delta_cache
= (pos
- index
).abs
83 if delta_cache
< min
then min
= delta_cache
84 if delta_end
< min
then min
= delta_end
89 if min
== delta_cache
then
92 else if min
== delta_begin
then
96 ns_i
= its
.find_beginning_of_char_at
(last_byte
)
100 ns_i
= its
.char_to_byte_index_cached
(index
, my_i
, ns_i
)
108 # By escaping `self` to HTML, how many more bytes will be needed ?
109 fun chars_to_html_escape
: Int do
118 else if c
== b
'>' then
120 else if c
== b
'&' then
122 else if c
== b
'"' then
124 else if c
== b
'\'' then
126 else if c == 0x2Fu8 then
134 redef fun html_escape
136 var extra = chars_to_html_escape
137 if extra == 0 then return to_s
141 var nlen = extra + _bytelen
142 var nits = new NativeString(nlen)
147 # Some HTML characters are used as meta-data, they need
148 # to be replaced by an HTML-Escaped equivalent
151 nits[outpos + 1] = b'l
'
152 nits[outpos + 2] = b't
'
153 nits[outpos + 3] = b';'
155 else if c == b'>' then
157 nits[outpos + 1] = b'g
'
158 nits[outpos + 2] = b't
'
159 nits[outpos + 3] = b';'
161 else if c == b'&' then
163 nits[outpos + 1] = b'a
'
164 nits[outpos + 2] = b'm
'
165 nits[outpos + 3] = b'p
'
166 nits[outpos + 4] = b';'
168 else if c == b'"' then
170 nits[outpos + 1] = b'#'
171 nits[outpos + 2] = b'3'
172 nits[outpos + 3] = b'4'
173 nits[outpos + 4] = b';'
175 else if c == b'\'' then
177 nits[outpos + 1] = b'#'
178 nits[outpos + 2] = b'3'
179 nits[outpos + 3] = b'9'
180 nits[outpos + 4] = b';'
182 else if c == 0x2Fu8 then
184 nits[outpos + 1] = b'#'
185 nits[outpos + 2] = b'4'
186 nits[outpos + 3] = b'7'
187 nits[outpos + 4] = b';'
195 var s = new FlatString.with_infos(nits, nlen, 0)
199 # By escaping `self` to C, how many more bytes will be needed ?
201 # This enables a double-optimization in `escape_to_c` since if this
202 # method returns 0, then `self` does not need escaping and can be
204 fun chars_to_escape_to_c: Int do
213 else if c == b'\t' then
215 else if c == b'"' then
217 else if c == b'\
'' then
219 else if c
== b
'\\' then
221 else if c
< 32u8
then
229 redef fun escape_to_c
do
230 var ln_extra
= chars_to_escape_to_c
231 if ln_extra
== 0 then return self.to_s
234 var nlen
= _bytelen
+ ln_extra
235 var nns
= new NativeString(nlen
)
242 # Any byte with value < 32 is a control character
243 # All their uses will be replaced by their octal
246 # There are two exceptions however:
251 # Aside from the code points above, the following are:
260 else if c
== b
'\n' then
264 else if c
== b
'"' then
268 else if c
== b
'\'' then
270 nns[opos + 1] = b'\
''
272 else if c
== b
'\\' then
274 nns
[opos
+ 1] = b
'\\'
276 else if c
< 32u8
then
279 nns
[opos
+ 2] = ((c
& 0x38u
8) >> 3) + b
'0'
280 nns
[opos
+ 3] = (c
& 0x07u
8) + b
'0'
288 return nns
.to_s_unsafe
(nlen
)
291 redef fun [](index
) do
295 # * ~70% want the next char
296 # * ~23% want the previous
297 # * ~7% want the same char
299 # So it makes sense to shortcut early. And early is here.
300 var dpos
= index
- _position
302 if dpos
== 1 and index
< len
- 1 then
305 if c
& 0x80u
8 == 0x00u
8 then
306 # We want the next, and current is easy.
307 # So next is easy to find!
311 # The rest will be done by `dpos==0` bellow.
314 else if dpos
== -1 and index
> 1 then
317 if c
& 0x80u
8 == 0x00u
8 then
318 # We want the previous, and it is easy.
327 # We know what we want (+0 or +1) just get it now!
330 if c
& 0x80u
8 == 0x00u
8 then return c
.ascii
331 return items
.char_at
(b
)
334 assert index
>= 0 and index
< len
335 return fetch_char_at
(index
)
338 # Gets a `Char` at `index` in `self`
340 # WARNING: Use at your own risks as no bound-checking is done
341 fun fetch_char_at
(index
: Int): Char do
342 var i
= char_to_byte_index
(index
)
345 if b
& 0x80u
8 == 0x00u
8 then return b
.ascii
346 return items
.char_at
(i
)
349 # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
351 # assert "ff".to_hex == 255
352 redef fun to_hex
(pos
, ln
) do
354 if pos
== null then pos
= 0
355 if ln
== null then ln
= length
- pos
356 pos
= char_to_byte_index
(pos
)
359 for i
in [pos
.. max
[ do
361 res
+= its
[i
].ascii
.from_hex
367 # Immutable strings of characters.
368 abstract class FlatString
372 # Index at which `self` begins in `_items`, inclusively
373 redef var first_byte
is noinit
375 redef var chars
= new FlatStringCharView(self) is lazy
377 redef var bytes
= new FlatStringByteView(self) is lazy
379 redef var to_cstring
is lazy
do
381 var new_items
= new NativeString(blen
+ 1)
382 _items
.copy_to
(new_items
, blen
, _first_byte
, 0)
383 new_items
[blen
] = 0u8
387 redef fun reversed
do
388 var b
= new FlatBuffer.with_capacity
(_bytelen
+ 1)
391 b
.add
self.fetch_char_at
(i
)
394 var s
= b
.to_s
.as(FlatString)
395 s
._length
= self._length
399 redef fun fast_cstring
do return _items
.fast_cstring
(_first_byte
)
401 redef fun substring
(from
, count
)
403 if count
<= 0 then return ""
407 if count
< 0 then return ""
412 if (count
+ from
) > ln
then count
= ln
- from
413 if count
<= 0 then return ""
414 var end_index
= from
+ count
- 1
415 return substring_impl
(from
, count
, end_index
)
418 private fun substring_impl
(from
, count
, end_index
: Int): String do
419 var cache
= _position
420 var dfrom
= (cache
- from
).abs
421 var dend
= (end_index
- from
).abs
426 bytefrom
= char_to_byte_index
(from
)
427 byteto
= char_to_byte_index
(end_index
)
429 byteto
= char_to_byte_index
(end_index
)
430 bytefrom
= char_to_byte_index
(from
)
434 byteto
+= its
.length_of_char_at
(byteto
) - 1
436 var s
= new FlatString.full
(its
, byteto
- bytefrom
+ 1, bytefrom
, count
)
440 redef fun empty
do return "".as(FlatString)
444 var outstr
= new FlatBuffer.with_capacity
(self._bytelen
+ 1)
450 outstr
.add
(chars
[pos
].to_upper
)
459 var outstr
= new FlatBuffer.with_capacity
(self._bytelen
+ 1)
465 outstr
.add
(chars
[pos
].to_lower
)
474 for i
in chars
do i
.output
477 ##################################################
478 # String Specific Methods #
479 ##################################################
481 # Low-level creation of a new string with minimal data.
483 # `_items` will be used as is, without copy, to retrieve the characters of the string.
484 # Aliasing issues is the responsibility of the caller.
485 private new with_infos
(items
: NativeString, bytelen
, from
: Int)
487 var len
= items
.utf8_length
(from
, bytelen
)
488 if bytelen
== len
then return new ASCIIFlatString.full_data
(items
, bytelen
, from
, len
)
489 return new UnicodeFlatString.full_data
(items
, bytelen
, from
, len
)
492 # Low-level creation of a new string with all the data.
494 # `_items` will be used as is, without copy, to retrieve the characters of the string.
495 # Aliasing issues is the responsibility of the caller.
496 private new full
(items
: NativeString, bytelen
, from
, length
: Int)
498 if bytelen
== length
then return new ASCIIFlatString.full_data
(items
, bytelen
, from
, length
)
499 return new UnicodeFlatString.full_data
(items
, bytelen
, from
, length
)
504 if not other
isa FlatText then return super
506 if self.object_id
== other
.object_id
then return true
508 var my_length
= _bytelen
510 if other
._bytelen
!= my_length
then return false
512 var my_index
= _first_byte
513 var its_index
= other
.first_byte
515 var last_iteration
= my_index
+ my_length
517 var its_items
= other
._items
518 var my_items
= self._items
520 while my_index
< last_iteration
do
521 if my_items
[my_index
] != its_items
[its_index
] then return false
531 if not other
isa FlatText then return super
533 if self.object_id
== other
.object_id
then return false
536 var itsits
= other
._items
539 var obt
= other
.bytelen
541 var minln
= if mbt
< obt
then mbt
else obt
542 var mst
= _first_byte
543 var ost
= other
.first_byte
545 for i
in [0 .. minln
[ do
546 var my_curr_char
= myits
[mst
]
547 var its_curr_char
= itsits
[ost
]
549 if my_curr_char
> its_curr_char
then return false
550 if my_curr_char
< its_curr_char
then return true
563 var nlen
= mlen
+ slen
565 var mifrom
= _first_byte
566 if s
isa FlatText then
568 var sifrom
= s
.first_byte
569 var ns
= new NativeString(nlen
+ 1)
570 mits
.copy_to
(ns
, mlen
, mifrom
, 0)
571 sits
.copy_to
(ns
, slen
, sifrom
, mlen
)
572 return new FlatString.full
(ns
, nlen
, 0, _length
+ o
.length
)
579 var mybtlen
= _bytelen
580 var new_bytelen
= mybtlen
* i
582 var newlen
= mylen
* i
585 var ns
= new NativeString(new_bytelen
+ 1)
586 ns
[new_bytelen
] = 0u8
589 its
.copy_to
(ns
, mybtlen
, fb
, offset
)
593 return new FlatString.full
(ns
, new_bytelen
, 0, newlen
)
598 if hash_cache
== null then
599 # djb2 hash algorithm
603 var my_items
= _items
607 h
= (h
<< 5) + h
+ my_items
[i
].to_i
614 return hash_cache
.as(not null)
617 redef fun substrings
do return new FlatSubstringsIter(self)
620 # Regular Nit UTF-8 strings
621 private class UnicodeFlatString
624 init full_data
(items
: NativeString, bytelen
, from
, length
: Int) do
626 self._length
= length
627 self._bytelen
= bytelen
632 redef fun substring_from
(from
) do
633 if from
>= self._length
then return empty
634 if from
<= 0 then return self
635 var c
= char_to_byte_index
(from
)
636 var st
= c
- _first_byte
637 var fln
= bytelen
- st
638 return new FlatString.full
(items
, fln
, c
, _length
- from
)
642 # Special cases of String where all the characters are ASCII-based
644 # Optimizes access operations to O(1) complexity.
645 private class ASCIIFlatString
648 init full_data
(items
: NativeString, bytelen
, from
, length
: Int) do
650 self._length
= length
651 self._bytelen
= bytelen
657 assert idx
< _bytelen
and idx
>= 0
658 return _items
[idx
+ _first_byte
].ascii
661 redef fun substring
(from
, count
) do
662 if count
<= 0 then return ""
666 if count
< 0 then return ""
670 if (count
+ from
) > ln
then count
= ln
- from
671 return new ASCIIFlatString.full_data
(_items
, count
, from
+ _first_byte
, count
)
674 redef fun reversed
do
675 var b
= new FlatBuffer.with_capacity
(_bytelen
+ 1)
681 var s
= b
.to_s
.as(FlatString)
685 redef fun char_to_byte_index
(index
) do return index
+ _first_byte
687 redef fun substring_impl
(from
, count
, end_index
) do
688 return new ASCIIFlatString.full_data
(_items
, count
, from
+ _first_byte
, count
)
691 redef fun fetch_char_at
(i
) do return _items
[i
+ _first_byte
].ascii
694 private class FlatStringCharReverseIterator
695 super IndexedIterator[Char]
697 var target
: FlatString
701 redef fun is_ok
do return curr_pos
>= 0
703 redef fun item
do return target
[curr_pos
]
705 redef fun next
do curr_pos
-= 1
707 redef fun index
do return curr_pos
711 private class FlatStringCharIterator
712 super IndexedIterator[Char]
714 var target
: FlatString
716 var max
: Int is noautoinit
720 init do max
= target
._length
- 1
722 redef fun is_ok
do return curr_pos
<= max
724 redef fun item
do return target
[curr_pos
]
726 redef fun next
do curr_pos
+= 1
728 redef fun index
do return curr_pos
732 private class FlatStringCharView
735 redef type SELFTYPE: FlatString
737 redef fun [](index
) do return target
[index
]
739 redef fun iterator_from
(start
) do return new FlatStringCharIterator(target
, start
)
741 redef fun reverse_iterator_from
(start
) do return new FlatStringCharReverseIterator(target
, start
)
745 private class FlatStringByteReverseIterator
746 super IndexedIterator[Byte]
748 var target
: FlatString
750 var target_items
: NativeString is noautoinit
757 target_items
= tgt
._items
758 curr_pos
+= tgt
._first_byte
761 redef fun is_ok
do return curr_pos
>= target
._first_byte
763 redef fun item
do return target_items
[curr_pos
]
765 redef fun next
do curr_pos
-= 1
767 redef fun index
do return curr_pos
- target
._first_byte
771 private class FlatStringByteIterator
772 super IndexedIterator[Byte]
774 var target
: FlatString
776 var target_items
: NativeString is noautoinit
783 target_items
= tgt
._items
784 curr_pos
+= tgt
._first_byte
787 redef fun is_ok
do return curr_pos
<= target
.last_byte
789 redef fun item
do return target_items
[curr_pos
]
791 redef fun next
do curr_pos
+= 1
793 redef fun index
do return curr_pos
- target
._first_byte
797 private class FlatStringByteView
800 redef type SELFTYPE: FlatString
804 # Check that the index (+ _first_byte) is not larger than last_byte
805 # In other terms, if the index is valid
807 assert index
>= 0 and index
< target
._bytelen
808 var ind
= index
+ target
._first_byte
809 return target
._items
[ind
]
812 redef fun iterator_from
(start
) do return new FlatStringByteIterator(target
, start
)
814 redef fun reverse_iterator_from
(start
) do return new FlatStringByteReverseIterator(target
, start
)
819 redef new do return new FlatBuffer
821 redef new with_cap
(i
) do return new FlatBuffer.with_capacity
(i
)
824 # Mutable strings of characters.
829 redef var chars
: Sequence[Char] = new FlatBufferCharView(self) is lazy
831 redef var bytes
= new FlatBufferByteView(self) is lazy
833 private var char_cache
: Int = -1
835 private var byte_cache
: Int = -1
837 private var capacity
= 0
839 # Real items, used as cache for when to_cstring is called
840 private var real_items
: NativeString is noinit
842 redef fun fast_cstring
do return _items
.fast_cstring
(0)
844 redef fun substrings
do return new FlatSubstringsIter(self)
846 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
848 # This happens when an operation modifies the current `Buffer` and
849 # the Copy-On-Write flag `written` is set at true.
851 var nns
= new NativeString(capacity
)
852 if _bytelen
!= 0 then _items
.copy_to
(nns
, _bytelen
, 0, 0)
857 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
859 # Internal only, does not modify _bytelen or length, this is the caller's responsability
860 private fun rshift_bytes
(from
: Int, len
: Int) do
864 if bt
+ len
> capacity
then
865 capacity
= capacity
* 2 + 2
866 nit
= new NativeString(capacity
)
867 oit
.copy_to
(nit
, 0, 0, from
)
869 oit
.copy_to
(nit
, bt
- from
, from
, from
+ len
)
872 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
874 # Internal only, does not modify _bytelen or length, this is the caller's responsability
875 private fun lshift_bytes
(from
: Int, len
: Int) do
877 it
.copy_to
(it
, _bytelen
- from
, from
, from
- len
)
880 redef fun []=(index
, item
)
882 assert index
>= 0 and index
<= _length
883 if written
then reset
885 if index
== _length
then
890 var ip
= it
.char_to_byte_index
(index
)
891 var c
= it
.char_at
(ip
)
892 var clen
= c
.u8char_len
893 var itemlen
= item
.u8char_len
894 var size_diff
= itemlen
- clen
895 if size_diff
> 0 then
896 rshift_bytes
(ip
+ clen
, size_diff
)
897 else if size_diff
< 0 then
898 lshift_bytes
(ip
+ clen
, -size_diff
)
900 _bytelen
+= size_diff
901 it
.set_char_at
(ip
, item
)
906 if written
then reset
908 var clen
= c
.u8char_len
911 _items
.set_char_at
(bt
, c
)
920 if written
then reset
923 redef fun empty
do return new Buffer
925 redef fun enlarge
(cap
)
928 if cap
<= c
then return
929 if c
<= 16 then c
= 16
930 while c
<= cap
do c
= c
* 2
931 # The COW flag can be set at false here, since
932 # it does a copy of the current `Buffer`
935 var a
= new NativeString(c
)
938 if bln
> 0 then it
.copy_to
(a
, bln
, 0, 0)
948 if bln
== 0 then _items
= new NativeString(1)
949 return new FlatString.full
(_items
, bln
, 0, _length
)
956 var new_native
= new NativeString(bln
+ 1)
957 new_native
[bln
] = 0u8
958 if _length
> 0 then _items
.copy_to
(new_native
, bln
, 0, 0)
959 real_items
= new_native
965 # Create a new empty string.
968 # Low-level creation a new buffer with given data.
970 # `_items` will be used as is, without copy, to store the characters of the buffer.
971 # Aliasing issues is the responsibility of the caller.
973 # If `_items` is shared, `written` should be set to true after the creation
974 # so that a modification will do a copy-on-write.
975 private init with_infos
(items
: NativeString, capacity
, bytelen
, length
: Int)
978 self.capacity
= capacity
979 self._bytelen
= bytelen
980 self._length
= length
983 # Create a new string copied from `s`.
986 _items
= new NativeString(s
.bytelen
)
987 if s
isa FlatText then
990 for i
in substrings
do i
.as(FlatString)._items
.copy_to
(_items
, i
._bytelen
, 0, 0)
998 # Create a new empty string with a given capacity.
999 init with_capacity
(cap
: Int)
1002 _items
= new NativeString(cap
)
1009 if s
.is_empty
then return
1012 var nln
= _bytelen
+ sl
1014 if s
isa FlatText then
1015 s
._items
.copy_to
(_items
, sl
, s
.first_byte
, _bytelen
)
1017 for i
in s
.substrings
do append i
1024 # Copies the content of self in `dest`
1025 fun copy
(start
: Int, len
: Int, dest
: Buffer, new_start
: Int)
1027 var self_chars
= self.chars
1028 var dest_chars
= dest
.chars
1029 for i
in [0..len-1
] do
1030 dest_chars
[new_start
+i
] = self_chars
[start
+i
]
1034 redef fun substring
(from
, count
)
1037 if from
< 0 then from
= 0
1038 if (from
+ count
) > _length
then count
= _length
- from
1039 if count
<= 0 then return new Buffer
1041 var bytefrom
= its
.char_to_byte_index
(from
)
1042 var byteto
= its
.char_to_byte_index
(count
+ from
- 1)
1043 byteto
+= its
.char_at
(byteto
).u8char_len
- 1
1044 var byte_length
= byteto
- bytefrom
+ 1
1045 var r_items
= new NativeString(byte_length
)
1046 its
.copy_to
(r_items
, byte_length
, bytefrom
, 0)
1047 return new FlatBuffer.with_infos
(r_items
, byte_length
, byte_length
, count
)
1053 var ns
= new FlatBuffer.with_capacity
(capacity
)
1054 for i
in chars
.reverse_iterator
do ns
.add i
1058 redef fun times
(repeats
)
1061 var x
= new FlatString.full
(_items
, bln
, 0, _length
)
1062 for i
in [1 .. repeats
[ do
1069 if written
then reset
1070 for i
in [0 .. _length
[ do self[i
] = self[i
].to_upper
1075 if written
then reset
1076 for i
in [0 .. _length
[ do self[i
] = self[i
].to_lower
1080 private class FlatBufferByteReverseIterator
1081 super IndexedIterator[Byte]
1083 var target
: FlatBuffer
1085 var target_items
: NativeString is noautoinit
1089 init do target_items
= target
._items
1091 redef fun index
do return curr_pos
1093 redef fun is_ok
do return curr_pos
>= 0
1095 redef fun item
do return target_items
[curr_pos
]
1097 redef fun next
do curr_pos
-= 1
1101 private class FlatBufferByteView
1102 super BufferByteView
1104 redef type SELFTYPE: FlatBuffer
1106 redef fun [](index
) do return target
._items
[index
]
1108 redef fun iterator_from
(pos
) do return new FlatBufferByteIterator(target
, pos
)
1110 redef fun reverse_iterator_from
(pos
) do return new FlatBufferByteReverseIterator(target
, pos
)
1114 private class FlatBufferByteIterator
1115 super IndexedIterator[Byte]
1117 var target
: FlatBuffer
1119 var target_items
: NativeString is noautoinit
1123 init do target_items
= target
._items
1125 redef fun index
do return curr_pos
1127 redef fun is_ok
do return curr_pos
< target
._bytelen
1129 redef fun item
do return target_items
[curr_pos
]
1131 redef fun next
do curr_pos
+= 1
1135 private class FlatBufferCharReverseIterator
1136 super IndexedIterator[Char]
1138 var target
: FlatBuffer
1142 redef fun index
do return curr_pos
1144 redef fun is_ok
do return curr_pos
>= 0
1146 redef fun item
do return target
[curr_pos
]
1148 redef fun next
do curr_pos
-= 1
1152 private class FlatBufferCharView
1153 super BufferCharView
1155 redef type SELFTYPE: FlatBuffer
1157 redef fun [](index
) do return target
[index
]
1159 redef fun []=(index
, item
)
1161 assert index
>= 0 and index
<= length
1162 if index
== length
then
1166 target
[index
] = item
1179 fun enlarge
(cap
: Int)
1186 var s_length
= s
.length
1187 if target
.capacity
< s
.length
then enlarge
(s_length
+ target
._length
)
1188 for i
in s
do target
.add i
1191 redef fun iterator_from
(pos
) do return new FlatBufferCharIterator(target
, pos
)
1193 redef fun reverse_iterator_from
(pos
) do return new FlatBufferCharReverseIterator(target
, pos
)
1197 private class FlatBufferCharIterator
1198 super IndexedIterator[Char]
1200 var target
: FlatBuffer
1202 var max
: Int is noautoinit
1206 init do max
= target
._length
- 1
1208 redef fun index
do return curr_pos
1210 redef fun is_ok
do return curr_pos
<= max
1212 redef fun item
do return target
[curr_pos
]
1214 redef fun next
do curr_pos
+= 1
1218 redef class NativeString
1221 return to_s_with_length
(cstring_length
)
1224 redef fun to_s_with_length
(length
)
1227 return clean_utf8
(length
)
1230 redef fun to_s_full
(bytelen
, unilen
) do
1231 return new FlatString.full
(self, bytelen
, 0, unilen
)
1234 redef fun to_s_unsafe
(len
) do
1235 if len
== null then len
= cstring_length
1236 return new FlatString.with_infos
(self, len
, 0)
1239 redef fun to_s_with_copy
do return to_s_with_copy_and_length
(cstring_length
)
1241 # Get a `String` from `length` bytes at `self` copied into Nit memory
1242 fun to_s_with_copy_and_length
(length
: Int): String
1244 var r
= clean_utf8
(length
)
1245 if r
.items
!= self then return r
1246 var new_self
= new NativeString(length
+ 1)
1247 copy_to
(new_self
, length
, 0, 0)
1248 var str
= new FlatString.with_infos
(new_self
, length
, 0)
1249 new_self
[length
] = 0u8
1250 str
.to_cstring
= new_self
1254 # Cleans a NativeString if necessary
1255 fun clean_utf8
(len
: Int): FlatString do
1256 var replacements
: nullable Array[Int] = null
1257 var end_length
= len
1263 var i
= fetch_4_chars
(pos
)
1264 if i
& 0x80808080 != 0 then break
1269 if rem
== 0 then break
1271 if b
& 0x80u
8 == 0x00u
8 then
1277 var nxst
= length_of_char_at
(pos
)
1280 ok_st
= b
& 0x80u
8 == 0u8
1281 else if nxst
== 2 then
1282 ok_st
= b
& 0xE0u
8 == 0xC0u
8
1283 else if nxst
== 3 then
1284 ok_st
= b
& 0xF0u
8 == 0xE0u
8
1286 ok_st
= b
& 0xF8u
8 == 0xF0u
8
1289 if replacements
== null then replacements
= new Array[Int]
1290 replacements
.add pos
1298 var c
= char_at
(pos
)
1299 var cp
= c
.code_point
1301 ok_c
= cp
>= 0 and cp
<= 0x7F
1302 else if nxst
== 2 then
1303 ok_c
= cp
>= 0x80 and cp
<= 0x7FF
1304 else if nxst
== 3 then
1305 ok_c
= cp
>= 0x800 and cp
<= 0xFFFF
1306 ok_c
= ok_c
and not (cp
>= 0xD800 and cp
<= 0xDFFF) and cp
!= 0xFFFE and cp
!= 0xFFFF
1308 ok_c
= cp
>= 0x10000 and cp
<= 0x10FFFF
1311 if replacements
== null then replacements
= new Array[Int]
1312 replacements
.add pos
1319 var clen
= c
.u8char_len
1325 if end_length
!= len
then
1326 ret
= new NativeString(end_length
)
1329 var repls
= replacements
.as(not null)
1330 var r
= repls
.items
.as(not null)
1331 var imax
= repls
.length
1332 for i
in [0 .. imax
[ do
1334 var chkln
= repl_pos
- old_repl
1335 copy_to
(ret
, chkln
, old_repl
, off
)
1338 ret
[off
+ 1] = 0xBFu
8
1339 ret
[off
+ 2] = 0xBDu
8
1340 old_repl
= repl_pos
+ 1
1343 copy_to
(ret
, len
- old_repl
, old_repl
, off
)
1345 return new FlatString.full
(ret
, end_length
, 0, chr_ln
)
1348 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1350 # Very unsafe, make sure to have room for this char prior to calling this function.
1351 private fun set_char_at
(pos
: Int, c
: Char) do
1352 if c
.code_point
< 128 then
1353 self[pos
] = c
.code_point
.to_b
1356 var ln
= c
.u8char_len
1357 native_set_char
(pos
, c
, ln
)
1360 private fun native_set_char
(pos
: Int, c
: Char, ln
: Int) `{
1361 char* dst = self + pos;
1367 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
1368 dst[1] = 0x80 | (c & 0x3F);
1371 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
1372 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
1373 dst[2] = 0x80 | (c & 0x3F);
1376 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
1377 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
1378 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
1379 dst[3] = 0x80 | (c & 0x3F);
1386 # return displayable int in base 10 and signed
1388 # assert 1.to_s == "1"
1389 # assert (-123).to_s == "-123"
1391 # Fast case for common numbers
1392 if self == 0 then return "0"
1393 if self == 1 then return "1"
1395 var nslen
= int_to_s_len
1396 var ns
= new NativeString(nslen
+ 1)
1398 native_int_to_s
(ns
, nslen
+ 1)
1399 return new FlatString.full
(ns
, nslen
, 0, nslen
)
1403 redef class Array[E
]
1405 # Fast implementation
1406 redef fun plain_to_s
1409 if l
== 0 then return ""
1410 var its
= _items
.as(not null)
1412 if l
== 1 then if first
== null then return "" else return first
.to_s
1413 var na
= new NativeArray[String](l
)
1419 if itsi
== null then
1429 var ns
= new NativeString(sl
+ 1)
1435 if tmp
isa FlatString then
1436 var tpl
= tmp
._bytelen
1437 tmp
._items
.copy_to
(ns
, tpl
, tmp
._first_byte
, off
)
1440 for j
in tmp
.substrings
do
1441 var s
= j
.as(FlatString)
1442 var slen
= s
._bytelen
1443 s
._items
.copy_to
(ns
, slen
, s
._first_byte
, off
)
1449 return new FlatString.with_infos
(ns
, sl
, 0)
1453 redef class NativeArray[E
]
1454 redef fun native_to_s
do
1455 assert self isa NativeArray[String]
1466 var ns
= new NativeString(sl
+ 1)
1472 if tmp
isa FlatString then
1473 var tpl
= tmp
._bytelen
1474 tmp
._items
.copy_to
(ns
, tpl
, tmp
._first_byte
, off
)
1477 for j
in tmp
.substrings
do
1478 var s
= j
.as(FlatString)
1479 var slen
= s
._bytelen
1480 s
._items
.copy_to
(ns
, slen
, s
._first_byte
, off
)
1486 return new FlatString.with_infos
(ns
, sl
, 0)
1490 redef class Map[K
,V
]
1491 redef fun join
(sep
, couple_sep
)
1493 if is_empty
then return ""
1495 var s
= new Buffer # Result
1501 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1503 # Concat other _items
1509 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")