1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # All the array-based text representations
14 intrude import abstract_text
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
25 var tgt
: nullable FlatText
29 return tgt
.as(not null)
32 redef fun is_ok
do return tgt
!= null
34 redef fun next
do tgt
= null
39 # First byte of the CString
40 protected fun first_byte
: Int do return 0
42 # Last byte of the CString
43 protected fun last_byte
: Int do return first_byte
+ _byte_length
- 1
45 # Cache of the latest position (char) explored in the string
48 # Cached position (bytes) in the CString underlying the String
51 # Index of the character `index` in `_items`
52 fun char_to_byte_index
(index
: Int): Int do
53 var dpos
= index
- _position
58 if its
[b
] & 0x80 == 0x00 then
61 b
+= its
.length_of_char_at
(b
)
68 b
= its
.find_beginning_of_char_at
(b
- 1)
73 if dpos
== 0 then return b
77 # Find best insertion point
78 var delta_begin
= index
79 var delta_end
= (ln
- 1) - index
80 var delta_cache
= (pos
- index
).abs
83 if delta_cache
< min
then min
= delta_cache
84 if delta_end
< min
then min
= delta_end
89 if min
== delta_cache
then
92 else if min
== delta_begin
then
96 ns_i
= its
.find_beginning_of_char_at
(last_byte
)
100 ns_i
= its
.char_to_byte_index_cached
(index
, my_i
, ns_i
)
108 # By escaping `self` to HTML, how many more bytes will be needed ?
109 fun chars_to_html_escape
: Int do
118 else if c
== u
'>' then
120 else if c
== u
'&' then
122 else if c
== u
'"' then
124 else if c
== u
'\'' then
126 else if c == 0x2F then
134 redef fun html_escape
136 var extra = chars_to_html_escape
137 if extra == 0 then return to_s
141 var nlen = extra + _byte_length
142 var nits = new CString(nlen)
147 # Some HTML characters are used as meta-data, they need
148 # to be replaced by an HTML-Escaped equivalent
151 nits[outpos + 1] = u'l
'
152 nits[outpos + 2] = u't
'
153 nits[outpos + 3] = u';'
155 else if c == u'>' then
157 nits[outpos + 1] = u'g
'
158 nits[outpos + 2] = u't
'
159 nits[outpos + 3] = u';'
161 else if c == u'&' then
163 nits[outpos + 1] = u'a
'
164 nits[outpos + 2] = u'm
'
165 nits[outpos + 3] = u'p
'
166 nits[outpos + 4] = u';'
168 else if c == u'"' then
170 nits[outpos + 1] = u'#'
171 nits[outpos + 2] = u'3'
172 nits[outpos + 3] = u'4'
173 nits[outpos + 4] = u';'
175 else if c == u'\'' then
177 nits[outpos + 1] = u'#'
178 nits[outpos + 2] = u'3'
179 nits[outpos + 3] = u'9'
180 nits[outpos + 4] = u';'
182 else if c == u'/' then
184 nits[outpos + 1] = u'#'
185 nits[outpos + 2] = u'4'
186 nits[outpos + 3] = u'7'
187 nits[outpos + 4] = u';'
195 var s = new FlatString.with_infos(nits, nlen, 0)
199 # By escaping `self` to C, how many more bytes will be needed ?
201 # This enables a double-optimization in `escape_to_c` since if this
202 # method returns 0, then `self` does not need escaping and can be
204 fun chars_to_escape_to_c: Int do
213 else if c == u'\t' then
215 else if c == u'"' then
217 else if c == u'\
'' then
219 else if c
== u
'\\' then
221 else if c
== u
'?' then
225 # We ignore `??'` because it will be escaped as `??\'`.
245 redef fun escape_to_c
do
246 var ln_extra
= chars_to_escape_to_c
247 if ln_extra
== 0 then return self.to_s
250 var nlen
= _byte_length
+ ln_extra
251 var nns
= new CString(nlen
)
258 # Any byte with value < 32 is a control character
259 # All their uses will be replaced by their octal
262 # There are two exceptions however:
267 # Aside from the code points above, the following are:
276 else if c
== u
'\n' then
280 else if c
== u
'"' then
284 else if c
== u
'\'' then
286 nns[opos + 1] = u'\
''
288 else if c
== u
'\\' then
290 nns
[opos
+ 1] = u
'\\'
292 else if c
== u
'?' then
296 # We ignore `??'` because it will be escaped as `??\'`.
316 nns
[opos
+ 2] = ((c
& 0x38) >> 3) + u
'0'
317 nns
[opos
+ 3] = (c
& 0x07) + u
'0'
325 return nns
.to_s_unsafe
(nlen
, copy
=false, clean
=false)
328 redef fun [](index
) do
332 # * ~70% want the next char
333 # * ~23% want the previous
334 # * ~7% want the same char
336 # So it makes sense to shortcut early. And early is here.
337 var dpos
= index
- _position
339 if dpos
== 1 and index
< len
- 1 then
342 if c
& 0x80 == 0x00 then
343 # We want the next, and current is easy.
344 # So next is easy to find!
348 # The rest will be done by `dpos==0` bellow.
351 else if dpos
== -1 and index
> 1 then
354 if c
& 0x80 == 0x00 then
355 # We want the previous, and it is easy.
364 # We know what we want (+0 or +1) just get it now!
367 if c
& 0x80 == 0x00 then return c
.code_point
368 return items
.char_at
(b
)
371 assert index
>= 0 and index
< len
372 return fetch_char_at
(index
)
375 # Gets a `Char` at `index` in `self`
377 # WARNING: Use at your own risks as no bound-checking is done
378 fun fetch_char_at
(index
: Int): Char do
379 var i
= char_to_byte_index
(index
)
382 if b
& 0x80 == 0x00 then return b
.code_point
383 return items
.char_at
(i
)
386 # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
388 # assert "ff".to_hex == 255
389 redef fun to_hex
(pos
, ln
) do
391 if pos
== null then pos
= 0
392 if ln
== null then ln
= length
- pos
393 pos
= char_to_byte_index
(pos
)
396 for i
in [pos
.. max
[ do
398 res
+= its
[i
].code_point
.from_hex
403 redef fun copy_to_native
(dst
, n
, src_off
, dst_off
) do
404 _items
.copy_to
(dst
, n
, first_byte
+ src_off
, dst_off
)
408 # Immutable strings of characters.
409 abstract class FlatString
413 # Index at which `self` begins in `_items`, inclusively
414 redef var first_byte
is noinit
416 redef fun chars
do return new FlatStringCharView(self)
418 redef fun bytes
do return new FlatStringByteView(self)
420 redef fun to_cstring
do
421 var blen
= _byte_length
422 var new_items
= new CString(blen
+ 1)
423 _items
.copy_to
(new_items
, blen
, _first_byte
, 0)
428 redef fun reversed
do
429 var b
= new FlatBuffer.with_capacity
(_byte_length
+ 1)
432 b
.add
self.fetch_char_at
(i
)
435 var s
= b
.to_s
.as(FlatString)
436 s
._length
= self._length
440 redef fun fast_cstring
do return _items
.fast_cstring
(_first_byte
)
442 redef fun substring
(from
, count
)
444 if count
<= 0 then return ""
448 if count
<= 0 then return ""
453 if (count
+ from
) > ln
then count
= ln
- from
454 if count
<= 0 then return ""
455 var end_index
= from
+ count
- 1
456 return substring_impl
(from
, count
, end_index
)
459 private fun substring_impl
(from
, count
, end_index
: Int): String do
460 var cache
= _position
461 var dfrom
= (cache
- from
).abs
462 var dend
= (end_index
- from
).abs
467 bytefrom
= char_to_byte_index
(from
)
468 byteto
= char_to_byte_index
(end_index
)
470 byteto
= char_to_byte_index
(end_index
)
471 bytefrom
= char_to_byte_index
(from
)
475 byteto
+= its
.length_of_char_at
(byteto
) - 1
477 var s
= new FlatString.full
(its
, byteto
- bytefrom
+ 1, bytefrom
, count
)
481 redef fun empty
do return "".as(FlatString)
485 var outstr
= new FlatBuffer.with_capacity
(self._byte_length
+ 1)
491 outstr
.add
(chars
[pos
].to_upper
)
500 var outstr
= new FlatBuffer.with_capacity
(self._byte_length
+ 1)
506 outstr
.add
(chars
[pos
].to_lower
)
515 for i
in chars
do i
.output
518 ##################################################
519 # String Specific Methods #
520 ##################################################
522 # Low-level creation of a new string with minimal data.
524 # `_items` will be used as is, without copy, to retrieve the characters of the string.
525 # Aliasing issues is the responsibility of the caller.
526 private new with_infos
(items
: CString, byte_length
, from
: Int)
528 var len
= items
.utf8_length
(from
, byte_length
)
529 if byte_length
== len
then return new ASCIIFlatString.full_data
(items
, byte_length
, from
, len
)
530 return new UnicodeFlatString.full_data
(items
, byte_length
, from
, len
)
533 # Low-level creation of a new string with all the data.
535 # `_items` will be used as is, without copy, to retrieve the characters of the string.
536 # Aliasing issues is the responsibility of the caller.
537 private new full
(items
: CString, byte_length
, from
, length
: Int)
539 if byte_length
== length
then return new ASCIIFlatString.full_data
(items
, byte_length
, from
, length
)
540 return new UnicodeFlatString.full_data
(items
, byte_length
, from
, length
)
545 if not other
isa FlatText then return super
547 if self.object_id
== other
.object_id
then return true
549 var my_length
= _byte_length
551 if other
._byte_length
!= my_length
then return false
553 var my_index
= _first_byte
554 var its_index
= other
.first_byte
556 var last_iteration
= my_index
+ my_length
558 var its_items
= other
._items
559 var my_items
= self._items
561 while my_index
< last_iteration
do
562 if my_items
[my_index
] != its_items
[its_index
] then return false
572 if not other
isa FlatText then return super
574 if self.object_id
== other
.object_id
then return false
577 var itsits
= other
._items
579 var mbt
= _byte_length
580 var obt
= other
.byte_length
582 var minln
= if mbt
< obt
then mbt
else obt
583 var mst
= _first_byte
584 var ost
= other
.first_byte
586 for i
in [0 .. minln
[ do
587 var my_curr_char
= myits
[mst
]
588 var its_curr_char
= itsits
[ost
]
590 if my_curr_char
> its_curr_char
then return false
591 if my_curr_char
< its_curr_char
then return true
602 var slen
= s
.byte_length
603 var mlen
= _byte_length
604 var nlen
= mlen
+ slen
606 var mifrom
= _first_byte
607 if s
isa FlatText then
609 var sifrom
= s
.first_byte
610 var ns
= new CString(nlen
+ 1)
611 mits
.copy_to
(ns
, mlen
, mifrom
, 0)
612 sits
.copy_to
(ns
, slen
, sifrom
, mlen
)
613 return new FlatString.full
(ns
, nlen
, 0, _length
+ o
.length
)
620 var mybtlen
= _byte_length
621 var new_byte_length
= mybtlen
* i
623 var newlen
= mylen
* i
626 var ns
= new CString(new_byte_length
+ 1)
627 ns
[new_byte_length
] = 0
630 its
.copy_to
(ns
, mybtlen
, fb
, offset
)
634 return new FlatString.full
(ns
, new_byte_length
, 0, newlen
)
639 if hash_cache
== null then
640 # djb2 hash algorithm
644 var my_items
= _items
648 h
= (h
<< 5) + h
+ my_items
[i
].to_i
655 return hash_cache
.as(not null)
658 redef fun substrings
do return new FlatSubstringsIter(self)
661 # Regular Nit UTF-8 strings
662 private class UnicodeFlatString
665 init full_data
(items
: CString, byte_length
, from
, length
: Int) do
667 self._length
= length
668 self._byte_length
= byte_length
673 redef fun substring_from
(from
) do
674 if from
>= self._length
then return empty
675 if from
<= 0 then return self
676 var c
= char_to_byte_index
(from
)
677 var st
= c
- _first_byte
678 var fln
= byte_length
- st
679 return new FlatString.full
(items
, fln
, c
, _length
- from
)
683 # Special cases of String where all the characters are ASCII-based
685 # Optimizes access operations to O(1) complexity.
686 private class ASCIIFlatString
689 init full_data
(items
: CString, byte_length
, from
, length
: Int) do
691 self._length
= length
692 self._byte_length
= byte_length
698 assert idx
< _byte_length
and idx
>= 0
699 return _items
[idx
+ _first_byte
].code_point
702 redef fun substring
(from
, count
) do
704 if count
<= 0 then return ""
705 if (count
+ from
) > ln
then count
= ln
- from
706 if count
<= 0 then return ""
709 if count
<= 0 then return ""
712 return new ASCIIFlatString.full_data
(_items
, count
, from
+ _first_byte
, count
)
715 redef fun reversed
do
716 var b
= new FlatBuffer.with_capacity
(_byte_length
+ 1)
722 var s
= b
.to_s
.as(FlatString)
726 redef fun char_to_byte_index
(index
) do return index
+ _first_byte
728 redef fun substring_impl
(from
, count
, end_index
) do
729 return new ASCIIFlatString.full_data
(_items
, count
, from
+ _first_byte
, count
)
732 redef fun fetch_char_at
(i
) do return _items
[i
+ _first_byte
].code_point
735 private class FlatStringCharReverseIterator
736 super IndexedIterator[Char]
738 var target
: FlatString
742 redef fun is_ok
do return curr_pos
>= 0
744 redef fun item
do return target
[curr_pos
]
746 redef fun next
do curr_pos
-= 1
748 redef fun index
do return curr_pos
752 private class FlatStringCharIterator
753 super IndexedIterator[Char]
755 var target
: FlatString
757 var max
: Int is noautoinit
761 init do max
= target
._length
- 1
763 redef fun is_ok
do return curr_pos
<= max
765 redef fun item
do return target
[curr_pos
]
767 redef fun next
do curr_pos
+= 1
769 redef fun index
do return curr_pos
773 private class FlatStringCharView
776 redef type SELFTYPE: FlatString
778 redef fun [](index
) do return target
[index
]
780 redef fun iterator_from
(start
) do return new FlatStringCharIterator(target
, start
)
782 redef fun reverse_iterator_from
(start
) do return new FlatStringCharReverseIterator(target
, start
)
786 private class FlatStringByteReverseIterator
787 super IndexedIterator[Int]
789 var target
: FlatString
791 var target_items
: CString is noautoinit
798 target_items
= tgt
._items
799 curr_pos
+= tgt
._first_byte
802 redef fun is_ok
do return curr_pos
>= target
._first_byte
804 redef fun item
do return target_items
[curr_pos
]
806 redef fun next
do curr_pos
-= 1
808 redef fun index
do return curr_pos
- target
._first_byte
812 private class FlatStringByteIterator
813 super IndexedIterator[Int]
815 var target
: FlatString
817 var target_items
: CString is noautoinit
824 target_items
= tgt
._items
825 curr_pos
+= tgt
._first_byte
828 redef fun is_ok
do return curr_pos
<= target
.last_byte
830 redef fun item
do return target_items
[curr_pos
]
832 redef fun next
do curr_pos
+= 1
834 redef fun index
do return curr_pos
- target
._first_byte
838 private class FlatStringByteView
841 redef type SELFTYPE: FlatString
845 # Check that the index (+ _first_byte) is not larger than last_byte
846 # In other terms, if the index is valid
848 assert index
>= 0 and index
< target
._byte_length
849 var ind
= index
+ target
._first_byte
850 return target
._items
[ind
]
853 redef fun iterator_from
(start
) do return new FlatStringByteIterator(target
, start
)
855 redef fun reverse_iterator_from
(start
) do return new FlatStringByteReverseIterator(target
, start
)
860 redef new do return new FlatBuffer
862 redef new with_cap
(i
) do return new FlatBuffer.with_capacity
(i
)
865 # Mutable strings of characters.
870 redef fun chars
do return new FlatBufferCharView(self)
872 redef fun bytes
do return new FlatBufferByteView(self)
874 private var capacity
= 0
876 redef fun fast_cstring
do return _items
.fast_cstring
(0)
878 redef fun substrings
do return new FlatSubstringsIter(self)
880 # Re-copies the `CString` into a new one and sets it as the new `Buffer`
882 # This happens when an operation modifies the current `Buffer` and
883 # the Copy-On-Write flag `written` is set at true.
885 var nns
= new CString(capacity
)
886 if _byte_length
!= 0 then _items
.copy_to
(nns
, _byte_length
, 0, 0)
891 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
893 # Internal only, does not modify _byte_length or length, this is the caller's responsability
894 private fun rshift_bytes
(from
: Int, len
: Int) do
897 var bt
= _byte_length
898 if bt
+ len
> capacity
then
899 capacity
= capacity
* 2 + 2
900 nit
= new CString(capacity
)
901 oit
.copy_to
(nit
, 0, 0, from
)
903 oit
.copy_to
(nit
, bt
- from
, from
, from
+ len
)
906 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
908 # Internal only, does not modify _byte_length or length, this is the caller's responsability
909 private fun lshift_bytes
(from
: Int, len
: Int) do
911 it
.copy_to
(it
, _byte_length
- from
, from
, from
- len
)
914 redef fun []=(index
, item
)
916 assert index
>= 0 and index
<= _length
917 if written
then reset
918 if index
== _length
then
923 var ip
= it
.char_to_byte_index
(index
)
924 var c
= it
.char_at
(ip
)
925 var clen
= c
.u8char_len
926 var itemlen
= item
.u8char_len
927 var size_diff
= itemlen
- clen
928 if size_diff
> 0 then
929 rshift_bytes
(ip
+ clen
, size_diff
)
930 else if size_diff
< 0 then
931 lshift_bytes
(ip
+ clen
, -size_diff
)
933 _byte_length
+= size_diff
934 it
.set_char_at
(ip
, item
)
937 redef fun insert
(s
, pos
) do
938 assert pos
>= 0 and pos
<= length
939 if pos
== length
then
943 var slen
= s
.byte_length
944 enlarge
(byte_length
+ slen
)
946 var shpos
= it
.char_to_byte_index
(pos
)
947 rshift_bytes
(shpos
, slen
)
948 s
.copy_to_native
(it
, slen
, 0, shpos
)
953 redef fun insert_char
(c
, pos
) do
954 assert pos
>= 0 and pos
<= length
955 if pos
== length
then
959 var clen
= c
.u8char_len
960 enlarge
(byte_length
+ clen
)
962 var shpos
= it
.char_to_byte_index
(pos
)
963 rshift_bytes
(shpos
, clen
)
964 it
.set_char_at
(shpos
, c
)
971 if written
then reset
972 var clen
= c
.u8char_len
973 var bt
= _byte_length
975 _items
.set_char_at
(bt
, c
)
989 redef fun empty
do return new Buffer
991 redef fun enlarge
(cap
)
994 if cap
<= c
then return
995 if c
<= 16 then c
= 16
996 while c
<= cap
do c
= c
* 2
997 # The COW flag can be set at false here, since
998 # it does a copy of the current `Buffer`
1000 var bln
= _byte_length
1001 var a
= new CString(c
)
1004 if bln
> 0 then it
.copy_to
(a
, bln
, 0, 0)
1013 var bln
= _byte_length
1014 if bln
== 0 then _items
= new CString(1)
1015 return new FlatString.full
(_items
, bln
, 0, _length
)
1018 redef fun to_cstring
1020 var bln
= _byte_length
1021 var new_native
= new CString(bln
+ 1)
1023 if _length
> 0 then _items
.copy_to
(new_native
, bln
, 0, 0)
1027 # Create a new empty string.
1030 # Low-level creation a new buffer with given data.
1032 # `_items` will be used as is, without copy, to store the characters of the buffer.
1033 # Aliasing issues is the responsibility of the caller.
1035 # If `_items` is shared, `written` should be set to true after the creation
1036 # so that a modification will do a copy-on-write.
1037 private init with_infos
(items
: CString, capacity
, byte_length
, length
: Int)
1040 self.capacity
= capacity
1041 self._byte_length
= byte_length
1042 self._length
= length
1045 # Create a new string copied from `s`.
1048 _items
= new CString(s
.byte_length
)
1049 for i
in s
.substrings
do i
._items
.copy_to
(_items
, i
._byte_length
, first_byte
, 0)
1050 _byte_length
= s
.byte_length
1052 _capacity
= _byte_length
1055 # Create a new empty string with a given capacity.
1056 init with_capacity
(cap
: Int)
1059 _items
= new CString(cap
)
1066 if s
.is_empty
then return
1067 var sl
= s
.byte_length
1068 var nln
= _byte_length
+ sl
1070 if s
isa FlatText then
1071 s
._items
.copy_to
(_items
, sl
, s
.first_byte
, _byte_length
)
1073 for i
in s
.substrings
do append i
1080 # Copies the content of self in `dest`
1081 fun copy
(start
: Int, len
: Int, dest
: Buffer, new_start
: Int)
1083 var self_chars
= self.chars
1084 var dest_chars
= dest
.chars
1085 for i
in [0..len-1
] do
1086 dest_chars
[new_start
+i
] = self_chars
[start
+i
]
1090 redef fun substring
(from
, count
)
1093 if from
< 0 then from
= 0
1094 if (from
+ count
) > _length
then count
= _length
- from
1095 if count
<= 0 then return new Buffer
1097 var bytefrom
= its
.char_to_byte_index
(from
)
1098 var byteto
= its
.char_to_byte_index
(count
+ from
- 1)
1099 byteto
+= its
.char_at
(byteto
).u8char_len
- 1
1100 var byte_length
= byteto
- bytefrom
+ 1
1101 var r_items
= new CString(byte_length
)
1102 its
.copy_to
(r_items
, byte_length
, bytefrom
, 0)
1103 return new FlatBuffer.with_infos
(r_items
, byte_length
, byte_length
, count
)
1106 redef fun append_substring_impl
(s
, from
, length
) do
1107 if length
<= 0 then return
1108 if not s
isa FlatText then
1113 var bytest
= s
.char_to_byte_index
(from
)
1114 var bytend
= s
.char_to_byte_index
(from
+ length
- 1)
1115 var btln
= bytend
- bytest
+ sits
.char_at
(bytend
).u8char_len
1116 enlarge
(btln
+ _byte_length
)
1117 sits
.copy_to
(_items
, btln
, bytest
, _byte_length
)
1118 _byte_length
+= btln
1122 redef fun remove_at
(p
, len
) do
1123 if len
== null then len
= 1
1124 if len
== 0 then return
1126 var bst
= char_to_byte_index
(p
)
1127 var bend
= char_to_byte_index
(p
+ len
- 1)
1128 bend
+= its
.char_at
(bend
).u8char_len
1129 var blen
= bend
- bst
1130 lshift_bytes
(bend
, bend
- bst
)
1138 var ns
= new FlatBuffer.with_capacity
(capacity
)
1139 for i
in chars
.reverse_iterator
do ns
.add i
1143 redef fun times
(repeats
)
1145 var bln
= _byte_length
1146 var x
= new FlatString.full
(_items
, bln
, 0, _length
)
1147 for i
in [1 .. repeats
[ do
1154 if written
then reset
1155 for i
in [0 .. _length
[ do self[i
] = self[i
].to_upper
1160 if written
then reset
1161 for i
in [0 .. _length
[ do self[i
] = self[i
].to_lower
1165 private class FlatBufferByteReverseIterator
1166 super IndexedIterator[Int]
1168 var target
: FlatBuffer
1170 var target_items
: CString is noautoinit
1174 init do target_items
= target
._items
1176 redef fun index
do return curr_pos
1178 redef fun is_ok
do return curr_pos
>= 0
1180 redef fun item
do return target_items
[curr_pos
]
1182 redef fun next
do curr_pos
-= 1
1186 private class FlatBufferByteView
1187 super BufferByteView
1189 redef type SELFTYPE: FlatBuffer
1191 redef fun [](index
) do return target
._items
[index
]
1193 redef fun iterator_from
(pos
) do return new FlatBufferByteIterator(target
, pos
)
1195 redef fun reverse_iterator_from
(pos
) do return new FlatBufferByteReverseIterator(target
, pos
)
1199 private class FlatBufferByteIterator
1200 super IndexedIterator[Int]
1202 var target
: FlatBuffer
1204 var target_items
: CString is noautoinit
1208 init do if isset target
._items
then target_items
= target
._items
1210 redef fun index
do return curr_pos
1212 redef fun is_ok
do return curr_pos
< target
._byte_length
1214 redef fun item
do return target_items
[curr_pos
]
1216 redef fun next
do curr_pos
+= 1
1220 private class FlatBufferCharReverseIterator
1221 super IndexedIterator[Char]
1223 var target
: FlatBuffer
1227 redef fun index
do return curr_pos
1229 redef fun is_ok
do return curr_pos
>= 0
1231 redef fun item
do return target
[curr_pos
]
1233 redef fun next
do curr_pos
-= 1
1237 private class FlatBufferCharView
1238 super BufferCharView
1240 redef type SELFTYPE: FlatBuffer
1242 redef fun [](index
) do return target
[index
]
1244 redef fun []=(index
, item
)
1246 assert index
>= 0 and index
<= length
1247 if index
== length
then
1251 target
[index
] = item
1264 fun enlarge
(cap
: Int)
1271 var s_length
= s
.length
1272 if target
.capacity
< s
.length
then enlarge
(s_length
+ target
._length
)
1273 for i
in s
do target
.add i
1276 redef fun iterator_from
(pos
) do return new FlatBufferCharIterator(target
, pos
)
1278 redef fun reverse_iterator_from
(pos
) do return new FlatBufferCharReverseIterator(target
, pos
)
1282 private class FlatBufferCharIterator
1283 super IndexedIterator[Char]
1285 var target
: FlatBuffer
1287 var max
: Int is noautoinit
1291 init do max
= target
._length
- 1
1293 redef fun index
do return curr_pos
1295 redef fun is_ok
do return curr_pos
<= max
1297 redef fun item
do return target
[curr_pos
]
1299 redef fun next
do curr_pos
+= 1
1305 # Get a `String` from the data at `self` copied into Nit memory
1307 # Require: `self` is a null-terminated string.
1308 redef fun to_s
do return to_s_unsafe
1310 # Get a `String` from `byte_length` bytes at `self` copied into Nit memory
1312 # The string is cleaned.
1313 fun to_s_with_length
(byte_length
: Int): String do return to_s_unsafe
(byte_length
)
1315 redef fun to_s_unsafe
(byte_length
, char_length
, copy
, clean
)
1317 byte_length
= byte_length
or else cstring_length
1318 clean
= clean
or else true
1319 copy
= copy
or else true
1324 str
= clean_utf8
(byte_length
)
1325 char_length
= str
.length
1327 char_length
= char_length
or else utf8_length
(0, byte_length
)
1330 # Copy? (if not already copied by `clean_utf8`)
1331 if copy
and (str
== null or str
.items
== self) then
1332 var new_cstr
= new CString(byte_length
+ 1)
1333 copy_to
(new_cstr
, byte_length
, 0, 0)
1334 new_cstr
[byte_length
] = 0
1335 str
= new FlatString.full
(new_cstr
, byte_length
, 0, char_length
)
1339 str
= new FlatString.full
(self, byte_length
, 0, char_length
)
1345 # Cleans a CString if necessary
1346 fun clean_utf8
(len
: Int): FlatString do
1347 var replacements
: nullable Array[Int] = null
1348 var end_length
= len
1354 var i
= fetch_4_chars
(pos
)
1355 if i
& 0x80808080u
32 != 0u32
then break
1360 if rem
== 0 then break
1362 if b
& 0x80 == 0x00 then
1368 var nxst
= length_of_char_at
(pos
)
1371 ok_st
= b
& 0x80 == 0
1372 else if nxst
== 2 then
1373 ok_st
= b
& 0xE0 == 0xC0
1374 else if nxst
== 3 then
1375 ok_st
= b
& 0xF0 == 0xE0
1377 ok_st
= b
& 0xF8 == 0xF0
1380 if replacements
== null then replacements
= new Array[Int]
1381 replacements
.add pos
1389 var c
= char_at
(pos
)
1390 var cp
= c
.code_point
1392 ok_c
= cp
>= 0 and cp
<= 0x7F
1393 else if nxst
== 2 then
1394 ok_c
= cp
>= 0x80 and cp
<= 0x7FF
1395 else if nxst
== 3 then
1396 ok_c
= cp
>= 0x800 and cp
<= 0xFFFF
1397 ok_c
= ok_c
and not (cp
>= 0xD800 and cp
<= 0xDFFF) and cp
!= 0xFFFE and cp
!= 0xFFFF
1399 ok_c
= cp
>= 0x10000 and cp
<= 0x10FFFF
1402 if replacements
== null then replacements
= new Array[Int]
1403 replacements
.add pos
1410 var clen
= c
.u8char_len
1416 if end_length
!= len
then
1417 ret
= new CString(end_length
)
1420 var repls
= replacements
.as(not null)
1421 var r
= repls
.items
.as(not null)
1422 var imax
= repls
.length
1423 for i
in [0 .. imax
[ do
1425 var chkln
= repl_pos
- old_repl
1426 copy_to
(ret
, chkln
, old_repl
, off
)
1431 old_repl
= repl_pos
+ 1
1434 copy_to
(ret
, len
- old_repl
, old_repl
, off
)
1436 return new FlatString.full
(ret
, end_length
, 0, chr_ln
)
1439 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1441 # Very unsafe, make sure to have room for this char prior to calling this function.
1442 private fun set_char_at
(pos
: Int, c
: Char) do
1443 var cp
= c
.code_point
1448 var ln
= c
.u8char_len
1450 self[pos
] = 0xC0 | ((cp
& 0x7C0) >> 6)
1451 self[pos
+ 1] = 0x80 | (cp
& 0x3F)
1452 else if ln
== 3 then
1453 self[pos
] = 0xE0 | ((cp
& 0xF000) >> 12)
1454 self[pos
+ 1] = 0x80 | ((cp
& 0xFC0) >> 6)
1455 self[pos
+ 2] = 0x80 | (cp
& 0x3F)
1456 else if ln
== 4 then
1457 self[pos
] = 0xF0 | ((cp
& 0x1C0000) >> 18)
1458 self[pos
+ 1] = 0x80 | ((cp
& 0x3F000) >> 12)
1459 self[pos
+ 2] = 0x80 | ((cp
& 0xFC0) >> 6)
1460 self[pos
+ 3] = 0x80 | (cp
& 0x3F)
1466 # return displayable int in base 10 and signed
1468 # assert 1.to_s == "1"
1469 # assert (-123).to_s == "-123"
1471 # Fast case for common numbers
1472 if self == 0 then return "0"
1473 if self == 1 then return "1"
1475 var nslen
= int_to_s_len
1476 var ns
= new CString(nslen
+ 1)
1478 native_int_to_s
(ns
, nslen
+ 1)
1479 return new FlatString.full
(ns
, nslen
, 0, nslen
)
1483 redef class Array[E
]
1485 # Fast implementation
1486 redef fun plain_to_s
1489 if l
== 0 then return ""
1490 var its
= _items
.as(not null)
1492 if l
== 1 then if first
== null then return "" else return first
.to_s
1493 var na
= new NativeArray[String](l
)
1499 if itsi
== null then
1504 sl
+= tmp
.byte_length
1509 var ns
= new CString(sl
+ 1)
1515 if tmp
isa FlatString then
1516 var tpl
= tmp
._byte_length
1517 tmp
._items
.copy_to
(ns
, tpl
, tmp
._first_byte
, off
)
1520 for j
in tmp
.substrings
do
1521 var s
= j
.as(FlatString)
1522 var slen
= s
._byte_length
1523 s
._items
.copy_to
(ns
, slen
, s
._first_byte
, off
)
1529 return new FlatString.with_infos
(ns
, sl
, 0)
1533 redef class NativeArray[E
]
1534 redef fun native_to_s
do
1535 assert self isa NativeArray[String]
1542 sl
+= na
[i
].byte_length
1546 var ns
= new CString(sl
+ 1)
1552 if tmp
isa FlatString then
1553 var tpl
= tmp
._byte_length
1554 tmp
._items
.copy_to
(ns
, tpl
, tmp
._first_byte
, off
)
1557 for j
in tmp
.substrings
do
1558 var s
= j
.as(FlatString)
1559 var slen
= s
._byte_length
1560 s
._items
.copy_to
(ns
, slen
, s
._first_byte
, off
)
1566 return new FlatString.with_infos
(ns
, sl
, 0)
1570 redef class Map[K
,V
]
1571 redef fun join
(sep
, couple_sep
)
1573 if is_empty
then return ""
1575 var s
= new Buffer # Result
1581 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1583 # Concat other _items
1589 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")