1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # All the array-based text representations
14 intrude import abstract_text
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
25 var tgt
: nullable FlatText
29 return tgt
.as(not null)
32 redef fun is_ok
do return tgt
!= null
34 redef fun next
do tgt
= null
39 private fun first_byte
: Int do return 0
41 private fun last_byte
: Int do return _bytelen
- 1
43 # Cache of the latest position (char) explored in the string
44 private var position
: Int = 0
46 # Cached position (bytes) in the NativeString underlying the String
47 private var bytepos
: Int = 0
49 # Index of the character `index` in `_items`
50 private fun char_to_byte_index
(index
: Int): Int do
56 # Find best insertion point
57 var delta_begin
= index
58 var delta_end
= (ln
- 1) - index
59 var delta_cache
= (pos
- index
).abs
63 if delta_cache
< min
then min
= delta_cache
64 if delta_end
< min
then min
= delta_end
69 if min
== delta_begin
then
72 else if min
== delta_cache
then
76 ns_i
= its
.find_beginning_of_char_at
(last_byte
)
80 ns_i
= its
.char_to_byte_index_cached
(index
, my_i
, ns_i
)
88 # By escaping `self` to C, how many more bytes will be needed ?
90 # This enables a double-optimization in `escape_to_c` since if this
91 # method returns 0, then `self` does not need escaping and can be
93 protected fun chars_to_escape_to_c
: Int do
102 else if c
== 0x09u
8 then
104 else if c
== 0x22u
8 then
106 else if c
== 0x27u
8 then
108 else if c
== 0x5Cu
8 then
110 else if c
< 32u8
then
118 redef fun escape_to_c
do
119 var ln_extra
= chars_to_escape_to_c
120 if ln_extra
== 0 then return self.to_s
123 var nlen
= _bytelen
+ ln_extra
124 var nns
= new NativeString(nlen
)
131 # Any byte with value < 32 is a control character
132 # All their uses will be replaced by their octal
135 # There are two exceptions however:
140 # Aside from the code points above, the following are:
147 nns
[opos
+ 1] = 0x74u
8
149 else if c
== 0x0Au
8 then
151 nns
[opos
+ 1] = 0x6Eu
8
153 else if c
== 0x22u
8 then
155 nns
[opos
+ 1] = 0x22u
8
157 else if c
== 0x27u
8 then
159 nns
[opos
+ 1] = 0x27u
8
161 else if c
== 0x5Cu
8 then
163 nns
[opos
+ 1] = 0x5Cu
8
165 else if c
< 32u8
then
167 nns
[opos
+ 1] = 0x30u
8
168 nns
[opos
+ 2] = ((c
& 0x38u
8) >> 3) + 0x30u
8
169 nns
[opos
+ 3] = (c
& 0x07u
8) + 0x30u
8
177 return nns
.to_s_with_length
(nlen
)
180 private fun byte_to_char_index
(index
: Int): Int do
186 # Find best insertion point
187 var delta_begin
= index
188 var delta_end
= (ln
- 1) - index
189 var delta_cache
= (pos
- index
).abs
190 var min
= delta_begin
193 if delta_cache
< min
then min
= delta_cache
194 if delta_end
< min
then min
= delta_end
199 if min
== delta_begin
then
202 else if min
== delta_cache
then
206 ns_i
= its
.find_beginning_of_char_at
(last_byte
)
210 my_i
= its
.byte_to_char_index_cached
(index
, my_i
, ns_i
)
218 redef fun [](index
) do return _items
.char_at
(char_to_byte_index
(index
))
221 # Immutable strings of characters.
226 # Index at which `self` begins in `_items`, inclusively
227 redef var first_byte
is noinit
229 # Index at which `self` ends in `_items`, inclusively
230 redef var last_byte
is noinit
232 redef var chars
= new FlatStringCharView(self) is lazy
234 redef var bytes
= new FlatStringByteView(self) is lazy
236 redef var length
is lazy
do
237 if _bytelen
== 0 then return 0
243 st
+= its
.length_of_char_at
(st
)
251 var b
= new FlatBuffer.with_capacity
(_bytelen
+ 1)
252 for i
in [length
- 1 .. 0].step
(-1) do
255 var s
= b
.to_s
.as(FlatString)
256 s
.length
= self.length
260 redef fun fast_cstring
do return _items
.fast_cstring
(_first_byte
)
262 redef fun substring
(from
, count
)
268 if count
< 0 then count
= 0
272 if (count
+ from
) > length
then count
= length
- from
273 if count
<= 0 then return ""
274 var end_index
= from
+ count
- 1
276 var bytefrom
= char_to_byte_index
(from
)
277 var byteto
= char_to_byte_index
(end_index
)
279 byteto
+= its
.length_of_char_at
(byteto
) - 1
281 var s
= new FlatString.full
(its
, byteto
- bytefrom
+ 1, bytefrom
, byteto
, count
)
285 redef fun empty
do return "".as(FlatString)
289 var outstr
= new FlatBuffer.with_capacity
(self._bytelen
+ 1)
295 outstr
.add
(chars
[pos
].to_upper
)
304 var outstr
= new FlatBuffer.with_capacity
(self._bytelen
+ 1)
310 outstr
.add
(chars
[pos
].to_lower
)
319 for i
in chars
do i
.output
322 ##################################################
323 # String Specific Methods #
324 ##################################################
326 # Low-level creation of a new string with minimal data.
328 # `_items` will be used as is, without copy, to retrieve the characters of the string.
329 # Aliasing issues is the responsibility of the caller.
330 private init with_infos
(items
: NativeString, bytelen
, from
, to
: Int)
333 self._bytelen
= bytelen
339 # Low-level creation of a new string with all the data.
341 # `_items` will be used as is, without copy, to retrieve the characters of the string.
342 # Aliasing issues is the responsibility of the caller.
343 private init full
(items
: NativeString, bytelen
, from
, to
, length
: Int)
347 self._bytelen
= bytelen
353 redef fun to_cstring
do
354 if real_items
!= null then return real_items
.as(not null)
356 var new_items
= new NativeString(blen
+ 1)
357 _items
.copy_to
(new_items
, blen
, _first_byte
, 0)
358 new_items
[blen
] = 0u8
359 real_items
= new_items
365 if not other
isa FlatString then return super
367 if self.object_id
== other
.object_id
then return true
369 var my_length
= _bytelen
371 if other
._bytelen
!= my_length
then return false
373 var my_index
= _first_byte
374 var its_index
= other
._first_byte
376 var last_iteration
= my_index
+ my_length
378 var its_items
= other
._items
379 var my_items
= self._items
381 while my_index
< last_iteration
do
382 if my_items
[my_index
] != its_items
[its_index
] then return false
392 if not other
isa FlatString then return super
394 if self.object_id
== other
.object_id
then return false
396 var my_length
= self._bytelen
397 var its_length
= other
._bytelen
399 var max
= if my_length
< its_length
then my_length
else its_length
401 var myits
= self.bytes
402 var itsits
= other
.bytes
404 for i
in [0 .. max
[ do
405 var my_curr_char
= myits
[i
]
406 var its_curr_char
= itsits
[i
]
408 if my_curr_char
!= its_curr_char
then
409 if my_curr_char
< its_curr_char
then return true
414 return my_length
< its_length
421 var nlen
= mlen
+ slen
423 var mifrom
= _first_byte
424 if s
isa FlatText then
426 var sifrom
= s
.first_byte
427 var ns
= new NativeString(nlen
+ 1)
428 mits
.copy_to
(ns
, mlen
, mifrom
, 0)
429 sits
.copy_to
(ns
, slen
, sifrom
, mlen
)
430 return new FlatString.full
(ns
, nlen
, 0, nlen
- 1, length
+ o
.length
)
437 var mybtlen
= _bytelen
438 var new_bytelen
= mybtlen
* i
440 var newlen
= mylen
* i
443 var ns
= new NativeString(new_bytelen
+ 1)
444 ns
[new_bytelen
] = 0u8
447 its
.copy_to
(ns
, mybtlen
, fb
, offset
)
451 return new FlatString.full
(ns
, new_bytelen
, 0, new_bytelen
- 1, newlen
)
457 if hash_cache
== null then
458 # djb2 hash algorithm
462 var my_items
= _items
466 h
= (h
<< 5) + h
+ my_items
[i
].to_i
473 return hash_cache
.as(not null)
476 redef fun substrings
do return new FlatSubstringsIter(self)
479 private class FlatStringCharReverseIterator
480 super IndexedIterator[Char]
482 var target
: FlatString
486 init with_pos
(tgt
: FlatString, pos
: Int)
491 redef fun is_ok
do return curr_pos
>= 0
493 redef fun item
do return target
[curr_pos
]
495 redef fun next
do curr_pos
-= 1
497 redef fun index
do return curr_pos
501 private class FlatStringCharIterator
502 super IndexedIterator[Char]
504 var target
: FlatString
510 init with_pos
(tgt
: FlatString, pos
: Int)
512 init(tgt
, tgt
.length
- 1, pos
)
515 redef fun is_ok
do return curr_pos
<= max
517 redef fun item
do return target
[curr_pos
]
519 redef fun next
do curr_pos
+= 1
521 redef fun index
do return curr_pos
525 private class FlatStringCharView
528 redef type SELFTYPE: FlatString
530 redef fun [](index
) do return target
[index
]
532 redef fun iterator_from
(start
) do return new FlatStringCharIterator.with_pos
(target
, start
)
534 redef fun reverse_iterator_from
(start
) do return new FlatStringCharReverseIterator.with_pos
(target
, start
)
538 private class FlatStringByteReverseIterator
539 super IndexedIterator[Byte]
541 var target
: FlatString
543 var target_items
: NativeString
547 init with_pos
(tgt
: FlatString, pos
: Int)
549 init(tgt
, tgt
._items
, pos
+ tgt
._first_byte
)
552 redef fun is_ok
do return curr_pos
>= target
._first_byte
554 redef fun item
do return target_items
[curr_pos
]
556 redef fun next
do curr_pos
-= 1
558 redef fun index
do return curr_pos
- target
._first_byte
562 private class FlatStringByteIterator
563 super IndexedIterator[Byte]
565 var target
: FlatString
567 var target_items
: NativeString
571 init with_pos
(tgt
: FlatString, pos
: Int)
573 init(tgt
, tgt
._items
, pos
+ tgt
._first_byte
)
576 redef fun is_ok
do return curr_pos
<= target
._last_byte
578 redef fun item
do return target_items
[curr_pos
]
580 redef fun next
do curr_pos
+= 1
582 redef fun index
do return curr_pos
- target
._first_byte
586 private class FlatStringByteView
589 redef type SELFTYPE: FlatString
593 # Check that the index (+ _first_byte) is not larger than _last_byte
594 # In other terms, if the index is valid
596 var target
= self.target
597 var ind
= index
+ target
._first_byte
598 assert ind
<= target
._last_byte
599 return target
._items
[ind
]
602 redef fun iterator_from
(start
) do return new FlatStringByteIterator.with_pos
(target
, start
)
604 redef fun reverse_iterator_from
(start
) do return new FlatStringByteReverseIterator.with_pos
(target
, start
)
609 redef new do return new FlatBuffer
611 redef new with_cap
(i
) do return new FlatBuffer.with_capacity
(i
)
614 # Mutable strings of characters.
619 redef var chars
: Sequence[Char] = new FlatBufferCharView(self) is lazy
621 redef var bytes
= new FlatBufferByteView(self) is lazy
625 private var char_cache
: Int = -1
627 private var byte_cache
: Int = -1
629 private var capacity
= 0
631 redef fun fast_cstring
do return _items
.fast_cstring
(0)
633 redef fun substrings
do return new FlatSubstringsIter(self)
635 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
637 # This happens when an operation modifies the current `Buffer` and
638 # the Copy-On-Write flag `written` is set at true.
640 var nns
= new NativeString(capacity
)
641 if _bytelen
!= 0 then _items
.copy_to
(nns
, _bytelen
, 0, 0)
646 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
648 # Internal only, does not modify _bytelen or length, this is the caller's responsability
649 private fun rshift_bytes
(from
: Int, len
: Int) do
653 if bt
+ len
> capacity
then
654 capacity
= capacity
* 2 + 2
655 nit
= new NativeString(capacity
)
656 oit
.copy_to
(nit
, 0, 0, from
)
658 oit
.copy_to
(nit
, bt
- from
, from
, from
+ len
)
661 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
663 # Internal only, does not modify _bytelen or length, this is the caller's responsability
664 private fun lshift_bytes
(from
: Int, len
: Int) do
666 it
.copy_to
(it
, _bytelen
- from
, from
, from
- len
)
669 redef fun []=(index
, item
)
671 assert index
>= 0 and index
<= length
672 if written
then reset
674 if index
== length
then
679 var ip
= it
.char_to_byte_index
(index
)
680 var c
= it
.char_at
(ip
)
681 var clen
= c
.u8char_len
682 var itemlen
= item
.u8char_len
683 var size_diff
= itemlen
- clen
684 if size_diff
> 0 then
685 rshift_bytes
(ip
+ clen
, size_diff
)
686 else if size_diff
< 0 then
687 lshift_bytes
(ip
+ clen
, -size_diff
)
689 _bytelen
+= size_diff
691 it
.set_char_at
(ip
, item
)
696 if written
then reset
698 var clen
= c
.u8char_len
701 _items
.set_char_at
(bt
, c
)
708 if written
then reset
713 redef fun empty
do return new Buffer
715 redef fun enlarge
(cap
)
718 if cap
<= c
then return
719 while c
<= cap
do c
= c
* 2 + 2
720 # The COW flag can be set at false here, since
721 # it does a copy of the current `Buffer`
724 var a
= new NativeString(c
+1)
727 if bln
> 0 then it
.copy_to
(a
, bln
, 0, 0)
737 if bln
== 0 then _items
= new NativeString(1)
738 return new FlatString.full
(_items
, bln
, 0, bln
- 1, length
)
745 var new_native
= new NativeString(bln
+ 1)
746 new_native
[bln
] = 0u8
747 if length
> 0 then _items
.copy_to
(new_native
, bln
, 0, 0)
748 real_items
= new_native
751 return real_items
.as(not null)
754 # Create a new empty string.
757 # Low-level creation a new buffer with given data.
759 # `_items` will be used as is, without copy, to store the characters of the buffer.
760 # Aliasing issues is the responsibility of the caller.
762 # If `_items` is shared, `written` should be set to true after the creation
763 # so that a modification will do a copy-on-write.
764 private init with_infos
(items
: NativeString, capacity
, bytelen
, length
: Int)
767 self.capacity
= capacity
768 self._bytelen
= bytelen
772 # Create a new string copied from `s`.
775 _items
= new NativeString(s
.bytelen
)
776 if s
isa FlatText then
779 for i
in substrings
do i
.as(FlatString)._items
.copy_to
(_items
, i
._bytelen
, 0, 0)
787 # Create a new empty string with a given capacity.
788 init with_capacity
(cap
: Int)
791 _items
= new NativeString(cap
+ 1)
798 if s
.is_empty
then return
801 var nln
= _bytelen
+ sl
803 if s
isa FlatText then
804 s
._items
.copy_to
(_items
, sl
, s
.first_byte
, _bytelen
)
806 for i
in s
.substrings
do append i
813 # Copies the content of self in `dest`
814 fun copy
(start
: Int, len
: Int, dest
: Buffer, new_start
: Int)
816 var self_chars
= self.chars
817 var dest_chars
= dest
.chars
818 for i
in [0..len-1
] do
819 dest_chars
[new_start
+i
] = self_chars
[start
+i
]
823 redef fun substring
(from
, count
)
826 if from
< 0 then from
= 0
827 if (from
+ count
) > length
then count
= length
- from
830 var bytefrom
= its
.char_to_byte_index
(from
)
831 var byteto
= its
.char_to_byte_index
(count
+ from
- 1)
832 byteto
+= its
.char_at
(byteto
).u8char_len
- 1
833 var byte_length
= byteto
- bytefrom
+ 1
834 var r_items
= new NativeString(byte_length
)
835 its
.copy_to
(r_items
, byte_length
, bytefrom
, 0)
836 return new FlatBuffer.with_infos
(r_items
, byte_length
, byte_length
, count
)
845 var ns
= new FlatBuffer.with_capacity
(capacity
)
846 for i
in chars
.reverse_iterator
do ns
.add i
850 redef fun times
(repeats
)
853 var x
= new FlatString.full
(_items
, bln
, 0, bln
- 1, length
)
854 for i
in [1 .. repeats
[ do
861 if written
then reset
862 for i
in [0 .. length
[ do self[i
] = self[i
].to_upper
867 if written
then reset
868 for i
in [0 .. length
[ do self[i
] = self[i
].to_lower
872 private class FlatBufferByteReverseIterator
873 super IndexedIterator[Byte]
875 var target
: FlatBuffer
877 var target_items
: NativeString
881 init with_pos
(tgt
: FlatBuffer, pos
: Int)
883 init(tgt
, tgt
._items
, pos
)
886 redef fun index
do return curr_pos
888 redef fun is_ok
do return curr_pos
>= 0
890 redef fun item
do return target_items
[curr_pos
]
892 redef fun next
do curr_pos
-= 1
896 private class FlatBufferByteView
899 redef type SELFTYPE: FlatBuffer
901 redef fun [](index
) do return target
._items
[index
]
903 redef fun iterator_from
(pos
) do return new FlatBufferByteIterator.with_pos
(target
, pos
)
905 redef fun reverse_iterator_from
(pos
) do return new FlatBufferByteReverseIterator.with_pos
(target
, pos
)
909 private class FlatBufferByteIterator
910 super IndexedIterator[Byte]
912 var target
: FlatBuffer
914 var target_items
: NativeString
918 init with_pos
(tgt
: FlatBuffer, pos
: Int)
920 init(tgt
, tgt
._items
, pos
)
923 redef fun index
do return curr_pos
925 redef fun is_ok
do return curr_pos
< target
._bytelen
927 redef fun item
do return target_items
[curr_pos
]
929 redef fun next
do curr_pos
+= 1
933 private class FlatBufferCharReverseIterator
934 super IndexedIterator[Char]
936 var target
: FlatBuffer
940 init with_pos
(tgt
: FlatBuffer, pos
: Int)
945 redef fun index
do return curr_pos
947 redef fun is_ok
do return curr_pos
>= 0
949 redef fun item
do return target
[curr_pos
]
951 redef fun next
do curr_pos
-= 1
955 private class FlatBufferCharView
958 redef type SELFTYPE: FlatBuffer
960 redef fun [](index
) do return target
[index
]
962 redef fun []=(index
, item
)
964 assert index
>= 0 and index
<= length
965 if index
== length
then
982 fun enlarge
(cap
: Int)
989 var s_length
= s
.length
990 if target
.capacity
< s
.length
then enlarge
(s_length
+ target
.length
)
991 for i
in s
do target
.add i
994 redef fun iterator_from
(pos
) do return new FlatBufferCharIterator.with_pos
(target
, pos
)
996 redef fun reverse_iterator_from
(pos
) do return new FlatBufferCharReverseIterator.with_pos
(target
, pos
)
1000 private class FlatBufferCharIterator
1001 super IndexedIterator[Char]
1003 var target
: FlatBuffer
1009 init with_pos
(tgt
: FlatBuffer, pos
: Int)
1011 init(tgt
, tgt
.length
- 1, pos
)
1014 redef fun index
do return curr_pos
1016 redef fun is_ok
do return curr_pos
<= max
1018 redef fun item
do return target
[curr_pos
]
1020 redef fun next
do curr_pos
+= 1
1024 redef class NativeString
1027 return to_s_with_length
(cstring_length
)
1030 # Returns `self` as a String of `length`.
1031 redef fun to_s_with_length
(length
): FlatString
1034 var str
= new FlatString.with_infos
(self, length
, 0, length
- 1)
1038 redef fun to_s_full
(bytelen
, unilen
) do
1039 return new FlatString.full
(self, bytelen
, 0, bytelen
- 1, unilen
)
1042 # Returns `self` as a new String.
1043 redef fun to_s_with_copy
: FlatString
1045 var length
= cstring_length
1046 var new_self
= new NativeString(length
+ 1)
1047 copy_to
(new_self
, length
, 0, 0)
1048 var str
= new FlatString.with_infos
(new_self
, length
, 0, length
- 1)
1049 new_self
[length
] = 0u8
1050 str
.real_items
= new_self
1054 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1056 # Very unsafe, make sure to have room for this char prior to calling this function.
1057 private fun set_char_at
(pos
: Int, c
: Char) do
1058 var ln
= c
.u8char_len
1059 native_set_char
(pos
, c
, ln
)
1062 private fun native_set_char
(pos
: Int, c
: Char, ln
: Int) `{
1063 char* dst = self + pos;
1069 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
1070 dst[1] = 0x80 | (c & 0x3F);
1073 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
1074 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
1075 dst[2] = 0x80 | (c & 0x3F);
1078 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
1079 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
1080 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
1081 dst[3] = 0x80 | (c & 0x3F);
1088 redef fun to_base
(base
, signed
)
1090 var l
= digit_count
(base
)
1091 var s
= new FlatBuffer.from
(" " * l
)
1092 fill_buffer
(s
, base
, signed
)
1096 # return displayable int in base 10 and signed
1098 # assert 1.to_s == "1"
1099 # assert (-123).to_s == "-123"
1101 # Fast case for common numbers
1102 if self == 0 then return "0"
1103 if self == 1 then return "1"
1105 var nslen
= int_to_s_len
1106 var ns
= new NativeString(nslen
+ 1)
1108 native_int_to_s
(ns
, nslen
+ 1)
1109 return new FlatString.full
(ns
, nslen
, 0, nslen
- 1, nslen
)
1113 redef class Array[E
]
1115 # Fast implementation
1116 redef fun plain_to_s
1119 if l
== 0 then return ""
1120 if l
== 1 then if self[0] == null then return "" else return self[0].to_s
1122 var na
= new NativeArray[String](l
)
1128 if itsi
== null then
1138 var ns
= new NativeString(sl
+ 1)
1144 if tmp
isa FlatString then
1145 var tpl
= tmp
._bytelen
1146 tmp
._items
.copy_to
(ns
, tpl
, tmp
._first_byte
, off
)
1149 for j
in tmp
.substrings
do
1150 var s
= j
.as(FlatString)
1151 var slen
= s
._bytelen
1152 s
._items
.copy_to
(ns
, slen
, s
._first_byte
, off
)
1158 return ns
.to_s_with_length
(sl
)
1162 redef class NativeArray[E
]
1163 redef fun native_to_s
do
1164 assert self isa NativeArray[String]
1175 var ns
= new NativeString(sl
+ 1)
1181 if tmp
isa FlatString then
1182 var tpl
= tmp
._bytelen
1183 tmp
._items
.copy_to
(ns
, tpl
, tmp
._first_byte
, off
)
1186 for j
in tmp
.substrings
do
1187 var s
= j
.as(FlatString)
1188 var slen
= s
._bytelen
1189 s
._items
.copy_to
(ns
, slen
, s
._first_byte
, off
)
1195 return ns
.to_s_with_length
(sl
)
1199 redef class Map[K
,V
]
1200 redef fun join
(sep
, couple_sep
)
1202 if is_empty
then return ""
1204 var s
= new Buffer # Result
1210 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1212 # Concat other _items
1218 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")