1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # All the array-based text representations
14 intrude import abstract_text
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
25 var tgt
: nullable FlatText
29 return tgt
.as(not null)
32 redef fun is_ok
do return tgt
!= null
34 redef fun next
do tgt
= null
39 private fun first_byte
: Int do return 0
41 private fun last_byte
: Int do return bytelen
- 1
43 # Cache of the latest position (char) explored in the string
46 # Cached position (bytes) in the NativeString underlying the String
47 var bytepos
: Int = first_byte
is lateinit
49 # Index of the character `index` in `items`
50 private fun char_to_byte_index
(index
: Int): Int do
55 # Find best insertion point
56 var delta_begin
= index
57 var delta_end
= (ln
- 1) - index
58 var delta_cache
= (position
- index
).abs
62 if delta_cache
< min
then min
= delta_cache
63 if delta_end
< min
then min
= delta_end
68 if min
== delta_begin
then
71 else if min
== delta_cache
then
75 ns_i
= its
.find_beginning_of_char_at
(last_byte
)
79 ns_i
= its
.char_to_byte_index_cached
(index
, my_i
, ns_i
)
87 # By escaping `self` to C, how many more bytes will be needed ?
89 # This enables a double-optimization in `escape_to_c` since if this
90 # method returns 0, then `self` does not need escaping and can be
92 protected fun chars_to_escape_to_c
: Int do
101 else if c
== 0x09u
8 then
103 else if c
== 0x22u
8 then
105 else if c
== 0x27u
8 then
107 else if c
== 0x5Cu
8 then
109 else if c
< 32u8
then
117 redef fun escape_to_c
do
118 var ln_extra
= chars_to_escape_to_c
119 if ln_extra
== 0 then return self.to_s
122 var nlen
= _bytelen
+ ln_extra
123 var nns
= new NativeString(nlen
)
130 # Any byte with value < 32 is a control character
131 # All their uses will be replaced by their octal
134 # There are two exceptions however:
139 # Aside from the code points above, the following are:
146 nns
[opos
+ 1] = 0x74u
8
148 else if c
== 0x0Au
8 then
150 nns
[opos
+ 1] = 0x6Eu
8
152 else if c
== 0x22u
8 then
154 nns
[opos
+ 1] = 0x22u
8
156 else if c
== 0x27u
8 then
158 nns
[opos
+ 1] = 0x27u
8
160 else if c
== 0x5Cu
8 then
162 nns
[opos
+ 1] = 0x5Cu
8
164 else if c
< 32u8
then
166 nns
[opos
+ 1] = 0x30u
8
167 nns
[opos
+ 2] = ((c
& 0x38u
8) >> 3) + 0x30u
8
168 nns
[opos
+ 3] = (c
& 0x07u
8) + 0x30u
8
176 return nns
.to_s_with_length
(nlen
)
179 private fun byte_to_char_index
(index
: Int): Int do
182 assert index
< bytelen
184 # Find best insertion point
185 var delta_begin
= index
186 var delta_end
= (ln
- 1) - index
187 var delta_cache
= (bytepos
- index
).abs
188 var min
= delta_begin
191 if delta_cache
< min
then min
= delta_cache
192 if delta_end
< min
then min
= delta_end
197 if min
== delta_begin
then
200 else if min
== delta_cache
then
204 ns_i
= its
.find_beginning_of_char_at
(last_byte
)
208 my_i
= its
.byte_to_char_index_cached
(index
, my_i
, ns_i
)
216 redef fun [](index
) do return items
.char_at
(char_to_byte_index
(index
))
219 # Immutable strings of characters.
224 # Index at which `self` begins in `items`, inclusively
225 redef var first_byte
is noinit
227 # Index at which `self` ends in `items`, inclusively
228 redef var last_byte
is noinit
230 redef var chars
= new FlatStringCharView(self) is lazy
232 redef var bytes
= new FlatStringByteView(self) is lazy
234 redef var length
is lazy
do
235 if bytelen
== 0 then return 0
241 st
+= its
.length_of_char_at
(st
)
249 var b
= new FlatBuffer.with_capacity
(bytelen
+ 1)
250 for i
in [length
- 1 .. 0].step
(-1) do
253 var s
= b
.to_s
.as(FlatString)
254 s
.length
= self.length
258 redef fun fast_cstring
do return items
.fast_cstring
(first_byte
)
260 redef fun substring
(from
, count
)
266 if count
< 0 then count
= 0
270 if (count
+ from
) > length
then count
= length
- from
271 if count
<= 0 then return ""
272 var end_index
= from
+ count
- 1
274 var bytefrom
= char_to_byte_index
(from
)
275 var byteto
= char_to_byte_index
(end_index
)
276 byteto
+= items
.length_of_char_at
(byteto
) - 1
278 var s
= new FlatString.full
(items
, byteto
- bytefrom
+ 1, bytefrom
, byteto
, count
)
282 redef fun empty
do return "".as(FlatString)
286 var outstr
= new FlatBuffer.with_capacity
(self.bytelen
+ 1)
292 outstr
.add
(chars
[pos
].to_upper
)
301 var outstr
= new FlatBuffer.with_capacity
(self.bytelen
+ 1)
307 outstr
.add
(chars
[pos
].to_lower
)
316 for i
in chars
do i
.output
319 ##################################################
320 # String Specific Methods #
321 ##################################################
323 # Low-level creation of a new string with minimal data.
325 # `items` will be used as is, without copy, to retrieve the characters of the string.
326 # Aliasing issues is the responsibility of the caller.
327 private init with_infos
(items
: NativeString, bytelen
, from
, to
: Int)
330 self.bytelen
= bytelen
335 # Low-level creation of a new string with all the data.
337 # `items` will be used as is, without copy, to retrieve the characters of the string.
338 # Aliasing issues is the responsibility of the caller.
339 private init full
(items
: NativeString, bytelen
, from
, to
, length
: Int)
343 self.bytelen
= bytelen
348 redef fun to_cstring
do
349 if real_items
!= null then return real_items
.as(not null)
350 var new_items
= new NativeString(bytelen
+ 1)
351 self.items
.copy_to
(new_items
, bytelen
, first_byte
, 0)
352 new_items
[bytelen
] = 0u8
353 real_items
= new_items
359 if not other
isa FlatString then return super
361 if self.object_id
== other
.object_id
then return true
363 var my_length
= bytelen
365 if other
.bytelen
!= my_length
then return false
367 var my_index
= first_byte
368 var its_index
= other
.first_byte
370 var last_iteration
= my_index
+ my_length
372 var itsitems
= other
.items
373 var myitems
= self.items
375 while my_index
< last_iteration
do
376 if myitems
[my_index
] != itsitems
[its_index
] then return false
386 if not other
isa FlatString then return super
388 if self.object_id
== other
.object_id
then return false
390 var my_length
= self.bytelen
391 var its_length
= other
.bytelen
393 var max
= if my_length
< its_length
then my_length
else its_length
395 var myits
= self.bytes
396 var itsits
= other
.bytes
398 for i
in [0 .. max
[ do
399 var my_curr_char
= myits
[i
]
400 var its_curr_char
= itsits
[i
]
402 if my_curr_char
!= its_curr_char
then
403 if my_curr_char
< its_curr_char
then return true
408 return my_length
< its_length
415 var nlen
= mlen
+ slen
417 var mifrom
= first_byte
418 if s
isa FlatText then
420 var sifrom
= s
.first_byte
421 var ns
= new NativeString(nlen
+ 1)
422 mits
.copy_to
(ns
, mlen
, mifrom
, 0)
423 sits
.copy_to
(ns
, slen
, sifrom
, mlen
)
424 return new FlatString.full
(ns
, nlen
, 0, nlen
- 1, length
+ o
.length
)
431 var mybtlen
= bytelen
432 var new_bytelen
= mybtlen
* i
434 var newlen
= mylen
* i
435 var ns
= new NativeString(new_bytelen
+ 1)
436 ns
[new_bytelen
] = 0u8
439 items
.copy_to
(ns
, bytelen
, first_byte
, offset
)
443 return new FlatString.full
(ns
, new_bytelen
, 0, new_bytelen
- 1, newlen
)
449 if hash_cache
== null then
450 # djb2 hash algorithm
456 while i
<= last_byte
do
457 h
= (h
<< 5) + h
+ myitems
[i
].to_i
464 return hash_cache
.as(not null)
467 redef fun substrings
do return new FlatSubstringsIter(self)
470 private class FlatStringCharReverseIterator
471 super IndexedIterator[Char]
473 var target
: FlatString
477 init with_pos
(tgt
: FlatString, pos
: Int)
482 redef fun is_ok
do return curr_pos
>= 0
484 redef fun item
do return target
[curr_pos
]
486 redef fun next
do curr_pos
-= 1
488 redef fun index
do return curr_pos
492 private class FlatStringCharIterator
493 super IndexedIterator[Char]
495 var target
: FlatString
501 init with_pos
(tgt
: FlatString, pos
: Int)
503 init(tgt
, tgt
.length
- 1, pos
)
506 redef fun is_ok
do return curr_pos
<= max
508 redef fun item
do return target
[curr_pos
]
510 redef fun next
do curr_pos
+= 1
512 redef fun index
do return curr_pos
516 private class FlatStringCharView
519 redef type SELFTYPE: FlatString
521 redef fun [](index
) do return target
[index
]
523 redef fun iterator_from
(start
) do return new FlatStringCharIterator.with_pos
(target
, start
)
525 redef fun reverse_iterator_from
(start
) do return new FlatStringCharReverseIterator.with_pos
(target
, start
)
529 private class FlatStringByteReverseIterator
530 super IndexedIterator[Byte]
532 var target
: FlatString
534 var target_items
: NativeString
538 init with_pos
(tgt
: FlatString, pos
: Int)
540 init(tgt
, tgt
.items
, pos
+ tgt
.first_byte
)
543 redef fun is_ok
do return curr_pos
>= target
.first_byte
545 redef fun item
do return target_items
[curr_pos
]
547 redef fun next
do curr_pos
-= 1
549 redef fun index
do return curr_pos
- target
.first_byte
553 private class FlatStringByteIterator
554 super IndexedIterator[Byte]
556 var target
: FlatString
558 var target_items
: NativeString
562 init with_pos
(tgt
: FlatString, pos
: Int)
564 init(tgt
, tgt
.items
, pos
+ tgt
.first_byte
)
567 redef fun is_ok
do return curr_pos
<= target
.last_byte
569 redef fun item
do return target_items
[curr_pos
]
571 redef fun next
do curr_pos
+= 1
573 redef fun index
do return curr_pos
- target
.first_byte
577 private class FlatStringByteView
580 redef type SELFTYPE: FlatString
584 # Check that the index (+ first_byte) is not larger than last_byte
585 # In other terms, if the index is valid
587 var target
= self.target
588 assert (index
+ target
.first_byte
) <= target
.last_byte
589 return target
.items
[index
+ target
.first_byte
]
592 redef fun iterator_from
(start
) do return new FlatStringByteIterator.with_pos
(target
, start
)
594 redef fun reverse_iterator_from
(start
) do return new FlatStringByteReverseIterator.with_pos
(target
, start
)
599 redef new do return new FlatBuffer
601 redef new with_cap
(i
) do return new FlatBuffer.with_capacity
(i
)
604 # Mutable strings of characters.
609 redef var chars
: Sequence[Char] = new FlatBufferCharView(self) is lazy
611 redef var bytes
= new FlatBufferByteView(self) is lazy
613 redef var bytelen
= 0
617 private var char_cache
: Int = -1
619 private var byte_cache
: Int = -1
621 private var capacity
= 0
623 redef fun fast_cstring
do return items
.fast_cstring
(0)
625 redef fun substrings
do return new FlatSubstringsIter(self)
627 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
629 # This happens when an operation modifies the current `Buffer` and
630 # the Copy-On-Write flag `written` is set at true.
632 var nns
= new NativeString(capacity
)
633 items
.copy_to
(nns
, bytelen
, 0, 0)
638 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
640 # Internal only, does not modify bytelen or length, this is the caller's responsability
641 private fun rshift_bytes
(from
: Int, len
: Int) do
644 if bytelen
+ len
> capacity
then
645 capacity
= capacity
* 2 + 2
646 nit
= new NativeString(capacity
)
647 oit
.copy_to
(nit
, 0, 0, from
)
649 oit
.copy_to
(nit
, bytelen
- from
, from
, from
+ len
)
652 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
654 # Internal only, does not modify bytelen or length, this is the caller's responsability
655 private fun lshift_bytes
(from
: Int, len
: Int) do
656 items
.copy_to
(items
, bytelen
- from
, from
, from
- len
)
659 redef fun []=(index
, item
)
661 assert index
>= 0 and index
<= length
662 if written
then reset
664 if index
== length
then
668 var ip
= items
.char_to_byte_index
(index
)
669 var c
= items
.char_at
(ip
)
670 var clen
= c
.u8char_len
671 var itemlen
= item
.u8char_len
672 var size_diff
= itemlen
- clen
673 if size_diff
> 0 then
674 rshift_bytes
(ip
+ clen
, size_diff
)
675 else if size_diff
< 0 then
676 lshift_bytes
(ip
+ clen
, -size_diff
)
680 items
.set_char_at
(ip
, item
)
685 if written
then reset
687 var clen
= c
.u8char_len
688 enlarge
(bytelen
+ clen
)
689 items
.set_char_at
(bytelen
, c
)
696 if written
then reset
701 redef fun empty
do return new Buffer
703 redef fun enlarge
(cap
)
706 if cap
<= c
then return
707 while c
<= cap
do c
= c
* 2 + 2
708 # The COW flag can be set at false here, since
709 # it does a copy of the current `Buffer`
711 var a
= new NativeString(c
+1)
712 if bytelen
> 0 then items
.copy_to
(a
, bytelen
, 0, 0)
720 if bytelen
== 0 then items
= new NativeString(1)
721 return new FlatString.full
(items
, bytelen
, 0, bytelen
- 1, length
)
727 var new_native
= new NativeString(bytelen
+ 1)
728 new_native
[bytelen
] = 0u8
729 if length
> 0 then items
.copy_to
(new_native
, bytelen
, 0, 0)
730 real_items
= new_native
733 return real_items
.as(not null)
736 # Create a new empty string.
739 # Low-level creation a new buffer with given data.
741 # `items` will be used as is, without copy, to store the characters of the buffer.
742 # Aliasing issues is the responsibility of the caller.
744 # If `items` is shared, `written` should be set to true after the creation
745 # so that a modification will do a copy-on-write.
746 private init with_infos
(items
: NativeString, capacity
, bytelen
, length
: Int)
749 self.capacity
= capacity
750 self.bytelen
= bytelen
754 # Create a new string copied from `s`.
757 items
= new NativeString(s
.bytelen
)
758 if s
isa FlatText then
761 for i
in substrings
do i
.as(FlatString).items
.copy_to
(items
, i
.bytelen
, 0, 0)
769 # Create a new empty string with a given capacity.
770 init with_capacity
(cap
: Int)
773 items
= new NativeString(cap
+ 1)
780 if s
.is_empty
then return
783 enlarge
(bytelen
+ sl
)
784 if s
isa FlatText then
785 s
.items
.copy_to
(items
, sl
, s
.first_byte
, bytelen
)
787 for i
in s
.substrings
do append i
794 # Copies the content of self in `dest`
795 fun copy
(start
: Int, len
: Int, dest
: Buffer, new_start
: Int)
797 var self_chars
= self.chars
798 var dest_chars
= dest
.chars
799 for i
in [0..len-1
] do
800 dest_chars
[new_start
+i
] = self_chars
[start
+i
]
804 redef fun substring
(from
, count
)
807 if from
< 0 then from
= 0
808 if (from
+ count
) > length
then count
= length
- from
810 var bytefrom
= items
.char_to_byte_index
(from
)
811 var byteto
= items
.char_to_byte_index
(count
+ from
- 1)
812 byteto
+= items
.char_at
(byteto
).u8char_len
- 1
813 var byte_length
= byteto
- bytefrom
+ 1
814 var r_items
= new NativeString(byte_length
)
815 items
.copy_to
(r_items
, byte_length
, bytefrom
, 0)
816 return new FlatBuffer.with_infos
(r_items
, byte_length
, byte_length
, count
)
825 var ns
= new FlatBuffer.with_capacity
(capacity
)
826 for i
in chars
.reverse_iterator
do ns
.add i
830 redef fun times
(repeats
)
832 var x
= new FlatString.full
(items
, bytelen
, 0, bytelen
- 1, length
)
833 for i
in [1 .. repeats
[ do
840 if written
then reset
841 for i
in [0 .. length
[ do self[i
] = self[i
].to_upper
846 if written
then reset
847 for i
in [0 .. length
[ do self[i
] = self[i
].to_lower
851 private class FlatBufferByteReverseIterator
852 super IndexedIterator[Byte]
854 var target
: FlatBuffer
856 var target_items
: NativeString
860 init with_pos
(tgt
: FlatBuffer, pos
: Int)
862 init(tgt
, tgt
.items
, pos
)
865 redef fun index
do return curr_pos
867 redef fun is_ok
do return curr_pos
>= 0
869 redef fun item
do return target_items
[curr_pos
]
871 redef fun next
do curr_pos
-= 1
875 private class FlatBufferByteView
878 redef type SELFTYPE: FlatBuffer
880 redef fun [](index
) do return target
.items
[index
]
882 redef fun iterator_from
(pos
) do return new FlatBufferByteIterator.with_pos
(target
, pos
)
884 redef fun reverse_iterator_from
(pos
) do return new FlatBufferByteReverseIterator.with_pos
(target
, pos
)
888 private class FlatBufferByteIterator
889 super IndexedIterator[Byte]
891 var target
: FlatBuffer
893 var target_items
: NativeString
897 init with_pos
(tgt
: FlatBuffer, pos
: Int)
899 init(tgt
, tgt
.items
, pos
)
902 redef fun index
do return curr_pos
904 redef fun is_ok
do return curr_pos
< target
.bytelen
906 redef fun item
do return target_items
[curr_pos
]
908 redef fun next
do curr_pos
+= 1
912 private class FlatBufferCharReverseIterator
913 super IndexedIterator[Char]
915 var target
: FlatBuffer
919 init with_pos
(tgt
: FlatBuffer, pos
: Int)
924 redef fun index
do return curr_pos
926 redef fun is_ok
do return curr_pos
>= 0
928 redef fun item
do return target
[curr_pos
]
930 redef fun next
do curr_pos
-= 1
934 private class FlatBufferCharView
937 redef type SELFTYPE: FlatBuffer
939 redef fun [](index
) do return target
[index
]
941 redef fun []=(index
, item
)
943 assert index
>= 0 and index
<= length
944 if index
== length
then
961 fun enlarge
(cap
: Int)
968 var s_length
= s
.length
969 if target
.capacity
< s
.length
then enlarge
(s_length
+ target
.length
)
970 for i
in s
do target
.add i
973 redef fun iterator_from
(pos
) do return new FlatBufferCharIterator.with_pos
(target
, pos
)
975 redef fun reverse_iterator_from
(pos
) do return new FlatBufferCharReverseIterator.with_pos
(target
, pos
)
979 private class FlatBufferCharIterator
980 super IndexedIterator[Char]
982 var target
: FlatBuffer
988 init with_pos
(tgt
: FlatBuffer, pos
: Int)
990 init(tgt
, tgt
.length
- 1, pos
)
993 redef fun index
do return curr_pos
995 redef fun is_ok
do return curr_pos
<= max
997 redef fun item
do return target
[curr_pos
]
999 redef fun next
do curr_pos
+= 1
1003 redef class NativeString
1006 return to_s_with_length
(cstring_length
)
1009 # Returns `self` as a String of `length`.
1010 redef fun to_s_with_length
(length
): FlatString
1013 var str
= new FlatString.with_infos
(self, length
, 0, length
- 1)
1017 redef fun to_s_full
(bytelen
, unilen
) do
1018 return new FlatString.full
(self, bytelen
, 0, bytelen
- 1, unilen
)
1021 # Returns `self` as a new String.
1022 redef fun to_s_with_copy
: FlatString
1024 var length
= cstring_length
1025 var new_self
= new NativeString(length
+ 1)
1026 copy_to
(new_self
, length
, 0, 0)
1027 var str
= new FlatString.with_infos
(new_self
, length
, 0, length
- 1)
1028 new_self
[length
] = 0u8
1029 str
.real_items
= new_self
1033 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1035 # Very unsafe, make sure to have room for this char prior to calling this function.
1036 private fun set_char_at
(pos
: Int, c
: Char) do
1037 var ln
= c
.u8char_len
1038 native_set_char
(pos
, c
, ln
)
1041 private fun native_set_char
(pos
: Int, c
: Char, ln
: Int) `{
1042 char* dst = self + pos;
1048 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
1049 dst[1] = 0x80 | (c & 0x3F);
1052 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
1053 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
1054 dst[2] = 0x80 | (c & 0x3F);
1057 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
1058 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
1059 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
1060 dst[3] = 0x80 | (c & 0x3F);
1067 redef fun to_base
(base
, signed
)
1069 var l
= digit_count
(base
)
1070 var s
= new FlatBuffer.from
(" " * l
)
1071 fill_buffer
(s
, base
, signed
)
1075 # return displayable int in base 10 and signed
1077 # assert 1.to_s == "1"
1078 # assert (-123).to_s == "-123"
1080 # Fast case for common numbers
1081 if self == 0 then return "0"
1082 if self == 1 then return "1"
1084 var nslen
= int_to_s_len
1085 var ns
= new NativeString(nslen
+ 1)
1087 native_int_to_s
(ns
, nslen
+ 1)
1088 return new FlatString.full
(ns
, nslen
, 0, nslen
- 1, nslen
)
1092 redef class Array[E
]
1094 # Fast implementation
1095 redef fun plain_to_s
1098 if l
== 0 then return ""
1099 if l
== 1 then if self[0] == null then return "" else return self[0].to_s
1101 var na
= new NativeArray[String](l
)
1107 if itsi
== null then
1117 var ns
= new NativeString(sl
+ 1)
1123 if tmp
isa FlatString then
1124 var tpl
= tmp
.bytelen
1125 tmp
.items
.copy_to
(ns
, tpl
, tmp
.first_byte
, off
)
1128 for j
in tmp
.substrings
do
1129 var s
= j
.as(FlatString)
1130 var slen
= s
.bytelen
1131 s
.items
.copy_to
(ns
, slen
, s
.first_byte
, off
)
1137 return ns
.to_s_with_length
(sl
)
1141 redef class NativeArray[E
]
1142 redef fun native_to_s
do
1143 assert self isa NativeArray[String]
1154 var ns
= new NativeString(sl
+ 1)
1160 if tmp
isa FlatString then
1161 var tpl
= tmp
.bytelen
1162 tmp
.items
.copy_to
(ns
, tpl
, tmp
.first_byte
, off
)
1165 for j
in tmp
.substrings
do
1166 var s
= j
.as(FlatString)
1167 var slen
= s
.bytelen
1168 s
.items
.copy_to
(ns
, slen
, s
.first_byte
, off
)
1174 return ns
.to_s_with_length
(sl
)
1178 redef class Map[K
,V
]
1179 redef fun join
(sep
, couple_sep
)
1181 if is_empty
then return ""
1183 var s
= new Buffer # Result
1189 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1191 # Concat other items
1197 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")