1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # All the array-based text representations
14 intrude import abstract_text
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
25 var tgt
: nullable FlatText
29 return tgt
.as(not null)
32 redef fun is_ok
do return tgt
!= null
34 redef fun next
do tgt
= null
39 private fun first_byte
: Int do return 0
41 private fun last_byte
: Int do return bytelen
- 1
43 # Cache of the latest position (char) explored in the string
46 # Cached position (bytes) in the NativeString underlying the String
47 var bytepos
: Int = first_byte
is lateinit
49 # Index of the character `index` in `items`
50 private fun char_to_byte_index
(index
: Int): Int do
55 # Find best insertion point
56 var delta_begin
= index
57 var delta_end
= (ln
- 1) - index
58 var delta_cache
= (position
- index
).abs
62 if delta_cache
< min
then min
= delta_cache
63 if delta_end
< min
then min
= delta_end
68 if min
== delta_begin
then
71 else if min
== delta_cache
then
75 ns_i
= its
.find_beginning_of_char_at
(last_byte
)
79 ns_i
= its
.char_to_byte_index_cached
(index
, my_i
, ns_i
)
87 private fun byte_to_char_index
(index
: Int): Int do
90 assert index
< bytelen
92 # Find best insertion point
93 var delta_begin
= index
94 var delta_end
= (ln
- 1) - index
95 var delta_cache
= (bytepos
- index
).abs
99 if delta_cache
< min
then min
= delta_cache
100 if delta_end
< min
then min
= delta_end
105 if min
== delta_begin
then
108 else if min
== delta_cache
then
112 ns_i
= its
.find_beginning_of_char_at
(last_byte
)
116 my_i
= its
.byte_to_char_index_cached
(index
, my_i
, ns_i
)
124 redef fun [](index
) do return items
.char_at
(char_to_byte_index
(index
))
127 # Immutable strings of characters.
132 # Index at which `self` begins in `items`, inclusively
133 redef var first_byte
is noinit
135 # Index at which `self` ends in `items`, inclusively
136 redef var last_byte
is noinit
138 redef var chars
= new FlatStringCharView(self) is lazy
140 redef var bytes
= new FlatStringByteView(self) is lazy
142 redef var length
is lazy
do
143 if bytelen
== 0 then return 0
149 st
+= its
.length_of_char_at
(st
)
157 var b
= new FlatBuffer.with_capacity
(bytelen
+ 1)
158 for i
in [length
- 1 .. 0].step
(-1) do
161 var s
= b
.to_s
.as(FlatString)
162 s
.length
= self.length
166 redef fun fast_cstring
do return items
.fast_cstring
(first_byte
)
168 redef fun substring
(from
, count
)
174 if count
< 0 then count
= 0
178 if (count
+ from
) > length
then count
= length
- from
179 if count
<= 0 then return ""
180 var end_index
= from
+ count
- 1
182 var bytefrom
= char_to_byte_index
(from
)
183 var byteto
= char_to_byte_index
(end_index
)
184 byteto
+= items
.length_of_char_at
(byteto
) - 1
186 var s
= new FlatString.full
(items
, byteto
- bytefrom
+ 1, bytefrom
, byteto
, count
)
190 redef fun empty
do return "".as(FlatString)
194 var outstr
= new FlatBuffer.with_capacity
(self.bytelen
+ 1)
200 outstr
.add
(chars
[pos
].to_upper
)
209 var outstr
= new FlatBuffer.with_capacity
(self.bytelen
+ 1)
215 outstr
.add
(chars
[pos
].to_lower
)
224 for i
in chars
do i
.output
227 ##################################################
228 # String Specific Methods #
229 ##################################################
231 # Low-level creation of a new string with minimal data.
233 # `items` will be used as is, without copy, to retrieve the characters of the string.
234 # Aliasing issues is the responsibility of the caller.
235 private init with_infos
(items
: NativeString, bytelen
, from
, to
: Int)
238 self.bytelen
= bytelen
243 # Low-level creation of a new string with all the data.
245 # `items` will be used as is, without copy, to retrieve the characters of the string.
246 # Aliasing issues is the responsibility of the caller.
247 private init full
(items
: NativeString, bytelen
, from
, to
, length
: Int)
251 self.bytelen
= bytelen
256 redef fun to_cstring
do
257 if real_items
!= null then return real_items
.as(not null)
258 var new_items
= new NativeString(bytelen
+ 1)
259 self.items
.copy_to
(new_items
, bytelen
, first_byte
, 0)
260 new_items
[bytelen
] = 0u8
261 real_items
= new_items
267 if not other
isa FlatString then return super
269 if self.object_id
== other
.object_id
then return true
271 var my_length
= bytelen
273 if other
.bytelen
!= my_length
then return false
275 var my_index
= first_byte
276 var its_index
= other
.first_byte
278 var last_iteration
= my_index
+ my_length
280 var itsitems
= other
.items
281 var myitems
= self.items
283 while my_index
< last_iteration
do
284 if myitems
[my_index
] != itsitems
[its_index
] then return false
294 if not other
isa FlatString then return super
296 if self.object_id
== other
.object_id
then return false
298 var my_length
= self.bytelen
299 var its_length
= other
.bytelen
301 var max
= if my_length
< its_length
then my_length
else its_length
303 var myits
= self.bytes
304 var itsits
= other
.bytes
306 for i
in [0 .. max
[ do
307 var my_curr_char
= myits
[i
]
308 var its_curr_char
= itsits
[i
]
310 if my_curr_char
!= its_curr_char
then
311 if my_curr_char
< its_curr_char
then return true
316 return my_length
< its_length
323 var nlen
= mlen
+ slen
325 var mifrom
= first_byte
326 if s
isa FlatText then
328 var sifrom
= s
.first_byte
329 var ns
= new NativeString(nlen
+ 1)
330 mits
.copy_to
(ns
, mlen
, mifrom
, 0)
331 sits
.copy_to
(ns
, slen
, sifrom
, mlen
)
332 return new FlatString.full
(ns
, nlen
, 0, nlen
- 1, length
+ o
.length
)
339 var mybtlen
= bytelen
340 var new_bytelen
= mybtlen
* i
342 var newlen
= mylen
* i
343 var ns
= new NativeString(new_bytelen
+ 1)
344 ns
[new_bytelen
] = 0u8
347 items
.copy_to
(ns
, bytelen
, first_byte
, offset
)
351 return new FlatString.full
(ns
, new_bytelen
, 0, new_bytelen
- 1, newlen
)
357 if hash_cache
== null then
358 # djb2 hash algorithm
364 while i
<= last_byte
do
365 h
= (h
<< 5) + h
+ myitems
[i
].to_i
372 return hash_cache
.as(not null)
375 redef fun substrings
do return new FlatSubstringsIter(self)
378 private class FlatStringCharReverseIterator
379 super IndexedIterator[Char]
381 var target
: FlatString
385 init with_pos
(tgt
: FlatString, pos
: Int)
390 redef fun is_ok
do return curr_pos
>= 0
392 redef fun item
do return target
[curr_pos
]
394 redef fun next
do curr_pos
-= 1
396 redef fun index
do return curr_pos
400 private class FlatStringCharIterator
401 super IndexedIterator[Char]
403 var target
: FlatString
409 init with_pos
(tgt
: FlatString, pos
: Int)
411 init(tgt
, tgt
.length
- 1, pos
)
414 redef fun is_ok
do return curr_pos
<= max
416 redef fun item
do return target
[curr_pos
]
418 redef fun next
do curr_pos
+= 1
420 redef fun index
do return curr_pos
424 private class FlatStringCharView
427 redef type SELFTYPE: FlatString
429 redef fun [](index
) do return target
[index
]
431 redef fun iterator_from
(start
) do return new FlatStringCharIterator.with_pos
(target
, start
)
433 redef fun reverse_iterator_from
(start
) do return new FlatStringCharReverseIterator.with_pos
(target
, start
)
437 private class FlatStringByteReverseIterator
438 super IndexedIterator[Byte]
440 var target
: FlatString
442 var target_items
: NativeString
446 init with_pos
(tgt
: FlatString, pos
: Int)
448 init(tgt
, tgt
.items
, pos
+ tgt
.first_byte
)
451 redef fun is_ok
do return curr_pos
>= target
.first_byte
453 redef fun item
do return target_items
[curr_pos
]
455 redef fun next
do curr_pos
-= 1
457 redef fun index
do return curr_pos
- target
.first_byte
461 private class FlatStringByteIterator
462 super IndexedIterator[Byte]
464 var target
: FlatString
466 var target_items
: NativeString
470 init with_pos
(tgt
: FlatString, pos
: Int)
472 init(tgt
, tgt
.items
, pos
+ tgt
.first_byte
)
475 redef fun is_ok
do return curr_pos
<= target
.last_byte
477 redef fun item
do return target_items
[curr_pos
]
479 redef fun next
do curr_pos
+= 1
481 redef fun index
do return curr_pos
- target
.first_byte
485 private class FlatStringByteView
488 redef type SELFTYPE: FlatString
492 # Check that the index (+ first_byte) is not larger than last_byte
493 # In other terms, if the index is valid
495 var target
= self.target
496 assert (index
+ target
.first_byte
) <= target
.last_byte
497 return target
.items
[index
+ target
.first_byte
]
500 redef fun iterator_from
(start
) do return new FlatStringByteIterator.with_pos
(target
, start
)
502 redef fun reverse_iterator_from
(start
) do return new FlatStringByteReverseIterator.with_pos
(target
, start
)
507 redef new do return new FlatBuffer
509 redef new with_cap
(i
) do return new FlatBuffer.with_capacity
(i
)
512 # Mutable strings of characters.
517 redef var chars
: Sequence[Char] = new FlatBufferCharView(self) is lazy
519 redef var bytes
= new FlatBufferByteView(self) is lazy
521 redef var bytelen
= 0
525 private var char_cache
: Int = -1
527 private var byte_cache
: Int = -1
529 private var capacity
= 0
531 redef fun fast_cstring
do return items
.fast_cstring
(0)
533 redef fun substrings
do return new FlatSubstringsIter(self)
535 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
537 # This happens when an operation modifies the current `Buffer` and
538 # the Copy-On-Write flag `written` is set at true.
540 var nns
= new NativeString(capacity
)
541 items
.copy_to
(nns
, bytelen
, 0, 0)
546 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
548 # Internal only, does not modify bytelen or length, this is the caller's responsability
549 private fun rshift_bytes
(from
: Int, len
: Int) do
552 if bytelen
+ len
> capacity
then
553 capacity
= capacity
* 2 + 2
554 nit
= new NativeString(capacity
)
555 oit
.copy_to
(nit
, 0, 0, from
)
557 oit
.copy_to
(nit
, bytelen
- from
, from
, from
+ len
)
560 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
562 # Internal only, does not modify bytelen or length, this is the caller's responsability
563 private fun lshift_bytes
(from
: Int, len
: Int) do
564 items
.copy_to
(items
, bytelen
- from
, from
, from
- len
)
567 redef fun []=(index
, item
)
569 assert index
>= 0 and index
<= length
570 if written
then reset
572 if index
== length
then
576 var ip
= items
.char_to_byte_index
(index
)
577 var c
= items
.char_at
(ip
)
578 var clen
= c
.u8char_len
579 var itemlen
= item
.u8char_len
580 var size_diff
= itemlen
- clen
581 if size_diff
> 0 then
582 rshift_bytes
(ip
+ clen
, size_diff
)
583 else if size_diff
< 0 then
584 lshift_bytes
(ip
+ clen
, -size_diff
)
588 items
.set_char_at
(ip
, item
)
593 if written
then reset
595 var clen
= c
.u8char_len
596 enlarge
(bytelen
+ clen
)
597 items
.set_char_at
(bytelen
, c
)
604 if written
then reset
609 redef fun empty
do return new Buffer
611 redef fun enlarge
(cap
)
614 if cap
<= c
then return
615 while c
<= cap
do c
= c
* 2 + 2
616 # The COW flag can be set at false here, since
617 # it does a copy of the current `Buffer`
619 var a
= new NativeString(c
+1)
620 if bytelen
> 0 then items
.copy_to
(a
, bytelen
, 0, 0)
628 if bytelen
== 0 then items
= new NativeString(1)
629 return new FlatString.with_infos
(items
, bytelen
, 0, bytelen
- 1)
635 var new_native
= new NativeString(bytelen
+ 1)
636 new_native
[bytelen
] = 0u8
637 if length
> 0 then items
.copy_to
(new_native
, bytelen
, 0, 0)
638 real_items
= new_native
641 return real_items
.as(not null)
644 # Create a new empty string.
647 # Low-level creation a new buffer with given data.
649 # `items` will be used as is, without copy, to store the characters of the buffer.
650 # Aliasing issues is the responsibility of the caller.
652 # If `items` is shared, `written` should be set to true after the creation
653 # so that a modification will do a copy-on-write.
654 private init with_infos
(items
: NativeString, capacity
, bytelen
, length
: Int)
657 self.capacity
= capacity
658 self.bytelen
= bytelen
662 # Create a new string copied from `s`.
665 items
= new NativeString(s
.bytelen
)
666 if s
isa FlatText then
669 for i
in substrings
do i
.as(FlatString).items
.copy_to
(items
, i
.bytelen
, 0, 0)
677 # Create a new empty string with a given capacity.
678 init with_capacity
(cap
: Int)
681 items
= new NativeString(cap
+ 1)
688 if s
.is_empty
then return
691 enlarge
(bytelen
+ sl
)
692 if s
isa FlatText then
693 s
.items
.copy_to
(items
, sl
, s
.first_byte
, bytelen
)
695 for i
in s
.substrings
do append i
702 # Copies the content of self in `dest`
703 fun copy
(start
: Int, len
: Int, dest
: Buffer, new_start
: Int)
705 var self_chars
= self.chars
706 var dest_chars
= dest
.chars
707 for i
in [0..len-1
] do
708 dest_chars
[new_start
+i
] = self_chars
[start
+i
]
712 redef fun substring
(from
, count
)
715 if from
< 0 then from
= 0
716 if (from
+ count
) > length
then count
= length
- from
718 var bytefrom
= items
.char_to_byte_index
(from
)
719 var byteto
= items
.char_to_byte_index
(count
+ from
- 1)
720 byteto
+= items
.char_at
(byteto
).u8char_len
- 1
721 var byte_length
= byteto
- bytefrom
+ 1
722 var r_items
= new NativeString(byte_length
)
723 items
.copy_to
(r_items
, byte_length
, bytefrom
, 0)
724 return new FlatBuffer.with_infos
(r_items
, byte_length
, byte_length
, count
)
733 var ns
= new FlatBuffer.with_capacity
(capacity
)
734 for i
in chars
.reverse_iterator
do ns
.add i
738 redef fun times
(repeats
)
740 var x
= new FlatString.with_infos
(items
, bytelen
, 0, bytelen
- 1)
741 for i
in [1 .. repeats
[ do
748 if written
then reset
749 for i
in [0 .. length
[ do self[i
] = self[i
].to_upper
754 if written
then reset
755 for i
in [0 .. length
[ do self[i
] = self[i
].to_lower
759 private class FlatBufferByteReverseIterator
760 super IndexedIterator[Byte]
762 var target
: FlatBuffer
764 var target_items
: NativeString
768 init with_pos
(tgt
: FlatBuffer, pos
: Int)
770 init(tgt
, tgt
.items
, pos
)
773 redef fun index
do return curr_pos
775 redef fun is_ok
do return curr_pos
>= 0
777 redef fun item
do return target_items
[curr_pos
]
779 redef fun next
do curr_pos
-= 1
783 private class FlatBufferByteView
786 redef type SELFTYPE: FlatBuffer
788 redef fun [](index
) do return target
.items
[index
]
790 redef fun iterator_from
(pos
) do return new FlatBufferByteIterator.with_pos
(target
, pos
)
792 redef fun reverse_iterator_from
(pos
) do return new FlatBufferByteReverseIterator.with_pos
(target
, pos
)
796 private class FlatBufferByteIterator
797 super IndexedIterator[Byte]
799 var target
: FlatBuffer
801 var target_items
: NativeString
805 init with_pos
(tgt
: FlatBuffer, pos
: Int)
807 init(tgt
, tgt
.items
, pos
)
810 redef fun index
do return curr_pos
812 redef fun is_ok
do return curr_pos
< target
.bytelen
814 redef fun item
do return target_items
[curr_pos
]
816 redef fun next
do curr_pos
+= 1
820 private class FlatBufferCharReverseIterator
821 super IndexedIterator[Char]
823 var target
: FlatBuffer
827 init with_pos
(tgt
: FlatBuffer, pos
: Int)
832 redef fun index
do return curr_pos
834 redef fun is_ok
do return curr_pos
>= 0
836 redef fun item
do return target
[curr_pos
]
838 redef fun next
do curr_pos
-= 1
842 private class FlatBufferCharView
845 redef type SELFTYPE: FlatBuffer
847 redef fun [](index
) do return target
[index
]
849 redef fun []=(index
, item
)
851 assert index
>= 0 and index
<= length
852 if index
== length
then
869 fun enlarge
(cap
: Int)
876 var s_length
= s
.length
877 if target
.capacity
< s
.length
then enlarge
(s_length
+ target
.length
)
878 for i
in s
do target
.add i
881 redef fun iterator_from
(pos
) do return new FlatBufferCharIterator.with_pos
(target
, pos
)
883 redef fun reverse_iterator_from
(pos
) do return new FlatBufferCharReverseIterator.with_pos
(target
, pos
)
887 private class FlatBufferCharIterator
888 super IndexedIterator[Char]
890 var target
: FlatBuffer
896 init with_pos
(tgt
: FlatBuffer, pos
: Int)
898 init(tgt
, tgt
.length
- 1, pos
)
901 redef fun index
do return curr_pos
903 redef fun is_ok
do return curr_pos
<= max
905 redef fun item
do return target
[curr_pos
]
907 redef fun next
do curr_pos
+= 1
911 redef class NativeString
914 return to_s_with_length
(cstring_length
)
917 # Returns `self` as a String of `length`.
918 redef fun to_s_with_length
(length
): FlatString
921 var str
= new FlatString.with_infos
(self, length
, 0, length
- 1)
925 # Returns `self` as a new String.
926 redef fun to_s_with_copy
: FlatString
928 var length
= cstring_length
929 var new_self
= new NativeString(length
+ 1)
930 copy_to
(new_self
, length
, 0, 0)
931 var str
= new FlatString.with_infos
(new_self
, length
, 0, length
- 1)
932 new_self
[length
] = 0u8
933 str
.real_items
= new_self
937 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
939 # Very unsafe, make sure to have room for this char prior to calling this function.
940 private fun set_char_at
(pos
: Int, c
: Char) do
941 var ln
= c
.u8char_len
942 native_set_char
(pos
, c
, ln
)
945 private fun native_set_char
(pos
: Int, c
: Char, ln
: Int) `{
946 char* dst = self + pos;
952 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
953 dst[1] = 0x80 | (c & 0x3F);
956 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
957 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
958 dst[2] = 0x80 | (c & 0x3F);
961 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
962 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
963 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
964 dst[3] = 0x80 | (c & 0x3F);
971 redef fun to_base
(base
, signed
)
973 var l
= digit_count
(base
)
974 var s
= new FlatBuffer.from
(" " * l
)
975 fill_buffer
(s
, base
, signed
)
979 # return displayable int in base 10 and signed
981 # assert 1.to_s == "1"
982 # assert (-123).to_s == "-123"
984 # Fast case for common numbers
985 if self == 0 then return "0"
986 if self == 1 then return "1"
988 var nslen
= int_to_s_len
989 var ns
= new NativeString(nslen
+ 1)
991 native_int_to_s
(ns
, nslen
+ 1)
992 return new FlatString.full
(ns
, nslen
, 0, nslen
- 1, nslen
)
998 # Fast implementation
1002 if l
== 0 then return ""
1003 if l
== 1 then if self[0] == null then return "" else return self[0].to_s
1005 var na
= new NativeArray[String](l
)
1011 if itsi
== null then
1021 var ns
= new NativeString(sl
+ 1)
1027 if tmp
isa FlatString then
1028 var tpl
= tmp
.bytelen
1029 tmp
.items
.copy_to
(ns
, tpl
, tmp
.first_byte
, off
)
1032 for j
in tmp
.substrings
do
1033 var s
= j
.as(FlatString)
1034 var slen
= s
.bytelen
1035 s
.items
.copy_to
(ns
, slen
, s
.first_byte
, off
)
1041 return ns
.to_s_with_length
(sl
)
1045 redef class NativeArray[E
]
1046 redef fun native_to_s
do
1047 assert self isa NativeArray[String]
1058 var ns
= new NativeString(sl
+ 1)
1064 if tmp
isa FlatString then
1065 var tpl
= tmp
.bytelen
1066 tmp
.items
.copy_to
(ns
, tpl
, tmp
.first_byte
, off
)
1069 for j
in tmp
.substrings
do
1070 var s
= j
.as(FlatString)
1071 var slen
= s
.bytelen
1072 s
.items
.copy_to
(ns
, slen
, s
.first_byte
, off
)
1078 return ns
.to_s_with_length
(sl
)
1082 redef class Map[K
,V
]
1083 redef fun join
(sep
, couple_sep
)
1085 if is_empty
then return ""
1087 var s
= new Buffer # Result
1093 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1095 # Concat other items
1101 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")