1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # All the array-based text representations
14 intrude import abstract_text
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
25 var tgt
: nullable FlatText
29 return tgt
.as(not null)
32 redef fun is_ok
do return tgt
!= null
34 redef fun next
do tgt
= null
37 # Immutable strings of characters.
42 # Index at which `self` begins in `items`, inclusively
43 private var first_byte
: Int is noinit
45 # Index at which `self` ends in `items`, inclusively
46 private var last_byte
: Int is noinit
48 redef var chars
= new FlatStringCharView(self) is lazy
50 redef var bytes
= new FlatStringByteView(self) is lazy
52 # Cache of the latest position (char) explored in the string
54 # Cached position (bytes) in the NativeString underlying the String
55 var bytepos
: Int = first_byte
is lateinit
57 redef var length
is lazy
do
58 if bytelen
== 0 then return 0
64 st
+= its
.length_of_char_at
(st
)
70 redef fun [](index
) do return items
.char_at
(char_to_byte_index
(index
))
72 # Index of the character `index` in `items`
73 private fun char_to_byte_index
(index
: Int): Int do
78 # Find best insertion point
79 var delta_begin
= index
80 var delta_end
= (ln
- 1) - index
81 var delta_cache
= (position
- index
).abs
85 if delta_cache
< min
then min
= delta_cache
86 if delta_end
< min
then min
= delta_end
91 if min
== delta_begin
then
94 else if min
== delta_cache
then
98 ns_i
= its
.find_beginning_of_char_at
(last_byte
)
102 ns_i
= its
.char_to_byte_index_cached
(index
, my_i
, ns_i
)
112 var b
= new FlatBuffer.with_capacity
(bytelen
+ 1)
113 for i
in [length
- 1 .. 0].step
(-1) do
116 var s
= b
.to_s
.as(FlatString)
117 s
.length
= self.length
121 redef fun fast_cstring
do return items
.fast_cstring
(first_byte
)
123 redef fun substring
(from
, count
)
129 if count
< 0 then count
= 0
133 if (count
+ from
) > length
then count
= length
- from
134 if count
<= 0 then return ""
135 var end_index
= from
+ count
- 1
137 var bytefrom
= char_to_byte_index
(from
)
138 var byteto
= char_to_byte_index
(end_index
)
139 byteto
+= items
.length_of_char_at
(byteto
) - 1
141 var s
= new FlatString.full
(items
, byteto
- bytefrom
+ 1, bytefrom
, byteto
, count
)
145 redef fun empty
do return "".as(FlatString)
149 var outstr
= new FlatBuffer.with_capacity
(self.bytelen
+ 1)
155 outstr
.add
(chars
[pos
].to_upper
)
164 var outstr
= new FlatBuffer.with_capacity
(self.bytelen
+ 1)
170 outstr
.add
(chars
[pos
].to_lower
)
179 for i
in chars
do i
.output
182 ##################################################
183 # String Specific Methods #
184 ##################################################
186 # Low-level creation of a new string with minimal data.
188 # `items` will be used as is, without copy, to retrieve the characters of the string.
189 # Aliasing issues is the responsibility of the caller.
190 private init with_infos
(items
: NativeString, bytelen
, from
, to
: Int)
193 self.bytelen
= bytelen
198 # Low-level creation of a new string with all the data.
200 # `items` will be used as is, without copy, to retrieve the characters of the string.
201 # Aliasing issues is the responsibility of the caller.
202 private init full
(items
: NativeString, bytelen
, from
, to
, length
: Int)
206 self.bytelen
= bytelen
211 redef fun to_cstring
do
212 if real_items
!= null then return real_items
.as(not null)
213 var new_items
= new NativeString(bytelen
+ 1)
214 self.items
.copy_to
(new_items
, bytelen
, first_byte
, 0)
215 new_items
[bytelen
] = 0u8
216 real_items
= new_items
222 if not other
isa FlatString then return super
224 if self.object_id
== other
.object_id
then return true
226 var my_length
= bytelen
228 if other
.bytelen
!= my_length
then return false
230 var my_index
= first_byte
231 var its_index
= other
.first_byte
233 var last_iteration
= my_index
+ my_length
235 var itsitems
= other
.items
236 var myitems
= self.items
238 while my_index
< last_iteration
do
239 if myitems
[my_index
] != itsitems
[its_index
] then return false
249 if not other
isa FlatString then return super
251 if self.object_id
== other
.object_id
then return false
253 var my_length
= self.bytelen
254 var its_length
= other
.bytelen
256 var max
= if my_length
< its_length
then my_length
else its_length
258 var myits
= self.bytes
259 var itsits
= other
.bytes
261 for i
in [0 .. max
[ do
262 var my_curr_char
= myits
[i
]
263 var its_curr_char
= itsits
[i
]
265 if my_curr_char
!= its_curr_char
then
266 if my_curr_char
< its_curr_char
then return true
271 return my_length
< its_length
278 var nlen
= mlen
+ slen
280 var mifrom
= first_byte
281 if s
isa FlatText then
283 var sifrom
= s
.as(FlatString).first_byte
284 var ns
= new NativeString(nlen
+ 1)
285 mits
.copy_to
(ns
, mlen
, mifrom
, 0)
286 sits
.copy_to
(ns
, slen
, sifrom
, mlen
)
287 return new FlatString.full
(ns
, nlen
, 0, nlen
- 1, length
+ o
.length
)
294 var mybtlen
= bytelen
295 var new_bytelen
= mybtlen
* i
297 var newlen
= mylen
* i
298 var ns
= new NativeString(new_bytelen
+ 1)
299 ns
[new_bytelen
] = 0u8
302 items
.copy_to
(ns
, bytelen
, first_byte
, offset
)
306 return new FlatString.full
(ns
, new_bytelen
, 0, new_bytelen
- 1, newlen
)
312 if hash_cache
== null then
313 # djb2 hash algorithm
319 while i
<= last_byte
do
320 h
= h
.lshift
(5) + h
+ myitems
[i
].to_i
327 return hash_cache
.as(not null)
330 redef fun substrings
do return new FlatSubstringsIter(self)
333 private class FlatStringCharReverseIterator
334 super IndexedIterator[Char]
336 var target
: FlatString
340 init with_pos
(tgt
: FlatString, pos
: Int)
345 redef fun is_ok
do return curr_pos
>= 0
347 redef fun item
do return target
[curr_pos
]
349 redef fun next
do curr_pos
-= 1
351 redef fun index
do return curr_pos
355 private class FlatStringCharIterator
356 super IndexedIterator[Char]
358 var target
: FlatString
364 init with_pos
(tgt
: FlatString, pos
: Int)
366 init(tgt
, tgt
.length
- 1, pos
)
369 redef fun is_ok
do return curr_pos
<= max
371 redef fun item
do return target
[curr_pos
]
373 redef fun next
do curr_pos
+= 1
375 redef fun index
do return curr_pos
379 private class FlatStringCharView
382 redef type SELFTYPE: FlatString
384 redef fun [](index
) do return target
[index
]
386 redef fun iterator_from
(start
) do return new FlatStringCharIterator.with_pos
(target
, start
)
388 redef fun reverse_iterator_from
(start
) do return new FlatStringCharReverseIterator.with_pos
(target
, start
)
392 private class FlatStringByteReverseIterator
393 super IndexedIterator[Byte]
395 var target
: FlatString
397 var target_items
: NativeString
401 init with_pos
(tgt
: FlatString, pos
: Int)
403 init(tgt
, tgt
.items
, pos
+ tgt
.first_byte
)
406 redef fun is_ok
do return curr_pos
>= target
.first_byte
408 redef fun item
do return target_items
[curr_pos
]
410 redef fun next
do curr_pos
-= 1
412 redef fun index
do return curr_pos
- target
.first_byte
416 private class FlatStringByteIterator
417 super IndexedIterator[Byte]
419 var target
: FlatString
421 var target_items
: NativeString
425 init with_pos
(tgt
: FlatString, pos
: Int)
427 init(tgt
, tgt
.items
, pos
+ tgt
.first_byte
)
430 redef fun is_ok
do return curr_pos
<= target
.last_byte
432 redef fun item
do return target_items
[curr_pos
]
434 redef fun next
do curr_pos
+= 1
436 redef fun index
do return curr_pos
- target
.first_byte
440 private class FlatStringByteView
443 redef type SELFTYPE: FlatString
447 # Check that the index (+ first_byte) is not larger than last_byte
448 # In other terms, if the index is valid
450 var target
= self.target
451 assert (index
+ target
.first_byte
) <= target
.last_byte
452 return target
.items
[index
+ target
.first_byte
]
455 redef fun iterator_from
(start
) do return new FlatStringByteIterator.with_pos
(target
, start
)
457 redef fun reverse_iterator_from
(start
) do return new FlatStringByteReverseIterator.with_pos
(target
, start
)
462 redef new do return new FlatBuffer
464 redef new with_cap
(i
) do return new FlatBuffer.with_capacity
(i
)
467 # Mutable strings of characters.
472 redef var chars
: Sequence[Char] = new FlatBufferCharView(self) is lazy
474 redef var bytes
: Sequence[Byte] = new FlatBufferByteView(self) is lazy
476 redef var bytelen
= 0
481 if max
== 0 then return 0
486 pos
+= its
.length_of_char_at
(pos
)
492 private var capacity
= 0
494 redef fun fast_cstring
do return items
.fast_cstring
(0)
496 redef fun substrings
do return new FlatSubstringsIter(self)
498 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
500 # This happens when an operation modifies the current `Buffer` and
501 # the Copy-On-Write flag `written` is set at true.
503 var nns
= new NativeString(capacity
)
504 items
.copy_to
(nns
, bytelen
, 0, 0)
509 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
511 # Internal only, does not modify bytelen or length, this is the caller's responsability
512 private fun rshift_bytes
(from
: Int, len
: Int) do
515 if bytelen
+ len
> capacity
then
516 capacity
= capacity
* 2 + 2
517 nit
= new NativeString(capacity
)
518 oit
.copy_to
(nit
, 0, 0, from
)
520 oit
.copy_to
(nit
, bytelen
- from
, from
, from
+ len
)
523 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
525 # Internal only, does not modify bytelen or length, this is the caller's responsability
526 private fun lshift_bytes
(from
: Int, len
: Int) do
527 items
.copy_to
(items
, bytelen
- from
, from
, from
- len
)
532 assert i
< length
and i
>= 0
533 return items
.char_at
(items
.char_to_byte_index
(i
))
536 redef fun []=(index
, item
)
538 assert index
>= 0 and index
<= length
539 if written
then reset
541 if index
== length
then
545 var ip
= items
.char_to_byte_index
(index
)
546 var c
= items
.char_at
(ip
)
547 var clen
= c
.u8char_len
548 var itemlen
= item
.u8char_len
549 var size_diff
= itemlen
- clen
550 if size_diff
> 0 then
551 rshift_bytes
(ip
+ clen
, size_diff
)
552 else if size_diff
< 0 then
553 lshift_bytes
(ip
+ clen
, -size_diff
)
556 items
.set_char_at
(ip
, item
)
561 if written
then reset
563 var clen
= c
.u8char_len
564 enlarge
(bytelen
+ clen
)
565 items
.set_char_at
(bytelen
, c
)
569 private fun add_byte
(b
: Byte) do
570 if written
then reset
574 # FIXME: Might trigger errors
580 if written
then reset
584 redef fun empty
do return new Buffer
586 redef fun enlarge
(cap
)
589 if cap
<= c
then return
590 while c
<= cap
do c
= c
* 2 + 2
591 # The COW flag can be set at false here, since
592 # it does a copy of the current `Buffer`
594 var a
= new NativeString(c
+1)
595 if bytelen
> 0 then items
.copy_to
(a
, bytelen
, 0, 0)
603 if bytelen
== 0 then items
= new NativeString(1)
604 return new FlatString.with_infos
(items
, bytelen
, 0, bytelen
- 1)
610 var new_native
= new NativeString(bytelen
+ 1)
611 new_native
[bytelen
] = 0u8
612 if length
> 0 then items
.copy_to
(new_native
, bytelen
, 0, 0)
613 real_items
= new_native
616 return real_items
.as(not null)
619 # Create a new empty string.
622 # Low-level creation a new buffer with given data.
624 # `items` will be used as is, without copy, to store the characters of the buffer.
625 # Aliasing issues is the responsibility of the caller.
627 # If `items` is shared, `written` should be set to true after the creation
628 # so that a modification will do a copy-on-write.
629 private init with_infos
(items
: NativeString, capacity
, bytelen
: Int)
632 self.capacity
= capacity
633 self.bytelen
= bytelen
636 # Create a new string copied from `s`.
639 items
= new NativeString(s
.bytelen
)
640 if s
isa FlatText then
643 for i
in substrings
do i
.as(FlatString).items
.copy_to
(items
, i
.bytelen
, 0, 0)
650 # Create a new empty string with a given capacity.
651 init with_capacity
(cap
: Int)
654 items
= new NativeString(cap
+ 1)
661 if s
.is_empty
then return
664 enlarge
(bytelen
+ sl
)
665 if s
isa FlatString then
666 s
.items
.copy_to
(items
, sl
, s
.first_byte
, bytelen
)
667 else if s
isa FlatBuffer then
668 s
.items
.copy_to
(items
, sl
, 0, bytelen
)
670 for i
in s
.substrings
do append i
676 # Copies the content of self in `dest`
677 fun copy
(start
: Int, len
: Int, dest
: Buffer, new_start
: Int)
679 var self_chars
= self.chars
680 var dest_chars
= dest
.chars
681 for i
in [0..len-1
] do
682 dest_chars
[new_start
+i
] = self_chars
[start
+i
]
686 redef fun substring
(from
, count
)
689 if from
< 0 then from
= 0
690 if (from
+ count
) > length
then count
= length
- from
692 var bytefrom
= items
.char_to_byte_index
(from
)
693 var byteto
= items
.char_to_byte_index
(count
+ from
- 1)
694 byteto
+= items
.char_at
(byteto
).u8char_len
- 1
695 var byte_length
= byteto
- bytefrom
+ 1
696 var r_items
= new NativeString(byte_length
)
697 items
.copy_to
(r_items
, byte_length
, bytefrom
, 0)
698 return new FlatBuffer.with_infos
(r_items
, byte_length
, byte_length
)
707 var ns
= new FlatBuffer.with_capacity
(capacity
)
708 for i
in chars
.reverse_iterator
do ns
.add i
712 redef fun times
(repeats
)
714 var x
= new FlatString.with_infos
(items
, bytelen
, 0, bytelen
- 1)
715 for i
in [1 .. repeats
[ do
722 if written
then reset
723 for i
in [0 .. length
[ do self[i
] = self[i
].to_upper
728 if written
then reset
729 for i
in [0 .. length
[ do self[i
] = self[i
].to_lower
733 private class FlatBufferByteReverseIterator
734 super IndexedIterator[Byte]
736 var target
: FlatBuffer
738 var target_items
: NativeString
742 init with_pos
(tgt
: FlatBuffer, pos
: Int)
744 init(tgt
, tgt
.items
, pos
)
747 redef fun index
do return curr_pos
749 redef fun is_ok
do return curr_pos
>= 0
751 redef fun item
do return target_items
[curr_pos
]
753 redef fun next
do curr_pos
-= 1
757 private class FlatBufferByteView
760 redef type SELFTYPE: FlatBuffer
762 redef fun [](index
) do return target
.items
[index
]
764 redef fun []=(index
, item
)
766 assert index
>= 0 and index
<= target
.bytelen
767 if index
== target
.bytelen
then
771 target
.items
[index
] = item
779 fun enlarge
(cap
: Int)
786 var s_length
= s
.length
787 if target
.capacity
< (target
.length
+ s_length
) then enlarge
(s_length
+ target
.length
)
788 var pos
= target
.length
789 var its
= target
.items
794 target
.length
+= s
.length
797 redef fun iterator_from
(pos
) do return new FlatBufferByteIterator.with_pos
(target
, pos
)
799 redef fun reverse_iterator_from
(pos
) do return new FlatBufferByteReverseIterator.with_pos
(target
, pos
)
803 private class FlatBufferByteIterator
804 super IndexedIterator[Byte]
806 var target
: FlatBuffer
808 var target_items
: NativeString
812 init with_pos
(tgt
: FlatBuffer, pos
: Int)
814 init(tgt
, tgt
.items
, pos
)
817 redef fun index
do return curr_pos
819 redef fun is_ok
do return curr_pos
< target
.bytelen
821 redef fun item
do return target_items
[curr_pos
]
823 redef fun next
do curr_pos
+= 1
827 private class FlatBufferCharReverseIterator
828 super IndexedIterator[Char]
830 var target
: FlatBuffer
834 init with_pos
(tgt
: FlatBuffer, pos
: Int)
839 redef fun index
do return curr_pos
841 redef fun is_ok
do return curr_pos
>= 0
843 redef fun item
do return target
[curr_pos
]
845 redef fun next
do curr_pos
-= 1
849 private class FlatBufferCharView
852 redef type SELFTYPE: FlatBuffer
854 redef fun [](index
) do return target
[index
]
856 redef fun []=(index
, item
)
858 assert index
>= 0 and index
<= length
859 if index
== length
then
876 fun enlarge
(cap
: Int)
883 var s_length
= s
.length
884 if target
.capacity
< s
.length
then enlarge
(s_length
+ target
.length
)
885 for i
in s
do target
.add i
888 redef fun iterator_from
(pos
) do return new FlatBufferCharIterator.with_pos
(target
, pos
)
890 redef fun reverse_iterator_from
(pos
) do return new FlatBufferCharReverseIterator.with_pos
(target
, pos
)
894 private class FlatBufferCharIterator
895 super IndexedIterator[Char]
897 var target
: FlatBuffer
903 init with_pos
(tgt
: FlatBuffer, pos
: Int)
905 init(tgt
, tgt
.length
- 1, pos
)
908 redef fun index
do return curr_pos
910 redef fun is_ok
do return curr_pos
<= max
912 redef fun item
do return target
[curr_pos
]
914 redef fun next
do curr_pos
+= 1
918 redef class NativeString
921 return to_s_with_length
(cstring_length
)
924 # Returns `self` as a String of `length`.
925 redef fun to_s_with_length
(length
): FlatString
928 var str
= new FlatString.with_infos
(self, length
, 0, length
- 1)
932 # Returns `self` as a new String.
933 redef fun to_s_with_copy
: FlatString
935 var length
= cstring_length
936 var new_self
= new NativeString(length
+ 1)
937 copy_to
(new_self
, length
, 0, 0)
938 var str
= new FlatString.with_infos
(new_self
, length
, 0, length
- 1)
939 new_self
[length
] = 0u8
940 str
.real_items
= new_self
944 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
946 # Very unsafe, make sure to have room for this char prior to calling this function.
947 private fun set_char_at
(pos
: Int, c
: Char) do
948 var ln
= c
.u8char_len
949 native_set_char
(pos
, c
, ln
)
952 private fun native_set_char
(pos
: Int, c
: Char, ln
: Int) `{
953 char* dst = self + pos;
959 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
960 dst[1] = 0x80 | (c & 0x3F);
963 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
964 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
965 dst[2] = 0x80 | (c & 0x3F);
968 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
969 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
970 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
971 dst[3] = 0x80 | (c & 0x3F);
978 redef fun to_base
(base
, signed
)
980 var l
= digit_count
(base
)
981 var s
= new FlatBuffer.from
(" " * l
)
982 fill_buffer
(s
, base
, signed
)
986 # return displayable int in base 10 and signed
988 # assert 1.to_s == "1"
989 # assert (-123).to_s == "-123"
991 # Fast case for common numbers
992 if self == 0 then return "0"
993 if self == 1 then return "1"
995 var nslen
= int_to_s_len
996 var ns
= new NativeString(nslen
+ 1)
998 native_int_to_s
(ns
, nslen
+ 1)
999 return new FlatString.full
(ns
, nslen
, 0, nslen
- 1, nslen
)
1003 redef class Array[E
]
1005 # Fast implementation
1006 redef fun plain_to_s
1009 if l
== 0 then return ""
1010 if l
== 1 then if self[0] == null then return "" else return self[0].to_s
1012 var na
= new NativeArray[String](l
)
1018 if itsi
== null then
1028 var ns
= new NativeString(sl
+ 1)
1034 if tmp
isa FlatString then
1035 var tpl
= tmp
.bytelen
1036 tmp
.items
.copy_to
(ns
, tpl
, tmp
.first_byte
, off
)
1039 for j
in tmp
.substrings
do
1040 var s
= j
.as(FlatString)
1041 var slen
= s
.bytelen
1042 s
.items
.copy_to
(ns
, slen
, s
.first_byte
, off
)
1048 return ns
.to_s_with_length
(sl
)
1052 redef class NativeArray[E
]
1053 redef fun native_to_s
do
1054 assert self isa NativeArray[String]
1065 var ns
= new NativeString(sl
+ 1)
1071 if tmp
isa FlatString then
1072 var tpl
= tmp
.bytelen
1073 tmp
.items
.copy_to
(ns
, tpl
, tmp
.first_byte
, off
)
1076 for j
in tmp
.substrings
do
1077 var s
= j
.as(FlatString)
1078 var slen
= s
.bytelen
1079 s
.items
.copy_to
(ns
, slen
, s
.first_byte
, off
)
1085 return ns
.to_s_with_length
(sl
)
1089 redef class Map[K
,V
]
1090 redef fun join
(sep
, couple_sep
)
1092 if is_empty
then return ""
1094 var s
= new Buffer # Result
1100 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1102 # Concat other items
1108 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")