5bc76fc9ca20a71d17e526c430768ef7c78584c1
[nit.git] / lib / core / text / flat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # All the array-based text representations
12 module flat
13
14 intrude import abstract_text
15 intrude import native
16
17 `{
18 #include <stdio.h>
19 #include <string.h>
20 `}
21
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
24
25 var tgt: nullable FlatText
26
27 redef fun item do
28 assert is_ok
29 return tgt.as(not null)
30 end
31
32 redef fun is_ok do return tgt != null
33
34 redef fun next do tgt = null
35 end
36
37 redef class FlatText
38
39 private fun first_byte: Int do return 0
40
41 private fun last_byte: Int do return _bytelen - 1
42
43 # Cache of the latest position (char) explored in the string
44 private var position: Int = 0
45
46 # Cached position (bytes) in the NativeString underlying the String
47 private var bytepos: Int = 0
48
49 # Index of the character `index` in `_items`
50 private fun char_to_byte_index(index: Int): Int do
51 var ln = length
52 assert index >= 0
53 assert index < ln
54
55 var pos = _position
56 # Find best insertion point
57 var delta_begin = index
58 var delta_end = (ln - 1) - index
59 var delta_cache = (pos - index).abs
60 var min = delta_begin
61 var its = _items
62
63 if delta_cache < min then min = delta_cache
64 if delta_end < min then min = delta_end
65
66 var ns_i: Int
67 var my_i: Int
68
69 if min == delta_begin then
70 ns_i = first_byte
71 my_i = 0
72 else if min == delta_cache then
73 ns_i = _bytepos
74 my_i = pos
75 else
76 ns_i = its.find_beginning_of_char_at(last_byte)
77 my_i = length - 1
78 end
79
80 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
81
82 _position = index
83 _bytepos = ns_i
84
85 return ns_i
86 end
87
88 # By escaping `self` to C, how many more bytes will be needed ?
89 #
90 # This enables a double-optimization in `escape_to_c` since if this
91 # method returns 0, then `self` does not need escaping and can be
92 # returned as-is
93 protected fun chars_to_escape_to_c: Int do
94 var its = _items
95 var max = last_byte
96 var pos = first_byte
97 var req_esc = 0
98 while pos <= max do
99 var c = its[pos]
100 if c == 0x0Au8 then
101 req_esc += 1
102 else if c == 0x09u8 then
103 req_esc += 1
104 else if c == 0x22u8 then
105 req_esc += 1
106 else if c == 0x27u8 then
107 req_esc += 1
108 else if c == 0x5Cu8 then
109 req_esc += 1
110 else if c < 32u8 then
111 req_esc += 3
112 end
113 pos += 1
114 end
115 return req_esc
116 end
117
118 redef fun escape_to_c do
119 var ln_extra = chars_to_escape_to_c
120 if ln_extra == 0 then return self.to_s
121 var its = _items
122 var max = last_byte
123 var nlen = _bytelen + ln_extra
124 var nns = new NativeString(nlen)
125 var pos = first_byte
126 var opos = 0
127 while pos <= max do
128 var c = its[pos]
129 # Special codes:
130 #
131 # Any byte with value < 32 is a control character
132 # All their uses will be replaced by their octal
133 # value in C.
134 #
135 # There are two exceptions however:
136 #
137 # * 0x09 => \t
138 # * 0x0A => \n
139 #
140 # Aside from the code points above, the following are:
141 #
142 # * 0x22 => \"
143 # * 0x27 => \'
144 # * 0x5C => \\
145 if c == 0x09u8 then
146 nns[opos] = 0x5Cu8
147 nns[opos + 1] = 0x74u8
148 opos += 2
149 else if c == 0x0Au8 then
150 nns[opos] = 0x5Cu8
151 nns[opos + 1] = 0x6Eu8
152 opos += 2
153 else if c == 0x22u8 then
154 nns[opos] = 0x5Cu8
155 nns[opos + 1] = 0x22u8
156 opos += 2
157 else if c == 0x27u8 then
158 nns[opos] = 0x5Cu8
159 nns[opos + 1] = 0x27u8
160 opos += 2
161 else if c == 0x5Cu8 then
162 nns[opos] = 0x5Cu8
163 nns[opos + 1] = 0x5Cu8
164 opos += 2
165 else if c < 32u8 then
166 nns[opos] = 0x5Cu8
167 nns[opos + 1] = 0x30u8
168 nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
169 nns[opos + 3] = (c & 0x07u8) + 0x30u8
170 opos += 4
171 else
172 nns[opos] = c
173 opos += 1
174 end
175 pos += 1
176 end
177 return nns.to_s_with_length(nlen)
178 end
179
180 redef fun [](index) do return _items.char_at(char_to_byte_index(index))
181 end
182
183 # Immutable strings of characters.
184 class FlatString
185 super FlatText
186 super String
187
188 # Index at which `self` begins in `_items`, inclusively
189 redef var first_byte is noinit
190
191 # Index at which `self` ends in `_items`, inclusively
192 redef var last_byte is noinit
193
194 redef var chars = new FlatStringCharView(self) is lazy
195
196 redef var bytes = new FlatStringByteView(self) is lazy
197
198 redef var length is lazy do
199 if _bytelen == 0 then return 0
200 return _items.utf8_length(_first_byte, _last_byte)
201 end
202
203 redef var to_cstring is lazy do
204 var blen = _bytelen
205 var new_items = new NativeString(blen + 1)
206 _items.copy_to(new_items, blen, _first_byte, 0)
207 new_items[blen] = 0u8
208 return new_items
209 end
210
211 redef fun reversed
212 do
213 var b = new FlatBuffer.with_capacity(_bytelen + 1)
214 for i in [length - 1 .. 0].step(-1) do
215 b.add self[i]
216 end
217 var s = b.to_s.as(FlatString)
218 s.length = self.length
219 return s
220 end
221
222 redef fun fast_cstring do return _items.fast_cstring(_first_byte)
223
224 redef fun substring(from, count)
225 do
226 assert count >= 0
227
228 if from < 0 then
229 count += from
230 if count < 0 then count = 0
231 from = 0
232 end
233
234 if (count + from) > length then count = length - from
235 if count <= 0 then return ""
236 var end_index = from + count - 1
237
238 var bytefrom = char_to_byte_index(from)
239 var byteto = char_to_byte_index(end_index)
240 var its = _items
241 byteto += its.length_of_char_at(byteto) - 1
242
243 var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, byteto, count)
244 return s
245 end
246
247 redef fun empty do return "".as(FlatString)
248
249 redef fun to_upper
250 do
251 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
252
253 var mylen = length
254 var pos = 0
255
256 while pos < mylen do
257 outstr.add(chars[pos].to_upper)
258 pos += 1
259 end
260
261 return outstr.to_s
262 end
263
264 redef fun to_lower
265 do
266 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
267
268 var mylen = length
269 var pos = 0
270
271 while pos < mylen do
272 outstr.add(chars[pos].to_lower)
273 pos += 1
274 end
275
276 return outstr.to_s
277 end
278
279 redef fun output
280 do
281 for i in chars do i.output
282 end
283
284 ##################################################
285 # String Specific Methods #
286 ##################################################
287
288 # Low-level creation of a new string with minimal data.
289 #
290 # `_items` will be used as is, without copy, to retrieve the characters of the string.
291 # Aliasing issues is the responsibility of the caller.
292 private init with_infos(items: NativeString, bytelen, from, to: Int)
293 do
294 self._items = items
295 self._bytelen = bytelen
296 _first_byte = from
297 _last_byte = to
298 _bytepos = from
299 end
300
301 # Low-level creation of a new string with all the data.
302 #
303 # `_items` will be used as is, without copy, to retrieve the characters of the string.
304 # Aliasing issues is the responsibility of the caller.
305 private init full(items: NativeString, bytelen, from, to, length: Int)
306 do
307 self._items = items
308 self.length = length
309 self._bytelen = bytelen
310 _first_byte = from
311 _last_byte = to
312 _bytepos = from
313 end
314
315 redef fun ==(other)
316 do
317 if not other isa FlatString then return super
318
319 if self.object_id == other.object_id then return true
320
321 var my_length = _bytelen
322
323 if other._bytelen != my_length then return false
324
325 var my_index = _first_byte
326 var its_index = other._first_byte
327
328 var last_iteration = my_index + my_length
329
330 var its_items = other._items
331 var my_items = self._items
332
333 while my_index < last_iteration do
334 if my_items[my_index] != its_items[its_index] then return false
335 my_index += 1
336 its_index += 1
337 end
338
339 return true
340 end
341
342 redef fun <(other)
343 do
344 if not other isa FlatString then return super
345
346 if self.object_id == other.object_id then return false
347
348 var my_length = self._bytelen
349 var its_length = other._bytelen
350
351 var max = if my_length < its_length then my_length else its_length
352
353 var myits = self.bytes
354 var itsits = other.bytes
355
356 for i in [0 .. max[ do
357 var my_curr_char = myits[i]
358 var its_curr_char = itsits[i]
359
360 if my_curr_char != its_curr_char then
361 if my_curr_char < its_curr_char then return true
362 return false
363 end
364 end
365
366 return my_length < its_length
367 end
368
369 redef fun +(o) do
370 var s = o.to_s
371 var slen = s.bytelen
372 var mlen = _bytelen
373 var nlen = mlen + slen
374 var mits = _items
375 var mifrom = _first_byte
376 if s isa FlatText then
377 var sits = s._items
378 var sifrom = s.first_byte
379 var ns = new NativeString(nlen + 1)
380 mits.copy_to(ns, mlen, mifrom, 0)
381 sits.copy_to(ns, slen, sifrom, mlen)
382 return new FlatString.full(ns, nlen, 0, nlen - 1, length + o.length)
383 else
384 abort
385 end
386 end
387
388 redef fun *(i) do
389 var mybtlen = _bytelen
390 var new_bytelen = mybtlen * i
391 var mylen = length
392 var newlen = mylen * i
393 var its = _items
394 var fb = _first_byte
395 var ns = new NativeString(new_bytelen + 1)
396 ns[new_bytelen] = 0u8
397 var offset = 0
398 while i > 0 do
399 its.copy_to(ns, mybtlen, fb, offset)
400 offset += mybtlen
401 i -= 1
402 end
403 return new FlatString.full(ns, new_bytelen, 0, new_bytelen - 1, newlen)
404 end
405
406
407 redef fun hash
408 do
409 if hash_cache == null then
410 # djb2 hash algorithm
411 var h = 5381
412 var i = _first_byte
413
414 var my_items = _items
415 var max = _last_byte
416
417 while i <= max do
418 h = (h << 5) + h + my_items[i].to_i
419 i += 1
420 end
421
422 hash_cache = h
423 end
424
425 return hash_cache.as(not null)
426 end
427
428 redef fun substrings do return new FlatSubstringsIter(self)
429 end
430
431 private class FlatStringCharReverseIterator
432 super IndexedIterator[Char]
433
434 var target: FlatString
435
436 var curr_pos: Int
437
438 init with_pos(tgt: FlatString, pos: Int)
439 do
440 init(tgt, pos)
441 end
442
443 redef fun is_ok do return curr_pos >= 0
444
445 redef fun item do return target[curr_pos]
446
447 redef fun next do curr_pos -= 1
448
449 redef fun index do return curr_pos
450
451 end
452
453 private class FlatStringCharIterator
454 super IndexedIterator[Char]
455
456 var target: FlatString
457
458 var max: Int
459
460 var curr_pos: Int
461
462 init with_pos(tgt: FlatString, pos: Int)
463 do
464 init(tgt, tgt.length - 1, pos)
465 end
466
467 redef fun is_ok do return curr_pos <= max
468
469 redef fun item do return target[curr_pos]
470
471 redef fun next do curr_pos += 1
472
473 redef fun index do return curr_pos
474
475 end
476
477 private class FlatStringCharView
478 super StringCharView
479
480 redef type SELFTYPE: FlatString
481
482 redef fun [](index) do return target[index]
483
484 redef fun iterator_from(start) do return new FlatStringCharIterator.with_pos(target, start)
485
486 redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator.with_pos(target, start)
487
488 end
489
490 private class FlatStringByteReverseIterator
491 super IndexedIterator[Byte]
492
493 var target: FlatString
494
495 var target_items: NativeString
496
497 var curr_pos: Int
498
499 init with_pos(tgt: FlatString, pos: Int)
500 do
501 init(tgt, tgt._items, pos + tgt._first_byte)
502 end
503
504 redef fun is_ok do return curr_pos >= target._first_byte
505
506 redef fun item do return target_items[curr_pos]
507
508 redef fun next do curr_pos -= 1
509
510 redef fun index do return curr_pos - target._first_byte
511
512 end
513
514 private class FlatStringByteIterator
515 super IndexedIterator[Byte]
516
517 var target: FlatString
518
519 var target_items: NativeString
520
521 var curr_pos: Int
522
523 init with_pos(tgt: FlatString, pos: Int)
524 do
525 init(tgt, tgt._items, pos + tgt._first_byte)
526 end
527
528 redef fun is_ok do return curr_pos <= target._last_byte
529
530 redef fun item do return target_items[curr_pos]
531
532 redef fun next do curr_pos += 1
533
534 redef fun index do return curr_pos - target._first_byte
535
536 end
537
538 private class FlatStringByteView
539 super StringByteView
540
541 redef type SELFTYPE: FlatString
542
543 redef fun [](index)
544 do
545 # Check that the index (+ _first_byte) is not larger than _last_byte
546 # In other terms, if the index is valid
547 assert index >= 0
548 var target = self.target
549 var ind = index + target._first_byte
550 assert ind <= target._last_byte
551 return target._items[ind]
552 end
553
554 redef fun iterator_from(start) do return new FlatStringByteIterator.with_pos(target, start)
555
556 redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator.with_pos(target, start)
557
558 end
559
560 redef class Buffer
561 redef new do return new FlatBuffer
562
563 redef new with_cap(i) do return new FlatBuffer.with_capacity(i)
564 end
565
566 # Mutable strings of characters.
567 class FlatBuffer
568 super FlatText
569 super Buffer
570
571 redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
572
573 redef var bytes = new FlatBufferByteView(self) is lazy
574
575 redef var length = 0
576
577 private var char_cache: Int = -1
578
579 private var byte_cache: Int = -1
580
581 private var capacity = 0
582
583 # Real items, used as cache for when to_cstring is called
584 private var real_items: NativeString is noinit
585
586 redef fun fast_cstring do return _items.fast_cstring(0)
587
588 redef fun substrings do return new FlatSubstringsIter(self)
589
590 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
591 #
592 # This happens when an operation modifies the current `Buffer` and
593 # the Copy-On-Write flag `written` is set at true.
594 private fun reset do
595 var nns = new NativeString(capacity)
596 if _bytelen != 0 then _items.copy_to(nns, _bytelen, 0, 0)
597 _items = nns
598 written = false
599 end
600
601 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
602 #
603 # Internal only, does not modify _bytelen or length, this is the caller's responsability
604 private fun rshift_bytes(from: Int, len: Int) do
605 var oit = _items
606 var nit = _items
607 var bt = _bytelen
608 if bt + len > capacity then
609 capacity = capacity * 2 + 2
610 nit = new NativeString(capacity)
611 oit.copy_to(nit, 0, 0, from)
612 end
613 oit.copy_to(nit, bt - from, from, from + len)
614 end
615
616 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
617 #
618 # Internal only, does not modify _bytelen or length, this is the caller's responsability
619 private fun lshift_bytes(from: Int, len: Int) do
620 var it = _items
621 it.copy_to(it, _bytelen - from, from, from - len)
622 end
623
624 redef fun []=(index, item)
625 do
626 assert index >= 0 and index <= length
627 if written then reset
628 is_dirty = true
629 if index == length then
630 add item
631 return
632 end
633 var it = _items
634 var ip = it.char_to_byte_index(index)
635 var c = it.char_at(ip)
636 var clen = c.u8char_len
637 var itemlen = item.u8char_len
638 var size_diff = itemlen - clen
639 if size_diff > 0 then
640 rshift_bytes(ip + clen, size_diff)
641 else if size_diff < 0 then
642 lshift_bytes(ip + clen, -size_diff)
643 end
644 _bytelen += size_diff
645 bytepos += size_diff
646 it.set_char_at(ip, item)
647 end
648
649 redef fun add(c)
650 do
651 if written then reset
652 is_dirty = true
653 var clen = c.u8char_len
654 var bt = _bytelen
655 enlarge(bt + clen)
656 _items.set_char_at(bt, c)
657 _bytelen += clen
658 length += 1
659 end
660
661 redef fun clear do
662 is_dirty = true
663 if written then reset
664 _bytelen = 0
665 length = 0
666 end
667
668 redef fun empty do return new Buffer
669
670 redef fun enlarge(cap)
671 do
672 var c = capacity
673 if cap <= c then return
674 while c <= cap do c = c * 2 + 2
675 # The COW flag can be set at false here, since
676 # it does a copy of the current `Buffer`
677 written = false
678 var bln = _bytelen
679 var a = new NativeString(c+1)
680 if bln > 0 then
681 var it = _items
682 if bln > 0 then it.copy_to(a, bln, 0, 0)
683 end
684 _items = a
685 capacity = c
686 end
687
688 redef fun to_s
689 do
690 written = true
691 var bln = _bytelen
692 if bln == 0 then _items = new NativeString(1)
693 return new FlatString.full(_items, bln, 0, bln - 1, length)
694 end
695
696 redef fun to_cstring
697 do
698 if is_dirty then
699 var bln = _bytelen
700 var new_native = new NativeString(bln + 1)
701 new_native[bln] = 0u8
702 if length > 0 then _items.copy_to(new_native, bln, 0, 0)
703 real_items = new_native
704 is_dirty = false
705 end
706 return real_items
707 end
708
709 # Create a new empty string.
710 init do end
711
712 # Low-level creation a new buffer with given data.
713 #
714 # `_items` will be used as is, without copy, to store the characters of the buffer.
715 # Aliasing issues is the responsibility of the caller.
716 #
717 # If `_items` is shared, `written` should be set to true after the creation
718 # so that a modification will do a copy-on-write.
719 private init with_infos(items: NativeString, capacity, bytelen, length: Int)
720 do
721 self._items = items
722 self.capacity = capacity
723 self._bytelen = bytelen
724 self.length = length
725 end
726
727 # Create a new string copied from `s`.
728 init from(s: Text)
729 do
730 _items = new NativeString(s.bytelen)
731 if s isa FlatText then
732 _items = s._items
733 else
734 for i in substrings do i.as(FlatString)._items.copy_to(_items, i._bytelen, 0, 0)
735 end
736 _bytelen = s.bytelen
737 length = s.length
738 _capacity = _bytelen
739 written = true
740 end
741
742 # Create a new empty string with a given capacity.
743 init with_capacity(cap: Int)
744 do
745 assert cap >= 0
746 _items = new NativeString(cap + 1)
747 capacity = cap
748 _bytelen = 0
749 end
750
751 redef fun append(s)
752 do
753 if s.is_empty then return
754 is_dirty = true
755 var sl = s.bytelen
756 var nln = _bytelen + sl
757 enlarge(nln)
758 if s isa FlatText then
759 s._items.copy_to(_items, sl, s.first_byte, _bytelen)
760 else
761 for i in s.substrings do append i
762 return
763 end
764 _bytelen = nln
765 length += s.length
766 end
767
768 # Copies the content of self in `dest`
769 fun copy(start: Int, len: Int, dest: Buffer, new_start: Int)
770 do
771 var self_chars = self.chars
772 var dest_chars = dest.chars
773 for i in [0..len-1] do
774 dest_chars[new_start+i] = self_chars[start+i]
775 end
776 end
777
778 redef fun substring(from, count)
779 do
780 assert count >= 0
781 if from < 0 then from = 0
782 if (from + count) > length then count = length - from
783 if count != 0 then
784 var its = _items
785 var bytefrom = its.char_to_byte_index(from)
786 var byteto = its.char_to_byte_index(count + from - 1)
787 byteto += its.char_at(byteto).u8char_len - 1
788 var byte_length = byteto - bytefrom + 1
789 var r_items = new NativeString(byte_length)
790 its.copy_to(r_items, byte_length, bytefrom, 0)
791 return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
792 else
793 return new Buffer
794 end
795 end
796
797 redef fun reverse
798 do
799 written = false
800 var ns = new FlatBuffer.with_capacity(capacity)
801 for i in chars.reverse_iterator do ns.add i
802 _items = ns._items
803 end
804
805 redef fun times(repeats)
806 do
807 var bln = _bytelen
808 var x = new FlatString.full(_items, bln, 0, bln - 1, length)
809 for i in [1 .. repeats[ do
810 append(x)
811 end
812 end
813
814 redef fun upper
815 do
816 if written then reset
817 for i in [0 .. length[ do self[i] = self[i].to_upper
818 end
819
820 redef fun lower
821 do
822 if written then reset
823 for i in [0 .. length[ do self[i] = self[i].to_lower
824 end
825 end
826
827 private class FlatBufferByteReverseIterator
828 super IndexedIterator[Byte]
829
830 var target: FlatBuffer
831
832 var target_items: NativeString
833
834 var curr_pos: Int
835
836 init with_pos(tgt: FlatBuffer, pos: Int)
837 do
838 init(tgt, tgt._items, pos)
839 end
840
841 redef fun index do return curr_pos
842
843 redef fun is_ok do return curr_pos >= 0
844
845 redef fun item do return target_items[curr_pos]
846
847 redef fun next do curr_pos -= 1
848
849 end
850
851 private class FlatBufferByteView
852 super BufferByteView
853
854 redef type SELFTYPE: FlatBuffer
855
856 redef fun [](index) do return target._items[index]
857
858 redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos)
859
860 redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos)
861
862 end
863
864 private class FlatBufferByteIterator
865 super IndexedIterator[Byte]
866
867 var target: FlatBuffer
868
869 var target_items: NativeString
870
871 var curr_pos: Int
872
873 init with_pos(tgt: FlatBuffer, pos: Int)
874 do
875 init(tgt, tgt._items, pos)
876 end
877
878 redef fun index do return curr_pos
879
880 redef fun is_ok do return curr_pos < target._bytelen
881
882 redef fun item do return target_items[curr_pos]
883
884 redef fun next do curr_pos += 1
885
886 end
887
888 private class FlatBufferCharReverseIterator
889 super IndexedIterator[Char]
890
891 var target: FlatBuffer
892
893 var curr_pos: Int
894
895 init with_pos(tgt: FlatBuffer, pos: Int)
896 do
897 init(tgt, pos)
898 end
899
900 redef fun index do return curr_pos
901
902 redef fun is_ok do return curr_pos >= 0
903
904 redef fun item do return target[curr_pos]
905
906 redef fun next do curr_pos -= 1
907
908 end
909
910 private class FlatBufferCharView
911 super BufferCharView
912
913 redef type SELFTYPE: FlatBuffer
914
915 redef fun [](index) do return target[index]
916
917 redef fun []=(index, item)
918 do
919 assert index >= 0 and index <= length
920 if index == length then
921 add(item)
922 return
923 end
924 target[index] = item
925 end
926
927 redef fun push(c)
928 do
929 target.add(c)
930 end
931
932 redef fun add(c)
933 do
934 target.add(c)
935 end
936
937 fun enlarge(cap: Int)
938 do
939 target.enlarge(cap)
940 end
941
942 redef fun append(s)
943 do
944 var s_length = s.length
945 if target.capacity < s.length then enlarge(s_length + target.length)
946 for i in s do target.add i
947 end
948
949 redef fun iterator_from(pos) do return new FlatBufferCharIterator.with_pos(target, pos)
950
951 redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator.with_pos(target, pos)
952
953 end
954
955 private class FlatBufferCharIterator
956 super IndexedIterator[Char]
957
958 var target: FlatBuffer
959
960 var max: Int
961
962 var curr_pos: Int
963
964 init with_pos(tgt: FlatBuffer, pos: Int)
965 do
966 init(tgt, tgt.length - 1, pos)
967 end
968
969 redef fun index do return curr_pos
970
971 redef fun is_ok do return curr_pos <= max
972
973 redef fun item do return target[curr_pos]
974
975 redef fun next do curr_pos += 1
976
977 end
978
979 redef class NativeString
980 redef fun to_s
981 do
982 return to_s_with_length(cstring_length)
983 end
984
985 # Returns `self` as a String of `length`.
986 redef fun to_s_with_length(length): FlatString
987 do
988 assert length >= 0
989 return clean_utf8(length)
990 end
991
992 redef fun to_s_full(bytelen, unilen) do
993 return new FlatString.full(self, bytelen, 0, bytelen - 1, unilen)
994 end
995
996 # Returns `self` as a new String.
997 redef fun to_s_with_copy: FlatString
998 do
999 var length = cstring_length
1000 var r = clean_utf8(length)
1001 if r.items != self then return r
1002 var new_self = new NativeString(length + 1)
1003 copy_to(new_self, length, 0, 0)
1004 var str = new FlatString.with_infos(new_self, length, 0, length - 1)
1005 new_self[length] = 0u8
1006 str.to_cstring = new_self
1007 return str
1008 end
1009
1010 # Cleans a NativeString if necessary
1011 fun clean_utf8(len: Int): FlatString do
1012 var replacements: nullable Array[Int] = null
1013 var end_length = len
1014 var pos = 0
1015 var chr_ln = 0
1016 while pos < len do
1017 var b = self[pos]
1018 var nxst = length_of_char_at(pos)
1019 var ok_st: Bool
1020 if nxst == 1 then
1021 ok_st = b & 0x80u8 == 0u8
1022 else if nxst == 2 then
1023 ok_st = b & 0xE0u8 == 0xC0u8
1024 else if nxst == 3 then
1025 ok_st = b & 0xF0u8 == 0xE0u8
1026 else
1027 ok_st = b & 0xF8u8 == 0xF0u8
1028 end
1029 if not ok_st then
1030 if replacements == null then replacements = new Array[Int]
1031 replacements.add pos
1032 end_length += 2
1033 pos += 1
1034 chr_ln += 1
1035 continue
1036 end
1037 var ok_c: Bool
1038 var c = char_at(pos)
1039 var cp = c.code_point
1040 if nxst == 1 then
1041 ok_c = cp >= 0 and cp <= 0x7F
1042 else if nxst == 2 then
1043 ok_c = cp >= 0x80 and cp <= 0x7FF
1044 else if nxst == 3 then
1045 ok_c = cp >= 0x800 and cp <= 0xFFFF
1046 ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
1047 else
1048 ok_c = cp >= 0x10000 and cp <= 0x10FFFF
1049 end
1050 if not ok_c then
1051 if replacements == null then replacements = new Array[Int]
1052 replacements.add pos
1053 end_length += 2
1054 pos += 1
1055 chr_ln += 1
1056 continue
1057 end
1058 pos += c.u8char_len
1059 chr_ln += 1
1060 end
1061 var ret = self
1062 if end_length != len then
1063 ret = new NativeString(end_length)
1064 var old_repl = 0
1065 var off = 0
1066 var repls = replacements.as(not null)
1067 var r = repls.items.as(not null)
1068 var imax = repls.length
1069 for i in [0 .. imax[ do
1070 var repl_pos = r[i]
1071 var chkln = repl_pos - old_repl
1072 copy_to(ret, chkln, old_repl, off)
1073 off += chkln
1074 ret[off] = 0xEFu8
1075 ret[off + 1] = 0xBFu8
1076 ret[off + 2] = 0xBDu8
1077 old_repl = repl_pos + 1
1078 off += 3
1079 end
1080 copy_to(ret, len - old_repl, old_repl, off)
1081 end
1082 return new FlatString.full(ret, end_length, 0, end_length - 1, chr_ln)
1083 end
1084
1085 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1086 #
1087 # Very unsafe, make sure to have room for this char prior to calling this function.
1088 private fun set_char_at(pos: Int, c: Char) do
1089 var ln = c.u8char_len
1090 native_set_char(pos, c, ln)
1091 end
1092
1093 private fun native_set_char(pos: Int, c: Char, ln: Int) `{
1094 char* dst = self + pos;
1095 switch(ln){
1096 case 1:
1097 dst[0] = c;
1098 break;
1099 case 2:
1100 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
1101 dst[1] = 0x80 | (c & 0x3F);
1102 break;
1103 case 3:
1104 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
1105 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
1106 dst[2] = 0x80 | (c & 0x3F);
1107 break;
1108 case 4:
1109 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
1110 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
1111 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
1112 dst[3] = 0x80 | (c & 0x3F);
1113 break;
1114 }
1115 `}
1116 end
1117
1118 redef class Int
1119 redef fun to_base(base, signed)
1120 do
1121 var l = digit_count(base)
1122 var s = new FlatBuffer.from(" " * l)
1123 fill_buffer(s, base, signed)
1124 return s.to_s
1125 end
1126
1127 # return displayable int in base 10 and signed
1128 #
1129 # assert 1.to_s == "1"
1130 # assert (-123).to_s == "-123"
1131 redef fun to_s do
1132 # Fast case for common numbers
1133 if self == 0 then return "0"
1134 if self == 1 then return "1"
1135
1136 var nslen = int_to_s_len
1137 var ns = new NativeString(nslen + 1)
1138 ns[nslen] = 0u8
1139 native_int_to_s(ns, nslen + 1)
1140 return new FlatString.full(ns, nslen, 0, nslen - 1, nslen)
1141 end
1142 end
1143
1144 redef class Array[E]
1145
1146 # Fast implementation
1147 redef fun plain_to_s
1148 do
1149 var l = length
1150 if l == 0 then return ""
1151 if l == 1 then if self[0] == null then return "" else return self[0].to_s
1152 var its = _items
1153 var na = new NativeArray[String](l)
1154 var i = 0
1155 var sl = 0
1156 var mypos = 0
1157 while i < l do
1158 var itsi = its[i]
1159 if itsi == null then
1160 i += 1
1161 continue
1162 end
1163 var tmp = itsi.to_s
1164 sl += tmp.bytelen
1165 na[mypos] = tmp
1166 i += 1
1167 mypos += 1
1168 end
1169 var ns = new NativeString(sl + 1)
1170 ns[sl] = 0u8
1171 i = 0
1172 var off = 0
1173 while i < mypos do
1174 var tmp = na[i]
1175 if tmp isa FlatString then
1176 var tpl = tmp._bytelen
1177 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1178 off += tpl
1179 else
1180 for j in tmp.substrings do
1181 var s = j.as(FlatString)
1182 var slen = s._bytelen
1183 s._items.copy_to(ns, slen, s._first_byte, off)
1184 off += slen
1185 end
1186 end
1187 i += 1
1188 end
1189 return new FlatString.with_infos(ns, sl, 0, sl - 1)
1190 end
1191 end
1192
1193 redef class NativeArray[E]
1194 redef fun native_to_s do
1195 assert self isa NativeArray[String]
1196 var l = length
1197 var na = self
1198 var i = 0
1199 var sl = 0
1200 var mypos = 0
1201 while i < l do
1202 sl += na[i].bytelen
1203 i += 1
1204 mypos += 1
1205 end
1206 var ns = new NativeString(sl + 1)
1207 ns[sl] = 0u8
1208 i = 0
1209 var off = 0
1210 while i < mypos do
1211 var tmp = na[i]
1212 if tmp isa FlatString then
1213 var tpl = tmp._bytelen
1214 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1215 off += tpl
1216 else
1217 for j in tmp.substrings do
1218 var s = j.as(FlatString)
1219 var slen = s._bytelen
1220 s._items.copy_to(ns, slen, s._first_byte, off)
1221 off += slen
1222 end
1223 end
1224 i += 1
1225 end
1226 return new FlatString.with_infos(ns, sl, 0, sl - 1)
1227 end
1228 end
1229
1230 redef class Map[K,V]
1231 redef fun join(sep, couple_sep)
1232 do
1233 if is_empty then return ""
1234
1235 var s = new Buffer # Result
1236
1237 # Concat first item
1238 var i = iterator
1239 var k = i.key
1240 var e = i.item
1241 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1242
1243 # Concat other _items
1244 i.next
1245 while i.is_ok do
1246 s.append(sep)
1247 k = i.key
1248 e = i.item
1249 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1250 i.next
1251 end
1252 return s.to_s
1253 end
1254 end