lib/core: Make FlatText public
[nit.git] / lib / core / text / flat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # All the array-based text representations
12 module flat
13
14 intrude import abstract_text
15 intrude import native
16
17 `{
18 #include <stdio.h>
19 #include <string.h>
20 `}
21
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
24
25 var tgt: nullable FlatText
26
27 redef fun item do
28 assert is_ok
29 return tgt.as(not null)
30 end
31
32 redef fun is_ok do return tgt != null
33
34 redef fun next do tgt = null
35 end
36
37 redef class FlatText
38
39 # First byte of the NativeString
40 protected fun first_byte: Int do return 0
41
42 # Last byte of the NativeString
43 protected fun last_byte: Int do return _bytelen - 1
44
45 # Cache of the latest position (char) explored in the string
46 var position: Int = 0
47
48 # Cached position (bytes) in the NativeString underlying the String
49 var bytepos: Int = 0
50
51 # Index of the character `index` in `_items`
52 fun char_to_byte_index(index: Int): Int do
53 var ln = length
54 assert index >= 0
55 assert index < ln
56
57 var pos = _position
58 # Find best insertion point
59 var delta_begin = index
60 var delta_end = (ln - 1) - index
61 var delta_cache = (pos - index).abs
62 var min = delta_begin
63 var its = _items
64
65 if delta_cache < min then min = delta_cache
66 if delta_end < min then min = delta_end
67
68 var ns_i: Int
69 var my_i: Int
70
71 if min == delta_begin then
72 ns_i = first_byte
73 my_i = 0
74 else if min == delta_cache then
75 ns_i = _bytepos
76 my_i = pos
77 else
78 ns_i = its.find_beginning_of_char_at(last_byte)
79 my_i = length - 1
80 end
81
82 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
83
84 _position = index
85 _bytepos = ns_i
86
87 return ns_i
88 end
89
90 # By escaping `self` to HTML, how many more bytes will be needed ?
91 fun chars_to_html_escape: Int do
92 var its = _items
93 var max = last_byte
94 var pos = first_byte
95 var endlen = 0
96 while pos <= max do
97 var c = its[pos]
98 if c == 0x3Cu8 then
99 endlen += 3
100 else if c == 0x3Eu8 then
101 endlen += 3
102 else if c == 0x26u8 then
103 endlen += 4
104 else if c == 0x22u8 then
105 endlen += 4
106 else if c == 0x27u8 then
107 endlen += 4
108 else if c == 0x2Fu8 then
109 endlen += 4
110 end
111 pos += 1
112 end
113 return endlen
114 end
115
116 redef fun html_escape
117 do
118 var extra = chars_to_html_escape
119 if extra == 0 then return to_s
120 var its = _items
121 var max = last_byte
122 var pos = first_byte
123 var nlen = extra + _bytelen
124 var nits = new NativeString(nlen)
125 var outpos = 0
126 while pos <= max do
127 var c = its[pos]
128 # Special codes:
129 # Some HTML characters are used as meta-data, they need
130 # to be replaced by an HTML-Escaped equivalent
131 #
132 # * 0x3C (<) => &lt;
133 # * 0x3E (>) => &gt;
134 # * 0x26 (&) => &amp;
135 # * 0x22 (") => &#34;
136 # * 0x27 (') => &#39;
137 # * 0x2F (/) => &#47;
138 if c == 0x3Cu8 then
139 nits[outpos] = 0x26u8
140 nits[outpos + 1] = 0x6Cu8
141 nits[outpos + 2] = 0x74u8
142 nits[outpos + 3] = 0x3Bu8
143 outpos += 4
144 else if c == 0x3Eu8 then
145 nits[outpos] = 0x26u8
146 nits[outpos + 1] = 0x67u8
147 nits[outpos + 2] = 0x74u8
148 nits[outpos + 3] = 0x3Bu8
149 outpos += 4
150 else if c == 0x26u8 then
151 nits[outpos] = 0x26u8
152 nits[outpos + 1] = 0x61u8
153 nits[outpos + 2] = 0x6Du8
154 nits[outpos + 3] = 0x70u8
155 nits[outpos + 4] = 0x3Bu8
156 outpos += 5
157 else if c == 0x22u8 then
158 nits[outpos] = 0x26u8
159 nits[outpos + 1] = 0x23u8
160 nits[outpos + 2] = 0x33u8
161 nits[outpos + 3] = 0x34u8
162 nits[outpos + 4] = 0x3Bu8
163 outpos += 5
164 else if c == 0x27u8 then
165 nits[outpos] = 0x26u8
166 nits[outpos + 1] = 0x23u8
167 nits[outpos + 2] = 0x33u8
168 nits[outpos + 3] = 0x39u8
169 nits[outpos + 4] = 0x3Bu8
170 outpos += 5
171 else if c == 0x2Fu8 then
172 nits[outpos] = 0x26u8
173 nits[outpos + 1] = 0x23u8
174 nits[outpos + 2] = 0x34u8
175 nits[outpos + 3] = 0x37u8
176 nits[outpos + 4] = 0x3Bu8
177 outpos += 5
178 else
179 nits[outpos] = c
180 outpos += 1
181 end
182 pos += 1
183 end
184 var s = new FlatString.with_infos(nits, nlen, 0, nlen - 1)
185 return s
186 end
187
188 # By escaping `self` to C, how many more bytes will be needed ?
189 #
190 # This enables a double-optimization in `escape_to_c` since if this
191 # method returns 0, then `self` does not need escaping and can be
192 # returned as-is
193 fun chars_to_escape_to_c: Int do
194 var its = _items
195 var max = last_byte
196 var pos = first_byte
197 var req_esc = 0
198 while pos <= max do
199 var c = its[pos]
200 if c == 0x0Au8 then
201 req_esc += 1
202 else if c == 0x09u8 then
203 req_esc += 1
204 else if c == 0x22u8 then
205 req_esc += 1
206 else if c == 0x27u8 then
207 req_esc += 1
208 else if c == 0x5Cu8 then
209 req_esc += 1
210 else if c < 32u8 then
211 req_esc += 3
212 end
213 pos += 1
214 end
215 return req_esc
216 end
217
218 redef fun escape_to_c do
219 var ln_extra = chars_to_escape_to_c
220 if ln_extra == 0 then return self.to_s
221 var its = _items
222 var max = last_byte
223 var nlen = _bytelen + ln_extra
224 var nns = new NativeString(nlen)
225 var pos = first_byte
226 var opos = 0
227 while pos <= max do
228 var c = its[pos]
229 # Special codes:
230 #
231 # Any byte with value < 32 is a control character
232 # All their uses will be replaced by their octal
233 # value in C.
234 #
235 # There are two exceptions however:
236 #
237 # * 0x09 => \t
238 # * 0x0A => \n
239 #
240 # Aside from the code points above, the following are:
241 #
242 # * 0x22 => \"
243 # * 0x27 => \'
244 # * 0x5C => \\
245 if c == 0x09u8 then
246 nns[opos] = 0x5Cu8
247 nns[opos + 1] = 0x74u8
248 opos += 2
249 else if c == 0x0Au8 then
250 nns[opos] = 0x5Cu8
251 nns[opos + 1] = 0x6Eu8
252 opos += 2
253 else if c == 0x22u8 then
254 nns[opos] = 0x5Cu8
255 nns[opos + 1] = 0x22u8
256 opos += 2
257 else if c == 0x27u8 then
258 nns[opos] = 0x5Cu8
259 nns[opos + 1] = 0x27u8
260 opos += 2
261 else if c == 0x5Cu8 then
262 nns[opos] = 0x5Cu8
263 nns[opos + 1] = 0x5Cu8
264 opos += 2
265 else if c < 32u8 then
266 nns[opos] = 0x5Cu8
267 nns[opos + 1] = 0x30u8
268 nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
269 nns[opos + 3] = (c & 0x07u8) + 0x30u8
270 opos += 4
271 else
272 nns[opos] = c
273 opos += 1
274 end
275 pos += 1
276 end
277 return nns.to_s_with_length(nlen)
278 end
279
280 redef fun [](index) do return _items.char_at(char_to_byte_index(index))
281 end
282
283 # Immutable strings of characters.
284 class FlatString
285 super FlatText
286 super String
287
288 # Index at which `self` begins in `_items`, inclusively
289 redef var first_byte is noinit
290
291 # Index at which `self` ends in `_items`, inclusively
292 redef var last_byte is noinit
293
294 redef var chars = new FlatStringCharView(self) is lazy
295
296 redef var bytes = new FlatStringByteView(self) is lazy
297
298 redef var length is lazy do
299 if _bytelen == 0 then return 0
300 return _items.utf8_length(_first_byte, _last_byte)
301 end
302
303 redef var to_cstring is lazy do
304 var blen = _bytelen
305 var new_items = new NativeString(blen + 1)
306 _items.copy_to(new_items, blen, _first_byte, 0)
307 new_items[blen] = 0u8
308 return new_items
309 end
310
311 redef fun reversed
312 do
313 var b = new FlatBuffer.with_capacity(_bytelen + 1)
314 for i in [length - 1 .. 0].step(-1) do
315 b.add self[i]
316 end
317 var s = b.to_s.as(FlatString)
318 s.length = self.length
319 return s
320 end
321
322 redef fun fast_cstring do return _items.fast_cstring(_first_byte)
323
324 redef fun substring(from, count)
325 do
326 assert count >= 0
327
328 if from < 0 then
329 count += from
330 if count < 0 then count = 0
331 from = 0
332 end
333
334 if (count + from) > length then count = length - from
335 if count <= 0 then return ""
336 var end_index = from + count - 1
337
338 var bytefrom = char_to_byte_index(from)
339 var byteto = char_to_byte_index(end_index)
340 var its = _items
341 byteto += its.length_of_char_at(byteto) - 1
342
343 var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, byteto, count)
344 return s
345 end
346
347 redef fun empty do return "".as(FlatString)
348
349 redef fun to_upper
350 do
351 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
352
353 var mylen = length
354 var pos = 0
355
356 while pos < mylen do
357 outstr.add(chars[pos].to_upper)
358 pos += 1
359 end
360
361 return outstr.to_s
362 end
363
364 redef fun to_lower
365 do
366 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
367
368 var mylen = length
369 var pos = 0
370
371 while pos < mylen do
372 outstr.add(chars[pos].to_lower)
373 pos += 1
374 end
375
376 return outstr.to_s
377 end
378
379 redef fun output
380 do
381 for i in chars do i.output
382 end
383
384 ##################################################
385 # String Specific Methods #
386 ##################################################
387
388 # Low-level creation of a new string with minimal data.
389 #
390 # `_items` will be used as is, without copy, to retrieve the characters of the string.
391 # Aliasing issues is the responsibility of the caller.
392 private init with_infos(items: NativeString, bytelen, from, to: Int)
393 do
394 self._items = items
395 self._bytelen = bytelen
396 _first_byte = from
397 _last_byte = to
398 _bytepos = from
399 end
400
401 # Low-level creation of a new string with all the data.
402 #
403 # `_items` will be used as is, without copy, to retrieve the characters of the string.
404 # Aliasing issues is the responsibility of the caller.
405 private init full(items: NativeString, bytelen, from, to, length: Int)
406 do
407 self._items = items
408 self.length = length
409 self._bytelen = bytelen
410 _first_byte = from
411 _last_byte = to
412 _bytepos = from
413 end
414
415 redef fun ==(other)
416 do
417 if not other isa FlatText then return super
418
419 if self.object_id == other.object_id then return true
420
421 var my_length = _bytelen
422
423 if other._bytelen != my_length then return false
424
425 var my_index = _first_byte
426 var its_index = other.first_byte
427
428 var last_iteration = my_index + my_length
429
430 var its_items = other._items
431 var my_items = self._items
432
433 while my_index < last_iteration do
434 if my_items[my_index] != its_items[its_index] then return false
435 my_index += 1
436 its_index += 1
437 end
438
439 return true
440 end
441
442 redef fun <(other)
443 do
444 if not other isa FlatText then return super
445
446 if self.object_id == other.object_id then return false
447
448 var myits = _items
449 var itsits = other._items
450
451 var mbt = _bytelen
452 var obt = other.bytelen
453
454 var minln = if mbt < obt then mbt else obt
455 var mst = _first_byte
456 var ost = other.first_byte
457
458 for i in [0 .. minln[ do
459 var my_curr_char = myits[mst]
460 var its_curr_char = itsits[ost]
461
462 if my_curr_char > its_curr_char then return false
463 if my_curr_char < its_curr_char then return true
464
465 mst += 1
466 ost += 1
467 end
468
469 return mbt < obt
470 end
471
472 redef fun +(o) do
473 var s = o.to_s
474 var slen = s.bytelen
475 var mlen = _bytelen
476 var nlen = mlen + slen
477 var mits = _items
478 var mifrom = _first_byte
479 if s isa FlatText then
480 var sits = s._items
481 var sifrom = s.first_byte
482 var ns = new NativeString(nlen + 1)
483 mits.copy_to(ns, mlen, mifrom, 0)
484 sits.copy_to(ns, slen, sifrom, mlen)
485 return new FlatString.full(ns, nlen, 0, nlen - 1, length + o.length)
486 else
487 abort
488 end
489 end
490
491 redef fun *(i) do
492 var mybtlen = _bytelen
493 var new_bytelen = mybtlen * i
494 var mylen = length
495 var newlen = mylen * i
496 var its = _items
497 var fb = _first_byte
498 var ns = new NativeString(new_bytelen + 1)
499 ns[new_bytelen] = 0u8
500 var offset = 0
501 while i > 0 do
502 its.copy_to(ns, mybtlen, fb, offset)
503 offset += mybtlen
504 i -= 1
505 end
506 return new FlatString.full(ns, new_bytelen, 0, new_bytelen - 1, newlen)
507 end
508
509
510 redef fun hash
511 do
512 if hash_cache == null then
513 # djb2 hash algorithm
514 var h = 5381
515 var i = _first_byte
516
517 var my_items = _items
518 var max = _last_byte
519
520 while i <= max do
521 h = (h << 5) + h + my_items[i].to_i
522 i += 1
523 end
524
525 hash_cache = h
526 end
527
528 return hash_cache.as(not null)
529 end
530
531 redef fun substrings do return new FlatSubstringsIter(self)
532 end
533
534 private class FlatStringCharReverseIterator
535 super IndexedIterator[Char]
536
537 var target: FlatString
538
539 var curr_pos: Int
540
541 redef fun is_ok do return curr_pos >= 0
542
543 redef fun item do return target[curr_pos]
544
545 redef fun next do curr_pos -= 1
546
547 redef fun index do return curr_pos
548
549 end
550
551 private class FlatStringCharIterator
552 super IndexedIterator[Char]
553
554 var target: FlatString
555
556 var max: Int is noautoinit
557
558 var curr_pos: Int
559
560 init do max = target.length - 1
561
562 redef fun is_ok do return curr_pos <= max
563
564 redef fun item do return target[curr_pos]
565
566 redef fun next do curr_pos += 1
567
568 redef fun index do return curr_pos
569
570 end
571
572 private class FlatStringCharView
573 super StringCharView
574
575 redef type SELFTYPE: FlatString
576
577 redef fun [](index) do return target[index]
578
579 redef fun iterator_from(start) do return new FlatStringCharIterator(target, start)
580
581 redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator(target, start)
582
583 end
584
585 private class FlatStringByteReverseIterator
586 super IndexedIterator[Byte]
587
588 var target: FlatString
589
590 var target_items: NativeString is noautoinit
591
592 var curr_pos: Int
593
594 init
595 do
596 var tgt = target
597 target_items = tgt._items
598 curr_pos += tgt._first_byte
599 end
600
601 redef fun is_ok do return curr_pos >= target._first_byte
602
603 redef fun item do return target_items[curr_pos]
604
605 redef fun next do curr_pos -= 1
606
607 redef fun index do return curr_pos - target._first_byte
608
609 end
610
611 private class FlatStringByteIterator
612 super IndexedIterator[Byte]
613
614 var target: FlatString
615
616 var target_items: NativeString is noautoinit
617
618 var curr_pos: Int
619
620 init
621 do
622 var tgt = target
623 target_items = tgt._items
624 curr_pos += tgt._first_byte
625 end
626
627 redef fun is_ok do return curr_pos <= target._last_byte
628
629 redef fun item do return target_items[curr_pos]
630
631 redef fun next do curr_pos += 1
632
633 redef fun index do return curr_pos - target._first_byte
634
635 end
636
637 private class FlatStringByteView
638 super StringByteView
639
640 redef type SELFTYPE: FlatString
641
642 redef fun [](index)
643 do
644 # Check that the index (+ _first_byte) is not larger than _last_byte
645 # In other terms, if the index is valid
646 assert index >= 0
647 var target = self.target
648 var ind = index + target._first_byte
649 assert ind <= target._last_byte
650 return target._items[ind]
651 end
652
653 redef fun iterator_from(start) do return new FlatStringByteIterator(target, start)
654
655 redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator(target, start)
656
657 end
658
659 redef class Buffer
660 redef new do return new FlatBuffer
661
662 redef new with_cap(i) do return new FlatBuffer.with_capacity(i)
663 end
664
665 # Mutable strings of characters.
666 class FlatBuffer
667 super FlatText
668 super Buffer
669
670 redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
671
672 redef var bytes = new FlatBufferByteView(self) is lazy
673
674 redef var length = 0
675
676 private var char_cache: Int = -1
677
678 private var byte_cache: Int = -1
679
680 private var capacity = 0
681
682 # Real items, used as cache for when to_cstring is called
683 private var real_items: NativeString is noinit
684
685 redef fun fast_cstring do return _items.fast_cstring(0)
686
687 redef fun substrings do return new FlatSubstringsIter(self)
688
689 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
690 #
691 # This happens when an operation modifies the current `Buffer` and
692 # the Copy-On-Write flag `written` is set at true.
693 private fun reset do
694 var nns = new NativeString(capacity)
695 if _bytelen != 0 then _items.copy_to(nns, _bytelen, 0, 0)
696 _items = nns
697 written = false
698 end
699
700 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
701 #
702 # Internal only, does not modify _bytelen or length, this is the caller's responsability
703 private fun rshift_bytes(from: Int, len: Int) do
704 var oit = _items
705 var nit = _items
706 var bt = _bytelen
707 if bt + len > capacity then
708 capacity = capacity * 2 + 2
709 nit = new NativeString(capacity)
710 oit.copy_to(nit, 0, 0, from)
711 end
712 oit.copy_to(nit, bt - from, from, from + len)
713 end
714
715 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
716 #
717 # Internal only, does not modify _bytelen or length, this is the caller's responsability
718 private fun lshift_bytes(from: Int, len: Int) do
719 var it = _items
720 it.copy_to(it, _bytelen - from, from, from - len)
721 end
722
723 redef fun []=(index, item)
724 do
725 assert index >= 0 and index <= length
726 if written then reset
727 is_dirty = true
728 if index == length then
729 add item
730 return
731 end
732 var it = _items
733 var ip = it.char_to_byte_index(index)
734 var c = it.char_at(ip)
735 var clen = c.u8char_len
736 var itemlen = item.u8char_len
737 var size_diff = itemlen - clen
738 if size_diff > 0 then
739 rshift_bytes(ip + clen, size_diff)
740 else if size_diff < 0 then
741 lshift_bytes(ip + clen, -size_diff)
742 end
743 _bytelen += size_diff
744 bytepos += size_diff
745 it.set_char_at(ip, item)
746 end
747
748 redef fun add(c)
749 do
750 if written then reset
751 is_dirty = true
752 var clen = c.u8char_len
753 var bt = _bytelen
754 enlarge(bt + clen)
755 _items.set_char_at(bt, c)
756 _bytelen += clen
757 length += 1
758 end
759
760 redef fun clear do
761 is_dirty = true
762 if written then reset
763 _bytelen = 0
764 length = 0
765 end
766
767 redef fun empty do return new Buffer
768
769 redef fun enlarge(cap)
770 do
771 var c = capacity
772 if cap <= c then return
773 while c <= cap do c = c * 2 + 2
774 # The COW flag can be set at false here, since
775 # it does a copy of the current `Buffer`
776 written = false
777 var bln = _bytelen
778 var a = new NativeString(c+1)
779 if bln > 0 then
780 var it = _items
781 if bln > 0 then it.copy_to(a, bln, 0, 0)
782 end
783 _items = a
784 capacity = c
785 end
786
787 redef fun to_s
788 do
789 written = true
790 var bln = _bytelen
791 if bln == 0 then _items = new NativeString(1)
792 return new FlatString.full(_items, bln, 0, bln - 1, length)
793 end
794
795 redef fun to_cstring
796 do
797 if is_dirty then
798 var bln = _bytelen
799 var new_native = new NativeString(bln + 1)
800 new_native[bln] = 0u8
801 if length > 0 then _items.copy_to(new_native, bln, 0, 0)
802 real_items = new_native
803 is_dirty = false
804 end
805 return real_items
806 end
807
808 # Create a new empty string.
809 init do end
810
811 # Low-level creation a new buffer with given data.
812 #
813 # `_items` will be used as is, without copy, to store the characters of the buffer.
814 # Aliasing issues is the responsibility of the caller.
815 #
816 # If `_items` is shared, `written` should be set to true after the creation
817 # so that a modification will do a copy-on-write.
818 private init with_infos(items: NativeString, capacity, bytelen, length: Int)
819 do
820 self._items = items
821 self.capacity = capacity
822 self._bytelen = bytelen
823 self.length = length
824 end
825
826 # Create a new string copied from `s`.
827 init from(s: Text)
828 do
829 _items = new NativeString(s.bytelen)
830 if s isa FlatText then
831 _items = s._items
832 else
833 for i in substrings do i.as(FlatString)._items.copy_to(_items, i._bytelen, 0, 0)
834 end
835 _bytelen = s.bytelen
836 length = s.length
837 _capacity = _bytelen
838 written = true
839 end
840
841 # Create a new empty string with a given capacity.
842 init with_capacity(cap: Int)
843 do
844 assert cap >= 0
845 _items = new NativeString(cap + 1)
846 capacity = cap
847 _bytelen = 0
848 end
849
850 redef fun append(s)
851 do
852 if s.is_empty then return
853 is_dirty = true
854 var sl = s.bytelen
855 var nln = _bytelen + sl
856 enlarge(nln)
857 if s isa FlatText then
858 s._items.copy_to(_items, sl, s.first_byte, _bytelen)
859 else
860 for i in s.substrings do append i
861 return
862 end
863 _bytelen = nln
864 length += s.length
865 end
866
867 # Copies the content of self in `dest`
868 fun copy(start: Int, len: Int, dest: Buffer, new_start: Int)
869 do
870 var self_chars = self.chars
871 var dest_chars = dest.chars
872 for i in [0..len-1] do
873 dest_chars[new_start+i] = self_chars[start+i]
874 end
875 end
876
877 redef fun substring(from, count)
878 do
879 assert count >= 0
880 if from < 0 then from = 0
881 if (from + count) > length then count = length - from
882 if count <= 0 then return new Buffer
883 var its = _items
884 var bytefrom = its.char_to_byte_index(from)
885 var byteto = its.char_to_byte_index(count + from - 1)
886 byteto += its.char_at(byteto).u8char_len - 1
887 var byte_length = byteto - bytefrom + 1
888 var r_items = new NativeString(byte_length)
889 its.copy_to(r_items, byte_length, bytefrom, 0)
890 return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
891 end
892
893 redef fun reverse
894 do
895 written = false
896 var ns = new FlatBuffer.with_capacity(capacity)
897 for i in chars.reverse_iterator do ns.add i
898 _items = ns._items
899 end
900
901 redef fun times(repeats)
902 do
903 var bln = _bytelen
904 var x = new FlatString.full(_items, bln, 0, bln - 1, length)
905 for i in [1 .. repeats[ do
906 append(x)
907 end
908 end
909
910 redef fun upper
911 do
912 if written then reset
913 for i in [0 .. length[ do self[i] = self[i].to_upper
914 end
915
916 redef fun lower
917 do
918 if written then reset
919 for i in [0 .. length[ do self[i] = self[i].to_lower
920 end
921 end
922
923 private class FlatBufferByteReverseIterator
924 super IndexedIterator[Byte]
925
926 var target: FlatBuffer
927
928 var target_items: NativeString is noautoinit
929
930 var curr_pos: Int
931
932 init do target_items = target._items
933
934 redef fun index do return curr_pos
935
936 redef fun is_ok do return curr_pos >= 0
937
938 redef fun item do return target_items[curr_pos]
939
940 redef fun next do curr_pos -= 1
941
942 end
943
944 private class FlatBufferByteView
945 super BufferByteView
946
947 redef type SELFTYPE: FlatBuffer
948
949 redef fun [](index) do return target._items[index]
950
951 redef fun iterator_from(pos) do return new FlatBufferByteIterator(target, pos)
952
953 redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator(target, pos)
954
955 end
956
957 private class FlatBufferByteIterator
958 super IndexedIterator[Byte]
959
960 var target: FlatBuffer
961
962 var target_items: NativeString is noautoinit
963
964 var curr_pos: Int
965
966 init do target_items = target._items
967
968 redef fun index do return curr_pos
969
970 redef fun is_ok do return curr_pos < target._bytelen
971
972 redef fun item do return target_items[curr_pos]
973
974 redef fun next do curr_pos += 1
975
976 end
977
978 private class FlatBufferCharReverseIterator
979 super IndexedIterator[Char]
980
981 var target: FlatBuffer
982
983 var curr_pos: Int
984
985 redef fun index do return curr_pos
986
987 redef fun is_ok do return curr_pos >= 0
988
989 redef fun item do return target[curr_pos]
990
991 redef fun next do curr_pos -= 1
992
993 end
994
995 private class FlatBufferCharView
996 super BufferCharView
997
998 redef type SELFTYPE: FlatBuffer
999
1000 redef fun [](index) do return target[index]
1001
1002 redef fun []=(index, item)
1003 do
1004 assert index >= 0 and index <= length
1005 if index == length then
1006 add(item)
1007 return
1008 end
1009 target[index] = item
1010 end
1011
1012 redef fun push(c)
1013 do
1014 target.add(c)
1015 end
1016
1017 redef fun add(c)
1018 do
1019 target.add(c)
1020 end
1021
1022 fun enlarge(cap: Int)
1023 do
1024 target.enlarge(cap)
1025 end
1026
1027 redef fun append(s)
1028 do
1029 var s_length = s.length
1030 if target.capacity < s.length then enlarge(s_length + target.length)
1031 for i in s do target.add i
1032 end
1033
1034 redef fun iterator_from(pos) do return new FlatBufferCharIterator(target, pos)
1035
1036 redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator(target, pos)
1037
1038 end
1039
1040 private class FlatBufferCharIterator
1041 super IndexedIterator[Char]
1042
1043 var target: FlatBuffer
1044
1045 var max: Int is noautoinit
1046
1047 var curr_pos: Int
1048
1049 init do max = target.length - 1
1050
1051 redef fun index do return curr_pos
1052
1053 redef fun is_ok do return curr_pos <= max
1054
1055 redef fun item do return target[curr_pos]
1056
1057 redef fun next do curr_pos += 1
1058
1059 end
1060
1061 redef class NativeString
1062 redef fun to_s
1063 do
1064 return to_s_with_length(cstring_length)
1065 end
1066
1067 # Returns `self` as a String of `length`.
1068 redef fun to_s_with_length(length): FlatString
1069 do
1070 assert length >= 0
1071 return clean_utf8(length)
1072 end
1073
1074 redef fun to_s_full(bytelen, unilen) do
1075 return new FlatString.full(self, bytelen, 0, bytelen - 1, unilen)
1076 end
1077
1078 # Returns `self` as a new String.
1079 redef fun to_s_with_copy: FlatString
1080 do
1081 var length = cstring_length
1082 var r = clean_utf8(length)
1083 if r.items != self then return r
1084 var new_self = new NativeString(length + 1)
1085 copy_to(new_self, length, 0, 0)
1086 var str = new FlatString.with_infos(new_self, length, 0, length - 1)
1087 new_self[length] = 0u8
1088 str.to_cstring = new_self
1089 return str
1090 end
1091
1092 # Cleans a NativeString if necessary
1093 fun clean_utf8(len: Int): FlatString do
1094 var replacements: nullable Array[Int] = null
1095 var end_length = len
1096 var pos = 0
1097 var chr_ln = 0
1098 while pos < len do
1099 var b = self[pos]
1100 var nxst = length_of_char_at(pos)
1101 var ok_st: Bool
1102 if nxst == 1 then
1103 ok_st = b & 0x80u8 == 0u8
1104 else if nxst == 2 then
1105 ok_st = b & 0xE0u8 == 0xC0u8
1106 else if nxst == 3 then
1107 ok_st = b & 0xF0u8 == 0xE0u8
1108 else
1109 ok_st = b & 0xF8u8 == 0xF0u8
1110 end
1111 if not ok_st then
1112 if replacements == null then replacements = new Array[Int]
1113 replacements.add pos
1114 end_length += 2
1115 pos += 1
1116 chr_ln += 1
1117 continue
1118 end
1119 var ok_c: Bool
1120 var c = char_at(pos)
1121 var cp = c.code_point
1122 if nxst == 1 then
1123 ok_c = cp >= 0 and cp <= 0x7F
1124 else if nxst == 2 then
1125 ok_c = cp >= 0x80 and cp <= 0x7FF
1126 else if nxst == 3 then
1127 ok_c = cp >= 0x800 and cp <= 0xFFFF
1128 ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
1129 else
1130 ok_c = cp >= 0x10000 and cp <= 0x10FFFF
1131 end
1132 if not ok_c then
1133 if replacements == null then replacements = new Array[Int]
1134 replacements.add pos
1135 end_length += 2
1136 pos += 1
1137 chr_ln += 1
1138 continue
1139 end
1140 pos += c.u8char_len
1141 chr_ln += 1
1142 end
1143 var ret = self
1144 if end_length != len then
1145 ret = new NativeString(end_length)
1146 var old_repl = 0
1147 var off = 0
1148 var repls = replacements.as(not null)
1149 var r = repls.items.as(not null)
1150 var imax = repls.length
1151 for i in [0 .. imax[ do
1152 var repl_pos = r[i]
1153 var chkln = repl_pos - old_repl
1154 copy_to(ret, chkln, old_repl, off)
1155 off += chkln
1156 ret[off] = 0xEFu8
1157 ret[off + 1] = 0xBFu8
1158 ret[off + 2] = 0xBDu8
1159 old_repl = repl_pos + 1
1160 off += 3
1161 end
1162 copy_to(ret, len - old_repl, old_repl, off)
1163 end
1164 return new FlatString.full(ret, end_length, 0, end_length - 1, chr_ln)
1165 end
1166
1167 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1168 #
1169 # Very unsafe, make sure to have room for this char prior to calling this function.
1170 private fun set_char_at(pos: Int, c: Char) do
1171 var ln = c.u8char_len
1172 native_set_char(pos, c, ln)
1173 end
1174
1175 private fun native_set_char(pos: Int, c: Char, ln: Int) `{
1176 char* dst = self + pos;
1177 switch(ln){
1178 case 1:
1179 dst[0] = c;
1180 break;
1181 case 2:
1182 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
1183 dst[1] = 0x80 | (c & 0x3F);
1184 break;
1185 case 3:
1186 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
1187 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
1188 dst[2] = 0x80 | (c & 0x3F);
1189 break;
1190 case 4:
1191 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
1192 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
1193 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
1194 dst[3] = 0x80 | (c & 0x3F);
1195 break;
1196 }
1197 `}
1198 end
1199
1200 redef class Int
1201 redef fun to_base(base, signed)
1202 do
1203 var l = digit_count(base)
1204 var s = new FlatBuffer.from(" " * l)
1205 fill_buffer(s, base, signed)
1206 return s.to_s
1207 end
1208
1209 # return displayable int in base 10 and signed
1210 #
1211 # assert 1.to_s == "1"
1212 # assert (-123).to_s == "-123"
1213 redef fun to_s do
1214 # Fast case for common numbers
1215 if self == 0 then return "0"
1216 if self == 1 then return "1"
1217
1218 var nslen = int_to_s_len
1219 var ns = new NativeString(nslen + 1)
1220 ns[nslen] = 0u8
1221 native_int_to_s(ns, nslen + 1)
1222 return new FlatString.full(ns, nslen, 0, nslen - 1, nslen)
1223 end
1224 end
1225
1226 redef class Array[E]
1227
1228 # Fast implementation
1229 redef fun plain_to_s
1230 do
1231 var l = length
1232 if l == 0 then return ""
1233 var its = _items.as(not null)
1234 var first = its[0]
1235 if l == 1 then if first == null then return "" else return first.to_s
1236 var na = new NativeArray[String](l)
1237 var i = 0
1238 var sl = 0
1239 var mypos = 0
1240 while i < l do
1241 var itsi = its[i]
1242 if itsi == null then
1243 i += 1
1244 continue
1245 end
1246 var tmp = itsi.to_s
1247 sl += tmp.bytelen
1248 na[mypos] = tmp
1249 i += 1
1250 mypos += 1
1251 end
1252 var ns = new NativeString(sl + 1)
1253 ns[sl] = 0u8
1254 i = 0
1255 var off = 0
1256 while i < mypos do
1257 var tmp = na[i]
1258 if tmp isa FlatString then
1259 var tpl = tmp._bytelen
1260 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1261 off += tpl
1262 else
1263 for j in tmp.substrings do
1264 var s = j.as(FlatString)
1265 var slen = s._bytelen
1266 s._items.copy_to(ns, slen, s._first_byte, off)
1267 off += slen
1268 end
1269 end
1270 i += 1
1271 end
1272 return new FlatString.with_infos(ns, sl, 0, sl - 1)
1273 end
1274 end
1275
1276 redef class NativeArray[E]
1277 redef fun native_to_s do
1278 assert self isa NativeArray[String]
1279 var l = length
1280 var na = self
1281 var i = 0
1282 var sl = 0
1283 var mypos = 0
1284 while i < l do
1285 sl += na[i].bytelen
1286 i += 1
1287 mypos += 1
1288 end
1289 var ns = new NativeString(sl + 1)
1290 ns[sl] = 0u8
1291 i = 0
1292 var off = 0
1293 while i < mypos do
1294 var tmp = na[i]
1295 if tmp isa FlatString then
1296 var tpl = tmp._bytelen
1297 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1298 off += tpl
1299 else
1300 for j in tmp.substrings do
1301 var s = j.as(FlatString)
1302 var slen = s._bytelen
1303 s._items.copy_to(ns, slen, s._first_byte, off)
1304 off += slen
1305 end
1306 end
1307 i += 1
1308 end
1309 return new FlatString.with_infos(ns, sl, 0, sl - 1)
1310 end
1311 end
1312
1313 redef class Map[K,V]
1314 redef fun join(sep, couple_sep)
1315 do
1316 if is_empty then return ""
1317
1318 var s = new Buffer # Result
1319
1320 # Concat first item
1321 var i = iterator
1322 var k = i.key
1323 var e = i.item
1324 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1325
1326 # Concat other _items
1327 i.next
1328 while i.is_ok do
1329 s.append(sep)
1330 k = i.key
1331 e = i.item
1332 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1333 i.next
1334 end
1335 return s.to_s
1336 end
1337 end