lib/core: Removed `last_byte` attribute in `FlatString` as it is useless
[nit.git] / lib / core / text / flat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # All the array-based text representations
12 module flat
13
14 intrude import abstract_text
15 intrude import native
16
17 `{
18 #include <stdio.h>
19 #include <string.h>
20 `}
21
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
24
25 var tgt: nullable FlatText
26
27 redef fun item do
28 assert is_ok
29 return tgt.as(not null)
30 end
31
32 redef fun is_ok do return tgt != null
33
34 redef fun next do tgt = null
35 end
36
37 redef class FlatText
38
39 # First byte of the NativeString
40 protected fun first_byte: Int do return 0
41
42 # Last byte of the NativeString
43 protected fun last_byte: Int do return first_byte + _bytelen - 1
44
45 # Cache of the latest position (char) explored in the string
46 var position: Int = 0
47
48 # Cached position (bytes) in the NativeString underlying the String
49 var bytepos: Int = 0
50
51 # Index of the character `index` in `_items`
52 fun char_to_byte_index(index: Int): Int do
53 var ln = length
54 assert index >= 0
55 assert index < ln
56
57 var pos = _position
58 # Find best insertion point
59 var delta_begin = index
60 var delta_end = (ln - 1) - index
61 var delta_cache = (pos - index).abs
62 var min = delta_begin
63 var its = _items
64
65 if delta_cache < min then min = delta_cache
66 if delta_end < min then min = delta_end
67
68 var ns_i: Int
69 var my_i: Int
70
71 if min == delta_begin then
72 ns_i = first_byte
73 my_i = 0
74 else if min == delta_cache then
75 ns_i = _bytepos
76 my_i = pos
77 else
78 ns_i = its.find_beginning_of_char_at(last_byte)
79 my_i = length - 1
80 end
81
82 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
83
84 _position = index
85 _bytepos = ns_i
86
87 return ns_i
88 end
89
90 # By escaping `self` to HTML, how many more bytes will be needed ?
91 fun chars_to_html_escape: Int do
92 var its = _items
93 var max = last_byte
94 var pos = first_byte
95 var endlen = 0
96 while pos <= max do
97 var c = its[pos]
98 if c == 0x3Cu8 then
99 endlen += 3
100 else if c == 0x3Eu8 then
101 endlen += 3
102 else if c == 0x26u8 then
103 endlen += 4
104 else if c == 0x22u8 then
105 endlen += 4
106 else if c == 0x27u8 then
107 endlen += 4
108 else if c == 0x2Fu8 then
109 endlen += 4
110 end
111 pos += 1
112 end
113 return endlen
114 end
115
116 redef fun html_escape
117 do
118 var extra = chars_to_html_escape
119 if extra == 0 then return to_s
120 var its = _items
121 var max = last_byte
122 var pos = first_byte
123 var nlen = extra + _bytelen
124 var nits = new NativeString(nlen)
125 var outpos = 0
126 while pos <= max do
127 var c = its[pos]
128 # Special codes:
129 # Some HTML characters are used as meta-data, they need
130 # to be replaced by an HTML-Escaped equivalent
131 #
132 # * 0x3C (<) => &lt;
133 # * 0x3E (>) => &gt;
134 # * 0x26 (&) => &amp;
135 # * 0x22 (") => &#34;
136 # * 0x27 (') => &#39;
137 # * 0x2F (/) => &#47;
138 if c == 0x3Cu8 then
139 nits[outpos] = 0x26u8
140 nits[outpos + 1] = 0x6Cu8
141 nits[outpos + 2] = 0x74u8
142 nits[outpos + 3] = 0x3Bu8
143 outpos += 4
144 else if c == 0x3Eu8 then
145 nits[outpos] = 0x26u8
146 nits[outpos + 1] = 0x67u8
147 nits[outpos + 2] = 0x74u8
148 nits[outpos + 3] = 0x3Bu8
149 outpos += 4
150 else if c == 0x26u8 then
151 nits[outpos] = 0x26u8
152 nits[outpos + 1] = 0x61u8
153 nits[outpos + 2] = 0x6Du8
154 nits[outpos + 3] = 0x70u8
155 nits[outpos + 4] = 0x3Bu8
156 outpos += 5
157 else if c == 0x22u8 then
158 nits[outpos] = 0x26u8
159 nits[outpos + 1] = 0x23u8
160 nits[outpos + 2] = 0x33u8
161 nits[outpos + 3] = 0x34u8
162 nits[outpos + 4] = 0x3Bu8
163 outpos += 5
164 else if c == 0x27u8 then
165 nits[outpos] = 0x26u8
166 nits[outpos + 1] = 0x23u8
167 nits[outpos + 2] = 0x33u8
168 nits[outpos + 3] = 0x39u8
169 nits[outpos + 4] = 0x3Bu8
170 outpos += 5
171 else if c == 0x2Fu8 then
172 nits[outpos] = 0x26u8
173 nits[outpos + 1] = 0x23u8
174 nits[outpos + 2] = 0x34u8
175 nits[outpos + 3] = 0x37u8
176 nits[outpos + 4] = 0x3Bu8
177 outpos += 5
178 else
179 nits[outpos] = c
180 outpos += 1
181 end
182 pos += 1
183 end
184 var s = new FlatString.with_infos(nits, nlen, 0)
185 return s
186 end
187
188 # By escaping `self` to C, how many more bytes will be needed ?
189 #
190 # This enables a double-optimization in `escape_to_c` since if this
191 # method returns 0, then `self` does not need escaping and can be
192 # returned as-is
193 fun chars_to_escape_to_c: Int do
194 var its = _items
195 var max = last_byte
196 var pos = first_byte
197 var req_esc = 0
198 while pos <= max do
199 var c = its[pos]
200 if c == 0x0Au8 then
201 req_esc += 1
202 else if c == 0x09u8 then
203 req_esc += 1
204 else if c == 0x22u8 then
205 req_esc += 1
206 else if c == 0x27u8 then
207 req_esc += 1
208 else if c == 0x5Cu8 then
209 req_esc += 1
210 else if c < 32u8 then
211 req_esc += 3
212 end
213 pos += 1
214 end
215 return req_esc
216 end
217
218 redef fun escape_to_c do
219 var ln_extra = chars_to_escape_to_c
220 if ln_extra == 0 then return self.to_s
221 var its = _items
222 var max = last_byte
223 var nlen = _bytelen + ln_extra
224 var nns = new NativeString(nlen)
225 var pos = first_byte
226 var opos = 0
227 while pos <= max do
228 var c = its[pos]
229 # Special codes:
230 #
231 # Any byte with value < 32 is a control character
232 # All their uses will be replaced by their octal
233 # value in C.
234 #
235 # There are two exceptions however:
236 #
237 # * 0x09 => \t
238 # * 0x0A => \n
239 #
240 # Aside from the code points above, the following are:
241 #
242 # * 0x22 => \"
243 # * 0x27 => \'
244 # * 0x5C => \\
245 if c == 0x09u8 then
246 nns[opos] = 0x5Cu8
247 nns[opos + 1] = 0x74u8
248 opos += 2
249 else if c == 0x0Au8 then
250 nns[opos] = 0x5Cu8
251 nns[opos + 1] = 0x6Eu8
252 opos += 2
253 else if c == 0x22u8 then
254 nns[opos] = 0x5Cu8
255 nns[opos + 1] = 0x22u8
256 opos += 2
257 else if c == 0x27u8 then
258 nns[opos] = 0x5Cu8
259 nns[opos + 1] = 0x27u8
260 opos += 2
261 else if c == 0x5Cu8 then
262 nns[opos] = 0x5Cu8
263 nns[opos + 1] = 0x5Cu8
264 opos += 2
265 else if c < 32u8 then
266 nns[opos] = 0x5Cu8
267 nns[opos + 1] = 0x30u8
268 nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
269 nns[opos + 3] = (c & 0x07u8) + 0x30u8
270 opos += 4
271 else
272 nns[opos] = c
273 opos += 1
274 end
275 pos += 1
276 end
277 return nns.to_s_with_length(nlen)
278 end
279
280 redef fun [](index) do return _items.char_at(char_to_byte_index(index))
281
282 # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
283 #
284 # assert "ff".to_hex == 255
285 redef fun to_hex(pos, ln) do
286 var res = 0
287 if pos == null then pos = 0
288 if ln == null then ln = length - pos
289 pos = char_to_byte_index(pos)
290 var its = _items
291 var max = pos + ln
292 for i in [pos .. max[ do
293 res <<= 4
294 res += its[i].ascii.from_hex
295 end
296 return res
297 end
298 end
299
300 # Immutable strings of characters.
301 class FlatString
302 super FlatText
303 super String
304
305 # Index at which `self` begins in `_items`, inclusively
306 redef var first_byte is noinit
307
308 redef var chars = new FlatStringCharView(self) is lazy
309
310 redef var bytes = new FlatStringByteView(self) is lazy
311
312 redef var length is lazy do
313 if _bytelen == 0 then return 0
314 return _items.utf8_length(_first_byte, _last_byte)
315 end
316
317 redef var to_cstring is lazy do
318 var blen = _bytelen
319 var new_items = new NativeString(blen + 1)
320 _items.copy_to(new_items, blen, _first_byte, 0)
321 new_items[blen] = 0u8
322 return new_items
323 end
324
325 redef fun reversed
326 do
327 var b = new FlatBuffer.with_capacity(_bytelen + 1)
328 for i in [length - 1 .. 0].step(-1) do
329 b.add self[i]
330 end
331 var s = b.to_s.as(FlatString)
332 s.length = self.length
333 return s
334 end
335
336 redef fun fast_cstring do return _items.fast_cstring(_first_byte)
337
338 redef fun substring(from, count)
339 do
340 assert count >= 0
341
342 if from < 0 then
343 count += from
344 if count < 0 then count = 0
345 from = 0
346 end
347
348 if (count + from) > length then count = length - from
349 if count <= 0 then return ""
350 var end_index = from + count - 1
351
352 var bytefrom = char_to_byte_index(from)
353 var byteto = char_to_byte_index(end_index)
354 var its = _items
355 byteto += its.length_of_char_at(byteto) - 1
356
357 var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, byteto, count)
358 return s
359 end
360
361 redef fun empty do return "".as(FlatString)
362
363 redef fun to_upper
364 do
365 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
366
367 var mylen = length
368 var pos = 0
369
370 while pos < mylen do
371 outstr.add(chars[pos].to_upper)
372 pos += 1
373 end
374
375 return outstr.to_s
376 end
377
378 redef fun to_lower
379 do
380 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
381
382 var mylen = length
383 var pos = 0
384
385 while pos < mylen do
386 outstr.add(chars[pos].to_lower)
387 pos += 1
388 end
389
390 return outstr.to_s
391 end
392
393 redef fun output
394 do
395 for i in chars do i.output
396 end
397
398 ##################################################
399 # String Specific Methods #
400 ##################################################
401
402 # Low-level creation of a new string with minimal data.
403 #
404 # `_items` will be used as is, without copy, to retrieve the characters of the string.
405 # Aliasing issues is the responsibility of the caller.
406 private init with_infos(items: NativeString, bytelen, from: Int)
407 do
408 self._items = items
409 self._bytelen = bytelen
410 _first_byte = from
411 _bytepos = from
412 end
413
414 # Low-level creation of a new string with all the data.
415 #
416 # `_items` will be used as is, without copy, to retrieve the characters of the string.
417 # Aliasing issues is the responsibility of the caller.
418 private init full(items: NativeString, bytelen, from, length: Int)
419 do
420 self._items = items
421 self.length = length
422 self._bytelen = bytelen
423 _first_byte = from
424 _bytepos = from
425 end
426
427 redef fun ==(other)
428 do
429 if not other isa FlatText then return super
430
431 if self.object_id == other.object_id then return true
432
433 var my_length = _bytelen
434
435 if other._bytelen != my_length then return false
436
437 var my_index = _first_byte
438 var its_index = other.first_byte
439
440 var last_iteration = my_index + my_length
441
442 var its_items = other._items
443 var my_items = self._items
444
445 while my_index < last_iteration do
446 if my_items[my_index] != its_items[its_index] then return false
447 my_index += 1
448 its_index += 1
449 end
450
451 return true
452 end
453
454 redef fun <(other)
455 do
456 if not other isa FlatText then return super
457
458 if self.object_id == other.object_id then return false
459
460 var myits = _items
461 var itsits = other._items
462
463 var mbt = _bytelen
464 var obt = other.bytelen
465
466 var minln = if mbt < obt then mbt else obt
467 var mst = _first_byte
468 var ost = other.first_byte
469
470 for i in [0 .. minln[ do
471 var my_curr_char = myits[mst]
472 var its_curr_char = itsits[ost]
473
474 if my_curr_char > its_curr_char then return false
475 if my_curr_char < its_curr_char then return true
476
477 mst += 1
478 ost += 1
479 end
480
481 return mbt < obt
482 end
483
484 redef fun +(o) do
485 var s = o.to_s
486 var slen = s.bytelen
487 var mlen = _bytelen
488 var nlen = mlen + slen
489 var mits = _items
490 var mifrom = _first_byte
491 if s isa FlatText then
492 var sits = s._items
493 var sifrom = s.first_byte
494 var ns = new NativeString(nlen + 1)
495 mits.copy_to(ns, mlen, mifrom, 0)
496 sits.copy_to(ns, slen, sifrom, mlen)
497 return new FlatString.full(ns, nlen, 0, _length + o.length)
498 else
499 abort
500 end
501 end
502
503 redef fun *(i) do
504 var mybtlen = _bytelen
505 var new_bytelen = mybtlen * i
506 var mylen = length
507 var newlen = mylen * i
508 var its = _items
509 var fb = _first_byte
510 var ns = new NativeString(new_bytelen + 1)
511 ns[new_bytelen] = 0u8
512 var offset = 0
513 while i > 0 do
514 its.copy_to(ns, mybtlen, fb, offset)
515 offset += mybtlen
516 i -= 1
517 end
518 return new FlatString.full(ns, new_bytelen, 0, newlen)
519 end
520
521
522 redef fun hash
523 do
524 if hash_cache == null then
525 # djb2 hash algorithm
526 var h = 5381
527 var i = _first_byte
528
529 var my_items = _items
530 var max = last_byte
531
532 while i <= max do
533 h = (h << 5) + h + my_items[i].to_i
534 i += 1
535 end
536
537 hash_cache = h
538 end
539
540 return hash_cache.as(not null)
541 end
542
543 redef fun substrings do return new FlatSubstringsIter(self)
544 end
545
546 private class FlatStringCharReverseIterator
547 super IndexedIterator[Char]
548
549 var target: FlatString
550
551 var curr_pos: Int
552
553 redef fun is_ok do return curr_pos >= 0
554
555 redef fun item do return target[curr_pos]
556
557 redef fun next do curr_pos -= 1
558
559 redef fun index do return curr_pos
560
561 end
562
563 private class FlatStringCharIterator
564 super IndexedIterator[Char]
565
566 var target: FlatString
567
568 var max: Int is noautoinit
569
570 var curr_pos: Int
571
572 init do max = target.length - 1
573
574 redef fun is_ok do return curr_pos <= max
575
576 redef fun item do return target[curr_pos]
577
578 redef fun next do curr_pos += 1
579
580 redef fun index do return curr_pos
581
582 end
583
584 private class FlatStringCharView
585 super StringCharView
586
587 redef type SELFTYPE: FlatString
588
589 redef fun [](index) do return target[index]
590
591 redef fun iterator_from(start) do return new FlatStringCharIterator(target, start)
592
593 redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator(target, start)
594
595 end
596
597 private class FlatStringByteReverseIterator
598 super IndexedIterator[Byte]
599
600 var target: FlatString
601
602 var target_items: NativeString is noautoinit
603
604 var curr_pos: Int
605
606 init
607 do
608 var tgt = target
609 target_items = tgt._items
610 curr_pos += tgt._first_byte
611 end
612
613 redef fun is_ok do return curr_pos >= target._first_byte
614
615 redef fun item do return target_items[curr_pos]
616
617 redef fun next do curr_pos -= 1
618
619 redef fun index do return curr_pos - target._first_byte
620
621 end
622
623 private class FlatStringByteIterator
624 super IndexedIterator[Byte]
625
626 var target: FlatString
627
628 var target_items: NativeString is noautoinit
629
630 var curr_pos: Int
631
632 init
633 do
634 var tgt = target
635 target_items = tgt._items
636 curr_pos += tgt._first_byte
637 end
638
639 redef fun is_ok do return curr_pos <= target.last_byte
640
641 redef fun item do return target_items[curr_pos]
642
643 redef fun next do curr_pos += 1
644
645 redef fun index do return curr_pos - target._first_byte
646
647 end
648
649 private class FlatStringByteView
650 super StringByteView
651
652 redef type SELFTYPE: FlatString
653
654 redef fun [](index)
655 do
656 # Check that the index (+ _first_byte) is not larger than last_byte
657 # In other terms, if the index is valid
658 assert index >= 0
659 var target = self.target
660 var ind = index + target._first_byte
661 assert ind <= target.last_byte
662 return target._items[ind]
663 end
664
665 redef fun iterator_from(start) do return new FlatStringByteIterator(target, start)
666
667 redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator(target, start)
668
669 end
670
671 redef class Buffer
672 redef new do return new FlatBuffer
673
674 redef new with_cap(i) do return new FlatBuffer.with_capacity(i)
675 end
676
677 # Mutable strings of characters.
678 class FlatBuffer
679 super FlatText
680 super Buffer
681
682 redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
683
684 redef var bytes = new FlatBufferByteView(self) is lazy
685
686 redef var length = 0
687
688 private var char_cache: Int = -1
689
690 private var byte_cache: Int = -1
691
692 private var capacity = 0
693
694 # Real items, used as cache for when to_cstring is called
695 private var real_items: NativeString is noinit
696
697 redef fun fast_cstring do return _items.fast_cstring(0)
698
699 redef fun substrings do return new FlatSubstringsIter(self)
700
701 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
702 #
703 # This happens when an operation modifies the current `Buffer` and
704 # the Copy-On-Write flag `written` is set at true.
705 private fun reset do
706 var nns = new NativeString(capacity)
707 if _bytelen != 0 then _items.copy_to(nns, _bytelen, 0, 0)
708 _items = nns
709 written = false
710 end
711
712 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
713 #
714 # Internal only, does not modify _bytelen or length, this is the caller's responsability
715 private fun rshift_bytes(from: Int, len: Int) do
716 var oit = _items
717 var nit = _items
718 var bt = _bytelen
719 if bt + len > capacity then
720 capacity = capacity * 2 + 2
721 nit = new NativeString(capacity)
722 oit.copy_to(nit, 0, 0, from)
723 end
724 oit.copy_to(nit, bt - from, from, from + len)
725 end
726
727 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
728 #
729 # Internal only, does not modify _bytelen or length, this is the caller's responsability
730 private fun lshift_bytes(from: Int, len: Int) do
731 var it = _items
732 it.copy_to(it, _bytelen - from, from, from - len)
733 end
734
735 redef fun []=(index, item)
736 do
737 assert index >= 0 and index <= length
738 if written then reset
739 is_dirty = true
740 if index == length then
741 add item
742 return
743 end
744 var it = _items
745 var ip = it.char_to_byte_index(index)
746 var c = it.char_at(ip)
747 var clen = c.u8char_len
748 var itemlen = item.u8char_len
749 var size_diff = itemlen - clen
750 if size_diff > 0 then
751 rshift_bytes(ip + clen, size_diff)
752 else if size_diff < 0 then
753 lshift_bytes(ip + clen, -size_diff)
754 end
755 _bytelen += size_diff
756 bytepos += size_diff
757 it.set_char_at(ip, item)
758 end
759
760 redef fun add(c)
761 do
762 if written then reset
763 is_dirty = true
764 var clen = c.u8char_len
765 var bt = _bytelen
766 enlarge(bt + clen)
767 _items.set_char_at(bt, c)
768 _bytelen += clen
769 length += 1
770 end
771
772 redef fun clear do
773 is_dirty = true
774 if written then reset
775 _bytelen = 0
776 length = 0
777 end
778
779 redef fun empty do return new Buffer
780
781 redef fun enlarge(cap)
782 do
783 var c = capacity
784 if cap <= c then return
785 while c <= cap do c = c * 2 + 2
786 # The COW flag can be set at false here, since
787 # it does a copy of the current `Buffer`
788 written = false
789 var bln = _bytelen
790 var a = new NativeString(c+1)
791 if bln > 0 then
792 var it = _items
793 if bln > 0 then it.copy_to(a, bln, 0, 0)
794 end
795 _items = a
796 capacity = c
797 end
798
799 redef fun to_s
800 do
801 written = true
802 var bln = _bytelen
803 if bln == 0 then _items = new NativeString(1)
804 return new FlatString.full(_items, bln, 0, _length)
805 end
806
807 redef fun to_cstring
808 do
809 if is_dirty then
810 var bln = _bytelen
811 var new_native = new NativeString(bln + 1)
812 new_native[bln] = 0u8
813 if length > 0 then _items.copy_to(new_native, bln, 0, 0)
814 real_items = new_native
815 is_dirty = false
816 end
817 return real_items
818 end
819
820 # Create a new empty string.
821 init do end
822
823 # Low-level creation a new buffer with given data.
824 #
825 # `_items` will be used as is, without copy, to store the characters of the buffer.
826 # Aliasing issues is the responsibility of the caller.
827 #
828 # If `_items` is shared, `written` should be set to true after the creation
829 # so that a modification will do a copy-on-write.
830 private init with_infos(items: NativeString, capacity, bytelen, length: Int)
831 do
832 self._items = items
833 self.capacity = capacity
834 self._bytelen = bytelen
835 self.length = length
836 end
837
838 # Create a new string copied from `s`.
839 init from(s: Text)
840 do
841 _items = new NativeString(s.bytelen)
842 if s isa FlatText then
843 _items = s._items
844 else
845 for i in substrings do i.as(FlatString)._items.copy_to(_items, i._bytelen, 0, 0)
846 end
847 _bytelen = s.bytelen
848 length = s.length
849 _capacity = _bytelen
850 written = true
851 end
852
853 # Create a new empty string with a given capacity.
854 init with_capacity(cap: Int)
855 do
856 assert cap >= 0
857 _items = new NativeString(cap + 1)
858 capacity = cap
859 _bytelen = 0
860 end
861
862 redef fun append(s)
863 do
864 if s.is_empty then return
865 is_dirty = true
866 var sl = s.bytelen
867 var nln = _bytelen + sl
868 enlarge(nln)
869 if s isa FlatText then
870 s._items.copy_to(_items, sl, s.first_byte, _bytelen)
871 else
872 for i in s.substrings do append i
873 return
874 end
875 _bytelen = nln
876 length += s.length
877 end
878
879 # Copies the content of self in `dest`
880 fun copy(start: Int, len: Int, dest: Buffer, new_start: Int)
881 do
882 var self_chars = self.chars
883 var dest_chars = dest.chars
884 for i in [0..len-1] do
885 dest_chars[new_start+i] = self_chars[start+i]
886 end
887 end
888
889 redef fun substring(from, count)
890 do
891 assert count >= 0
892 if from < 0 then from = 0
893 if (from + count) > length then count = length - from
894 if count <= 0 then return new Buffer
895 var its = _items
896 var bytefrom = its.char_to_byte_index(from)
897 var byteto = its.char_to_byte_index(count + from - 1)
898 byteto += its.char_at(byteto).u8char_len - 1
899 var byte_length = byteto - bytefrom + 1
900 var r_items = new NativeString(byte_length)
901 its.copy_to(r_items, byte_length, bytefrom, 0)
902 return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
903 end
904
905 redef fun reverse
906 do
907 written = false
908 var ns = new FlatBuffer.with_capacity(capacity)
909 for i in chars.reverse_iterator do ns.add i
910 _items = ns._items
911 end
912
913 redef fun times(repeats)
914 do
915 var bln = _bytelen
916 var x = new FlatString.full(_items, bln, 0, _length)
917 for i in [1 .. repeats[ do
918 append(x)
919 end
920 end
921
922 redef fun upper
923 do
924 if written then reset
925 for i in [0 .. length[ do self[i] = self[i].to_upper
926 end
927
928 redef fun lower
929 do
930 if written then reset
931 for i in [0 .. length[ do self[i] = self[i].to_lower
932 end
933 end
934
935 private class FlatBufferByteReverseIterator
936 super IndexedIterator[Byte]
937
938 var target: FlatBuffer
939
940 var target_items: NativeString is noautoinit
941
942 var curr_pos: Int
943
944 init do target_items = target._items
945
946 redef fun index do return curr_pos
947
948 redef fun is_ok do return curr_pos >= 0
949
950 redef fun item do return target_items[curr_pos]
951
952 redef fun next do curr_pos -= 1
953
954 end
955
956 private class FlatBufferByteView
957 super BufferByteView
958
959 redef type SELFTYPE: FlatBuffer
960
961 redef fun [](index) do return target._items[index]
962
963 redef fun iterator_from(pos) do return new FlatBufferByteIterator(target, pos)
964
965 redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator(target, pos)
966
967 end
968
969 private class FlatBufferByteIterator
970 super IndexedIterator[Byte]
971
972 var target: FlatBuffer
973
974 var target_items: NativeString is noautoinit
975
976 var curr_pos: Int
977
978 init do target_items = target._items
979
980 redef fun index do return curr_pos
981
982 redef fun is_ok do return curr_pos < target._bytelen
983
984 redef fun item do return target_items[curr_pos]
985
986 redef fun next do curr_pos += 1
987
988 end
989
990 private class FlatBufferCharReverseIterator
991 super IndexedIterator[Char]
992
993 var target: FlatBuffer
994
995 var curr_pos: Int
996
997 redef fun index do return curr_pos
998
999 redef fun is_ok do return curr_pos >= 0
1000
1001 redef fun item do return target[curr_pos]
1002
1003 redef fun next do curr_pos -= 1
1004
1005 end
1006
1007 private class FlatBufferCharView
1008 super BufferCharView
1009
1010 redef type SELFTYPE: FlatBuffer
1011
1012 redef fun [](index) do return target[index]
1013
1014 redef fun []=(index, item)
1015 do
1016 assert index >= 0 and index <= length
1017 if index == length then
1018 add(item)
1019 return
1020 end
1021 target[index] = item
1022 end
1023
1024 redef fun push(c)
1025 do
1026 target.add(c)
1027 end
1028
1029 redef fun add(c)
1030 do
1031 target.add(c)
1032 end
1033
1034 fun enlarge(cap: Int)
1035 do
1036 target.enlarge(cap)
1037 end
1038
1039 redef fun append(s)
1040 do
1041 var s_length = s.length
1042 if target.capacity < s.length then enlarge(s_length + target.length)
1043 for i in s do target.add i
1044 end
1045
1046 redef fun iterator_from(pos) do return new FlatBufferCharIterator(target, pos)
1047
1048 redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator(target, pos)
1049
1050 end
1051
1052 private class FlatBufferCharIterator
1053 super IndexedIterator[Char]
1054
1055 var target: FlatBuffer
1056
1057 var max: Int is noautoinit
1058
1059 var curr_pos: Int
1060
1061 init do max = target.length - 1
1062
1063 redef fun index do return curr_pos
1064
1065 redef fun is_ok do return curr_pos <= max
1066
1067 redef fun item do return target[curr_pos]
1068
1069 redef fun next do curr_pos += 1
1070
1071 end
1072
1073 redef class NativeString
1074 redef fun to_s
1075 do
1076 return to_s_with_length(cstring_length)
1077 end
1078
1079 # Returns `self` as a String of `length`.
1080 redef fun to_s_with_length(length): FlatString
1081 do
1082 assert length >= 0
1083 return clean_utf8(length)
1084 end
1085
1086 redef fun to_s_full(bytelen, unilen) do
1087 return new FlatString.full(self, bytelen, 0, unilen)
1088 end
1089
1090 # Returns `self` as a new String.
1091 redef fun to_s_with_copy: FlatString
1092 do
1093 var length = cstring_length
1094 var r = clean_utf8(length)
1095 if r.items != self then return r
1096 var new_self = new NativeString(length + 1)
1097 copy_to(new_self, length, 0, 0)
1098 var str = new FlatString.with_infos(new_self, length, 0)
1099 new_self[length] = 0u8
1100 str.to_cstring = new_self
1101 return str
1102 end
1103
1104 # Cleans a NativeString if necessary
1105 fun clean_utf8(len: Int): FlatString do
1106 var replacements: nullable Array[Int] = null
1107 var end_length = len
1108 var pos = 0
1109 var chr_ln = 0
1110 while pos < len do
1111 var b = self[pos]
1112 var nxst = length_of_char_at(pos)
1113 var ok_st: Bool
1114 if nxst == 1 then
1115 ok_st = b & 0x80u8 == 0u8
1116 else if nxst == 2 then
1117 ok_st = b & 0xE0u8 == 0xC0u8
1118 else if nxst == 3 then
1119 ok_st = b & 0xF0u8 == 0xE0u8
1120 else
1121 ok_st = b & 0xF8u8 == 0xF0u8
1122 end
1123 if not ok_st then
1124 if replacements == null then replacements = new Array[Int]
1125 replacements.add pos
1126 end_length += 2
1127 pos += 1
1128 chr_ln += 1
1129 continue
1130 end
1131 var ok_c: Bool
1132 var c = char_at(pos)
1133 var cp = c.code_point
1134 if nxst == 1 then
1135 ok_c = cp >= 0 and cp <= 0x7F
1136 else if nxst == 2 then
1137 ok_c = cp >= 0x80 and cp <= 0x7FF
1138 else if nxst == 3 then
1139 ok_c = cp >= 0x800 and cp <= 0xFFFF
1140 ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
1141 else
1142 ok_c = cp >= 0x10000 and cp <= 0x10FFFF
1143 end
1144 if not ok_c then
1145 if replacements == null then replacements = new Array[Int]
1146 replacements.add pos
1147 end_length += 2
1148 pos += 1
1149 chr_ln += 1
1150 continue
1151 end
1152 pos += c.u8char_len
1153 chr_ln += 1
1154 end
1155 var ret = self
1156 if end_length != len then
1157 ret = new NativeString(end_length)
1158 var old_repl = 0
1159 var off = 0
1160 var repls = replacements.as(not null)
1161 var r = repls.items.as(not null)
1162 var imax = repls.length
1163 for i in [0 .. imax[ do
1164 var repl_pos = r[i]
1165 var chkln = repl_pos - old_repl
1166 copy_to(ret, chkln, old_repl, off)
1167 off += chkln
1168 ret[off] = 0xEFu8
1169 ret[off + 1] = 0xBFu8
1170 ret[off + 2] = 0xBDu8
1171 old_repl = repl_pos + 1
1172 off += 3
1173 end
1174 copy_to(ret, len - old_repl, old_repl, off)
1175 end
1176 return new FlatString.full(ret, end_length, 0, chr_ln)
1177 end
1178
1179 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1180 #
1181 # Very unsafe, make sure to have room for this char prior to calling this function.
1182 private fun set_char_at(pos: Int, c: Char) do
1183 var ln = c.u8char_len
1184 native_set_char(pos, c, ln)
1185 end
1186
1187 private fun native_set_char(pos: Int, c: Char, ln: Int) `{
1188 char* dst = self + pos;
1189 switch(ln){
1190 case 1:
1191 dst[0] = c;
1192 break;
1193 case 2:
1194 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
1195 dst[1] = 0x80 | (c & 0x3F);
1196 break;
1197 case 3:
1198 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
1199 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
1200 dst[2] = 0x80 | (c & 0x3F);
1201 break;
1202 case 4:
1203 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
1204 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
1205 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
1206 dst[3] = 0x80 | (c & 0x3F);
1207 break;
1208 }
1209 `}
1210 end
1211
1212 redef class Int
1213 redef fun to_base(base, signed)
1214 do
1215 var l = digit_count(base)
1216 var s = new FlatBuffer.from(" " * l)
1217 fill_buffer(s, base, signed)
1218 return s.to_s
1219 end
1220
1221 # return displayable int in base 10 and signed
1222 #
1223 # assert 1.to_s == "1"
1224 # assert (-123).to_s == "-123"
1225 redef fun to_s do
1226 # Fast case for common numbers
1227 if self == 0 then return "0"
1228 if self == 1 then return "1"
1229
1230 var nslen = int_to_s_len
1231 var ns = new NativeString(nslen + 1)
1232 ns[nslen] = 0u8
1233 native_int_to_s(ns, nslen + 1)
1234 return new FlatString.full(ns, nslen, 0, nslen)
1235 end
1236 end
1237
1238 redef class Array[E]
1239
1240 # Fast implementation
1241 redef fun plain_to_s
1242 do
1243 var l = length
1244 if l == 0 then return ""
1245 var its = _items.as(not null)
1246 var first = its[0]
1247 if l == 1 then if first == null then return "" else return first.to_s
1248 var na = new NativeArray[String](l)
1249 var i = 0
1250 var sl = 0
1251 var mypos = 0
1252 while i < l do
1253 var itsi = its[i]
1254 if itsi == null then
1255 i += 1
1256 continue
1257 end
1258 var tmp = itsi.to_s
1259 sl += tmp.bytelen
1260 na[mypos] = tmp
1261 i += 1
1262 mypos += 1
1263 end
1264 var ns = new NativeString(sl + 1)
1265 ns[sl] = 0u8
1266 i = 0
1267 var off = 0
1268 while i < mypos do
1269 var tmp = na[i]
1270 if tmp isa FlatString then
1271 var tpl = tmp._bytelen
1272 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1273 off += tpl
1274 else
1275 for j in tmp.substrings do
1276 var s = j.as(FlatString)
1277 var slen = s._bytelen
1278 s._items.copy_to(ns, slen, s._first_byte, off)
1279 off += slen
1280 end
1281 end
1282 i += 1
1283 end
1284 return new FlatString.with_infos(ns, sl, 0)
1285 end
1286 end
1287
1288 redef class NativeArray[E]
1289 redef fun native_to_s do
1290 assert self isa NativeArray[String]
1291 var l = length
1292 var na = self
1293 var i = 0
1294 var sl = 0
1295 var mypos = 0
1296 while i < l do
1297 sl += na[i].bytelen
1298 i += 1
1299 mypos += 1
1300 end
1301 var ns = new NativeString(sl + 1)
1302 ns[sl] = 0u8
1303 i = 0
1304 var off = 0
1305 while i < mypos do
1306 var tmp = na[i]
1307 if tmp isa FlatString then
1308 var tpl = tmp._bytelen
1309 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1310 off += tpl
1311 else
1312 for j in tmp.substrings do
1313 var s = j.as(FlatString)
1314 var slen = s._bytelen
1315 s._items.copy_to(ns, slen, s._first_byte, off)
1316 off += slen
1317 end
1318 end
1319 i += 1
1320 end
1321 return new FlatString.with_infos(ns, sl, 0)
1322 end
1323 end
1324
1325 redef class Map[K,V]
1326 redef fun join(sep, couple_sep)
1327 do
1328 if is_empty then return ""
1329
1330 var s = new Buffer # Result
1331
1332 # Concat first item
1333 var i = iterator
1334 var k = i.key
1335 var e = i.item
1336 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1337
1338 # Concat other _items
1339 i.next
1340 while i.is_ok do
1341 s.append(sep)
1342 k = i.key
1343 e = i.item
1344 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1345 i.next
1346 end
1347 return s.to_s
1348 end
1349 end