lib/core/flat: Optimized use of escape_to_c for FlatText
[nit.git] / lib / core / text / flat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # All the array-based text representations
12 module flat
13
14 intrude import abstract_text
15 intrude import native
16
17 `{
18 #include <stdio.h>
19 #include <string.h>
20 `}
21
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
24
25 var tgt: nullable FlatText
26
27 redef fun item do
28 assert is_ok
29 return tgt.as(not null)
30 end
31
32 redef fun is_ok do return tgt != null
33
34 redef fun next do tgt = null
35 end
36
37 redef class FlatText
38
39 private fun first_byte: Int do return 0
40
41 private fun last_byte: Int do return bytelen - 1
42
43 # Cache of the latest position (char) explored in the string
44 var position: Int = 0
45
46 # Cached position (bytes) in the NativeString underlying the String
47 var bytepos: Int = first_byte is lateinit
48
49 # Index of the character `index` in `items`
50 private fun char_to_byte_index(index: Int): Int do
51 var ln = length
52 assert index >= 0
53 assert index < ln
54
55 # Find best insertion point
56 var delta_begin = index
57 var delta_end = (ln - 1) - index
58 var delta_cache = (position - index).abs
59 var min = delta_begin
60 var its = items
61
62 if delta_cache < min then min = delta_cache
63 if delta_end < min then min = delta_end
64
65 var ns_i: Int
66 var my_i: Int
67
68 if min == delta_begin then
69 ns_i = first_byte
70 my_i = 0
71 else if min == delta_cache then
72 ns_i = bytepos
73 my_i = position
74 else
75 ns_i = its.find_beginning_of_char_at(last_byte)
76 my_i = length - 1
77 end
78
79 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
80
81 position = index
82 bytepos = ns_i
83
84 return ns_i
85 end
86
87 # By escaping `self` to C, how many more bytes will be needed ?
88 #
89 # This enables a double-optimization in `escape_to_c` since if this
90 # method returns 0, then `self` does not need escaping and can be
91 # returned as-is
92 protected fun chars_to_escape_to_c: Int do
93 var its = _items
94 var max = last_byte
95 var pos = first_byte
96 var req_esc = 0
97 while pos <= max do
98 var c = its[pos]
99 if c == 0x0Au8 then
100 req_esc += 1
101 else if c == 0x09u8 then
102 req_esc += 1
103 else if c == 0x22u8 then
104 req_esc += 1
105 else if c == 0x27u8 then
106 req_esc += 1
107 else if c == 0x5Cu8 then
108 req_esc += 1
109 else if c < 32u8 then
110 req_esc += 3
111 end
112 pos += 1
113 end
114 return req_esc
115 end
116
117 redef fun escape_to_c do
118 var ln_extra = chars_to_escape_to_c
119 if ln_extra == 0 then return self.to_s
120 var its = _items
121 var max = last_byte
122 var nlen = _bytelen + ln_extra
123 var nns = new NativeString(nlen)
124 var pos = first_byte
125 var opos = 0
126 while pos <= max do
127 var c = its[pos]
128 # Special codes:
129 #
130 # Any byte with value < 32 is a control character
131 # All their uses will be replaced by their octal
132 # value in C.
133 #
134 # There are two exceptions however:
135 #
136 # * 0x09 => \t
137 # * 0x0A => \n
138 #
139 # Aside from the code points above, the following are:
140 #
141 # * 0x22 => \"
142 # * 0x27 => \'
143 # * 0x5C => \\
144 if c == 0x09u8 then
145 nns[opos] = 0x5Cu8
146 nns[opos + 1] = 0x74u8
147 opos += 2
148 else if c == 0x0Au8 then
149 nns[opos] = 0x5Cu8
150 nns[opos + 1] = 0x6Eu8
151 opos += 2
152 else if c == 0x22u8 then
153 nns[opos] = 0x5Cu8
154 nns[opos + 1] = 0x22u8
155 opos += 2
156 else if c == 0x27u8 then
157 nns[opos] = 0x5Cu8
158 nns[opos + 1] = 0x27u8
159 opos += 2
160 else if c == 0x5Cu8 then
161 nns[opos] = 0x5Cu8
162 nns[opos + 1] = 0x5Cu8
163 opos += 2
164 else if c < 32u8 then
165 nns[opos] = 0x5Cu8
166 nns[opos + 1] = 0x30u8
167 nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
168 nns[opos + 3] = (c & 0x07u8) + 0x30u8
169 opos += 4
170 else
171 nns[opos] = c
172 opos += 1
173 end
174 pos += 1
175 end
176 return nns.to_s_with_length(nlen)
177 end
178
179 private fun byte_to_char_index(index: Int): Int do
180 var ln = bytelen
181 assert index >= 0
182 assert index < bytelen
183
184 # Find best insertion point
185 var delta_begin = index
186 var delta_end = (ln - 1) - index
187 var delta_cache = (bytepos - index).abs
188 var min = delta_begin
189 var its = items
190
191 if delta_cache < min then min = delta_cache
192 if delta_end < min then min = delta_end
193
194 var ns_i: Int
195 var my_i: Int
196
197 if min == delta_begin then
198 ns_i = first_byte
199 my_i = 0
200 else if min == delta_cache then
201 ns_i = bytepos
202 my_i = position
203 else
204 ns_i = its.find_beginning_of_char_at(last_byte)
205 my_i = length - 1
206 end
207
208 my_i = its.byte_to_char_index_cached(index, my_i, ns_i)
209
210 position = my_i
211 bytepos = index
212
213 return my_i
214 end
215
216 redef fun [](index) do return items.char_at(char_to_byte_index(index))
217 end
218
219 # Immutable strings of characters.
220 class FlatString
221 super FlatText
222 super String
223
224 # Index at which `self` begins in `items`, inclusively
225 redef var first_byte is noinit
226
227 # Index at which `self` ends in `items`, inclusively
228 redef var last_byte is noinit
229
230 redef var chars = new FlatStringCharView(self) is lazy
231
232 redef var bytes = new FlatStringByteView(self) is lazy
233
234 redef var length is lazy do
235 if bytelen == 0 then return 0
236 var st = first_byte
237 var its = items
238 var ln = 0
239 var lst = last_byte
240 while st <= lst do
241 st += its.length_of_char_at(st)
242 ln += 1
243 end
244 return ln
245 end
246
247 redef fun reversed
248 do
249 var b = new FlatBuffer.with_capacity(bytelen + 1)
250 for i in [length - 1 .. 0].step(-1) do
251 b.add self[i]
252 end
253 var s = b.to_s.as(FlatString)
254 s.length = self.length
255 return s
256 end
257
258 redef fun fast_cstring do return items.fast_cstring(first_byte)
259
260 redef fun substring(from, count)
261 do
262 assert count >= 0
263
264 if from < 0 then
265 count += from
266 if count < 0 then count = 0
267 from = 0
268 end
269
270 if (count + from) > length then count = length - from
271 if count <= 0 then return ""
272 var end_index = from + count - 1
273
274 var bytefrom = char_to_byte_index(from)
275 var byteto = char_to_byte_index(end_index)
276 byteto += items.length_of_char_at(byteto) - 1
277
278 var s = new FlatString.full(items, byteto - bytefrom + 1, bytefrom, byteto, count)
279 return s
280 end
281
282 redef fun empty do return "".as(FlatString)
283
284 redef fun to_upper
285 do
286 var outstr = new FlatBuffer.with_capacity(self.bytelen + 1)
287
288 var mylen = length
289 var pos = 0
290
291 while pos < mylen do
292 outstr.add(chars[pos].to_upper)
293 pos += 1
294 end
295
296 return outstr.to_s
297 end
298
299 redef fun to_lower
300 do
301 var outstr = new FlatBuffer.with_capacity(self.bytelen + 1)
302
303 var mylen = length
304 var pos = 0
305
306 while pos < mylen do
307 outstr.add(chars[pos].to_lower)
308 pos += 1
309 end
310
311 return outstr.to_s
312 end
313
314 redef fun output
315 do
316 for i in chars do i.output
317 end
318
319 ##################################################
320 # String Specific Methods #
321 ##################################################
322
323 # Low-level creation of a new string with minimal data.
324 #
325 # `items` will be used as is, without copy, to retrieve the characters of the string.
326 # Aliasing issues is the responsibility of the caller.
327 private init with_infos(items: NativeString, bytelen, from, to: Int)
328 do
329 self.items = items
330 self.bytelen = bytelen
331 first_byte = from
332 last_byte = to
333 end
334
335 # Low-level creation of a new string with all the data.
336 #
337 # `items` will be used as is, without copy, to retrieve the characters of the string.
338 # Aliasing issues is the responsibility of the caller.
339 private init full(items: NativeString, bytelen, from, to, length: Int)
340 do
341 self.items = items
342 self.length = length
343 self.bytelen = bytelen
344 first_byte = from
345 last_byte = to
346 end
347
348 redef fun to_cstring do
349 if real_items != null then return real_items.as(not null)
350 var new_items = new NativeString(bytelen + 1)
351 self.items.copy_to(new_items, bytelen, first_byte, 0)
352 new_items[bytelen] = 0u8
353 real_items = new_items
354 return new_items
355 end
356
357 redef fun ==(other)
358 do
359 if not other isa FlatString then return super
360
361 if self.object_id == other.object_id then return true
362
363 var my_length = bytelen
364
365 if other.bytelen != my_length then return false
366
367 var my_index = first_byte
368 var its_index = other.first_byte
369
370 var last_iteration = my_index + my_length
371
372 var itsitems = other.items
373 var myitems = self.items
374
375 while my_index < last_iteration do
376 if myitems[my_index] != itsitems[its_index] then return false
377 my_index += 1
378 its_index += 1
379 end
380
381 return true
382 end
383
384 redef fun <(other)
385 do
386 if not other isa FlatString then return super
387
388 if self.object_id == other.object_id then return false
389
390 var my_length = self.bytelen
391 var its_length = other.bytelen
392
393 var max = if my_length < its_length then my_length else its_length
394
395 var myits = self.bytes
396 var itsits = other.bytes
397
398 for i in [0 .. max[ do
399 var my_curr_char = myits[i]
400 var its_curr_char = itsits[i]
401
402 if my_curr_char != its_curr_char then
403 if my_curr_char < its_curr_char then return true
404 return false
405 end
406 end
407
408 return my_length < its_length
409 end
410
411 redef fun +(o) do
412 var s = o.to_s
413 var slen = s.bytelen
414 var mlen = bytelen
415 var nlen = mlen + slen
416 var mits = items
417 var mifrom = first_byte
418 if s isa FlatText then
419 var sits = s.items
420 var sifrom = s.first_byte
421 var ns = new NativeString(nlen + 1)
422 mits.copy_to(ns, mlen, mifrom, 0)
423 sits.copy_to(ns, slen, sifrom, mlen)
424 return new FlatString.full(ns, nlen, 0, nlen - 1, length + o.length)
425 else
426 abort
427 end
428 end
429
430 redef fun *(i) do
431 var mybtlen = bytelen
432 var new_bytelen = mybtlen * i
433 var mylen = length
434 var newlen = mylen * i
435 var ns = new NativeString(new_bytelen + 1)
436 ns[new_bytelen] = 0u8
437 var offset = 0
438 while i > 0 do
439 items.copy_to(ns, bytelen, first_byte, offset)
440 offset += mybtlen
441 i -= 1
442 end
443 return new FlatString.full(ns, new_bytelen, 0, new_bytelen - 1, newlen)
444 end
445
446
447 redef fun hash
448 do
449 if hash_cache == null then
450 # djb2 hash algorithm
451 var h = 5381
452 var i = first_byte
453
454 var myitems = items
455
456 while i <= last_byte do
457 h = (h << 5) + h + myitems[i].to_i
458 i += 1
459 end
460
461 hash_cache = h
462 end
463
464 return hash_cache.as(not null)
465 end
466
467 redef fun substrings do return new FlatSubstringsIter(self)
468 end
469
470 private class FlatStringCharReverseIterator
471 super IndexedIterator[Char]
472
473 var target: FlatString
474
475 var curr_pos: Int
476
477 init with_pos(tgt: FlatString, pos: Int)
478 do
479 init(tgt, pos)
480 end
481
482 redef fun is_ok do return curr_pos >= 0
483
484 redef fun item do return target[curr_pos]
485
486 redef fun next do curr_pos -= 1
487
488 redef fun index do return curr_pos
489
490 end
491
492 private class FlatStringCharIterator
493 super IndexedIterator[Char]
494
495 var target: FlatString
496
497 var max: Int
498
499 var curr_pos: Int
500
501 init with_pos(tgt: FlatString, pos: Int)
502 do
503 init(tgt, tgt.length - 1, pos)
504 end
505
506 redef fun is_ok do return curr_pos <= max
507
508 redef fun item do return target[curr_pos]
509
510 redef fun next do curr_pos += 1
511
512 redef fun index do return curr_pos
513
514 end
515
516 private class FlatStringCharView
517 super StringCharView
518
519 redef type SELFTYPE: FlatString
520
521 redef fun [](index) do return target[index]
522
523 redef fun iterator_from(start) do return new FlatStringCharIterator.with_pos(target, start)
524
525 redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator.with_pos(target, start)
526
527 end
528
529 private class FlatStringByteReverseIterator
530 super IndexedIterator[Byte]
531
532 var target: FlatString
533
534 var target_items: NativeString
535
536 var curr_pos: Int
537
538 init with_pos(tgt: FlatString, pos: Int)
539 do
540 init(tgt, tgt.items, pos + tgt.first_byte)
541 end
542
543 redef fun is_ok do return curr_pos >= target.first_byte
544
545 redef fun item do return target_items[curr_pos]
546
547 redef fun next do curr_pos -= 1
548
549 redef fun index do return curr_pos - target.first_byte
550
551 end
552
553 private class FlatStringByteIterator
554 super IndexedIterator[Byte]
555
556 var target: FlatString
557
558 var target_items: NativeString
559
560 var curr_pos: Int
561
562 init with_pos(tgt: FlatString, pos: Int)
563 do
564 init(tgt, tgt.items, pos + tgt.first_byte)
565 end
566
567 redef fun is_ok do return curr_pos <= target.last_byte
568
569 redef fun item do return target_items[curr_pos]
570
571 redef fun next do curr_pos += 1
572
573 redef fun index do return curr_pos - target.first_byte
574
575 end
576
577 private class FlatStringByteView
578 super StringByteView
579
580 redef type SELFTYPE: FlatString
581
582 redef fun [](index)
583 do
584 # Check that the index (+ first_byte) is not larger than last_byte
585 # In other terms, if the index is valid
586 assert index >= 0
587 var target = self.target
588 assert (index + target.first_byte) <= target.last_byte
589 return target.items[index + target.first_byte]
590 end
591
592 redef fun iterator_from(start) do return new FlatStringByteIterator.with_pos(target, start)
593
594 redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator.with_pos(target, start)
595
596 end
597
598 redef class Buffer
599 redef new do return new FlatBuffer
600
601 redef new with_cap(i) do return new FlatBuffer.with_capacity(i)
602 end
603
604 # Mutable strings of characters.
605 class FlatBuffer
606 super FlatText
607 super Buffer
608
609 redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
610
611 redef var bytes = new FlatBufferByteView(self) is lazy
612
613 redef var bytelen = 0
614
615 redef var length = 0
616
617 private var char_cache: Int = -1
618
619 private var byte_cache: Int = -1
620
621 private var capacity = 0
622
623 redef fun fast_cstring do return items.fast_cstring(0)
624
625 redef fun substrings do return new FlatSubstringsIter(self)
626
627 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
628 #
629 # This happens when an operation modifies the current `Buffer` and
630 # the Copy-On-Write flag `written` is set at true.
631 private fun reset do
632 var nns = new NativeString(capacity)
633 items.copy_to(nns, bytelen, 0, 0)
634 items = nns
635 written = false
636 end
637
638 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
639 #
640 # Internal only, does not modify bytelen or length, this is the caller's responsability
641 private fun rshift_bytes(from: Int, len: Int) do
642 var oit = items
643 var nit = items
644 if bytelen + len > capacity then
645 capacity = capacity * 2 + 2
646 nit = new NativeString(capacity)
647 oit.copy_to(nit, 0, 0, from)
648 end
649 oit.copy_to(nit, bytelen - from, from, from + len)
650 end
651
652 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
653 #
654 # Internal only, does not modify bytelen or length, this is the caller's responsability
655 private fun lshift_bytes(from: Int, len: Int) do
656 items.copy_to(items, bytelen - from, from, from - len)
657 end
658
659 redef fun []=(index, item)
660 do
661 assert index >= 0 and index <= length
662 if written then reset
663 is_dirty = true
664 if index == length then
665 add item
666 return
667 end
668 var ip = items.char_to_byte_index(index)
669 var c = items.char_at(ip)
670 var clen = c.u8char_len
671 var itemlen = item.u8char_len
672 var size_diff = itemlen - clen
673 if size_diff > 0 then
674 rshift_bytes(ip + clen, size_diff)
675 else if size_diff < 0 then
676 lshift_bytes(ip + clen, -size_diff)
677 end
678 bytelen += size_diff
679 bytepos += size_diff
680 items.set_char_at(ip, item)
681 end
682
683 redef fun add(c)
684 do
685 if written then reset
686 is_dirty = true
687 var clen = c.u8char_len
688 enlarge(bytelen + clen)
689 items.set_char_at(bytelen, c)
690 bytelen += clen
691 length += 1
692 end
693
694 redef fun clear do
695 is_dirty = true
696 if written then reset
697 bytelen = 0
698 length = 0
699 end
700
701 redef fun empty do return new Buffer
702
703 redef fun enlarge(cap)
704 do
705 var c = capacity
706 if cap <= c then return
707 while c <= cap do c = c * 2 + 2
708 # The COW flag can be set at false here, since
709 # it does a copy of the current `Buffer`
710 written = false
711 var a = new NativeString(c+1)
712 if bytelen > 0 then items.copy_to(a, bytelen, 0, 0)
713 items = a
714 capacity = c
715 end
716
717 redef fun to_s
718 do
719 written = true
720 if bytelen == 0 then items = new NativeString(1)
721 return new FlatString.full(items, bytelen, 0, bytelen - 1, length)
722 end
723
724 redef fun to_cstring
725 do
726 if is_dirty then
727 var new_native = new NativeString(bytelen + 1)
728 new_native[bytelen] = 0u8
729 if length > 0 then items.copy_to(new_native, bytelen, 0, 0)
730 real_items = new_native
731 is_dirty = false
732 end
733 return real_items.as(not null)
734 end
735
736 # Create a new empty string.
737 init do end
738
739 # Low-level creation a new buffer with given data.
740 #
741 # `items` will be used as is, without copy, to store the characters of the buffer.
742 # Aliasing issues is the responsibility of the caller.
743 #
744 # If `items` is shared, `written` should be set to true after the creation
745 # so that a modification will do a copy-on-write.
746 private init with_infos(items: NativeString, capacity, bytelen, length: Int)
747 do
748 self.items = items
749 self.capacity = capacity
750 self.bytelen = bytelen
751 self.length = length
752 end
753
754 # Create a new string copied from `s`.
755 init from(s: Text)
756 do
757 items = new NativeString(s.bytelen)
758 if s isa FlatText then
759 items = s.items
760 else
761 for i in substrings do i.as(FlatString).items.copy_to(items, i.bytelen, 0, 0)
762 end
763 bytelen = s.bytelen
764 length = s.length
765 capacity = s.bytelen
766 written = true
767 end
768
769 # Create a new empty string with a given capacity.
770 init with_capacity(cap: Int)
771 do
772 assert cap >= 0
773 items = new NativeString(cap + 1)
774 capacity = cap
775 bytelen = 0
776 end
777
778 redef fun append(s)
779 do
780 if s.is_empty then return
781 is_dirty = true
782 var sl = s.bytelen
783 enlarge(bytelen + sl)
784 if s isa FlatText then
785 s.items.copy_to(items, sl, s.first_byte, bytelen)
786 else
787 for i in s.substrings do append i
788 return
789 end
790 bytelen += sl
791 length += s.length
792 end
793
794 # Copies the content of self in `dest`
795 fun copy(start: Int, len: Int, dest: Buffer, new_start: Int)
796 do
797 var self_chars = self.chars
798 var dest_chars = dest.chars
799 for i in [0..len-1] do
800 dest_chars[new_start+i] = self_chars[start+i]
801 end
802 end
803
804 redef fun substring(from, count)
805 do
806 assert count >= 0
807 if from < 0 then from = 0
808 if (from + count) > length then count = length - from
809 if count != 0 then
810 var bytefrom = items.char_to_byte_index(from)
811 var byteto = items.char_to_byte_index(count + from - 1)
812 byteto += items.char_at(byteto).u8char_len - 1
813 var byte_length = byteto - bytefrom + 1
814 var r_items = new NativeString(byte_length)
815 items.copy_to(r_items, byte_length, bytefrom, 0)
816 return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
817 else
818 return new Buffer
819 end
820 end
821
822 redef fun reverse
823 do
824 written = false
825 var ns = new FlatBuffer.with_capacity(capacity)
826 for i in chars.reverse_iterator do ns.add i
827 items = ns.items
828 end
829
830 redef fun times(repeats)
831 do
832 var x = new FlatString.full(items, bytelen, 0, bytelen - 1, length)
833 for i in [1 .. repeats[ do
834 append(x)
835 end
836 end
837
838 redef fun upper
839 do
840 if written then reset
841 for i in [0 .. length[ do self[i] = self[i].to_upper
842 end
843
844 redef fun lower
845 do
846 if written then reset
847 for i in [0 .. length[ do self[i] = self[i].to_lower
848 end
849 end
850
851 private class FlatBufferByteReverseIterator
852 super IndexedIterator[Byte]
853
854 var target: FlatBuffer
855
856 var target_items: NativeString
857
858 var curr_pos: Int
859
860 init with_pos(tgt: FlatBuffer, pos: Int)
861 do
862 init(tgt, tgt.items, pos)
863 end
864
865 redef fun index do return curr_pos
866
867 redef fun is_ok do return curr_pos >= 0
868
869 redef fun item do return target_items[curr_pos]
870
871 redef fun next do curr_pos -= 1
872
873 end
874
875 private class FlatBufferByteView
876 super BufferByteView
877
878 redef type SELFTYPE: FlatBuffer
879
880 redef fun [](index) do return target.items[index]
881
882 redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos)
883
884 redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos)
885
886 end
887
888 private class FlatBufferByteIterator
889 super IndexedIterator[Byte]
890
891 var target: FlatBuffer
892
893 var target_items: NativeString
894
895 var curr_pos: Int
896
897 init with_pos(tgt: FlatBuffer, pos: Int)
898 do
899 init(tgt, tgt.items, pos)
900 end
901
902 redef fun index do return curr_pos
903
904 redef fun is_ok do return curr_pos < target.bytelen
905
906 redef fun item do return target_items[curr_pos]
907
908 redef fun next do curr_pos += 1
909
910 end
911
912 private class FlatBufferCharReverseIterator
913 super IndexedIterator[Char]
914
915 var target: FlatBuffer
916
917 var curr_pos: Int
918
919 init with_pos(tgt: FlatBuffer, pos: Int)
920 do
921 init(tgt, pos)
922 end
923
924 redef fun index do return curr_pos
925
926 redef fun is_ok do return curr_pos >= 0
927
928 redef fun item do return target[curr_pos]
929
930 redef fun next do curr_pos -= 1
931
932 end
933
934 private class FlatBufferCharView
935 super BufferCharView
936
937 redef type SELFTYPE: FlatBuffer
938
939 redef fun [](index) do return target[index]
940
941 redef fun []=(index, item)
942 do
943 assert index >= 0 and index <= length
944 if index == length then
945 add(item)
946 return
947 end
948 target[index] = item
949 end
950
951 redef fun push(c)
952 do
953 target.add(c)
954 end
955
956 redef fun add(c)
957 do
958 target.add(c)
959 end
960
961 fun enlarge(cap: Int)
962 do
963 target.enlarge(cap)
964 end
965
966 redef fun append(s)
967 do
968 var s_length = s.length
969 if target.capacity < s.length then enlarge(s_length + target.length)
970 for i in s do target.add i
971 end
972
973 redef fun iterator_from(pos) do return new FlatBufferCharIterator.with_pos(target, pos)
974
975 redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator.with_pos(target, pos)
976
977 end
978
979 private class FlatBufferCharIterator
980 super IndexedIterator[Char]
981
982 var target: FlatBuffer
983
984 var max: Int
985
986 var curr_pos: Int
987
988 init with_pos(tgt: FlatBuffer, pos: Int)
989 do
990 init(tgt, tgt.length - 1, pos)
991 end
992
993 redef fun index do return curr_pos
994
995 redef fun is_ok do return curr_pos <= max
996
997 redef fun item do return target[curr_pos]
998
999 redef fun next do curr_pos += 1
1000
1001 end
1002
1003 redef class NativeString
1004 redef fun to_s
1005 do
1006 return to_s_with_length(cstring_length)
1007 end
1008
1009 # Returns `self` as a String of `length`.
1010 redef fun to_s_with_length(length): FlatString
1011 do
1012 assert length >= 0
1013 var str = new FlatString.with_infos(self, length, 0, length - 1)
1014 return str
1015 end
1016
1017 redef fun to_s_full(bytelen, unilen) do
1018 return new FlatString.full(self, bytelen, 0, bytelen - 1, unilen)
1019 end
1020
1021 # Returns `self` as a new String.
1022 redef fun to_s_with_copy: FlatString
1023 do
1024 var length = cstring_length
1025 var new_self = new NativeString(length + 1)
1026 copy_to(new_self, length, 0, 0)
1027 var str = new FlatString.with_infos(new_self, length, 0, length - 1)
1028 new_self[length] = 0u8
1029 str.real_items = new_self
1030 return str
1031 end
1032
1033 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1034 #
1035 # Very unsafe, make sure to have room for this char prior to calling this function.
1036 private fun set_char_at(pos: Int, c: Char) do
1037 var ln = c.u8char_len
1038 native_set_char(pos, c, ln)
1039 end
1040
1041 private fun native_set_char(pos: Int, c: Char, ln: Int) `{
1042 char* dst = self + pos;
1043 switch(ln){
1044 case 1:
1045 dst[0] = c;
1046 break;
1047 case 2:
1048 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
1049 dst[1] = 0x80 | (c & 0x3F);
1050 break;
1051 case 3:
1052 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
1053 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
1054 dst[2] = 0x80 | (c & 0x3F);
1055 break;
1056 case 4:
1057 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
1058 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
1059 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
1060 dst[3] = 0x80 | (c & 0x3F);
1061 break;
1062 }
1063 `}
1064 end
1065
1066 redef class Int
1067 redef fun to_base(base, signed)
1068 do
1069 var l = digit_count(base)
1070 var s = new FlatBuffer.from(" " * l)
1071 fill_buffer(s, base, signed)
1072 return s.to_s
1073 end
1074
1075 # return displayable int in base 10 and signed
1076 #
1077 # assert 1.to_s == "1"
1078 # assert (-123).to_s == "-123"
1079 redef fun to_s do
1080 # Fast case for common numbers
1081 if self == 0 then return "0"
1082 if self == 1 then return "1"
1083
1084 var nslen = int_to_s_len
1085 var ns = new NativeString(nslen + 1)
1086 ns[nslen] = 0u8
1087 native_int_to_s(ns, nslen + 1)
1088 return new FlatString.full(ns, nslen, 0, nslen - 1, nslen)
1089 end
1090 end
1091
1092 redef class Array[E]
1093
1094 # Fast implementation
1095 redef fun plain_to_s
1096 do
1097 var l = length
1098 if l == 0 then return ""
1099 if l == 1 then if self[0] == null then return "" else return self[0].to_s
1100 var its = _items
1101 var na = new NativeArray[String](l)
1102 var i = 0
1103 var sl = 0
1104 var mypos = 0
1105 while i < l do
1106 var itsi = its[i]
1107 if itsi == null then
1108 i += 1
1109 continue
1110 end
1111 var tmp = itsi.to_s
1112 sl += tmp.bytelen
1113 na[mypos] = tmp
1114 i += 1
1115 mypos += 1
1116 end
1117 var ns = new NativeString(sl + 1)
1118 ns[sl] = 0u8
1119 i = 0
1120 var off = 0
1121 while i < mypos do
1122 var tmp = na[i]
1123 if tmp isa FlatString then
1124 var tpl = tmp.bytelen
1125 tmp.items.copy_to(ns, tpl, tmp.first_byte, off)
1126 off += tpl
1127 else
1128 for j in tmp.substrings do
1129 var s = j.as(FlatString)
1130 var slen = s.bytelen
1131 s.items.copy_to(ns, slen, s.first_byte, off)
1132 off += slen
1133 end
1134 end
1135 i += 1
1136 end
1137 return ns.to_s_with_length(sl)
1138 end
1139 end
1140
1141 redef class NativeArray[E]
1142 redef fun native_to_s do
1143 assert self isa NativeArray[String]
1144 var l = length
1145 var na = self
1146 var i = 0
1147 var sl = 0
1148 var mypos = 0
1149 while i < l do
1150 sl += na[i].bytelen
1151 i += 1
1152 mypos += 1
1153 end
1154 var ns = new NativeString(sl + 1)
1155 ns[sl] = 0u8
1156 i = 0
1157 var off = 0
1158 while i < mypos do
1159 var tmp = na[i]
1160 if tmp isa FlatString then
1161 var tpl = tmp.bytelen
1162 tmp.items.copy_to(ns, tpl, tmp.first_byte, off)
1163 off += tpl
1164 else
1165 for j in tmp.substrings do
1166 var s = j.as(FlatString)
1167 var slen = s.bytelen
1168 s.items.copy_to(ns, slen, s.first_byte, off)
1169 off += slen
1170 end
1171 end
1172 i += 1
1173 end
1174 return ns.to_s_with_length(sl)
1175 end
1176 end
1177
1178 redef class Map[K,V]
1179 redef fun join(sep, couple_sep)
1180 do
1181 if is_empty then return ""
1182
1183 var s = new Buffer # Result
1184
1185 # Concat first item
1186 var i = iterator
1187 var k = i.key
1188 var e = i.item
1189 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1190
1191 # Concat other items
1192 i.next
1193 while i.is_ok do
1194 s.append(sep)
1195 k = i.key
1196 e = i.item
1197 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1198 i.next
1199 end
1200 return s.to_s
1201 end
1202 end