lib/core: Optimized `html_escape` for FlatText variants
[nit.git] / lib / core / text / flat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # All the array-based text representations
12 module flat
13
14 intrude import abstract_text
15 intrude import native
16
17 `{
18 #include <stdio.h>
19 #include <string.h>
20 `}
21
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
24
25 var tgt: nullable FlatText
26
27 redef fun item do
28 assert is_ok
29 return tgt.as(not null)
30 end
31
32 redef fun is_ok do return tgt != null
33
34 redef fun next do tgt = null
35 end
36
37 redef class FlatText
38
39 fun first_byte: Int do return 0
40
41 fun last_byte: Int do return _bytelen - 1
42
43 # Cache of the latest position (char) explored in the string
44 var position: Int = 0
45
46 # Cached position (bytes) in the NativeString underlying the String
47 var bytepos: Int = 0
48
49 # Index of the character `index` in `_items`
50 fun char_to_byte_index(index: Int): Int do
51 var ln = length
52 assert index >= 0
53 assert index < ln
54
55 var pos = _position
56 # Find best insertion point
57 var delta_begin = index
58 var delta_end = (ln - 1) - index
59 var delta_cache = (pos - index).abs
60 var min = delta_begin
61 var its = _items
62
63 if delta_cache < min then min = delta_cache
64 if delta_end < min then min = delta_end
65
66 var ns_i: Int
67 var my_i: Int
68
69 if min == delta_begin then
70 ns_i = first_byte
71 my_i = 0
72 else if min == delta_cache then
73 ns_i = _bytepos
74 my_i = pos
75 else
76 ns_i = its.find_beginning_of_char_at(last_byte)
77 my_i = length - 1
78 end
79
80 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
81
82 _position = index
83 _bytepos = ns_i
84
85 return ns_i
86 end
87
88 # By escaping `self` to HTML, how many more bytes will be needed ?
89 fun chars_to_html_escape: Int do
90 var its = _items
91 var max = last_byte
92 var pos = first_byte
93 var endlen = 0
94 while pos <= max do
95 var c = its[pos]
96 if c == 0x3Cu8 then
97 endlen += 3
98 else if c == 0x3Eu8 then
99 endlen += 3
100 else if c == 0x26u8 then
101 endlen += 4
102 else if c == 0x22u8 then
103 endlen += 4
104 else if c == 0x27u8 then
105 endlen += 4
106 else if c == 0x2Fu8 then
107 endlen += 4
108 end
109 pos += 1
110 end
111 return endlen
112 end
113
114 redef fun html_escape
115 do
116 var extra = chars_to_html_escape
117 if extra == 0 then return to_s
118 var its = _items
119 var max = last_byte
120 var pos = first_byte
121 var nlen = extra + _bytelen
122 var nits = new NativeString(nlen)
123 var outpos = 0
124 while pos <= max do
125 var c = its[pos]
126 # Special codes:
127 # Some HTML characters are used as meta-data, they need
128 # to be replaced by an HTML-Escaped equivalent
129 #
130 # * 0x3C (<) => &lt;
131 # * 0x3E (>) => &gt;
132 # * 0x26 (&) => &amp;
133 # * 0x22 (") => &#34;
134 # * 0x27 (') => &#39;
135 # * 0x2F (/) => &#47;
136 if c == 0x3Cu8 then
137 nits[outpos] = 0x26u8
138 nits[outpos + 1] = 0x6Cu8
139 nits[outpos + 2] = 0x74u8
140 nits[outpos + 3] = 0x3Bu8
141 outpos += 4
142 else if c == 0x3Eu8 then
143 nits[outpos] = 0x26u8
144 nits[outpos + 1] = 0x67u8
145 nits[outpos + 2] = 0x74u8
146 nits[outpos + 3] = 0x3Bu8
147 outpos += 4
148 else if c == 0x26u8 then
149 nits[outpos] = 0x26u8
150 nits[outpos + 1] = 0x61u8
151 nits[outpos + 2] = 0x6Du8
152 nits[outpos + 3] = 0x70u8
153 nits[outpos + 4] = 0x3Bu8
154 outpos += 5
155 else if c == 0x22u8 then
156 nits[outpos] = 0x26u8
157 nits[outpos + 1] = 0x23u8
158 nits[outpos + 2] = 0x33u8
159 nits[outpos + 3] = 0x34u8
160 nits[outpos + 4] = 0x3Bu8
161 outpos += 5
162 else if c == 0x27u8 then
163 nits[outpos] = 0x26u8
164 nits[outpos + 1] = 0x23u8
165 nits[outpos + 2] = 0x33u8
166 nits[outpos + 3] = 0x39u8
167 nits[outpos + 4] = 0x3Bu8
168 outpos += 5
169 else if c == 0x2Fu8 then
170 nits[outpos] = 0x26u8
171 nits[outpos + 1] = 0x23u8
172 nits[outpos + 2] = 0x34u8
173 nits[outpos + 3] = 0x37u8
174 nits[outpos + 4] = 0x3Bu8
175 outpos += 5
176 else
177 nits[outpos] = c
178 outpos += 1
179 end
180 pos += 1
181 end
182 var s = new FlatString.with_infos(nits, nlen, 0, nlen - 1)
183 return s
184 end
185
186 # By escaping `self` to C, how many more bytes will be needed ?
187 #
188 # This enables a double-optimization in `escape_to_c` since if this
189 # method returns 0, then `self` does not need escaping and can be
190 # returned as-is
191 fun chars_to_escape_to_c: Int do
192 var its = _items
193 var max = last_byte
194 var pos = first_byte
195 var req_esc = 0
196 while pos <= max do
197 var c = its[pos]
198 if c == 0x0Au8 then
199 req_esc += 1
200 else if c == 0x09u8 then
201 req_esc += 1
202 else if c == 0x22u8 then
203 req_esc += 1
204 else if c == 0x27u8 then
205 req_esc += 1
206 else if c == 0x5Cu8 then
207 req_esc += 1
208 else if c < 32u8 then
209 req_esc += 3
210 end
211 pos += 1
212 end
213 return req_esc
214 end
215
216 redef fun escape_to_c do
217 var ln_extra = chars_to_escape_to_c
218 if ln_extra == 0 then return self.to_s
219 var its = _items
220 var max = last_byte
221 var nlen = _bytelen + ln_extra
222 var nns = new NativeString(nlen)
223 var pos = first_byte
224 var opos = 0
225 while pos <= max do
226 var c = its[pos]
227 # Special codes:
228 #
229 # Any byte with value < 32 is a control character
230 # All their uses will be replaced by their octal
231 # value in C.
232 #
233 # There are two exceptions however:
234 #
235 # * 0x09 => \t
236 # * 0x0A => \n
237 #
238 # Aside from the code points above, the following are:
239 #
240 # * 0x22 => \"
241 # * 0x27 => \'
242 # * 0x5C => \\
243 if c == 0x09u8 then
244 nns[opos] = 0x5Cu8
245 nns[opos + 1] = 0x74u8
246 opos += 2
247 else if c == 0x0Au8 then
248 nns[opos] = 0x5Cu8
249 nns[opos + 1] = 0x6Eu8
250 opos += 2
251 else if c == 0x22u8 then
252 nns[opos] = 0x5Cu8
253 nns[opos + 1] = 0x22u8
254 opos += 2
255 else if c == 0x27u8 then
256 nns[opos] = 0x5Cu8
257 nns[opos + 1] = 0x27u8
258 opos += 2
259 else if c == 0x5Cu8 then
260 nns[opos] = 0x5Cu8
261 nns[opos + 1] = 0x5Cu8
262 opos += 2
263 else if c < 32u8 then
264 nns[opos] = 0x5Cu8
265 nns[opos + 1] = 0x30u8
266 nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
267 nns[opos + 3] = (c & 0x07u8) + 0x30u8
268 opos += 4
269 else
270 nns[opos] = c
271 opos += 1
272 end
273 pos += 1
274 end
275 return nns.to_s_with_length(nlen)
276 end
277
278 redef fun [](index) do return _items.char_at(char_to_byte_index(index))
279 end
280
281 # Immutable strings of characters.
282 class FlatString
283 super FlatText
284 super String
285
286 # Index at which `self` begins in `_items`, inclusively
287 redef var first_byte is noinit
288
289 # Index at which `self` ends in `_items`, inclusively
290 redef var last_byte is noinit
291
292 redef var chars = new FlatStringCharView(self) is lazy
293
294 redef var bytes = new FlatStringByteView(self) is lazy
295
296 redef var length is lazy do
297 if _bytelen == 0 then return 0
298 return _items.utf8_length(_first_byte, _last_byte)
299 end
300
301 redef var to_cstring is lazy do
302 var blen = _bytelen
303 var new_items = new NativeString(blen + 1)
304 _items.copy_to(new_items, blen, _first_byte, 0)
305 new_items[blen] = 0u8
306 return new_items
307 end
308
309 redef fun reversed
310 do
311 var b = new FlatBuffer.with_capacity(_bytelen + 1)
312 for i in [length - 1 .. 0].step(-1) do
313 b.add self[i]
314 end
315 var s = b.to_s.as(FlatString)
316 s.length = self.length
317 return s
318 end
319
320 redef fun fast_cstring do return _items.fast_cstring(_first_byte)
321
322 redef fun substring(from, count)
323 do
324 assert count >= 0
325
326 if from < 0 then
327 count += from
328 if count < 0 then count = 0
329 from = 0
330 end
331
332 if (count + from) > length then count = length - from
333 if count <= 0 then return ""
334 var end_index = from + count - 1
335
336 var bytefrom = char_to_byte_index(from)
337 var byteto = char_to_byte_index(end_index)
338 var its = _items
339 byteto += its.length_of_char_at(byteto) - 1
340
341 var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, byteto, count)
342 return s
343 end
344
345 redef fun empty do return "".as(FlatString)
346
347 redef fun to_upper
348 do
349 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
350
351 var mylen = length
352 var pos = 0
353
354 while pos < mylen do
355 outstr.add(chars[pos].to_upper)
356 pos += 1
357 end
358
359 return outstr.to_s
360 end
361
362 redef fun to_lower
363 do
364 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
365
366 var mylen = length
367 var pos = 0
368
369 while pos < mylen do
370 outstr.add(chars[pos].to_lower)
371 pos += 1
372 end
373
374 return outstr.to_s
375 end
376
377 redef fun output
378 do
379 for i in chars do i.output
380 end
381
382 ##################################################
383 # String Specific Methods #
384 ##################################################
385
386 # Low-level creation of a new string with minimal data.
387 #
388 # `_items` will be used as is, without copy, to retrieve the characters of the string.
389 # Aliasing issues is the responsibility of the caller.
390 private init with_infos(items: NativeString, bytelen, from, to: Int)
391 do
392 self._items = items
393 self._bytelen = bytelen
394 _first_byte = from
395 _last_byte = to
396 _bytepos = from
397 end
398
399 # Low-level creation of a new string with all the data.
400 #
401 # `_items` will be used as is, without copy, to retrieve the characters of the string.
402 # Aliasing issues is the responsibility of the caller.
403 private init full(items: NativeString, bytelen, from, to, length: Int)
404 do
405 self._items = items
406 self.length = length
407 self._bytelen = bytelen
408 _first_byte = from
409 _last_byte = to
410 _bytepos = from
411 end
412
413 redef fun ==(other)
414 do
415 if not other isa FlatString then return super
416
417 if self.object_id == other.object_id then return true
418
419 var my_length = _bytelen
420
421 if other._bytelen != my_length then return false
422
423 var my_index = _first_byte
424 var its_index = other._first_byte
425
426 var last_iteration = my_index + my_length
427
428 var its_items = other._items
429 var my_items = self._items
430
431 while my_index < last_iteration do
432 if my_items[my_index] != its_items[its_index] then return false
433 my_index += 1
434 its_index += 1
435 end
436
437 return true
438 end
439
440 redef fun <(other)
441 do
442 if not other isa FlatString then return super
443
444 if self.object_id == other.object_id then return false
445
446 var my_length = self._bytelen
447 var its_length = other._bytelen
448
449 var max = if my_length < its_length then my_length else its_length
450
451 var myits = self.bytes
452 var itsits = other.bytes
453
454 for i in [0 .. max[ do
455 var my_curr_char = myits[i]
456 var its_curr_char = itsits[i]
457
458 if my_curr_char != its_curr_char then
459 if my_curr_char < its_curr_char then return true
460 return false
461 end
462 end
463
464 return my_length < its_length
465 end
466
467 redef fun +(o) do
468 var s = o.to_s
469 var slen = s.bytelen
470 var mlen = _bytelen
471 var nlen = mlen + slen
472 var mits = _items
473 var mifrom = _first_byte
474 if s isa FlatText then
475 var sits = s._items
476 var sifrom = s.first_byte
477 var ns = new NativeString(nlen + 1)
478 mits.copy_to(ns, mlen, mifrom, 0)
479 sits.copy_to(ns, slen, sifrom, mlen)
480 return new FlatString.full(ns, nlen, 0, nlen - 1, length + o.length)
481 else
482 abort
483 end
484 end
485
486 redef fun *(i) do
487 var mybtlen = _bytelen
488 var new_bytelen = mybtlen * i
489 var mylen = length
490 var newlen = mylen * i
491 var its = _items
492 var fb = _first_byte
493 var ns = new NativeString(new_bytelen + 1)
494 ns[new_bytelen] = 0u8
495 var offset = 0
496 while i > 0 do
497 its.copy_to(ns, mybtlen, fb, offset)
498 offset += mybtlen
499 i -= 1
500 end
501 return new FlatString.full(ns, new_bytelen, 0, new_bytelen - 1, newlen)
502 end
503
504
505 redef fun hash
506 do
507 if hash_cache == null then
508 # djb2 hash algorithm
509 var h = 5381
510 var i = _first_byte
511
512 var my_items = _items
513 var max = _last_byte
514
515 while i <= max do
516 h = (h << 5) + h + my_items[i].to_i
517 i += 1
518 end
519
520 hash_cache = h
521 end
522
523 return hash_cache.as(not null)
524 end
525
526 redef fun substrings do return new FlatSubstringsIter(self)
527 end
528
529 private class FlatStringCharReverseIterator
530 super IndexedIterator[Char]
531
532 var target: FlatString
533
534 var curr_pos: Int
535
536 init with_pos(tgt: FlatString, pos: Int)
537 do
538 init(tgt, pos)
539 end
540
541 redef fun is_ok do return curr_pos >= 0
542
543 redef fun item do return target[curr_pos]
544
545 redef fun next do curr_pos -= 1
546
547 redef fun index do return curr_pos
548
549 end
550
551 private class FlatStringCharIterator
552 super IndexedIterator[Char]
553
554 var target: FlatString
555
556 var max: Int
557
558 var curr_pos: Int
559
560 init with_pos(tgt: FlatString, pos: Int)
561 do
562 init(tgt, tgt.length - 1, pos)
563 end
564
565 redef fun is_ok do return curr_pos <= max
566
567 redef fun item do return target[curr_pos]
568
569 redef fun next do curr_pos += 1
570
571 redef fun index do return curr_pos
572
573 end
574
575 private class FlatStringCharView
576 super StringCharView
577
578 redef type SELFTYPE: FlatString
579
580 redef fun [](index) do return target[index]
581
582 redef fun iterator_from(start) do return new FlatStringCharIterator.with_pos(target, start)
583
584 redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator.with_pos(target, start)
585
586 end
587
588 private class FlatStringByteReverseIterator
589 super IndexedIterator[Byte]
590
591 var target: FlatString
592
593 var target_items: NativeString
594
595 var curr_pos: Int
596
597 init with_pos(tgt: FlatString, pos: Int)
598 do
599 init(tgt, tgt._items, pos + tgt._first_byte)
600 end
601
602 redef fun is_ok do return curr_pos >= target._first_byte
603
604 redef fun item do return target_items[curr_pos]
605
606 redef fun next do curr_pos -= 1
607
608 redef fun index do return curr_pos - target._first_byte
609
610 end
611
612 private class FlatStringByteIterator
613 super IndexedIterator[Byte]
614
615 var target: FlatString
616
617 var target_items: NativeString
618
619 var curr_pos: Int
620
621 init with_pos(tgt: FlatString, pos: Int)
622 do
623 init(tgt, tgt._items, pos + tgt._first_byte)
624 end
625
626 redef fun is_ok do return curr_pos <= target._last_byte
627
628 redef fun item do return target_items[curr_pos]
629
630 redef fun next do curr_pos += 1
631
632 redef fun index do return curr_pos - target._first_byte
633
634 end
635
636 private class FlatStringByteView
637 super StringByteView
638
639 redef type SELFTYPE: FlatString
640
641 redef fun [](index)
642 do
643 # Check that the index (+ _first_byte) is not larger than _last_byte
644 # In other terms, if the index is valid
645 assert index >= 0
646 var target = self.target
647 var ind = index + target._first_byte
648 assert ind <= target._last_byte
649 return target._items[ind]
650 end
651
652 redef fun iterator_from(start) do return new FlatStringByteIterator.with_pos(target, start)
653
654 redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator.with_pos(target, start)
655
656 end
657
658 redef class Buffer
659 redef new do return new FlatBuffer
660
661 redef new with_cap(i) do return new FlatBuffer.with_capacity(i)
662 end
663
664 # Mutable strings of characters.
665 class FlatBuffer
666 super FlatText
667 super Buffer
668
669 redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
670
671 redef var bytes = new FlatBufferByteView(self) is lazy
672
673 redef var length = 0
674
675 private var char_cache: Int = -1
676
677 private var byte_cache: Int = -1
678
679 private var capacity = 0
680
681 # Real items, used as cache for when to_cstring is called
682 private var real_items: NativeString is noinit
683
684 redef fun fast_cstring do return _items.fast_cstring(0)
685
686 redef fun substrings do return new FlatSubstringsIter(self)
687
688 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
689 #
690 # This happens when an operation modifies the current `Buffer` and
691 # the Copy-On-Write flag `written` is set at true.
692 private fun reset do
693 var nns = new NativeString(capacity)
694 if _bytelen != 0 then _items.copy_to(nns, _bytelen, 0, 0)
695 _items = nns
696 written = false
697 end
698
699 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
700 #
701 # Internal only, does not modify _bytelen or length, this is the caller's responsability
702 private fun rshift_bytes(from: Int, len: Int) do
703 var oit = _items
704 var nit = _items
705 var bt = _bytelen
706 if bt + len > capacity then
707 capacity = capacity * 2 + 2
708 nit = new NativeString(capacity)
709 oit.copy_to(nit, 0, 0, from)
710 end
711 oit.copy_to(nit, bt - from, from, from + len)
712 end
713
714 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
715 #
716 # Internal only, does not modify _bytelen or length, this is the caller's responsability
717 private fun lshift_bytes(from: Int, len: Int) do
718 var it = _items
719 it.copy_to(it, _bytelen - from, from, from - len)
720 end
721
722 redef fun []=(index, item)
723 do
724 assert index >= 0 and index <= length
725 if written then reset
726 is_dirty = true
727 if index == length then
728 add item
729 return
730 end
731 var it = _items
732 var ip = it.char_to_byte_index(index)
733 var c = it.char_at(ip)
734 var clen = c.u8char_len
735 var itemlen = item.u8char_len
736 var size_diff = itemlen - clen
737 if size_diff > 0 then
738 rshift_bytes(ip + clen, size_diff)
739 else if size_diff < 0 then
740 lshift_bytes(ip + clen, -size_diff)
741 end
742 _bytelen += size_diff
743 bytepos += size_diff
744 it.set_char_at(ip, item)
745 end
746
747 redef fun add(c)
748 do
749 if written then reset
750 is_dirty = true
751 var clen = c.u8char_len
752 var bt = _bytelen
753 enlarge(bt + clen)
754 _items.set_char_at(bt, c)
755 _bytelen += clen
756 length += 1
757 end
758
759 redef fun clear do
760 is_dirty = true
761 if written then reset
762 _bytelen = 0
763 length = 0
764 end
765
766 redef fun empty do return new Buffer
767
768 redef fun enlarge(cap)
769 do
770 var c = capacity
771 if cap <= c then return
772 while c <= cap do c = c * 2 + 2
773 # The COW flag can be set at false here, since
774 # it does a copy of the current `Buffer`
775 written = false
776 var bln = _bytelen
777 var a = new NativeString(c+1)
778 if bln > 0 then
779 var it = _items
780 if bln > 0 then it.copy_to(a, bln, 0, 0)
781 end
782 _items = a
783 capacity = c
784 end
785
786 redef fun to_s
787 do
788 written = true
789 var bln = _bytelen
790 if bln == 0 then _items = new NativeString(1)
791 return new FlatString.full(_items, bln, 0, bln - 1, length)
792 end
793
794 redef fun to_cstring
795 do
796 if is_dirty then
797 var bln = _bytelen
798 var new_native = new NativeString(bln + 1)
799 new_native[bln] = 0u8
800 if length > 0 then _items.copy_to(new_native, bln, 0, 0)
801 real_items = new_native
802 is_dirty = false
803 end
804 return real_items
805 end
806
807 # Create a new empty string.
808 init do end
809
810 # Low-level creation a new buffer with given data.
811 #
812 # `_items` will be used as is, without copy, to store the characters of the buffer.
813 # Aliasing issues is the responsibility of the caller.
814 #
815 # If `_items` is shared, `written` should be set to true after the creation
816 # so that a modification will do a copy-on-write.
817 private init with_infos(items: NativeString, capacity, bytelen, length: Int)
818 do
819 self._items = items
820 self.capacity = capacity
821 self._bytelen = bytelen
822 self.length = length
823 end
824
825 # Create a new string copied from `s`.
826 init from(s: Text)
827 do
828 _items = new NativeString(s.bytelen)
829 if s isa FlatText then
830 _items = s._items
831 else
832 for i in substrings do i.as(FlatString)._items.copy_to(_items, i._bytelen, 0, 0)
833 end
834 _bytelen = s.bytelen
835 length = s.length
836 _capacity = _bytelen
837 written = true
838 end
839
840 # Create a new empty string with a given capacity.
841 init with_capacity(cap: Int)
842 do
843 assert cap >= 0
844 _items = new NativeString(cap + 1)
845 capacity = cap
846 _bytelen = 0
847 end
848
849 redef fun append(s)
850 do
851 if s.is_empty then return
852 is_dirty = true
853 var sl = s.bytelen
854 var nln = _bytelen + sl
855 enlarge(nln)
856 if s isa FlatText then
857 s._items.copy_to(_items, sl, s.first_byte, _bytelen)
858 else
859 for i in s.substrings do append i
860 return
861 end
862 _bytelen = nln
863 length += s.length
864 end
865
866 # Copies the content of self in `dest`
867 fun copy(start: Int, len: Int, dest: Buffer, new_start: Int)
868 do
869 var self_chars = self.chars
870 var dest_chars = dest.chars
871 for i in [0..len-1] do
872 dest_chars[new_start+i] = self_chars[start+i]
873 end
874 end
875
876 redef fun substring(from, count)
877 do
878 assert count >= 0
879 if from < 0 then from = 0
880 if (from + count) > length then count = length - from
881 if count != 0 then
882 var its = _items
883 var bytefrom = its.char_to_byte_index(from)
884 var byteto = its.char_to_byte_index(count + from - 1)
885 byteto += its.char_at(byteto).u8char_len - 1
886 var byte_length = byteto - bytefrom + 1
887 var r_items = new NativeString(byte_length)
888 its.copy_to(r_items, byte_length, bytefrom, 0)
889 return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
890 else
891 return new Buffer
892 end
893 end
894
895 redef fun reverse
896 do
897 written = false
898 var ns = new FlatBuffer.with_capacity(capacity)
899 for i in chars.reverse_iterator do ns.add i
900 _items = ns._items
901 end
902
903 redef fun times(repeats)
904 do
905 var bln = _bytelen
906 var x = new FlatString.full(_items, bln, 0, bln - 1, length)
907 for i in [1 .. repeats[ do
908 append(x)
909 end
910 end
911
912 redef fun upper
913 do
914 if written then reset
915 for i in [0 .. length[ do self[i] = self[i].to_upper
916 end
917
918 redef fun lower
919 do
920 if written then reset
921 for i in [0 .. length[ do self[i] = self[i].to_lower
922 end
923 end
924
925 private class FlatBufferByteReverseIterator
926 super IndexedIterator[Byte]
927
928 var target: FlatBuffer
929
930 var target_items: NativeString
931
932 var curr_pos: Int
933
934 init with_pos(tgt: FlatBuffer, pos: Int)
935 do
936 init(tgt, tgt._items, pos)
937 end
938
939 redef fun index do return curr_pos
940
941 redef fun is_ok do return curr_pos >= 0
942
943 redef fun item do return target_items[curr_pos]
944
945 redef fun next do curr_pos -= 1
946
947 end
948
949 private class FlatBufferByteView
950 super BufferByteView
951
952 redef type SELFTYPE: FlatBuffer
953
954 redef fun [](index) do return target._items[index]
955
956 redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos)
957
958 redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos)
959
960 end
961
962 private class FlatBufferByteIterator
963 super IndexedIterator[Byte]
964
965 var target: FlatBuffer
966
967 var target_items: NativeString
968
969 var curr_pos: Int
970
971 init with_pos(tgt: FlatBuffer, pos: Int)
972 do
973 init(tgt, tgt._items, pos)
974 end
975
976 redef fun index do return curr_pos
977
978 redef fun is_ok do return curr_pos < target._bytelen
979
980 redef fun item do return target_items[curr_pos]
981
982 redef fun next do curr_pos += 1
983
984 end
985
986 private class FlatBufferCharReverseIterator
987 super IndexedIterator[Char]
988
989 var target: FlatBuffer
990
991 var curr_pos: Int
992
993 init with_pos(tgt: FlatBuffer, pos: Int)
994 do
995 init(tgt, pos)
996 end
997
998 redef fun index do return curr_pos
999
1000 redef fun is_ok do return curr_pos >= 0
1001
1002 redef fun item do return target[curr_pos]
1003
1004 redef fun next do curr_pos -= 1
1005
1006 end
1007
1008 private class FlatBufferCharView
1009 super BufferCharView
1010
1011 redef type SELFTYPE: FlatBuffer
1012
1013 redef fun [](index) do return target[index]
1014
1015 redef fun []=(index, item)
1016 do
1017 assert index >= 0 and index <= length
1018 if index == length then
1019 add(item)
1020 return
1021 end
1022 target[index] = item
1023 end
1024
1025 redef fun push(c)
1026 do
1027 target.add(c)
1028 end
1029
1030 redef fun add(c)
1031 do
1032 target.add(c)
1033 end
1034
1035 fun enlarge(cap: Int)
1036 do
1037 target.enlarge(cap)
1038 end
1039
1040 redef fun append(s)
1041 do
1042 var s_length = s.length
1043 if target.capacity < s.length then enlarge(s_length + target.length)
1044 for i in s do target.add i
1045 end
1046
1047 redef fun iterator_from(pos) do return new FlatBufferCharIterator.with_pos(target, pos)
1048
1049 redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator.with_pos(target, pos)
1050
1051 end
1052
1053 private class FlatBufferCharIterator
1054 super IndexedIterator[Char]
1055
1056 var target: FlatBuffer
1057
1058 var max: Int
1059
1060 var curr_pos: Int
1061
1062 init with_pos(tgt: FlatBuffer, pos: Int)
1063 do
1064 init(tgt, tgt.length - 1, pos)
1065 end
1066
1067 redef fun index do return curr_pos
1068
1069 redef fun is_ok do return curr_pos <= max
1070
1071 redef fun item do return target[curr_pos]
1072
1073 redef fun next do curr_pos += 1
1074
1075 end
1076
1077 redef class NativeString
1078 redef fun to_s
1079 do
1080 return to_s_with_length(cstring_length)
1081 end
1082
1083 # Returns `self` as a String of `length`.
1084 redef fun to_s_with_length(length): FlatString
1085 do
1086 assert length >= 0
1087 return clean_utf8(length)
1088 end
1089
1090 redef fun to_s_full(bytelen, unilen) do
1091 return new FlatString.full(self, bytelen, 0, bytelen - 1, unilen)
1092 end
1093
1094 # Returns `self` as a new String.
1095 redef fun to_s_with_copy: FlatString
1096 do
1097 var length = cstring_length
1098 var r = clean_utf8(length)
1099 if r.items != self then return r
1100 var new_self = new NativeString(length + 1)
1101 copy_to(new_self, length, 0, 0)
1102 var str = new FlatString.with_infos(new_self, length, 0, length - 1)
1103 new_self[length] = 0u8
1104 str.to_cstring = new_self
1105 return str
1106 end
1107
1108 # Cleans a NativeString if necessary
1109 fun clean_utf8(len: Int): FlatString do
1110 var replacements: nullable Array[Int] = null
1111 var end_length = len
1112 var pos = 0
1113 var chr_ln = 0
1114 while pos < len do
1115 var b = self[pos]
1116 var nxst = length_of_char_at(pos)
1117 var ok_st: Bool
1118 if nxst == 1 then
1119 ok_st = b & 0x80u8 == 0u8
1120 else if nxst == 2 then
1121 ok_st = b & 0xE0u8 == 0xC0u8
1122 else if nxst == 3 then
1123 ok_st = b & 0xF0u8 == 0xE0u8
1124 else
1125 ok_st = b & 0xF8u8 == 0xF0u8
1126 end
1127 if not ok_st then
1128 if replacements == null then replacements = new Array[Int]
1129 replacements.add pos
1130 end_length += 2
1131 pos += 1
1132 chr_ln += 1
1133 continue
1134 end
1135 var ok_c: Bool
1136 var c = char_at(pos)
1137 var cp = c.code_point
1138 if nxst == 1 then
1139 ok_c = cp >= 0 and cp <= 0x7F
1140 else if nxst == 2 then
1141 ok_c = cp >= 0x80 and cp <= 0x7FF
1142 else if nxst == 3 then
1143 ok_c = cp >= 0x800 and cp <= 0xFFFF
1144 ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
1145 else
1146 ok_c = cp >= 0x10000 and cp <= 0x10FFFF
1147 end
1148 if not ok_c then
1149 if replacements == null then replacements = new Array[Int]
1150 replacements.add pos
1151 end_length += 2
1152 pos += 1
1153 chr_ln += 1
1154 continue
1155 end
1156 pos += c.u8char_len
1157 chr_ln += 1
1158 end
1159 var ret = self
1160 if end_length != len then
1161 ret = new NativeString(end_length)
1162 var old_repl = 0
1163 var off = 0
1164 var repls = replacements.as(not null)
1165 var r = repls.items.as(not null)
1166 var imax = repls.length
1167 for i in [0 .. imax[ do
1168 var repl_pos = r[i]
1169 var chkln = repl_pos - old_repl
1170 copy_to(ret, chkln, old_repl, off)
1171 off += chkln
1172 ret[off] = 0xEFu8
1173 ret[off + 1] = 0xBFu8
1174 ret[off + 2] = 0xBDu8
1175 old_repl = repl_pos + 1
1176 off += 3
1177 end
1178 copy_to(ret, len - old_repl, old_repl, off)
1179 end
1180 return new FlatString.full(ret, end_length, 0, end_length - 1, chr_ln)
1181 end
1182
1183 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1184 #
1185 # Very unsafe, make sure to have room for this char prior to calling this function.
1186 private fun set_char_at(pos: Int, c: Char) do
1187 var ln = c.u8char_len
1188 native_set_char(pos, c, ln)
1189 end
1190
1191 private fun native_set_char(pos: Int, c: Char, ln: Int) `{
1192 char* dst = self + pos;
1193 switch(ln){
1194 case 1:
1195 dst[0] = c;
1196 break;
1197 case 2:
1198 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
1199 dst[1] = 0x80 | (c & 0x3F);
1200 break;
1201 case 3:
1202 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
1203 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
1204 dst[2] = 0x80 | (c & 0x3F);
1205 break;
1206 case 4:
1207 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
1208 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
1209 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
1210 dst[3] = 0x80 | (c & 0x3F);
1211 break;
1212 }
1213 `}
1214 end
1215
1216 redef class Int
1217 redef fun to_base(base, signed)
1218 do
1219 var l = digit_count(base)
1220 var s = new FlatBuffer.from(" " * l)
1221 fill_buffer(s, base, signed)
1222 return s.to_s
1223 end
1224
1225 # return displayable int in base 10 and signed
1226 #
1227 # assert 1.to_s == "1"
1228 # assert (-123).to_s == "-123"
1229 redef fun to_s do
1230 # Fast case for common numbers
1231 if self == 0 then return "0"
1232 if self == 1 then return "1"
1233
1234 var nslen = int_to_s_len
1235 var ns = new NativeString(nslen + 1)
1236 ns[nslen] = 0u8
1237 native_int_to_s(ns, nslen + 1)
1238 return new FlatString.full(ns, nslen, 0, nslen - 1, nslen)
1239 end
1240 end
1241
1242 redef class Array[E]
1243
1244 # Fast implementation
1245 redef fun plain_to_s
1246 do
1247 var l = length
1248 if l == 0 then return ""
1249 var its = _items.as(not null)
1250 var first = its[0]
1251 if l == 1 then if first == null then return "" else return first.to_s
1252 var na = new NativeArray[String](l)
1253 var i = 0
1254 var sl = 0
1255 var mypos = 0
1256 while i < l do
1257 var itsi = its[i]
1258 if itsi == null then
1259 i += 1
1260 continue
1261 end
1262 var tmp = itsi.to_s
1263 sl += tmp.bytelen
1264 na[mypos] = tmp
1265 i += 1
1266 mypos += 1
1267 end
1268 var ns = new NativeString(sl + 1)
1269 ns[sl] = 0u8
1270 i = 0
1271 var off = 0
1272 while i < mypos do
1273 var tmp = na[i]
1274 if tmp isa FlatString then
1275 var tpl = tmp._bytelen
1276 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1277 off += tpl
1278 else
1279 for j in tmp.substrings do
1280 var s = j.as(FlatString)
1281 var slen = s._bytelen
1282 s._items.copy_to(ns, slen, s._first_byte, off)
1283 off += slen
1284 end
1285 end
1286 i += 1
1287 end
1288 return new FlatString.with_infos(ns, sl, 0, sl - 1)
1289 end
1290 end
1291
1292 redef class NativeArray[E]
1293 redef fun native_to_s do
1294 assert self isa NativeArray[String]
1295 var l = length
1296 var na = self
1297 var i = 0
1298 var sl = 0
1299 var mypos = 0
1300 while i < l do
1301 sl += na[i].bytelen
1302 i += 1
1303 mypos += 1
1304 end
1305 var ns = new NativeString(sl + 1)
1306 ns[sl] = 0u8
1307 i = 0
1308 var off = 0
1309 while i < mypos do
1310 var tmp = na[i]
1311 if tmp isa FlatString then
1312 var tpl = tmp._bytelen
1313 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1314 off += tpl
1315 else
1316 for j in tmp.substrings do
1317 var s = j.as(FlatString)
1318 var slen = s._bytelen
1319 s._items.copy_to(ns, slen, s._first_byte, off)
1320 off += slen
1321 end
1322 end
1323 i += 1
1324 end
1325 return new FlatString.with_infos(ns, sl, 0, sl - 1)
1326 end
1327 end
1328
1329 redef class Map[K,V]
1330 redef fun join(sep, couple_sep)
1331 do
1332 if is_empty then return ""
1333
1334 var s = new Buffer # Result
1335
1336 # Concat first item
1337 var i = iterator
1338 var k = i.key
1339 var e = i.item
1340 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1341
1342 # Concat other _items
1343 i.next
1344 while i.is_ok do
1345 s.append(sep)
1346 k = i.key
1347 e = i.item
1348 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1349 i.next
1350 end
1351 return s.to_s
1352 end
1353 end