text/flat: faster NativeString::set_char_at
[nit.git] / lib / core / text / flat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # All the array-based text representations
12 module flat
13
14 intrude import abstract_text
15 intrude import native
16
17 `{
18 #include <stdio.h>
19 #include <string.h>
20 `}
21
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
24
25 var tgt: nullable FlatText
26
27 redef fun item do
28 assert is_ok
29 return tgt.as(not null)
30 end
31
32 redef fun is_ok do return tgt != null
33
34 redef fun next do tgt = null
35 end
36
37 redef class FlatText
38
39 # First byte of the NativeString
40 protected fun first_byte: Int do return 0
41
42 # Last byte of the NativeString
43 protected fun last_byte: Int do return first_byte + _bytelen - 1
44
45 # Cache of the latest position (char) explored in the string
46 var position: Int = 0
47
48 # Cached position (bytes) in the NativeString underlying the String
49 var bytepos: Int = 0
50
51 # Index of the character `index` in `_items`
52 fun char_to_byte_index(index: Int): Int do
53 var dpos = index - _position
54 var b = _bytepos
55
56 if dpos == 0 then return b
57 if dpos == 1 then
58 b += _items.length_of_char_at(b)
59 _bytepos = b
60 _position = index
61 return b
62 end
63 if dpos == -1 then
64 b = _items.find_beginning_of_char_at(b - 1)
65 _bytepos = b
66 _position = index
67 return b
68 end
69
70 var ln = _length
71 var pos = _position
72 # Find best insertion point
73 var delta_begin = index
74 var delta_end = (ln - 1) - index
75 var delta_cache = (pos - index).abs
76 var min = delta_begin
77 var its = _items
78
79 if delta_cache < min then min = delta_cache
80 if delta_end < min then min = delta_end
81
82 var ns_i: Int
83 var my_i: Int
84
85 if min == delta_cache then
86 ns_i = _bytepos
87 my_i = pos
88 else if min == delta_begin then
89 ns_i = first_byte
90 my_i = 0
91 else
92 ns_i = its.find_beginning_of_char_at(last_byte)
93 my_i = _length - 1
94 end
95
96 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
97
98 _position = index
99 _bytepos = ns_i
100
101 return ns_i
102 end
103
104 # By escaping `self` to HTML, how many more bytes will be needed ?
105 fun chars_to_html_escape: Int do
106 var its = _items
107 var max = last_byte
108 var pos = first_byte
109 var endlen = 0
110 while pos <= max do
111 var c = its[pos]
112 if c == 0x3Cu8 then
113 endlen += 3
114 else if c == 0x3Eu8 then
115 endlen += 3
116 else if c == 0x26u8 then
117 endlen += 4
118 else if c == 0x22u8 then
119 endlen += 4
120 else if c == 0x27u8 then
121 endlen += 4
122 else if c == 0x2Fu8 then
123 endlen += 4
124 end
125 pos += 1
126 end
127 return endlen
128 end
129
130 redef fun html_escape
131 do
132 var extra = chars_to_html_escape
133 if extra == 0 then return to_s
134 var its = _items
135 var max = last_byte
136 var pos = first_byte
137 var nlen = extra + _bytelen
138 var nits = new NativeString(nlen)
139 var outpos = 0
140 while pos <= max do
141 var c = its[pos]
142 # Special codes:
143 # Some HTML characters are used as meta-data, they need
144 # to be replaced by an HTML-Escaped equivalent
145 #
146 # * 0x3C (<) => &lt;
147 # * 0x3E (>) => &gt;
148 # * 0x26 (&) => &amp;
149 # * 0x22 (") => &#34;
150 # * 0x27 (') => &#39;
151 # * 0x2F (/) => &#47;
152 if c == 0x3Cu8 then
153 nits[outpos] = 0x26u8
154 nits[outpos + 1] = 0x6Cu8
155 nits[outpos + 2] = 0x74u8
156 nits[outpos + 3] = 0x3Bu8
157 outpos += 4
158 else if c == 0x3Eu8 then
159 nits[outpos] = 0x26u8
160 nits[outpos + 1] = 0x67u8
161 nits[outpos + 2] = 0x74u8
162 nits[outpos + 3] = 0x3Bu8
163 outpos += 4
164 else if c == 0x26u8 then
165 nits[outpos] = 0x26u8
166 nits[outpos + 1] = 0x61u8
167 nits[outpos + 2] = 0x6Du8
168 nits[outpos + 3] = 0x70u8
169 nits[outpos + 4] = 0x3Bu8
170 outpos += 5
171 else if c == 0x22u8 then
172 nits[outpos] = 0x26u8
173 nits[outpos + 1] = 0x23u8
174 nits[outpos + 2] = 0x33u8
175 nits[outpos + 3] = 0x34u8
176 nits[outpos + 4] = 0x3Bu8
177 outpos += 5
178 else if c == 0x27u8 then
179 nits[outpos] = 0x26u8
180 nits[outpos + 1] = 0x23u8
181 nits[outpos + 2] = 0x33u8
182 nits[outpos + 3] = 0x39u8
183 nits[outpos + 4] = 0x3Bu8
184 outpos += 5
185 else if c == 0x2Fu8 then
186 nits[outpos] = 0x26u8
187 nits[outpos + 1] = 0x23u8
188 nits[outpos + 2] = 0x34u8
189 nits[outpos + 3] = 0x37u8
190 nits[outpos + 4] = 0x3Bu8
191 outpos += 5
192 else
193 nits[outpos] = c
194 outpos += 1
195 end
196 pos += 1
197 end
198 var s = new FlatString.with_infos(nits, nlen, 0)
199 return s
200 end
201
202 # By escaping `self` to C, how many more bytes will be needed ?
203 #
204 # This enables a double-optimization in `escape_to_c` since if this
205 # method returns 0, then `self` does not need escaping and can be
206 # returned as-is
207 fun chars_to_escape_to_c: Int do
208 var its = _items
209 var max = last_byte
210 var pos = first_byte
211 var req_esc = 0
212 while pos <= max do
213 var c = its[pos]
214 if c == 0x0Au8 then
215 req_esc += 1
216 else if c == 0x09u8 then
217 req_esc += 1
218 else if c == 0x22u8 then
219 req_esc += 1
220 else if c == 0x27u8 then
221 req_esc += 1
222 else if c == 0x5Cu8 then
223 req_esc += 1
224 else if c < 32u8 then
225 req_esc += 3
226 end
227 pos += 1
228 end
229 return req_esc
230 end
231
232 redef fun escape_to_c do
233 var ln_extra = chars_to_escape_to_c
234 if ln_extra == 0 then return self.to_s
235 var its = _items
236 var max = last_byte
237 var nlen = _bytelen + ln_extra
238 var nns = new NativeString(nlen)
239 var pos = first_byte
240 var opos = 0
241 while pos <= max do
242 var c = its[pos]
243 # Special codes:
244 #
245 # Any byte with value < 32 is a control character
246 # All their uses will be replaced by their octal
247 # value in C.
248 #
249 # There are two exceptions however:
250 #
251 # * 0x09 => \t
252 # * 0x0A => \n
253 #
254 # Aside from the code points above, the following are:
255 #
256 # * 0x22 => \"
257 # * 0x27 => \'
258 # * 0x5C => \\
259 if c == 0x09u8 then
260 nns[opos] = 0x5Cu8
261 nns[opos + 1] = 0x74u8
262 opos += 2
263 else if c == 0x0Au8 then
264 nns[opos] = 0x5Cu8
265 nns[opos + 1] = 0x6Eu8
266 opos += 2
267 else if c == 0x22u8 then
268 nns[opos] = 0x5Cu8
269 nns[opos + 1] = 0x22u8
270 opos += 2
271 else if c == 0x27u8 then
272 nns[opos] = 0x5Cu8
273 nns[opos + 1] = 0x27u8
274 opos += 2
275 else if c == 0x5Cu8 then
276 nns[opos] = 0x5Cu8
277 nns[opos + 1] = 0x5Cu8
278 opos += 2
279 else if c < 32u8 then
280 nns[opos] = 0x5Cu8
281 nns[opos + 1] = 0x30u8
282 nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
283 nns[opos + 3] = (c & 0x07u8) + 0x30u8
284 opos += 4
285 else
286 nns[opos] = c
287 opos += 1
288 end
289 pos += 1
290 end
291 return nns.to_s_unsafe(nlen)
292 end
293
294 redef fun [](index) do
295 assert index >= 0 and index < _length
296 return fetch_char_at(index)
297 end
298
299 # Gets a `Char` at `index` in `self`
300 #
301 # WARNING: Use at your own risks as no bound-checking is done
302 fun fetch_char_at(index: Int): Char do
303 var i = char_to_byte_index(index)
304 var items = _items
305 var b = items[i]
306 if b & 0x80u8 == 0x00u8 then return b.ascii
307 return items.char_at(i)
308 end
309
310 # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
311 #
312 # assert "ff".to_hex == 255
313 redef fun to_hex(pos, ln) do
314 var res = 0
315 if pos == null then pos = 0
316 if ln == null then ln = length - pos
317 pos = char_to_byte_index(pos)
318 var its = _items
319 var max = pos + ln
320 for i in [pos .. max[ do
321 res <<= 4
322 res += its[i].ascii.from_hex
323 end
324 return res
325 end
326 end
327
328 # Immutable strings of characters.
329 class FlatString
330 super FlatText
331 super String
332
333 # Index at which `self` begins in `_items`, inclusively
334 redef var first_byte is noinit
335
336 redef var chars = new FlatStringCharView(self) is lazy
337
338 redef var bytes = new FlatStringByteView(self) is lazy
339
340 redef var to_cstring is lazy do
341 var blen = _bytelen
342 var new_items = new NativeString(blen + 1)
343 _items.copy_to(new_items, blen, _first_byte, 0)
344 new_items[blen] = 0u8
345 return new_items
346 end
347
348 redef fun reversed do
349 var b = new FlatBuffer.with_capacity(_bytelen + 1)
350 var i = _length - 1
351 while i >= 0 do
352 b.add self.fetch_char_at(i)
353 i -= 1
354 end
355 var s = b.to_s.as(FlatString)
356 s._length = self._length
357 return s
358 end
359
360 redef fun fast_cstring do return _items.fast_cstring(_first_byte)
361
362 redef fun substring_from(from) do
363 if from >= self._length then return empty
364 if from <= 0 then return self
365 var c = char_to_byte_index(from)
366 var st = c - _first_byte
367 var fln = bytelen - st
368 return new FlatString.full(items, fln, c, _length - from)
369 end
370
371 redef fun substring(from, count)
372 do
373 if count <= 0 then return ""
374
375 if from < 0 then
376 count += from
377 if count < 0 then return ""
378 from = 0
379 end
380
381 var ln = _length
382 if (count + from) > ln then count = ln - from
383 if count <= 0 then return ""
384 var end_index = from + count - 1
385 return substring_impl(from, count, end_index)
386 end
387
388 private fun substring_impl(from, count, end_index: Int): String do
389 var cache = _position
390 var dfrom = (cache - from).abs
391 var dend = (end_index - from).abs
392
393 var bytefrom: Int
394 var byteto: Int
395 if dfrom < dend then
396 bytefrom = char_to_byte_index(from)
397 byteto = char_to_byte_index(end_index)
398 else
399 byteto = char_to_byte_index(end_index)
400 bytefrom = char_to_byte_index(from)
401 end
402
403 var its = _items
404 byteto += its.length_of_char_at(byteto) - 1
405
406 var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, count)
407 return s
408 end
409
410 redef fun empty do return "".as(FlatString)
411
412 redef fun to_upper
413 do
414 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
415
416 var mylen = _length
417 var pos = 0
418
419 while pos < mylen do
420 outstr.add(chars[pos].to_upper)
421 pos += 1
422 end
423
424 return outstr.to_s
425 end
426
427 redef fun to_lower
428 do
429 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
430
431 var mylen = _length
432 var pos = 0
433
434 while pos < mylen do
435 outstr.add(chars[pos].to_lower)
436 pos += 1
437 end
438
439 return outstr.to_s
440 end
441
442 redef fun output
443 do
444 for i in chars do i.output
445 end
446
447 ##################################################
448 # String Specific Methods #
449 ##################################################
450
451 # Low-level creation of a new string with minimal data.
452 #
453 # `_items` will be used as is, without copy, to retrieve the characters of the string.
454 # Aliasing issues is the responsibility of the caller.
455 private init with_infos(items: NativeString, bytelen, from: Int)
456 do
457 self._items = items
458 self._bytelen = bytelen
459 _first_byte = from
460 _bytepos = from
461 _length = _items.utf8_length(_first_byte, bytelen)
462 end
463
464 # Low-level creation of a new string with all the data.
465 #
466 # `_items` will be used as is, without copy, to retrieve the characters of the string.
467 # Aliasing issues is the responsibility of the caller.
468 private init full(items: NativeString, bytelen, from, length: Int)
469 do
470 self._items = items
471 self._length = length
472 self._bytelen = bytelen
473 _first_byte = from
474 _bytepos = from
475 end
476
477 redef fun ==(other)
478 do
479 if not other isa FlatText then return super
480
481 if self.object_id == other.object_id then return true
482
483 var my_length = _bytelen
484
485 if other._bytelen != my_length then return false
486
487 var my_index = _first_byte
488 var its_index = other.first_byte
489
490 var last_iteration = my_index + my_length
491
492 var its_items = other._items
493 var my_items = self._items
494
495 while my_index < last_iteration do
496 if my_items[my_index] != its_items[its_index] then return false
497 my_index += 1
498 its_index += 1
499 end
500
501 return true
502 end
503
504 redef fun <(other)
505 do
506 if not other isa FlatText then return super
507
508 if self.object_id == other.object_id then return false
509
510 var myits = _items
511 var itsits = other._items
512
513 var mbt = _bytelen
514 var obt = other.bytelen
515
516 var minln = if mbt < obt then mbt else obt
517 var mst = _first_byte
518 var ost = other.first_byte
519
520 for i in [0 .. minln[ do
521 var my_curr_char = myits[mst]
522 var its_curr_char = itsits[ost]
523
524 if my_curr_char > its_curr_char then return false
525 if my_curr_char < its_curr_char then return true
526
527 mst += 1
528 ost += 1
529 end
530
531 return mbt < obt
532 end
533
534 redef fun +(o) do
535 var s = o.to_s
536 var slen = s.bytelen
537 var mlen = _bytelen
538 var nlen = mlen + slen
539 var mits = _items
540 var mifrom = _first_byte
541 if s isa FlatText then
542 var sits = s._items
543 var sifrom = s.first_byte
544 var ns = new NativeString(nlen + 1)
545 mits.copy_to(ns, mlen, mifrom, 0)
546 sits.copy_to(ns, slen, sifrom, mlen)
547 return new FlatString.full(ns, nlen, 0, _length + o.length)
548 else
549 abort
550 end
551 end
552
553 redef fun *(i) do
554 var mybtlen = _bytelen
555 var new_bytelen = mybtlen * i
556 var mylen = _length
557 var newlen = mylen * i
558 var its = _items
559 var fb = _first_byte
560 var ns = new NativeString(new_bytelen + 1)
561 ns[new_bytelen] = 0u8
562 var offset = 0
563 while i > 0 do
564 its.copy_to(ns, mybtlen, fb, offset)
565 offset += mybtlen
566 i -= 1
567 end
568 return new FlatString.full(ns, new_bytelen, 0, newlen)
569 end
570
571
572 redef fun hash
573 do
574 if hash_cache == null then
575 # djb2 hash algorithm
576 var h = 5381
577 var i = _first_byte
578
579 var my_items = _items
580 var max = last_byte
581
582 while i <= max do
583 h = (h << 5) + h + my_items[i].to_i
584 i += 1
585 end
586
587 hash_cache = h
588 end
589
590 return hash_cache.as(not null)
591 end
592
593 redef fun substrings do return new FlatSubstringsIter(self)
594 end
595
596 private class FlatStringCharReverseIterator
597 super IndexedIterator[Char]
598
599 var target: FlatString
600
601 var curr_pos: Int
602
603 redef fun is_ok do return curr_pos >= 0
604
605 redef fun item do return target[curr_pos]
606
607 redef fun next do curr_pos -= 1
608
609 redef fun index do return curr_pos
610
611 end
612
613 private class FlatStringCharIterator
614 super IndexedIterator[Char]
615
616 var target: FlatString
617
618 var max: Int is noautoinit
619
620 var curr_pos: Int
621
622 init do max = target._length - 1
623
624 redef fun is_ok do return curr_pos <= max
625
626 redef fun item do return target[curr_pos]
627
628 redef fun next do curr_pos += 1
629
630 redef fun index do return curr_pos
631
632 end
633
634 private class FlatStringCharView
635 super StringCharView
636
637 redef type SELFTYPE: FlatString
638
639 redef fun [](index) do return target[index]
640
641 redef fun iterator_from(start) do return new FlatStringCharIterator(target, start)
642
643 redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator(target, start)
644
645 end
646
647 private class FlatStringByteReverseIterator
648 super IndexedIterator[Byte]
649
650 var target: FlatString
651
652 var target_items: NativeString is noautoinit
653
654 var curr_pos: Int
655
656 init
657 do
658 var tgt = target
659 target_items = tgt._items
660 curr_pos += tgt._first_byte
661 end
662
663 redef fun is_ok do return curr_pos >= target._first_byte
664
665 redef fun item do return target_items[curr_pos]
666
667 redef fun next do curr_pos -= 1
668
669 redef fun index do return curr_pos - target._first_byte
670
671 end
672
673 private class FlatStringByteIterator
674 super IndexedIterator[Byte]
675
676 var target: FlatString
677
678 var target_items: NativeString is noautoinit
679
680 var curr_pos: Int
681
682 init
683 do
684 var tgt = target
685 target_items = tgt._items
686 curr_pos += tgt._first_byte
687 end
688
689 redef fun is_ok do return curr_pos <= target.last_byte
690
691 redef fun item do return target_items[curr_pos]
692
693 redef fun next do curr_pos += 1
694
695 redef fun index do return curr_pos - target._first_byte
696
697 end
698
699 private class FlatStringByteView
700 super StringByteView
701
702 redef type SELFTYPE: FlatString
703
704 redef fun [](index)
705 do
706 # Check that the index (+ _first_byte) is not larger than last_byte
707 # In other terms, if the index is valid
708 var target = _target
709 assert index >= 0 and index < target._bytelen
710 var ind = index + target._first_byte
711 return target._items[ind]
712 end
713
714 redef fun iterator_from(start) do return new FlatStringByteIterator(target, start)
715
716 redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator(target, start)
717
718 end
719
720 redef class Buffer
721 redef new do return new FlatBuffer
722
723 redef new with_cap(i) do return new FlatBuffer.with_capacity(i)
724 end
725
726 # Mutable strings of characters.
727 class FlatBuffer
728 super FlatText
729 super Buffer
730
731 redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
732
733 redef var bytes = new FlatBufferByteView(self) is lazy
734
735 private var char_cache: Int = -1
736
737 private var byte_cache: Int = -1
738
739 private var capacity = 0
740
741 # Real items, used as cache for when to_cstring is called
742 private var real_items: NativeString is noinit
743
744 redef fun fast_cstring do return _items.fast_cstring(0)
745
746 redef fun substrings do return new FlatSubstringsIter(self)
747
748 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
749 #
750 # This happens when an operation modifies the current `Buffer` and
751 # the Copy-On-Write flag `written` is set at true.
752 private fun reset do
753 var nns = new NativeString(capacity)
754 if _bytelen != 0 then _items.copy_to(nns, _bytelen, 0, 0)
755 _items = nns
756 written = false
757 end
758
759 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
760 #
761 # Internal only, does not modify _bytelen or length, this is the caller's responsability
762 private fun rshift_bytes(from: Int, len: Int) do
763 var oit = _items
764 var nit = _items
765 var bt = _bytelen
766 if bt + len > capacity then
767 capacity = capacity * 2 + 2
768 nit = new NativeString(capacity)
769 oit.copy_to(nit, 0, 0, from)
770 end
771 oit.copy_to(nit, bt - from, from, from + len)
772 end
773
774 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
775 #
776 # Internal only, does not modify _bytelen or length, this is the caller's responsability
777 private fun lshift_bytes(from: Int, len: Int) do
778 var it = _items
779 it.copy_to(it, _bytelen - from, from, from - len)
780 end
781
782 redef fun []=(index, item)
783 do
784 assert index >= 0 and index <= _length
785 if written then reset
786 is_dirty = true
787 if index == _length then
788 add item
789 return
790 end
791 var it = _items
792 var ip = it.char_to_byte_index(index)
793 var c = it.char_at(ip)
794 var clen = c.u8char_len
795 var itemlen = item.u8char_len
796 var size_diff = itemlen - clen
797 if size_diff > 0 then
798 rshift_bytes(ip + clen, size_diff)
799 else if size_diff < 0 then
800 lshift_bytes(ip + clen, -size_diff)
801 end
802 _bytelen += size_diff
803 it.set_char_at(ip, item)
804 end
805
806 redef fun add(c)
807 do
808 if written then reset
809 is_dirty = true
810 var clen = c.u8char_len
811 var bt = _bytelen
812 enlarge(bt + clen)
813 _items.set_char_at(bt, c)
814 _bytelen += clen
815 _length += 1
816 end
817
818 redef fun clear do
819 is_dirty = true
820 if written then reset
821 _bytelen = 0
822 _length = 0
823 end
824
825 redef fun empty do return new Buffer
826
827 redef fun enlarge(cap)
828 do
829 var c = capacity
830 if cap <= c then return
831 while c <= cap do c = c * 2 + 2
832 # The COW flag can be set at false here, since
833 # it does a copy of the current `Buffer`
834 written = false
835 var bln = _bytelen
836 var a = new NativeString(c+1)
837 if bln > 0 then
838 var it = _items
839 if bln > 0 then it.copy_to(a, bln, 0, 0)
840 end
841 _items = a
842 capacity = c
843 end
844
845 redef fun to_s
846 do
847 written = true
848 var bln = _bytelen
849 if bln == 0 then _items = new NativeString(1)
850 return new FlatString.full(_items, bln, 0, _length)
851 end
852
853 redef fun to_cstring
854 do
855 if is_dirty then
856 var bln = _bytelen
857 var new_native = new NativeString(bln + 1)
858 new_native[bln] = 0u8
859 if _length > 0 then _items.copy_to(new_native, bln, 0, 0)
860 real_items = new_native
861 is_dirty = false
862 end
863 return real_items
864 end
865
866 # Create a new empty string.
867 init do end
868
869 # Low-level creation a new buffer with given data.
870 #
871 # `_items` will be used as is, without copy, to store the characters of the buffer.
872 # Aliasing issues is the responsibility of the caller.
873 #
874 # If `_items` is shared, `written` should be set to true after the creation
875 # so that a modification will do a copy-on-write.
876 private init with_infos(items: NativeString, capacity, bytelen, length: Int)
877 do
878 self._items = items
879 self.capacity = capacity
880 self._bytelen = bytelen
881 self._length = length
882 end
883
884 # Create a new string copied from `s`.
885 init from(s: Text)
886 do
887 _items = new NativeString(s.bytelen)
888 if s isa FlatText then
889 _items = s._items
890 else
891 for i in substrings do i.as(FlatString)._items.copy_to(_items, i._bytelen, 0, 0)
892 end
893 _bytelen = s.bytelen
894 _length = s.length
895 _capacity = _bytelen
896 written = true
897 end
898
899 # Create a new empty string with a given capacity.
900 init with_capacity(cap: Int)
901 do
902 assert cap >= 0
903 _items = new NativeString(cap + 1)
904 capacity = cap
905 _bytelen = 0
906 end
907
908 redef fun append(s)
909 do
910 if s.is_empty then return
911 is_dirty = true
912 var sl = s.bytelen
913 var nln = _bytelen + sl
914 enlarge(nln)
915 if s isa FlatText then
916 s._items.copy_to(_items, sl, s.first_byte, _bytelen)
917 else
918 for i in s.substrings do append i
919 return
920 end
921 _bytelen = nln
922 _length += s.length
923 end
924
925 # Copies the content of self in `dest`
926 fun copy(start: Int, len: Int, dest: Buffer, new_start: Int)
927 do
928 var self_chars = self.chars
929 var dest_chars = dest.chars
930 for i in [0..len-1] do
931 dest_chars[new_start+i] = self_chars[start+i]
932 end
933 end
934
935 redef fun substring(from, count)
936 do
937 assert count >= 0
938 if from < 0 then from = 0
939 if (from + count) > _length then count = _length - from
940 if count <= 0 then return new Buffer
941 var its = _items
942 var bytefrom = its.char_to_byte_index(from)
943 var byteto = its.char_to_byte_index(count + from - 1)
944 byteto += its.char_at(byteto).u8char_len - 1
945 var byte_length = byteto - bytefrom + 1
946 var r_items = new NativeString(byte_length)
947 its.copy_to(r_items, byte_length, bytefrom, 0)
948 return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
949 end
950
951 redef fun reverse
952 do
953 written = false
954 var ns = new FlatBuffer.with_capacity(capacity)
955 for i in chars.reverse_iterator do ns.add i
956 _items = ns._items
957 end
958
959 redef fun times(repeats)
960 do
961 var bln = _bytelen
962 var x = new FlatString.full(_items, bln, 0, _length)
963 for i in [1 .. repeats[ do
964 append(x)
965 end
966 end
967
968 redef fun upper
969 do
970 if written then reset
971 for i in [0 .. _length[ do self[i] = self[i].to_upper
972 end
973
974 redef fun lower
975 do
976 if written then reset
977 for i in [0 .. _length[ do self[i] = self[i].to_lower
978 end
979 end
980
981 private class FlatBufferByteReverseIterator
982 super IndexedIterator[Byte]
983
984 var target: FlatBuffer
985
986 var target_items: NativeString is noautoinit
987
988 var curr_pos: Int
989
990 init do target_items = target._items
991
992 redef fun index do return curr_pos
993
994 redef fun is_ok do return curr_pos >= 0
995
996 redef fun item do return target_items[curr_pos]
997
998 redef fun next do curr_pos -= 1
999
1000 end
1001
1002 private class FlatBufferByteView
1003 super BufferByteView
1004
1005 redef type SELFTYPE: FlatBuffer
1006
1007 redef fun [](index) do return target._items[index]
1008
1009 redef fun iterator_from(pos) do return new FlatBufferByteIterator(target, pos)
1010
1011 redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator(target, pos)
1012
1013 end
1014
1015 private class FlatBufferByteIterator
1016 super IndexedIterator[Byte]
1017
1018 var target: FlatBuffer
1019
1020 var target_items: NativeString is noautoinit
1021
1022 var curr_pos: Int
1023
1024 init do target_items = target._items
1025
1026 redef fun index do return curr_pos
1027
1028 redef fun is_ok do return curr_pos < target._bytelen
1029
1030 redef fun item do return target_items[curr_pos]
1031
1032 redef fun next do curr_pos += 1
1033
1034 end
1035
1036 private class FlatBufferCharReverseIterator
1037 super IndexedIterator[Char]
1038
1039 var target: FlatBuffer
1040
1041 var curr_pos: Int
1042
1043 redef fun index do return curr_pos
1044
1045 redef fun is_ok do return curr_pos >= 0
1046
1047 redef fun item do return target[curr_pos]
1048
1049 redef fun next do curr_pos -= 1
1050
1051 end
1052
1053 private class FlatBufferCharView
1054 super BufferCharView
1055
1056 redef type SELFTYPE: FlatBuffer
1057
1058 redef fun [](index) do return target[index]
1059
1060 redef fun []=(index, item)
1061 do
1062 assert index >= 0 and index <= length
1063 if index == length then
1064 add(item)
1065 return
1066 end
1067 target[index] = item
1068 end
1069
1070 redef fun push(c)
1071 do
1072 target.add(c)
1073 end
1074
1075 redef fun add(c)
1076 do
1077 target.add(c)
1078 end
1079
1080 fun enlarge(cap: Int)
1081 do
1082 target.enlarge(cap)
1083 end
1084
1085 redef fun append(s)
1086 do
1087 var s_length = s.length
1088 if target.capacity < s.length then enlarge(s_length + target._length)
1089 for i in s do target.add i
1090 end
1091
1092 redef fun iterator_from(pos) do return new FlatBufferCharIterator(target, pos)
1093
1094 redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator(target, pos)
1095
1096 end
1097
1098 private class FlatBufferCharIterator
1099 super IndexedIterator[Char]
1100
1101 var target: FlatBuffer
1102
1103 var max: Int is noautoinit
1104
1105 var curr_pos: Int
1106
1107 init do max = target._length - 1
1108
1109 redef fun index do return curr_pos
1110
1111 redef fun is_ok do return curr_pos <= max
1112
1113 redef fun item do return target[curr_pos]
1114
1115 redef fun next do curr_pos += 1
1116
1117 end
1118
1119 redef class NativeString
1120 redef fun to_s
1121 do
1122 return to_s_with_length(cstring_length)
1123 end
1124
1125 # Returns `self` as a String of `length`.
1126 redef fun to_s_with_length(length): FlatString
1127 do
1128 assert length >= 0
1129 return clean_utf8(length)
1130 end
1131
1132 redef fun to_s_full(bytelen, unilen) do
1133 return new FlatString.full(self, bytelen, 0, unilen)
1134 end
1135
1136 redef fun to_s_unsafe(len) do
1137 if len == null then len = cstring_length
1138 return new FlatString.with_infos(self, len, 0)
1139 end
1140
1141 # Returns `self` as a new String.
1142 redef fun to_s_with_copy: FlatString
1143 do
1144 var length = cstring_length
1145 var r = clean_utf8(length)
1146 if r.items != self then return r
1147 var new_self = new NativeString(length + 1)
1148 copy_to(new_self, length, 0, 0)
1149 var str = new FlatString.with_infos(new_self, length, 0)
1150 new_self[length] = 0u8
1151 str.to_cstring = new_self
1152 return str
1153 end
1154
1155 # Cleans a NativeString if necessary
1156 fun clean_utf8(len: Int): FlatString do
1157 var replacements: nullable Array[Int] = null
1158 var end_length = len
1159 var pos = 0
1160 var chr_ln = 0
1161 var rem = len
1162 while rem > 0 do
1163 while rem >= 4 do
1164 var i = fetch_4_chars(pos)
1165 if i & 0x80808080 != 0 then break
1166 pos += 4
1167 chr_ln += 4
1168 rem -= 4
1169 end
1170 if rem == 0 then break
1171 var b = self[pos]
1172 if b & 0x80u8 == 0x00u8 then
1173 pos += 1
1174 chr_ln += 1
1175 rem -= 1
1176 continue
1177 end
1178 var nxst = length_of_char_at(pos)
1179 var ok_st: Bool
1180 if nxst == 1 then
1181 ok_st = b & 0x80u8 == 0u8
1182 else if nxst == 2 then
1183 ok_st = b & 0xE0u8 == 0xC0u8
1184 else if nxst == 3 then
1185 ok_st = b & 0xF0u8 == 0xE0u8
1186 else
1187 ok_st = b & 0xF8u8 == 0xF0u8
1188 end
1189 if not ok_st then
1190 if replacements == null then replacements = new Array[Int]
1191 replacements.add pos
1192 end_length += 2
1193 pos += 1
1194 rem -= 1
1195 chr_ln += 1
1196 continue
1197 end
1198 var ok_c: Bool
1199 var c = char_at(pos)
1200 var cp = c.code_point
1201 if nxst == 1 then
1202 ok_c = cp >= 0 and cp <= 0x7F
1203 else if nxst == 2 then
1204 ok_c = cp >= 0x80 and cp <= 0x7FF
1205 else if nxst == 3 then
1206 ok_c = cp >= 0x800 and cp <= 0xFFFF
1207 ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
1208 else
1209 ok_c = cp >= 0x10000 and cp <= 0x10FFFF
1210 end
1211 if not ok_c then
1212 if replacements == null then replacements = new Array[Int]
1213 replacements.add pos
1214 end_length += 2
1215 pos += 1
1216 chr_ln += 1
1217 rem -= 1
1218 continue
1219 end
1220 var clen = c.u8char_len
1221 pos += clen
1222 rem -= clen
1223 chr_ln += 1
1224 end
1225 var ret = self
1226 if end_length != len then
1227 ret = new NativeString(end_length)
1228 var old_repl = 0
1229 var off = 0
1230 var repls = replacements.as(not null)
1231 var r = repls.items.as(not null)
1232 var imax = repls.length
1233 for i in [0 .. imax[ do
1234 var repl_pos = r[i]
1235 var chkln = repl_pos - old_repl
1236 copy_to(ret, chkln, old_repl, off)
1237 off += chkln
1238 ret[off] = 0xEFu8
1239 ret[off + 1] = 0xBFu8
1240 ret[off + 2] = 0xBDu8
1241 old_repl = repl_pos + 1
1242 off += 3
1243 end
1244 copy_to(ret, len - old_repl, old_repl, off)
1245 end
1246 return new FlatString.full(ret, end_length, 0, chr_ln)
1247 end
1248
1249 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1250 #
1251 # Very unsafe, make sure to have room for this char prior to calling this function.
1252 private fun set_char_at(pos: Int, c: Char) do
1253 if c.code_point < 128 then
1254 self[pos] = c.code_point.to_b
1255 return
1256 end
1257 var ln = c.u8char_len
1258 native_set_char(pos, c, ln)
1259 end
1260
1261 private fun native_set_char(pos: Int, c: Char, ln: Int) `{
1262 char* dst = self + pos;
1263 switch(ln){
1264 case 1:
1265 dst[0] = c;
1266 break;
1267 case 2:
1268 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
1269 dst[1] = 0x80 | (c & 0x3F);
1270 break;
1271 case 3:
1272 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
1273 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
1274 dst[2] = 0x80 | (c & 0x3F);
1275 break;
1276 case 4:
1277 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
1278 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
1279 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
1280 dst[3] = 0x80 | (c & 0x3F);
1281 break;
1282 }
1283 `}
1284 end
1285
1286 redef class Int
1287 redef fun to_base(base, signed)
1288 do
1289 var l = digit_count(base)
1290 var s = new FlatBuffer.from(" " * l)
1291 fill_buffer(s, base, signed)
1292 return s.to_s
1293 end
1294
1295 # return displayable int in base 10 and signed
1296 #
1297 # assert 1.to_s == "1"
1298 # assert (-123).to_s == "-123"
1299 redef fun to_s do
1300 # Fast case for common numbers
1301 if self == 0 then return "0"
1302 if self == 1 then return "1"
1303
1304 var nslen = int_to_s_len
1305 var ns = new NativeString(nslen + 1)
1306 ns[nslen] = 0u8
1307 native_int_to_s(ns, nslen + 1)
1308 return new FlatString.full(ns, nslen, 0, nslen)
1309 end
1310 end
1311
1312 redef class Array[E]
1313
1314 # Fast implementation
1315 redef fun plain_to_s
1316 do
1317 var l = _length
1318 if l == 0 then return ""
1319 var its = _items.as(not null)
1320 var first = its[0]
1321 if l == 1 then if first == null then return "" else return first.to_s
1322 var na = new NativeArray[String](l)
1323 var i = 0
1324 var sl = 0
1325 var mypos = 0
1326 while i < l do
1327 var itsi = its[i]
1328 if itsi == null then
1329 i += 1
1330 continue
1331 end
1332 var tmp = itsi.to_s
1333 sl += tmp.bytelen
1334 na[mypos] = tmp
1335 i += 1
1336 mypos += 1
1337 end
1338 var ns = new NativeString(sl + 1)
1339 ns[sl] = 0u8
1340 i = 0
1341 var off = 0
1342 while i < mypos do
1343 var tmp = na[i]
1344 if tmp isa FlatString then
1345 var tpl = tmp._bytelen
1346 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1347 off += tpl
1348 else
1349 for j in tmp.substrings do
1350 var s = j.as(FlatString)
1351 var slen = s._bytelen
1352 s._items.copy_to(ns, slen, s._first_byte, off)
1353 off += slen
1354 end
1355 end
1356 i += 1
1357 end
1358 return new FlatString.with_infos(ns, sl, 0)
1359 end
1360 end
1361
1362 redef class NativeArray[E]
1363 redef fun native_to_s do
1364 assert self isa NativeArray[String]
1365 var l = length
1366 var na = self
1367 var i = 0
1368 var sl = 0
1369 var mypos = 0
1370 while i < l do
1371 sl += na[i].bytelen
1372 i += 1
1373 mypos += 1
1374 end
1375 var ns = new NativeString(sl + 1)
1376 ns[sl] = 0u8
1377 i = 0
1378 var off = 0
1379 while i < mypos do
1380 var tmp = na[i]
1381 if tmp isa FlatString then
1382 var tpl = tmp._bytelen
1383 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1384 off += tpl
1385 else
1386 for j in tmp.substrings do
1387 var s = j.as(FlatString)
1388 var slen = s._bytelen
1389 s._items.copy_to(ns, slen, s._first_byte, off)
1390 off += slen
1391 end
1392 end
1393 i += 1
1394 end
1395 return new FlatString.with_infos(ns, sl, 0)
1396 end
1397 end
1398
1399 redef class Map[K,V]
1400 redef fun join(sep, couple_sep)
1401 do
1402 if is_empty then return ""
1403
1404 var s = new Buffer # Result
1405
1406 # Concat first item
1407 var i = iterator
1408 var k = i.key
1409 var e = i.item
1410 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1411
1412 # Concat other _items
1413 i.next
1414 while i.is_ok do
1415 s.append(sep)
1416 k = i.key
1417 e = i.item
1418 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1419 i.next
1420 end
1421 return s.to_s
1422 end
1423 end