Merge: subset: Add grammar and kind
[nit.git] / lib / core / text / flat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # All the array-based text representations
12 module flat
13
14 intrude import abstract_text
15 intrude import native
16
17 `{
18 #include <stdio.h>
19 #include <string.h>
20 `}
21
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
24
25 var tgt: nullable FlatText
26
27 redef fun item do
28 assert is_ok
29 return tgt.as(not null)
30 end
31
32 redef fun is_ok do return tgt != null
33
34 redef fun next do tgt = null
35 end
36
37 redef class FlatText
38
39 # First byte of the CString
40 protected fun first_byte: Int do return 0
41
42 # Last byte of the CString
43 protected fun last_byte: Int do return first_byte + _byte_length - 1
44
45 # Cache of the latest position (char) explored in the string
46 var position: Int = 0
47
48 # Cached position (bytes) in the CString underlying the String
49 var bytepos: Int = 0
50
51 # Index of the character `index` in `_items`
52 fun char_to_byte_index(index: Int): Int do
53 var dpos = index - _position
54 var b = _bytepos
55 var its = _items
56
57 if dpos == 1 then
58 if its[b] & 0x80u8 == 0x00u8 then
59 b += 1
60 else
61 b += its.length_of_char_at(b)
62 end
63 _bytepos = b
64 _position = index
65 return b
66 end
67 if dpos == -1 then
68 b = its.find_beginning_of_char_at(b - 1)
69 _bytepos = b
70 _position = index
71 return b
72 end
73 if dpos == 0 then return b
74
75 var ln = _length
76 var pos = _position
77 # Find best insertion point
78 var delta_begin = index
79 var delta_end = (ln - 1) - index
80 var delta_cache = (pos - index).abs
81 var min = delta_begin
82
83 if delta_cache < min then min = delta_cache
84 if delta_end < min then min = delta_end
85
86 var ns_i: Int
87 var my_i: Int
88
89 if min == delta_cache then
90 ns_i = _bytepos
91 my_i = pos
92 else if min == delta_begin then
93 ns_i = first_byte
94 my_i = 0
95 else
96 ns_i = its.find_beginning_of_char_at(last_byte)
97 my_i = _length - 1
98 end
99
100 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
101
102 _position = index
103 _bytepos = ns_i
104
105 return ns_i
106 end
107
108 # By escaping `self` to HTML, how many more bytes will be needed ?
109 fun chars_to_html_escape: Int do
110 var its = _items
111 var max = last_byte
112 var pos = first_byte
113 var endlen = 0
114 while pos <= max do
115 var c = its[pos]
116 if c == 0x3Cu8 then
117 endlen += 3
118 else if c == 0x3Eu8 then
119 endlen += 3
120 else if c == 0x26u8 then
121 endlen += 4
122 else if c == 0x22u8 then
123 endlen += 4
124 else if c == 0x27u8 then
125 endlen += 4
126 else if c == 0x2Fu8 then
127 endlen += 4
128 end
129 pos += 1
130 end
131 return endlen
132 end
133
134 redef fun html_escape
135 do
136 var extra = chars_to_html_escape
137 if extra == 0 then return to_s
138 var its = _items
139 var max = last_byte
140 var pos = first_byte
141 var nlen = extra + _byte_length
142 var nits = new CString(nlen)
143 var outpos = 0
144 while pos <= max do
145 var c = its[pos]
146 # Special codes:
147 # Some HTML characters are used as meta-data, they need
148 # to be replaced by an HTML-Escaped equivalent
149 #
150 # * 0x3C (<) => &lt;
151 # * 0x3E (>) => &gt;
152 # * 0x26 (&) => &amp;
153 # * 0x22 (") => &#34;
154 # * 0x27 (') => &#39;
155 # * 0x2F (/) => &#47;
156 if c == 0x3Cu8 then
157 nits[outpos] = 0x26u8
158 nits[outpos + 1] = 0x6Cu8
159 nits[outpos + 2] = 0x74u8
160 nits[outpos + 3] = 0x3Bu8
161 outpos += 4
162 else if c == 0x3Eu8 then
163 nits[outpos] = 0x26u8
164 nits[outpos + 1] = 0x67u8
165 nits[outpos + 2] = 0x74u8
166 nits[outpos + 3] = 0x3Bu8
167 outpos += 4
168 else if c == 0x26u8 then
169 nits[outpos] = 0x26u8
170 nits[outpos + 1] = 0x61u8
171 nits[outpos + 2] = 0x6Du8
172 nits[outpos + 3] = 0x70u8
173 nits[outpos + 4] = 0x3Bu8
174 outpos += 5
175 else if c == 0x22u8 then
176 nits[outpos] = 0x26u8
177 nits[outpos + 1] = 0x23u8
178 nits[outpos + 2] = 0x33u8
179 nits[outpos + 3] = 0x34u8
180 nits[outpos + 4] = 0x3Bu8
181 outpos += 5
182 else if c == 0x27u8 then
183 nits[outpos] = 0x26u8
184 nits[outpos + 1] = 0x23u8
185 nits[outpos + 2] = 0x33u8
186 nits[outpos + 3] = 0x39u8
187 nits[outpos + 4] = 0x3Bu8
188 outpos += 5
189 else if c == 0x2Fu8 then
190 nits[outpos] = 0x26u8
191 nits[outpos + 1] = 0x23u8
192 nits[outpos + 2] = 0x34u8
193 nits[outpos + 3] = 0x37u8
194 nits[outpos + 4] = 0x3Bu8
195 outpos += 5
196 else
197 nits[outpos] = c
198 outpos += 1
199 end
200 pos += 1
201 end
202 var s = new FlatString.with_infos(nits, nlen, 0)
203 return s
204 end
205
206 # By escaping `self` to C, how many more bytes will be needed ?
207 #
208 # This enables a double-optimization in `escape_to_c` since if this
209 # method returns 0, then `self` does not need escaping and can be
210 # returned as-is
211 fun chars_to_escape_to_c: Int do
212 var its = _items
213 var max = last_byte
214 var pos = first_byte
215 var req_esc = 0
216 while pos <= max do
217 var c = its[pos]
218 if c == 0x0Au8 then
219 req_esc += 1
220 else if c == 0x09u8 then
221 req_esc += 1
222 else if c == 0x22u8 then
223 req_esc += 1
224 else if c == 0x27u8 then
225 req_esc += 1
226 else if c == 0x5Cu8 then
227 req_esc += 1
228 else if c == 0x3Fu8 then
229 var j = pos + 1
230 if j < length then
231 var next = its[j]
232 # We ignore `??'` because it will be escaped as `??\'`.
233 if
234 next == 0x21u8 or
235 next == 0x28u8 or
236 next == 0x29u8 or
237 next == 0x2Du8 or
238 next == 0x2Fu8 or
239 next == 0x3Cu8 or
240 next == 0x3Du8 or
241 next == 0x3Eu8
242 then req_esc += 1
243 end
244 else if c < 32u8 then
245 req_esc += 3
246 end
247 pos += 1
248 end
249 return req_esc
250 end
251
252 redef fun escape_to_c do
253 var ln_extra = chars_to_escape_to_c
254 if ln_extra == 0 then return self.to_s
255 var its = _items
256 var max = last_byte
257 var nlen = _byte_length + ln_extra
258 var nns = new CString(nlen)
259 var pos = first_byte
260 var opos = 0
261 while pos <= max do
262 var c = its[pos]
263 # Special codes:
264 #
265 # Any byte with value < 32 is a control character
266 # All their uses will be replaced by their octal
267 # value in C.
268 #
269 # There are two exceptions however:
270 #
271 # * 0x09 => \t
272 # * 0x0A => \n
273 #
274 # Aside from the code points above, the following are:
275 #
276 # * 0x22 => \"
277 # * 0x27 => \'
278 # * 0x5C => \\
279 if c == 0x09u8 then
280 nns[opos] = 0x5Cu8
281 nns[opos + 1] = 0x74u8
282 opos += 2
283 else if c == 0x0Au8 then
284 nns[opos] = 0x5Cu8
285 nns[opos + 1] = 0x6Eu8
286 opos += 2
287 else if c == 0x22u8 then
288 nns[opos] = 0x5Cu8
289 nns[opos + 1] = 0x22u8
290 opos += 2
291 else if c == 0x27u8 then
292 nns[opos] = 0x5Cu8
293 nns[opos + 1] = 0x27u8
294 opos += 2
295 else if c == 0x5Cu8 then
296 nns[opos] = 0x5Cu8
297 nns[opos + 1] = 0x5Cu8
298 opos += 2
299 else if c == 0x3Fu8 then
300 var j = pos + 1
301 if j < length then
302 var next = its[j]
303 # We ignore `??'` because it will be escaped as `??\'`.
304 if
305 next == 0x21u8 or
306 next == 0x28u8 or
307 next == 0x29u8 or
308 next == 0x2Du8 or
309 next == 0x2Fu8 or
310 next == 0x3Cu8 or
311 next == 0x3Du8 or
312 next == 0x3Eu8
313 then
314 nns[opos] = 0x5Cu8
315 opos += 1
316 end
317 end
318 nns[opos] = 0x3Fu8
319 opos += 1
320 else if c < 32u8 then
321 nns[opos] = 0x5Cu8
322 nns[opos + 1] = 0x30u8
323 nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
324 nns[opos + 3] = (c & 0x07u8) + 0x30u8
325 opos += 4
326 else
327 nns[opos] = c
328 opos += 1
329 end
330 pos += 1
331 end
332 return nns.to_s_unsafe(nlen, copy=false, clean=false)
333 end
334
335 redef fun [](index) do
336 var len = _length
337
338 # Statistically:
339 # * ~70% want the next char
340 # * ~23% want the previous
341 # * ~7% want the same char
342 #
343 # So it makes sense to shortcut early. And early is here.
344 var dpos = index - _position
345 var b = _bytepos
346 if dpos == 1 and index < len - 1 then
347 var its = _items
348 var c = its[b]
349 if c & 0x80u8 == 0x00u8 then
350 # We want the next, and current is easy.
351 # So next is easy to find!
352 b += 1
353 _position = index
354 _bytepos = b
355 # The rest will be done by `dpos==0` bellow.
356 dpos = 0
357 end
358 else if dpos == -1 and index > 1 then
359 var its = _items
360 var c = its[b-1]
361 if c & 0x80u8 == 0x00u8 then
362 # We want the previous, and it is easy.
363 b -= 1
364 dpos = 0
365 _position = index
366 _bytepos = b
367 return c.ascii
368 end
369 end
370 if dpos == 0 then
371 # We know what we want (+0 or +1) just get it now!
372 var its = _items
373 var c = its[b]
374 if c & 0x80u8 == 0x00u8 then return c.ascii
375 return items.char_at(b)
376 end
377
378 assert index >= 0 and index < len
379 return fetch_char_at(index)
380 end
381
382 # Gets a `Char` at `index` in `self`
383 #
384 # WARNING: Use at your own risks as no bound-checking is done
385 fun fetch_char_at(index: Int): Char do
386 var i = char_to_byte_index(index)
387 var items = _items
388 var b = items[i]
389 if b & 0x80u8 == 0x00u8 then return b.ascii
390 return items.char_at(i)
391 end
392
393 # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
394 #
395 # assert "ff".to_hex == 255
396 redef fun to_hex(pos, ln) do
397 var res = 0
398 if pos == null then pos = 0
399 if ln == null then ln = length - pos
400 pos = char_to_byte_index(pos)
401 var its = _items
402 var max = pos + ln
403 for i in [pos .. max[ do
404 res <<= 4
405 res += its[i].ascii.from_hex
406 end
407 return res
408 end
409
410 redef fun copy_to_native(dst, n, src_off, dst_off) do
411 _items.copy_to(dst, n, first_byte + src_off, dst_off)
412 end
413 end
414
415 # Immutable strings of characters.
416 abstract class FlatString
417 super FlatText
418 super String
419
420 # Index at which `self` begins in `_items`, inclusively
421 redef var first_byte is noinit
422
423 redef fun chars do return new FlatStringCharView(self)
424
425 redef fun bytes do return new FlatStringByteView(self)
426
427 redef fun to_cstring do
428 var blen = _byte_length
429 var new_items = new CString(blen + 1)
430 _items.copy_to(new_items, blen, _first_byte, 0)
431 new_items[blen] = 0u8
432 return new_items
433 end
434
435 redef fun reversed do
436 var b = new FlatBuffer.with_capacity(_byte_length + 1)
437 var i = _length - 1
438 while i >= 0 do
439 b.add self.fetch_char_at(i)
440 i -= 1
441 end
442 var s = b.to_s.as(FlatString)
443 s._length = self._length
444 return s
445 end
446
447 redef fun fast_cstring do return _items.fast_cstring(_first_byte)
448
449 redef fun substring(from, count)
450 do
451 if count <= 0 then return ""
452
453 if from < 0 then
454 count += from
455 if count <= 0 then return ""
456 from = 0
457 end
458
459 var ln = _length
460 if (count + from) > ln then count = ln - from
461 if count <= 0 then return ""
462 var end_index = from + count - 1
463 return substring_impl(from, count, end_index)
464 end
465
466 private fun substring_impl(from, count, end_index: Int): String do
467 var cache = _position
468 var dfrom = (cache - from).abs
469 var dend = (end_index - from).abs
470
471 var bytefrom: Int
472 var byteto: Int
473 if dfrom < dend then
474 bytefrom = char_to_byte_index(from)
475 byteto = char_to_byte_index(end_index)
476 else
477 byteto = char_to_byte_index(end_index)
478 bytefrom = char_to_byte_index(from)
479 end
480
481 var its = _items
482 byteto += its.length_of_char_at(byteto) - 1
483
484 var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, count)
485 return s
486 end
487
488 redef fun empty do return "".as(FlatString)
489
490 redef fun to_upper
491 do
492 var outstr = new FlatBuffer.with_capacity(self._byte_length + 1)
493
494 var mylen = _length
495 var pos = 0
496
497 while pos < mylen do
498 outstr.add(chars[pos].to_upper)
499 pos += 1
500 end
501
502 return outstr.to_s
503 end
504
505 redef fun to_lower
506 do
507 var outstr = new FlatBuffer.with_capacity(self._byte_length + 1)
508
509 var mylen = _length
510 var pos = 0
511
512 while pos < mylen do
513 outstr.add(chars[pos].to_lower)
514 pos += 1
515 end
516
517 return outstr.to_s
518 end
519
520 redef fun output
521 do
522 for i in chars do i.output
523 end
524
525 ##################################################
526 # String Specific Methods #
527 ##################################################
528
529 # Low-level creation of a new string with minimal data.
530 #
531 # `_items` will be used as is, without copy, to retrieve the characters of the string.
532 # Aliasing issues is the responsibility of the caller.
533 private new with_infos(items: CString, byte_length, from: Int)
534 do
535 var len = items.utf8_length(from, byte_length)
536 if byte_length == len then return new ASCIIFlatString.full_data(items, byte_length, from, len)
537 return new UnicodeFlatString.full_data(items, byte_length, from, len)
538 end
539
540 # Low-level creation of a new string with all the data.
541 #
542 # `_items` will be used as is, without copy, to retrieve the characters of the string.
543 # Aliasing issues is the responsibility of the caller.
544 private new full(items: CString, byte_length, from, length: Int)
545 do
546 if byte_length == length then return new ASCIIFlatString.full_data(items, byte_length, from, length)
547 return new UnicodeFlatString.full_data(items, byte_length, from, length)
548 end
549
550 redef fun ==(other)
551 do
552 if not other isa FlatText then return super
553
554 if self.object_id == other.object_id then return true
555
556 var my_length = _byte_length
557
558 if other._byte_length != my_length then return false
559
560 var my_index = _first_byte
561 var its_index = other.first_byte
562
563 var last_iteration = my_index + my_length
564
565 var its_items = other._items
566 var my_items = self._items
567
568 while my_index < last_iteration do
569 if my_items[my_index] != its_items[its_index] then return false
570 my_index += 1
571 its_index += 1
572 end
573
574 return true
575 end
576
577 redef fun <(other)
578 do
579 if not other isa FlatText then return super
580
581 if self.object_id == other.object_id then return false
582
583 var myits = _items
584 var itsits = other._items
585
586 var mbt = _byte_length
587 var obt = other.byte_length
588
589 var minln = if mbt < obt then mbt else obt
590 var mst = _first_byte
591 var ost = other.first_byte
592
593 for i in [0 .. minln[ do
594 var my_curr_char = myits[mst]
595 var its_curr_char = itsits[ost]
596
597 if my_curr_char > its_curr_char then return false
598 if my_curr_char < its_curr_char then return true
599
600 mst += 1
601 ost += 1
602 end
603
604 return mbt < obt
605 end
606
607 redef fun +(o) do
608 var s = o.to_s
609 var slen = s.byte_length
610 var mlen = _byte_length
611 var nlen = mlen + slen
612 var mits = _items
613 var mifrom = _first_byte
614 if s isa FlatText then
615 var sits = s._items
616 var sifrom = s.first_byte
617 var ns = new CString(nlen + 1)
618 mits.copy_to(ns, mlen, mifrom, 0)
619 sits.copy_to(ns, slen, sifrom, mlen)
620 return new FlatString.full(ns, nlen, 0, _length + o.length)
621 else
622 abort
623 end
624 end
625
626 redef fun *(i) do
627 var mybtlen = _byte_length
628 var new_byte_length = mybtlen * i
629 var mylen = _length
630 var newlen = mylen * i
631 var its = _items
632 var fb = _first_byte
633 var ns = new CString(new_byte_length + 1)
634 ns[new_byte_length] = 0u8
635 var offset = 0
636 while i > 0 do
637 its.copy_to(ns, mybtlen, fb, offset)
638 offset += mybtlen
639 i -= 1
640 end
641 return new FlatString.full(ns, new_byte_length, 0, newlen)
642 end
643
644 redef fun hash
645 do
646 if hash_cache == null then
647 # djb2 hash algorithm
648 var h = 5381
649 var i = _first_byte
650
651 var my_items = _items
652 var max = last_byte
653
654 while i <= max do
655 h = (h << 5) + h + my_items[i].to_i
656 i += 1
657 end
658
659 hash_cache = h
660 end
661
662 return hash_cache.as(not null)
663 end
664
665 redef fun substrings do return new FlatSubstringsIter(self)
666 end
667
668 # Regular Nit UTF-8 strings
669 private class UnicodeFlatString
670 super FlatString
671
672 init full_data(items: CString, byte_length, from, length: Int) do
673 self._items = items
674 self._length = length
675 self._byte_length = byte_length
676 _first_byte = from
677 _bytepos = from
678 end
679
680 redef fun substring_from(from) do
681 if from >= self._length then return empty
682 if from <= 0 then return self
683 var c = char_to_byte_index(from)
684 var st = c - _first_byte
685 var fln = byte_length - st
686 return new FlatString.full(items, fln, c, _length - from)
687 end
688 end
689
690 # Special cases of String where all the characters are ASCII-based
691 #
692 # Optimizes access operations to O(1) complexity.
693 private class ASCIIFlatString
694 super FlatString
695
696 init full_data(items: CString, byte_length, from, length: Int) do
697 self._items = items
698 self._length = length
699 self._byte_length = byte_length
700 _first_byte = from
701 _bytepos = from
702 end
703
704 redef fun [](idx) do
705 assert idx < _byte_length and idx >= 0
706 return _items[idx + _first_byte].ascii
707 end
708
709 redef fun substring(from, count) do
710 var ln = _length
711 if count <= 0 then return ""
712 if (count + from) > ln then count = ln - from
713 if count <= 0 then return ""
714 if from < 0 then
715 count += from
716 if count <= 0 then return ""
717 from = 0
718 end
719 return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
720 end
721
722 redef fun reversed do
723 var b = new FlatBuffer.with_capacity(_byte_length + 1)
724 var i = _length - 1
725 while i >= 0 do
726 b.add self[i]
727 i -= 1
728 end
729 var s = b.to_s.as(FlatString)
730 return s
731 end
732
733 redef fun char_to_byte_index(index) do return index + _first_byte
734
735 redef fun substring_impl(from, count, end_index) do
736 return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
737 end
738
739 redef fun fetch_char_at(i) do return _items[i + _first_byte].ascii
740 end
741
742 private class FlatStringCharReverseIterator
743 super IndexedIterator[Char]
744
745 var target: FlatString
746
747 var curr_pos: Int
748
749 redef fun is_ok do return curr_pos >= 0
750
751 redef fun item do return target[curr_pos]
752
753 redef fun next do curr_pos -= 1
754
755 redef fun index do return curr_pos
756
757 end
758
759 private class FlatStringCharIterator
760 super IndexedIterator[Char]
761
762 var target: FlatString
763
764 var max: Int is noautoinit
765
766 var curr_pos: Int
767
768 init do max = target._length - 1
769
770 redef fun is_ok do return curr_pos <= max
771
772 redef fun item do return target[curr_pos]
773
774 redef fun next do curr_pos += 1
775
776 redef fun index do return curr_pos
777
778 end
779
780 private class FlatStringCharView
781 super StringCharView
782
783 redef type SELFTYPE: FlatString
784
785 redef fun [](index) do return target[index]
786
787 redef fun iterator_from(start) do return new FlatStringCharIterator(target, start)
788
789 redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator(target, start)
790
791 end
792
793 private class FlatStringByteReverseIterator
794 super IndexedIterator[Byte]
795
796 var target: FlatString
797
798 var target_items: CString is noautoinit
799
800 var curr_pos: Int
801
802 init
803 do
804 var tgt = target
805 target_items = tgt._items
806 curr_pos += tgt._first_byte
807 end
808
809 redef fun is_ok do return curr_pos >= target._first_byte
810
811 redef fun item do return target_items[curr_pos]
812
813 redef fun next do curr_pos -= 1
814
815 redef fun index do return curr_pos - target._first_byte
816
817 end
818
819 private class FlatStringByteIterator
820 super IndexedIterator[Byte]
821
822 var target: FlatString
823
824 var target_items: CString is noautoinit
825
826 var curr_pos: Int
827
828 init
829 do
830 var tgt = target
831 target_items = tgt._items
832 curr_pos += tgt._first_byte
833 end
834
835 redef fun is_ok do return curr_pos <= target.last_byte
836
837 redef fun item do return target_items[curr_pos]
838
839 redef fun next do curr_pos += 1
840
841 redef fun index do return curr_pos - target._first_byte
842
843 end
844
845 private class FlatStringByteView
846 super StringByteView
847
848 redef type SELFTYPE: FlatString
849
850 redef fun [](index)
851 do
852 # Check that the index (+ _first_byte) is not larger than last_byte
853 # In other terms, if the index is valid
854 var target = _target
855 assert index >= 0 and index < target._byte_length
856 var ind = index + target._first_byte
857 return target._items[ind]
858 end
859
860 redef fun iterator_from(start) do return new FlatStringByteIterator(target, start)
861
862 redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator(target, start)
863
864 end
865
866 redef class Buffer
867 redef new do return new FlatBuffer
868
869 redef new with_cap(i) do return new FlatBuffer.with_capacity(i)
870 end
871
872 # Mutable strings of characters.
873 class FlatBuffer
874 super FlatText
875 super Buffer
876
877 redef fun chars do return new FlatBufferCharView(self)
878
879 redef fun bytes do return new FlatBufferByteView(self)
880
881 private var capacity = 0
882
883 redef fun fast_cstring do return _items.fast_cstring(0)
884
885 redef fun substrings do return new FlatSubstringsIter(self)
886
887 # Re-copies the `CString` into a new one and sets it as the new `Buffer`
888 #
889 # This happens when an operation modifies the current `Buffer` and
890 # the Copy-On-Write flag `written` is set at true.
891 private fun reset do
892 var nns = new CString(capacity)
893 if _byte_length != 0 then _items.copy_to(nns, _byte_length, 0, 0)
894 _items = nns
895 written = false
896 end
897
898 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
899 #
900 # Internal only, does not modify _byte_length or length, this is the caller's responsability
901 private fun rshift_bytes(from: Int, len: Int) do
902 var oit = _items
903 var nit = _items
904 var bt = _byte_length
905 if bt + len > capacity then
906 capacity = capacity * 2 + 2
907 nit = new CString(capacity)
908 oit.copy_to(nit, 0, 0, from)
909 end
910 oit.copy_to(nit, bt - from, from, from + len)
911 end
912
913 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
914 #
915 # Internal only, does not modify _byte_length or length, this is the caller's responsability
916 private fun lshift_bytes(from: Int, len: Int) do
917 var it = _items
918 it.copy_to(it, _byte_length - from, from, from - len)
919 end
920
921 redef fun []=(index, item)
922 do
923 assert index >= 0 and index <= _length
924 if written then reset
925 if index == _length then
926 add item
927 return
928 end
929 var it = _items
930 var ip = it.char_to_byte_index(index)
931 var c = it.char_at(ip)
932 var clen = c.u8char_len
933 var itemlen = item.u8char_len
934 var size_diff = itemlen - clen
935 if size_diff > 0 then
936 rshift_bytes(ip + clen, size_diff)
937 else if size_diff < 0 then
938 lshift_bytes(ip + clen, -size_diff)
939 end
940 _byte_length += size_diff
941 it.set_char_at(ip, item)
942 end
943
944 redef fun insert(s, pos) do
945 assert pos >= 0 and pos <= length
946 if pos == length then
947 append s
948 return
949 end
950 var slen = s.byte_length
951 enlarge(byte_length + slen)
952 var it = _items
953 var shpos = it.char_to_byte_index(pos)
954 rshift_bytes(shpos, slen)
955 s.copy_to_native(it, slen, 0, shpos)
956 length += s.length
957 byte_length += slen
958 end
959
960 redef fun insert_char(c, pos) do
961 assert pos >= 0 and pos <= length
962 if pos == length then
963 add c
964 return
965 end
966 var clen = c.u8char_len
967 enlarge(byte_length + clen)
968 var it = _items
969 var shpos = it.char_to_byte_index(pos)
970 rshift_bytes(shpos, clen)
971 it.set_char_at(shpos, c)
972 length += 1
973 byte_length += clen
974 end
975
976 redef fun add(c)
977 do
978 if written then reset
979 var clen = c.u8char_len
980 var bt = _byte_length
981 enlarge(bt + clen)
982 _items.set_char_at(bt, c)
983 _byte_length += clen
984 _length += 1
985 end
986
987 redef fun clear do
988 _byte_length = 0
989 _length = 0
990 if written then
991 _capacity = 16
992 reset
993 end
994 end
995
996 redef fun empty do return new Buffer
997
998 redef fun enlarge(cap)
999 do
1000 var c = capacity
1001 if cap <= c then return
1002 if c <= 16 then c = 16
1003 while c <= cap do c = c * 2
1004 # The COW flag can be set at false here, since
1005 # it does a copy of the current `Buffer`
1006 written = false
1007 var bln = _byte_length
1008 var a = new CString(c)
1009 if bln > 0 then
1010 var it = _items
1011 if bln > 0 then it.copy_to(a, bln, 0, 0)
1012 end
1013 _items = a
1014 capacity = c
1015 end
1016
1017 redef fun to_s
1018 do
1019 written = true
1020 var bln = _byte_length
1021 if bln == 0 then _items = new CString(1)
1022 return new FlatString.full(_items, bln, 0, _length)
1023 end
1024
1025 redef fun to_cstring
1026 do
1027 var bln = _byte_length
1028 var new_native = new CString(bln + 1)
1029 new_native[bln] = 0u8
1030 if _length > 0 then _items.copy_to(new_native, bln, 0, 0)
1031 return new_native
1032 end
1033
1034 # Create a new empty string.
1035 init do end
1036
1037 # Low-level creation a new buffer with given data.
1038 #
1039 # `_items` will be used as is, without copy, to store the characters of the buffer.
1040 # Aliasing issues is the responsibility of the caller.
1041 #
1042 # If `_items` is shared, `written` should be set to true after the creation
1043 # so that a modification will do a copy-on-write.
1044 private init with_infos(items: CString, capacity, byte_length, length: Int)
1045 do
1046 self._items = items
1047 self.capacity = capacity
1048 self._byte_length = byte_length
1049 self._length = length
1050 end
1051
1052 # Create a new string copied from `s`.
1053 init from(s: Text)
1054 do
1055 _items = new CString(s.byte_length)
1056 for i in s.substrings do i._items.copy_to(_items, i._byte_length, first_byte, 0)
1057 _byte_length = s.byte_length
1058 _length = s.length
1059 _capacity = _byte_length
1060 end
1061
1062 # Create a new empty string with a given capacity.
1063 init with_capacity(cap: Int)
1064 do
1065 assert cap >= 0
1066 _items = new CString(cap)
1067 capacity = cap
1068 _byte_length = 0
1069 end
1070
1071 redef fun append(s)
1072 do
1073 if s.is_empty then return
1074 var sl = s.byte_length
1075 var nln = _byte_length + sl
1076 enlarge(nln)
1077 if s isa FlatText then
1078 s._items.copy_to(_items, sl, s.first_byte, _byte_length)
1079 else
1080 for i in s.substrings do append i
1081 return
1082 end
1083 _byte_length = nln
1084 _length += s.length
1085 end
1086
1087 # Copies the content of self in `dest`
1088 fun copy(start: Int, len: Int, dest: Buffer, new_start: Int)
1089 do
1090 var self_chars = self.chars
1091 var dest_chars = dest.chars
1092 for i in [0..len-1] do
1093 dest_chars[new_start+i] = self_chars[start+i]
1094 end
1095 end
1096
1097 redef fun substring(from, count)
1098 do
1099 assert count >= 0
1100 if from < 0 then from = 0
1101 if (from + count) > _length then count = _length - from
1102 if count <= 0 then return new Buffer
1103 var its = _items
1104 var bytefrom = its.char_to_byte_index(from)
1105 var byteto = its.char_to_byte_index(count + from - 1)
1106 byteto += its.char_at(byteto).u8char_len - 1
1107 var byte_length = byteto - bytefrom + 1
1108 var r_items = new CString(byte_length)
1109 its.copy_to(r_items, byte_length, bytefrom, 0)
1110 return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
1111 end
1112
1113 redef fun append_substring_impl(s, from, length) do
1114 if length <= 0 then return
1115 if not s isa FlatText then
1116 super
1117 return
1118 end
1119 var sits = s._items
1120 var bytest = s.char_to_byte_index(from)
1121 var bytend = s.char_to_byte_index(from + length - 1)
1122 var btln = bytend - bytest + sits.char_at(bytend).u8char_len
1123 enlarge(btln + _byte_length)
1124 sits.copy_to(_items, btln, bytest, _byte_length)
1125 _byte_length += btln
1126 _length += length
1127 end
1128
1129 redef fun remove_at(p, len) do
1130 if len == null then len = 1
1131 if len == 0 then return
1132 var its = _items
1133 var bst = char_to_byte_index(p)
1134 var bend = char_to_byte_index(p + len - 1)
1135 bend += its.char_at(bend).u8char_len
1136 var blen = bend - bst
1137 lshift_bytes(bend, bend - bst)
1138 byte_length -= blen
1139 length -= len
1140 end
1141
1142 redef fun reverse
1143 do
1144 written = false
1145 var ns = new FlatBuffer.with_capacity(capacity)
1146 for i in chars.reverse_iterator do ns.add i
1147 _items = ns._items
1148 end
1149
1150 redef fun times(repeats)
1151 do
1152 var bln = _byte_length
1153 var x = new FlatString.full(_items, bln, 0, _length)
1154 for i in [1 .. repeats[ do
1155 append(x)
1156 end
1157 end
1158
1159 redef fun upper
1160 do
1161 if written then reset
1162 for i in [0 .. _length[ do self[i] = self[i].to_upper
1163 end
1164
1165 redef fun lower
1166 do
1167 if written then reset
1168 for i in [0 .. _length[ do self[i] = self[i].to_lower
1169 end
1170 end
1171
1172 private class FlatBufferByteReverseIterator
1173 super IndexedIterator[Byte]
1174
1175 var target: FlatBuffer
1176
1177 var target_items: CString is noautoinit
1178
1179 var curr_pos: Int
1180
1181 init do target_items = target._items
1182
1183 redef fun index do return curr_pos
1184
1185 redef fun is_ok do return curr_pos >= 0
1186
1187 redef fun item do return target_items[curr_pos]
1188
1189 redef fun next do curr_pos -= 1
1190
1191 end
1192
1193 private class FlatBufferByteView
1194 super BufferByteView
1195
1196 redef type SELFTYPE: FlatBuffer
1197
1198 redef fun [](index) do return target._items[index]
1199
1200 redef fun iterator_from(pos) do return new FlatBufferByteIterator(target, pos)
1201
1202 redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator(target, pos)
1203
1204 end
1205
1206 private class FlatBufferByteIterator
1207 super IndexedIterator[Byte]
1208
1209 var target: FlatBuffer
1210
1211 var target_items: CString is noautoinit
1212
1213 var curr_pos: Int
1214
1215 init do target_items = target._items
1216
1217 redef fun index do return curr_pos
1218
1219 redef fun is_ok do return curr_pos < target._byte_length
1220
1221 redef fun item do return target_items[curr_pos]
1222
1223 redef fun next do curr_pos += 1
1224
1225 end
1226
1227 private class FlatBufferCharReverseIterator
1228 super IndexedIterator[Char]
1229
1230 var target: FlatBuffer
1231
1232 var curr_pos: Int
1233
1234 redef fun index do return curr_pos
1235
1236 redef fun is_ok do return curr_pos >= 0
1237
1238 redef fun item do return target[curr_pos]
1239
1240 redef fun next do curr_pos -= 1
1241
1242 end
1243
1244 private class FlatBufferCharView
1245 super BufferCharView
1246
1247 redef type SELFTYPE: FlatBuffer
1248
1249 redef fun [](index) do return target[index]
1250
1251 redef fun []=(index, item)
1252 do
1253 assert index >= 0 and index <= length
1254 if index == length then
1255 add(item)
1256 return
1257 end
1258 target[index] = item
1259 end
1260
1261 redef fun push(c)
1262 do
1263 target.add(c)
1264 end
1265
1266 redef fun add(c)
1267 do
1268 target.add(c)
1269 end
1270
1271 fun enlarge(cap: Int)
1272 do
1273 target.enlarge(cap)
1274 end
1275
1276 redef fun append(s)
1277 do
1278 var s_length = s.length
1279 if target.capacity < s.length then enlarge(s_length + target._length)
1280 for i in s do target.add i
1281 end
1282
1283 redef fun iterator_from(pos) do return new FlatBufferCharIterator(target, pos)
1284
1285 redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator(target, pos)
1286
1287 end
1288
1289 private class FlatBufferCharIterator
1290 super IndexedIterator[Char]
1291
1292 var target: FlatBuffer
1293
1294 var max: Int is noautoinit
1295
1296 var curr_pos: Int
1297
1298 init do max = target._length - 1
1299
1300 redef fun index do return curr_pos
1301
1302 redef fun is_ok do return curr_pos <= max
1303
1304 redef fun item do return target[curr_pos]
1305
1306 redef fun next do curr_pos += 1
1307
1308 end
1309
1310 redef class CString
1311
1312 # Get a `String` from the data at `self` copied into Nit memory
1313 #
1314 # Require: `self` is a null-terminated string.
1315 redef fun to_s do return to_s_unsafe
1316
1317 # Get a `String` from `byte_length` bytes at `self` copied into Nit memory
1318 #
1319 # The string is cleaned.
1320 fun to_s_with_length(byte_length: Int): String do return to_s_unsafe(byte_length)
1321
1322 redef fun to_s_unsafe(byte_length, char_length, copy, clean)
1323 do
1324 byte_length = byte_length or else cstring_length
1325 clean = clean or else true
1326 copy = copy or else true
1327
1328 # Clean?
1329 var str = null
1330 if clean then
1331 str = clean_utf8(byte_length)
1332 char_length = str.length
1333 else
1334 char_length = char_length or else utf8_length(0, byte_length)
1335 end
1336
1337 # Copy? (if not already copied by `clean_utf8`)
1338 if copy and (str == null or str.items == self) then
1339 var new_cstr = new CString(byte_length + 1)
1340 copy_to(new_cstr, byte_length, 0, 0)
1341 new_cstr[byte_length] = 0u8
1342 str = new FlatString.full(new_cstr, byte_length, 0, char_length)
1343 end
1344
1345 if str == null then
1346 str = new FlatString.full(self, byte_length, 0, char_length)
1347 end
1348
1349 return str
1350 end
1351
1352 # Cleans a CString if necessary
1353 fun clean_utf8(len: Int): FlatString do
1354 var replacements: nullable Array[Int] = null
1355 var end_length = len
1356 var pos = 0
1357 var chr_ln = 0
1358 var rem = len
1359 while rem > 0 do
1360 while rem >= 4 do
1361 var i = fetch_4_chars(pos)
1362 if i & 0x80808080u32 != 0u32 then break
1363 pos += 4
1364 chr_ln += 4
1365 rem -= 4
1366 end
1367 if rem == 0 then break
1368 var b = self[pos]
1369 if b & 0x80u8 == 0x00u8 then
1370 pos += 1
1371 chr_ln += 1
1372 rem -= 1
1373 continue
1374 end
1375 var nxst = length_of_char_at(pos)
1376 var ok_st: Bool
1377 if nxst == 1 then
1378 ok_st = b & 0x80u8 == 0u8
1379 else if nxst == 2 then
1380 ok_st = b & 0xE0u8 == 0xC0u8
1381 else if nxst == 3 then
1382 ok_st = b & 0xF0u8 == 0xE0u8
1383 else
1384 ok_st = b & 0xF8u8 == 0xF0u8
1385 end
1386 if not ok_st then
1387 if replacements == null then replacements = new Array[Int]
1388 replacements.add pos
1389 end_length += 2
1390 pos += 1
1391 rem -= 1
1392 chr_ln += 1
1393 continue
1394 end
1395 var ok_c: Bool
1396 var c = char_at(pos)
1397 var cp = c.code_point
1398 if nxst == 1 then
1399 ok_c = cp >= 0 and cp <= 0x7F
1400 else if nxst == 2 then
1401 ok_c = cp >= 0x80 and cp <= 0x7FF
1402 else if nxst == 3 then
1403 ok_c = cp >= 0x800 and cp <= 0xFFFF
1404 ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
1405 else
1406 ok_c = cp >= 0x10000 and cp <= 0x10FFFF
1407 end
1408 if not ok_c then
1409 if replacements == null then replacements = new Array[Int]
1410 replacements.add pos
1411 end_length += 2
1412 pos += 1
1413 chr_ln += 1
1414 rem -= 1
1415 continue
1416 end
1417 var clen = c.u8char_len
1418 pos += clen
1419 rem -= clen
1420 chr_ln += 1
1421 end
1422 var ret = self
1423 if end_length != len then
1424 ret = new CString(end_length)
1425 var old_repl = 0
1426 var off = 0
1427 var repls = replacements.as(not null)
1428 var r = repls.items.as(not null)
1429 var imax = repls.length
1430 for i in [0 .. imax[ do
1431 var repl_pos = r[i]
1432 var chkln = repl_pos - old_repl
1433 copy_to(ret, chkln, old_repl, off)
1434 off += chkln
1435 ret[off] = 0xEFu8
1436 ret[off + 1] = 0xBFu8
1437 ret[off + 2] = 0xBDu8
1438 old_repl = repl_pos + 1
1439 off += 3
1440 end
1441 copy_to(ret, len - old_repl, old_repl, off)
1442 end
1443 return new FlatString.full(ret, end_length, 0, chr_ln)
1444 end
1445
1446 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1447 #
1448 # Very unsafe, make sure to have room for this char prior to calling this function.
1449 private fun set_char_at(pos: Int, c: Char) do
1450 var cp = c.code_point
1451 if cp < 128 then
1452 self[pos] = cp.to_b
1453 return
1454 end
1455 var ln = c.u8char_len
1456 if ln == 2 then
1457 self[pos] = (0xC0 | ((cp & 0x7C0) >> 6)).to_b
1458 self[pos + 1] = (0x80 | (cp & 0x3F)).to_b
1459 else if ln == 3 then
1460 self[pos] = (0xE0 | ((cp & 0xF000) >> 12)).to_b
1461 self[pos + 1] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
1462 self[pos + 2] = (0x80 | (cp & 0x3F)).to_b
1463 else if ln == 4 then
1464 self[pos] = (0xF0 | ((cp & 0x1C0000) >> 18)).to_b
1465 self[pos + 1] = (0x80 | ((cp & 0x3F000) >> 12)).to_b
1466 self[pos + 2] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
1467 self[pos + 3] = (0x80 | (cp & 0x3F)).to_b
1468 end
1469 end
1470 end
1471
1472 redef class Int
1473 # return displayable int in base 10 and signed
1474 #
1475 # assert 1.to_s == "1"
1476 # assert (-123).to_s == "-123"
1477 redef fun to_s do
1478 # Fast case for common numbers
1479 if self == 0 then return "0"
1480 if self == 1 then return "1"
1481
1482 var nslen = int_to_s_len
1483 var ns = new CString(nslen + 1)
1484 ns[nslen] = 0u8
1485 native_int_to_s(ns, nslen + 1)
1486 return new FlatString.full(ns, nslen, 0, nslen)
1487 end
1488 end
1489
1490 redef class Array[E]
1491
1492 # Fast implementation
1493 redef fun plain_to_s
1494 do
1495 var l = _length
1496 if l == 0 then return ""
1497 var its = _items.as(not null)
1498 var first = its[0]
1499 if l == 1 then if first == null then return "" else return first.to_s
1500 var na = new NativeArray[String](l)
1501 var i = 0
1502 var sl = 0
1503 var mypos = 0
1504 while i < l do
1505 var itsi = its[i]
1506 if itsi == null then
1507 i += 1
1508 continue
1509 end
1510 var tmp = itsi.to_s
1511 sl += tmp.byte_length
1512 na[mypos] = tmp
1513 i += 1
1514 mypos += 1
1515 end
1516 var ns = new CString(sl + 1)
1517 ns[sl] = 0u8
1518 i = 0
1519 var off = 0
1520 while i < mypos do
1521 var tmp = na[i]
1522 if tmp isa FlatString then
1523 var tpl = tmp._byte_length
1524 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1525 off += tpl
1526 else
1527 for j in tmp.substrings do
1528 var s = j.as(FlatString)
1529 var slen = s._byte_length
1530 s._items.copy_to(ns, slen, s._first_byte, off)
1531 off += slen
1532 end
1533 end
1534 i += 1
1535 end
1536 return new FlatString.with_infos(ns, sl, 0)
1537 end
1538 end
1539
1540 redef class NativeArray[E]
1541 redef fun native_to_s do
1542 assert self isa NativeArray[String]
1543 var l = length
1544 var na = self
1545 var i = 0
1546 var sl = 0
1547 var mypos = 0
1548 while i < l do
1549 sl += na[i].byte_length
1550 i += 1
1551 mypos += 1
1552 end
1553 var ns = new CString(sl + 1)
1554 ns[sl] = 0u8
1555 i = 0
1556 var off = 0
1557 while i < mypos do
1558 var tmp = na[i]
1559 if tmp isa FlatString then
1560 var tpl = tmp._byte_length
1561 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1562 off += tpl
1563 else
1564 for j in tmp.substrings do
1565 var s = j.as(FlatString)
1566 var slen = s._byte_length
1567 s._items.copy_to(ns, slen, s._first_byte, off)
1568 off += slen
1569 end
1570 end
1571 i += 1
1572 end
1573 return new FlatString.with_infos(ns, sl, 0)
1574 end
1575 end
1576
1577 redef class Map[K,V]
1578 redef fun join(sep, couple_sep)
1579 do
1580 if is_empty then return ""
1581
1582 var s = new Buffer # Result
1583
1584 # Concat first item
1585 var i = iterator
1586 var k = i.key
1587 var e = i.item
1588 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1589
1590 # Concat other _items
1591 i.next
1592 while i.is_ok do
1593 s.append(sep)
1594 k = i.key
1595 e = i.item
1596 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1597 i.next
1598 end
1599 return s.to_s
1600 end
1601 end