stdlib: Strings, modified concatenation operations (avoids creating a useless buffer)
[nit.git] / lib / standard / string.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
5 #
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
12 # another product.
13
14 # Basic manipulations of strings of characters
15 package string
16
17 intrude import collection # FIXME should be collection::array
18 import hash
19
20 ###############################################################################
21 # String #
22 ###############################################################################
23
24 # Common subclass for String and Buffer
25 abstract class AbstractString
26 super AbstractArrayRead[Char]
27
28 readable private var _items: NativeString
29
30 redef fun [](index) do return _items[index]
31
32 # Create a substring.
33 #
34 # "abcd".substring(1, 2) # --> "bc"
35 # "abcd".substring(-1, 2) # --> "a"
36 # "abcd".substring(1, 0) # --> ""
37 # "abcd".substring(2, 5) # --> "cd"
38 fun substring(from: Int, count: Int): String
39 do
40 assert count >= 0
41 count += from
42 if from < 0 then from = 0
43 if count > length then count = length
44 if from < count then
45 var r = new Buffer.with_capacity(count - from)
46 while from < count do
47 r.push(_items[from])
48 from += 1
49 end
50 return r.to_s
51 else
52 return ""
53 end
54 end
55
56 # Create a substring from `self' beginning at the 'from' position
57 #
58 # "abcd".substring(1) # --> "bcd"
59 # "abcd".substring(-1) # --> "abcd"
60 # "abcd".substring(2) # --> "cd"
61 fun substring_from(from: Int): String
62 do
63 assert from < length
64 return substring(from, length - from)
65 end
66
67 # Is `self' a substring of the `str' string from pos `pos'
68 #
69 # "bc".is_substring("abcd",1) # --> true
70 # "bc".is_substring("abcd",2) # --> false
71 fun has_substring(str: String, pos: Int): Bool
72 do
73 var itsindex = str.length - 1
74 var myindex = pos + itsindex
75 var myitems = _items
76 var itsitems = str._items
77 if myindex > length or itsindex > myindex then return false
78 var its_index_from = str._indexFrom
79 itsindex += its_index_from
80 while itsindex >= its_index_from do
81 if myitems[myindex] != itsitems[itsindex] then return false
82 myindex -= 1
83 itsindex -= 1
84 end
85 return true
86 end
87
88 # Is this string prefixed by 'prefix'
89 #
90 # "abc".is_prefix("abcd") # --> true
91 # "bc".is_prefix("abcd") # --> false
92 fun has_prefix(prefix: String): Bool do return has_substring(prefix,0)
93
94 # Is this string suffixed by 'suffix'
95 #
96 # "abcd".has_suffix("abc") # --> false
97 # "abcd".has_suffix("bcd") # --> true
98 fun has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
99
100 # If `self' contains only digits, return the corresponding integer
101 fun to_i: Int
102 do
103 # Shortcut
104 return to_s.to_cstring.atoi
105 end
106
107 # If `self' contains a float, return the corresponding float
108 fun to_f: Float
109 do
110 # Shortcut
111 return to_s.to_cstring.atof
112 end
113
114 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
115 fun to_hex: Int do return a_to(16)
116
117 # If `self' contains only digits and letters, return the corresponding integer in a given base
118 fun a_to(base: Int) : Int
119 do
120 var i = 0
121 var neg = false
122
123 for c in self
124 do
125 var v = c.to_i
126 if v > base then
127 if neg then
128 return -i
129 else
130 return i
131 end
132 else if v < 0 then
133 neg = true
134 else
135 i = i * base + v
136 end
137 end
138 if neg then
139 return -i
140 else
141 return i
142 end
143 end
144
145 # A upper case version of `self'
146 fun to_upper: String
147 do
148 var s = new Buffer.with_capacity(length)
149 for i in self do s.add(i.to_upper)
150 return s.to_s
151 end
152
153 # A lower case version of `self'
154 fun to_lower : String
155 do
156 var s = new Buffer.with_capacity(length)
157 for i in self do s.add(i.to_lower)
158 return s.to_s
159 end
160
161
162 redef fun output
163 do
164 var i = 0
165 while i < length do
166 _items[i].output
167 i += 1
168 end
169 end
170 end
171
172 # Immutable strings of characters.
173 class String
174 super Comparable
175 super AbstractString
176 super StringCapable
177
178 redef type OTHER: String
179
180 readable var _indexFrom: Int
181 readable var _indexTo: Int
182
183 ################################################
184 # AbstractString specific methods #
185 ################################################
186
187 # Access a character at index in String
188 #
189 redef fun [](index) do
190 assert index >= 0
191 assert (index + _indexFrom) < (_indexFrom + _length)
192 return items[index + _indexFrom]
193 end
194
195 # Create a substring.
196 #
197 # "abcd".substring(1, 2) # --> "bc"
198 # "abcd".substring(-1, 2) # --> "a"
199 # "abcd".substring(1, 0) # --> ""
200 # "abcd".substring(2, 5) # --> "cd"
201 redef fun substring(from: Int, count: Int): String
202 do
203 assert count >= 0
204
205 if from < 0 then
206 count += from
207 if count < 0 then count = 0
208 from = 0
209 end
210
211 var realFrom = _indexFrom + from
212
213 if (realFrom + count) > _indexTo then return new String.from_substring(realFrom, _indexTo, _items)
214
215 if count == 0 then return ""
216
217 return new String.from_substring(realFrom, realFrom + count - 1, _items)
218 end
219
220 # Create a substring from `self' beginning at the 'from' position
221 #
222 # "abcd".substring(1) # --> "bcd"
223 # "abcd".substring(-1) # --> "abcd"
224 # "abcd".substring(2) # --> "cd"
225 redef fun substring_from(from: Int): String
226 do
227 if from > _length then return ""
228 if from < 0 then from = 0
229 return substring(from, _length)
230 end
231
232 # Is `self' a substring of the `str' string from pos `pos'
233 #
234 # "bc".is_substring("abcd",1) # --> true
235 # "bc".is_substring("abcd",2) # --> false
236 redef fun has_substring(str: String, pos: Int): Bool
237 do
238 var itsindex = str._length - 1
239
240 var myindex = pos + itsindex
241 var myitems = _items
242
243 var itsitems = str._items
244
245 if myindex > _length or itsindex > myindex then return false
246
247 var itsindexfrom = str.indexFrom
248 itsindex += itsindexfrom
249 myindex += indexFrom
250
251 while itsindex >= itsindexfrom do
252 if myitems[myindex] != itsitems[itsindex] then return false
253 myindex -= 1
254 itsindex -= 1
255 end
256
257 return true
258 end
259
260 # A upper case version of `self'
261 redef fun to_upper: String
262 do
263 var outstr = calloc_string(self._length + 1)
264 var index = 0
265
266 var myitems = self._items
267 var index_from = self._indexFrom
268 var max = self._indexTo
269
270 while index_from <= max do
271 outstr[index] = myitems[index_from].to_upper
272 index += 1
273 index_from += 1
274 end
275
276 outstr[self.length] = '\0'
277
278 return new String.with_native(outstr, self._length)
279 end
280
281 # A lower case version of `self'
282 redef fun to_lower : String
283 do
284 var outstr = calloc_string(self._length + 1)
285 var index = 0
286
287 var myitems = self._items
288 var index_from = self._indexFrom
289 var max = self._indexTo
290
291 while index_from <= max do
292 outstr[index] = myitems[index_from].to_lower
293 index += 1
294 index_from += 1
295 end
296
297 outstr[self.length] = '\0'
298
299 return new String.with_native(outstr, self._length)
300 end
301
302 redef fun output
303 do
304 var i = self._indexFrom
305 while i < length do
306 _items[i].output
307 i += 1
308 end
309 end
310
311 ##################################################
312 # String Specific Methods #
313 ##################################################
314
315 # Creates a String object as a substring of another String
316 private init from_substring(from: Int, to: Int, internalString: NativeString)
317 do
318 _items = internalString
319 _indexFrom = from
320 _indexTo = to
321 _length = to - from + 1
322 end
323
324 # Create a new string from a given char *.
325 init with_native(nat: NativeString, size: Int)
326 do
327 assert size >= 0
328 _items = nat
329 _length = size
330 _indexFrom = 0
331 _indexTo = size - 1
332 end
333
334 # Create a new string from a null terminated char *.
335 init from_cstring(str: NativeString)
336 do
337 with_native(str,str.cstring_length)
338 end
339
340 # Return a null terminated char *
341 fun to_cstring: NativeString
342 do
343 #return items
344 if _indexFrom > 0 or _indexTo != items.cstring_length-1 then
345 var newItems = calloc_string(length+1)
346 self.items.copy_to(newItems, _length, _indexFrom, 0)
347 newItems[length] = '\0'
348 return newItems
349 end
350 return _items
351 end
352
353 redef fun ==(o)
354 do
355 if not o isa String or o is null then return false
356
357 if self.object_id == o.object_id then return true
358
359 var l = _length
360
361 if o._length != l then return false
362
363 var i = _indexFrom
364 var j = o._indexFrom
365 var max = l + _indexFrom
366 var itsitems = o._items
367 var myitems = self._items
368
369 while i < max do
370 if myitems[i] != itsitems[j] then return false
371 i += 1
372 j += 1
373 end
374
375 return true
376 end
377
378 redef fun <(s)
379 do
380 if self.object_id == s.object_id then return false
381
382 var c1 : Int
383 var c2 : Int
384 var currIdSelf = self._indexFrom
385 var currIdOther = s._indexFrom
386 var my_items = self._items
387 var its_items = s._items
388
389 if self._length < s._length then
390 return true
391 else if self.length > s._length then
392 return false
393 end
394
395 var self_upper_bound = self._length + currIdSelf
396 var other_upper_bound = s._length + currIdOther
397
398 while currIdSelf < self_upper_bound and currIdOther < other_upper_bound do
399 c1 = my_items[currIdSelf].ascii
400 c2 = its_items[currIdOther].ascii
401
402 if c1 < c2 then
403 return true
404 else if c2 < c1 then
405 return false
406 end
407
408 currIdSelf += 1
409 currIdOther += 1
410 end
411
412 return false
413 end
414
415 # The concatenation of `self' with `r'
416 fun +(s: String): String
417 do
418 var newString = calloc_string(_length + s._length + 1)
419
420 self._items.copy_to(newString, _length, _indexFrom, 0)
421 s._items.copy_to(newString, s._length, s._indexFrom, _length)
422
423 newString[self._length + s._length] = '\0'
424
425 return new String.with_native(newString, _length + s._length)
426 end
427
428 # i repetitions of self
429 fun *(i: Int): String
430 do
431 assert i >= 0
432 var r = calloc_string((_length * i) + 1)
433
434 r[_length * i] = '\0'
435
436 var lastStr = new String.with_native(r, (_length * i))
437
438 while i > 0 do
439 self._items.copy_to(r, _length, _indexFrom, _length*(i-1))
440 i -= 1
441 end
442
443 return lastStr
444 end
445
446 redef fun to_s do return self
447
448 redef fun hash
449 do
450 # djb2 hash algorythm
451 var h = 5381
452 var i = _length - 1
453
454 var myitems = self.items
455 var index_from = self._indexFrom
456
457 i += index_from
458
459 while i >= index_from do
460 h = (h * 32) + h + self._items[i].ascii
461 i -= 1
462 end
463
464 return h
465 end
466 end
467
468 # Mutable strings of characters.
469 class Buffer
470 super AbstractString
471 super Comparable
472 super StringCapable
473 super AbstractArray[Char]
474
475 redef type OTHER: String
476
477 redef fun []=(index, item)
478 do
479 if index == length then
480 add(item)
481 return
482 end
483 assert index >= 0 and index < length
484 _items[index] = item
485 end
486
487 redef fun add(c)
488 do
489 if _capacity <= length then enlarge(length + 5)
490 _items[length] = c
491 _length += 1
492 end
493
494 redef fun enlarge(cap)
495 do
496 var c = _capacity
497 if cap <= c then return
498 while c <= cap do c = c * 2 + 2
499 var a = calloc_string(c+1)
500 _items.copy_to(a, length, 0, 0)
501 _items = a
502 _capacity = c
503 end
504
505 redef fun append(s)
506 do
507 if s isa String then
508 var sl = s.length
509 if _capacity < _length + sl then enlarge(_length + sl)
510 s.items.copy_to(_items, sl, s._indexFrom, _length)
511 _length += sl
512 else
513 super
514 end
515 end
516
517 redef fun to_s: String
518 do
519 var l = length
520 var a = calloc_string(l+1)
521 _items.copy_to(a, l, 0, 0)
522
523 # Ensure the afterlast byte is '\0' to nul-terminated char *
524 a[length] = '\0'
525
526 return new String.with_native(a, length)
527 end
528
529 redef fun <(s)
530 do
531 var i = 0
532 var l1 = length
533 var l2 = s.length
534 while i < l1 and i < l2 do
535 var c1 = self[i].ascii
536 var c2 = s[i].ascii
537 if c1 < c2 then
538 return true
539 else if c2 < c1 then
540 return false
541 end
542 i += 1
543 end
544 if l1 < l2 then
545 return true
546 else
547 return false
548 end
549 end
550
551 # Create a new empty string.
552 init
553 do
554 with_capacity(5)
555 end
556
557 init from(s: String)
558 do
559 _capacity = s.length + 1
560 _length = s.length
561 _items = calloc_string(_capacity)
562 s.items.copy_to(_items, _length, s._indexFrom, 0)
563 end
564
565 # Create a new empty string with a given capacity.
566 init with_capacity(cap: Int)
567 do
568 assert cap >= 0
569 # _items = new NativeString.calloc(cap)
570 _items = calloc_string(cap+1)
571 _capacity = cap
572 _length = 0
573 end
574
575 redef fun ==(o)
576 do
577 if not o isa Buffer or o is null then return false
578 var l = length
579 if o.length != l then return false
580 var i = 0
581 var it = _items
582 var oit = o._items
583 while i < l do
584 if it[i] != oit[i] then return false
585 i += 1
586 end
587 return true
588 end
589
590 readable private var _capacity: Int
591 end
592
593 ###############################################################################
594 # Refinement #
595 ###############################################################################
596
597 redef class Object
598 # User readable representation of `self'.
599 fun to_s: String do return inspect
600
601 # The class name of the object in NativeString format.
602 private fun native_class_name: NativeString is intern
603
604 # The class name of the object.
605 # FIXME: real type information is not available at runtime.
606 # Therefore, for instance, an instance of List[Bool] has just
607 # "List" for class_name
608 fun class_name: String do return new String.from_cstring(native_class_name)
609
610 # Developer readable representation of `self'.
611 # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
612 fun inspect: String
613 do
614 return "<{inspect_head}>"
615 end
616
617 # Return "CLASSNAME:#OBJECTID".
618 # This function is mainly used with the redefinition of the inspect method
619 protected fun inspect_head: String
620 do
621 return "{class_name}:#{object_id.to_hex}"
622 end
623
624 protected fun args: Sequence[String]
625 do
626 return sys.args
627 end
628 end
629
630 redef class Bool
631 redef fun to_s
632 do
633 if self then
634 return once "true"
635 else
636 return once "false"
637 end
638 end
639 end
640
641 redef class Int
642 fun fill_buffer(s: Buffer, base: Int, signed: Bool)
643 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
644 # assume < to_c max const of char
645 do
646 var n: Int
647 # Sign
648 if self < 0 then
649 n = - self
650 s[0] = '-'
651 else if self == 0 then
652 s[0] = '0'
653 return
654 else
655 n = self
656 end
657 # Fill digits
658 var pos = digit_count(base) - 1
659 while pos >= 0 and n > 0 do
660 s[pos] = (n % base).to_c
661 n = n / base # /
662 pos -= 1
663 end
664 end
665
666 # return displayable int in base 10 and signed
667 redef fun to_s do return to_base(10,true)
668
669 # return displayable int in hexadecimal (unsigned (not now))
670 fun to_hex: String do return to_base(16,false)
671
672 # return displayable int in base base and signed
673 fun to_base(base: Int, signed: Bool): String
674 do
675 var l = digit_count(base)
676 var s = new Buffer.from(" " * l)
677 fill_buffer(s, base, signed)
678 return s.to_s
679 end
680 end
681
682 redef class Float
683 redef fun to_s do return to_precision(6)
684
685 # `self' representation with `nb' digits after the '.'.
686 fun to_precision(nb: Int): String
687 do
688 if nb == 0 then return to_i.to_s
689
690 var i = to_i
691 var dec = 1.0
692 while nb > 0 do
693 dec = dec * 10.0
694 nb -= 1
695 end
696 var d = ((self-i.to_f)*dec).to_i
697 return "{i}.{d}"
698 end
699 end
700
701 redef class Char
702 redef fun to_s
703 do
704 var s = new Buffer.with_capacity(1)
705 s[0] = self
706 return s.to_s
707 end
708 end
709
710 redef class Collection[E]
711 # Concatenate elements.
712 redef fun to_s
713 do
714 var s = new Buffer
715 for e in self do if e != null then s.append(e.to_s)
716 return s.to_s
717 end
718
719 # Concatenate and separate each elements with `sep'.
720 fun join(sep: String): String
721 do
722 if is_empty then return ""
723
724 var s = new Buffer # Result
725
726 # Concat first item
727 var i = iterator
728 var e = i.item
729 if e != null then s.append(e.to_s)
730
731 # Concat other items
732 i.next
733 while i.is_ok do
734 s.append(sep)
735 e = i.item
736 if e != null then s.append(e.to_s)
737 i.next
738 end
739 return s.to_s
740 end
741 end
742
743 redef class Array[E]
744 # Fast implementation
745 redef fun to_s
746 do
747 var s = new Buffer
748 var i = 0
749 var l = length
750 while i < l do
751 var e = self[i]
752 if e != null then s.append(e.to_s)
753 i += 1
754 end
755 return s.to_s
756 end
757 end
758
759 redef class Map[K,V]
760 # Concatenate couple of 'key value'.
761 # key and value are separated by 'couple_sep'.
762 # each couple is separated each couple with `sep'.
763 fun join(sep: String, couple_sep: String): String
764 do
765 if is_empty then return ""
766
767 var s = new Buffer # Result
768
769 # Concat first item
770 var i = iterator
771 var k = i.key
772 var e = i.item
773 if e != null then s.append("{k}{couple_sep}{e}")
774
775 # Concat other items
776 i.next
777 while i.is_ok do
778 s.append(sep)
779 k = i.key
780 e = i.item
781 if e != null then s.append("{k}{couple_sep}{e}")
782 i.next
783 end
784 return s.to_s
785 end
786 end
787
788 ###############################################################################
789 # Native classes #
790 ###############################################################################
791
792 # Native strings are simple C char *
793 class NativeString
794 fun [](index: Int): Char is intern
795 fun []=(index: Int, item: Char) is intern
796 fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
797
798 # Position of the first nul character.
799 fun cstring_length: Int
800 do
801 var l = 0
802 while self[l] != '\0' do l += 1
803 return l
804 end
805 fun atoi: Int is intern
806 fun atof: Float is extern "atof"
807 end
808
809 # StringCapable objects can create native strings
810 interface StringCapable
811 protected fun calloc_string(size: Int): NativeString is intern
812 end
813
814 redef class Sys
815 var _args_cache: nullable Sequence[String]
816
817 redef fun args: Sequence[String]
818 do
819 if _args_cache == null then init_args
820 return _args_cache.as(not null)
821 end
822
823 # The name of the program as given by the OS
824 fun program_name: String
825 do
826 return new String.from_cstring(native_argv(0))
827 end
828
829 # Initialize `args' with the contents of `native_argc' and `native_argv'.
830 private fun init_args
831 do
832 var argc = native_argc
833 var args = new Array[String].with_capacity(0)
834 var i = 1
835 while i < argc do
836 args[i-1] = new String.from_cstring(native_argv(i))
837 i += 1
838 end
839 _args_cache = args
840 end
841
842 private fun native_argc: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
843
844 private fun native_argv(i: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.
845 end
846