stdlib : Cleaned the code for the Strings, renamed attributes to be compliant with...
[nit.git] / lib / standard / string.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
5 #
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
12 # another product.
13
14 # Basic manipulations of strings of characters
15 package string
16
17 intrude import collection # FIXME should be collection::array
18 import hash
19
20 ###############################################################################
21 # String #
22 ###############################################################################
23
24 # Common subclass for String and Buffer
25 abstract class AbstractString
26 super AbstractArrayRead[Char]
27
28 readable private var _items: NativeString
29
30 redef fun [](index) do return _items[index]
31
32 # Create a substring.
33 #
34 # "abcd".substring(1, 2) # --> "bc"
35 # "abcd".substring(-1, 2) # --> "a"
36 # "abcd".substring(1, 0) # --> ""
37 # "abcd".substring(2, 5) # --> "cd"
38 fun substring(from: Int, count: Int): String
39 do
40 assert count >= 0
41 count += from
42 if from < 0 then from = 0
43 if count > length then count = length
44 if from < count then
45 var r = new Buffer.with_capacity(count - from)
46 while from < count do
47 r.push(_items[from])
48 from += 1
49 end
50 return r.to_s
51 else
52 return ""
53 end
54 end
55
56 # Create a substring from `self' beginning at the 'from' position
57 #
58 # "abcd".substring(1) # --> "bcd"
59 # "abcd".substring(-1) # --> "abcd"
60 # "abcd".substring(2) # --> "cd"
61 fun substring_from(from: Int): String
62 do
63 assert from < length
64 return substring(from, length - from)
65 end
66
67 # Does self have a substring 'str' starting from position 'pos
68 #
69 # "abcd".has_substring("bc",1) # --> true
70 # "abcd".has_substring("bc",2) # --> false
71 fun has_substring(str: String, pos: Int): Bool
72 do
73 var itsindex = str.length - 1
74 var myindex = pos + itsindex
75 var myitems = _items
76 var itsitems = str._items
77 if myindex > length or itsindex > myindex then return false
78 var its_index_from = str._index_from
79 itsindex += its_index_from
80 while itsindex >= its_index_from do
81 if myitems[myindex] != itsitems[itsindex] then return false
82 myindex -= 1
83 itsindex -= 1
84 end
85 return true
86 end
87
88 # Is this string prefixed by 'prefix'
89 #
90 # "abc".is_prefix("abcd") # --> true
91 # "bc".is_prefix("abcd") # --> false
92 fun has_prefix(prefix: String): Bool do return has_substring(prefix,0)
93
94 # Is this string suffixed by 'suffix'
95 #
96 # "abcd".has_suffix("abc") # --> false
97 # "abcd".has_suffix("bcd") # --> true
98 fun has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
99
100 # If `self' contains only digits, return the corresponding integer
101 fun to_i: Int
102 do
103 # Shortcut
104 return to_s.to_cstring.atoi
105 end
106
107 # If `self' contains a float, return the corresponding float
108 fun to_f: Float
109 do
110 # Shortcut
111 return to_s.to_cstring.atof
112 end
113
114 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
115 fun to_hex: Int do return a_to(16)
116
117 # If `self' contains only digits and letters, return the corresponding integer in a given base
118 fun a_to(base: Int) : Int
119 do
120 var i = 0
121 var neg = false
122
123 for c in self
124 do
125 var v = c.to_i
126 if v > base then
127 if neg then
128 return -i
129 else
130 return i
131 end
132 else if v < 0 then
133 neg = true
134 else
135 i = i * base + v
136 end
137 end
138 if neg then
139 return -i
140 else
141 return i
142 end
143 end
144
145 # Returns true if the string contains only Numeric values (and one "," or one "." character)
146 fun is_numeric: Bool
147 do
148 var has_point_or_comma = false
149 for i in self
150 do
151 if not i.is_numeric
152 then
153 if (i == '.' or i == ',') and not has_point_or_comma
154 then
155 has_point_or_comma = true
156 else
157 return false
158 end
159 end
160 end
161 return true
162 end
163
164 # A upper case version of `self'
165 fun to_upper: String
166 do
167 var s = new Buffer.with_capacity(length)
168 for i in self do s.add(i.to_upper)
169 return s.to_s
170 end
171
172 # A lower case version of `self'
173 fun to_lower : String
174 do
175 var s = new Buffer.with_capacity(length)
176 for i in self do s.add(i.to_lower)
177 return s.to_s
178 end
179
180
181 redef fun output
182 do
183 var i = 0
184 while i < length do
185 _items[i].output
186 i += 1
187 end
188 end
189 end
190
191 # Immutable strings of characters.
192 class String
193 super Comparable
194 super AbstractString
195 super StringCapable
196
197 redef type OTHER: String
198
199 # Index in _items of the start of the string
200 readable var _index_from: Int
201
202 # Indes in _items of the last item of the string
203 readable var _index_to: Int
204
205 ################################################
206 # AbstractString specific methods #
207 ################################################
208
209 # Access a character at index in String
210 #
211 redef fun [](index) do
212 assert index >= 0
213 # Check that the index (+ index_from) is not larger than indexTo
214 # In other terms, if the index is valid
215 assert (index + _index_from) <= _index_to
216 return _items[index + _index_from]
217 end
218
219 # Create a substring.
220 #
221 # "abcd".substring(1, 2) # --> "bc"
222 # "abcd".substring(-1, 2) # --> "a"
223 # "abcd".substring(1, 0) # --> ""
224 # "abcd".substring(2, 5) # --> "cd"
225 #
226 # A "from" index < 0 will be replaced by 0
227 # Unless a count value is > 0 at the same time
228 # In this case, from += count and count -= from
229 #
230 redef fun substring(from: Int, count: Int): String
231 do
232 assert count >= 0
233
234 if from < 0 then
235 count += from
236 if count < 0 then count = 0
237 from = 0
238 end
239
240 var realFrom = _index_from + from
241
242 if (realFrom + count) > _index_to then return new String.from_substring(realFrom, _index_to, _items)
243
244 if count == 0 then return ""
245
246 return new String.from_substring(realFrom, realFrom + count - 1, _items)
247 end
248
249 # Create a substring from `self' beginning at the 'from' position
250 #
251 # "abcd".substring_from(1) # --> "bcd"
252 # "abcd".substring_from(-1) # --> "abcd"
253 # "abcd".substring_from(2) # --> "cd"
254 #
255 # As with substring, a "from" index < 0 will be replaced by 0
256 #
257 redef fun substring_from(from: Int): String
258 do
259 if from > _length then return ""
260 if from < 0 then from = 0
261 return substring(from, _length)
262 end
263
264 # Does self have a substring 'str' starting from position 'pos
265 #
266 # "abcd".has_substring("bc",1) # --> true
267 # "abcd".has_substring("bc",2) # --> false
268 redef fun has_substring(str: String, pos: Int): Bool
269 do
270 var itsindex = str._length - 1
271
272 var myindex = pos + itsindex
273 var myitems = _items
274
275 var itsitems = str._items
276
277 if myindex > _length or itsindex > myindex then return false
278
279 var itsindexfrom = str.index_from
280 itsindex += itsindexfrom
281 myindex += index_from
282
283 while itsindex >= itsindexfrom do
284 if myitems[myindex] != itsitems[itsindex] then return false
285 myindex -= 1
286 itsindex -= 1
287 end
288
289 return true
290 end
291
292 # A upper case version of `self'
293 redef fun to_upper: String
294 do
295 var outstr = calloc_string(self._length + 1)
296 var out_index = 0
297
298 var myitems = self._items
299 var index_from = self._index_from
300 var max = self._index_to
301
302 while index_from <= max do
303 outstr[out_index] = myitems[index_from].to_upper
304 out_index += 1
305 index_from += 1
306 end
307
308 outstr[self.length] = '\0'
309
310 return new String.with_native(outstr, self._length)
311 end
312
313 # A lower case version of `self'
314 redef fun to_lower : String
315 do
316 var outstr = calloc_string(self._length + 1)
317 var out_index = 0
318
319 var myitems = self._items
320 var index_from = self._index_from
321 var max = self._index_to
322
323 while index_from <= max do
324 outstr[out_index] = myitems[index_from].to_lower
325 out_index += 1
326 index_from += 1
327 end
328
329 outstr[self.length] = '\0'
330
331 return new String.with_native(outstr, self._length)
332 end
333
334 redef fun output
335 do
336 var i = self._index_from
337 var imax = self._index_to
338 while i <= imax do
339 _items[i].output
340 i += 1
341 end
342 end
343
344 ##################################################
345 # String Specific Methods #
346 ##################################################
347
348 # Creates a String object as a substring of another String
349 #
350 # From : index to start at
351 #
352 # To : Index to stop at (from + count -1)
353 #
354 private init from_substring(from: Int, to: Int, internalString: NativeString)
355 do
356 _items = internalString
357 _index_from = from
358 _index_to = to
359 _length = to - from + 1
360 end
361
362 # Create a new string from a given char *.
363 init with_native(nat: NativeString, size: Int)
364 do
365 assert size >= 0
366 _items = nat
367 _length = size
368 _index_from = 0
369 _index_to = _length - 1
370 end
371
372 # Create a new string from a null terminated char *.
373 init from_cstring(str: NativeString)
374 do
375 with_native(str,str.cstring_length)
376 end
377
378 # Return a null terminated char *
379 fun to_cstring: NativeString
380 do
381 #return items
382 if _index_from > 0 or _index_to != items.cstring_length - 1 then
383 var newItems = calloc_string(_length + 1)
384 self.items.copy_to(newItems, _length, _index_from, 0)
385 newItems[length] = '\0'
386 return newItems
387 end
388 return _items
389 end
390
391 redef fun ==(other)
392 do
393 if not other isa String or other is null then return false
394
395 if self.object_id == other.object_id then return true
396
397 var my_length = _length
398
399 if other._length != my_length then return false
400
401 var my_index = _index_from
402 var its_index = other._index_from
403
404 var last_iteration = my_index + my_length
405
406 var itsitems = other._items
407 var myitems = self._items
408
409 while my_index < last_iteration do
410 if myitems[my_index] != itsitems[its_index] then return false
411 my_index += 1
412 its_index += 1
413 end
414
415 return true
416 end
417
418 # The comparison between two strings is done on a lexicographical basis
419 # Eg : "aa" < "b" => true
420 redef fun <(other)
421 do
422 if self.object_id == other.object_id then return false
423
424 var my_curr_char : Char
425 var its_curr_char : Char
426
427 var curr_id_self = self._index_from
428 var curr_id_other = other._index_from
429
430 var my_items = self._items
431 var its_items = other._items
432
433 var my_length = self._length
434 var its_length = other._length
435
436 var max_iterations = curr_id_self + my_length
437
438 while curr_id_self < max_iterations do
439 my_curr_char = my_items[curr_id_self]
440 its_curr_char = its_items[curr_id_other]
441
442 if my_curr_char != its_curr_char then
443 if my_curr_char < its_curr_char then return true
444 return false
445 end
446
447 curr_id_self += 1
448 curr_id_other += 1
449 end
450
451 if my_length != its_length then
452 if my_length < its_length then return true
453 return false
454 end
455
456 return false
457 end
458
459 # The concatenation of `self' with `r'
460 fun +(s: String): String
461 do
462 var my_length = self._length
463 var its_length = s._length
464
465 var target_string = calloc_string(my_length + its_length + 1)
466
467 self._items.copy_to(target_string, my_length, _index_from, 0)
468 s._items.copy_to(target_string, its_length, s._index_from, my_length)
469
470 target_string[my_length + its_length] = '\0'
471
472 return new String.with_native(target_string, my_length + its_length)
473 end
474
475 # i repetitions of self
476 fun *(i: Int): String
477 do
478 assert i >= 0
479
480 var my_length = self._length
481
482 var final_length = my_length * i
483
484 var my_items = self._items
485
486 var target_string = calloc_string((final_length) + 1)
487
488 target_string[final_length] = '\0'
489
490 var current_last = 0
491
492 for iteration in [1 .. i] do
493 my_items.copy_to(target_string, my_length, 0, current_last)
494 current_last += my_length
495 end
496
497 return new String.with_native(target_string, final_length)
498 end
499
500 redef fun to_s do return self
501
502 redef fun hash
503 do
504 # djb2 hash algorythm
505 var h = 5381
506 var i = _length - 1
507
508 var myitems = _items
509 var strStart = _index_from
510
511 i += strStart
512
513 while i >= strStart do
514 h = (h * 32) + h + self._items[i].ascii
515 i -= 1
516 end
517
518 return h
519 end
520 end
521
522 # Mutable strings of characters.
523 class Buffer
524 super AbstractString
525 super Comparable
526 super StringCapable
527 super AbstractArray[Char]
528
529 redef type OTHER: String
530
531 redef fun []=(index, item)
532 do
533 if index == length then
534 add(item)
535 return
536 end
537 assert index >= 0 and index < length
538 _items[index] = item
539 end
540
541 redef fun add(c)
542 do
543 if _capacity <= length then enlarge(length + 5)
544 _items[length] = c
545 _length += 1
546 end
547
548 redef fun enlarge(cap)
549 do
550 var c = _capacity
551 if cap <= c then return
552 while c <= cap do c = c * 2 + 2
553 var a = calloc_string(c+1)
554 _items.copy_to(a, length, 0, 0)
555 _items = a
556 _capacity = c
557 end
558
559 redef fun append(s)
560 do
561 if s isa String then
562 var sl = s.length
563 if _capacity < _length + sl then enlarge(_length + sl)
564 s.items.copy_to(_items, sl, s._index_from, _length)
565 _length += sl
566 else
567 super
568 end
569 end
570
571 redef fun to_s: String
572 do
573 var l = length
574 var a = calloc_string(l+1)
575 _items.copy_to(a, l, 0, 0)
576
577 # Ensure the afterlast byte is '\0' to nul-terminated char *
578 a[length] = '\0'
579
580 return new String.with_native(a, length)
581 end
582
583 redef fun <(s)
584 do
585 var i = 0
586 var l1 = length
587 var l2 = s.length
588 while i < l1 and i < l2 do
589 var c1 = self[i].ascii
590 var c2 = s[i].ascii
591 if c1 < c2 then
592 return true
593 else if c2 < c1 then
594 return false
595 end
596 i += 1
597 end
598 if l1 < l2 then
599 return true
600 else
601 return false
602 end
603 end
604
605 # Create a new empty string.
606 init
607 do
608 with_capacity(5)
609 end
610
611 init from(s: String)
612 do
613 _capacity = s.length + 1
614 _length = s.length
615 _items = calloc_string(_capacity)
616 s.items.copy_to(_items, _length, s._index_from, 0)
617 end
618
619 # Create a new empty string with a given capacity.
620 init with_capacity(cap: Int)
621 do
622 assert cap >= 0
623 # _items = new NativeString.calloc(cap)
624 _items = calloc_string(cap+1)
625 _capacity = cap
626 _length = 0
627 end
628
629 redef fun ==(o)
630 do
631 if not o isa Buffer or o is null then return false
632 var l = length
633 if o.length != l then return false
634 var i = 0
635 var it = _items
636 var oit = o._items
637 while i < l do
638 if it[i] != oit[i] then return false
639 i += 1
640 end
641 return true
642 end
643
644 readable private var _capacity: Int
645 end
646
647 ###############################################################################
648 # Refinement #
649 ###############################################################################
650
651 redef class Object
652 # User readable representation of `self'.
653 fun to_s: String do return inspect
654
655 # The class name of the object in NativeString format.
656 private fun native_class_name: NativeString is intern
657
658 # The class name of the object.
659 # FIXME: real type information is not available at runtime.
660 # Therefore, for instance, an instance of List[Bool] has just
661 # "List" for class_name
662 fun class_name: String do return new String.from_cstring(native_class_name)
663
664 # Developer readable representation of `self'.
665 # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
666 fun inspect: String
667 do
668 return "<{inspect_head}>"
669 end
670
671 # Return "CLASSNAME:#OBJECTID".
672 # This function is mainly used with the redefinition of the inspect method
673 protected fun inspect_head: String
674 do
675 return "{class_name}:#{object_id.to_hex}"
676 end
677
678 protected fun args: Sequence[String]
679 do
680 return sys.args
681 end
682 end
683
684 redef class Bool
685 redef fun to_s
686 do
687 if self then
688 return once "true"
689 else
690 return once "false"
691 end
692 end
693 end
694
695 redef class Int
696 fun fill_buffer(s: Buffer, base: Int, signed: Bool)
697 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
698 # assume < to_c max const of char
699 do
700 var n: Int
701 # Sign
702 if self < 0 then
703 n = - self
704 s[0] = '-'
705 else if self == 0 then
706 s[0] = '0'
707 return
708 else
709 n = self
710 end
711 # Fill digits
712 var pos = digit_count(base) - 1
713 while pos >= 0 and n > 0 do
714 s[pos] = (n % base).to_c
715 n = n / base # /
716 pos -= 1
717 end
718 end
719
720 # return displayable int in base 10 and signed
721 redef fun to_s do return to_base(10,true)
722
723 # return displayable int in hexadecimal (unsigned (not now))
724 fun to_hex: String do return to_base(16,false)
725
726 # return displayable int in base base and signed
727 fun to_base(base: Int, signed: Bool): String
728 do
729 var l = digit_count(base)
730 var s = new Buffer.from(" " * l)
731 fill_buffer(s, base, signed)
732 return s.to_s
733 end
734 end
735
736 redef class Float
737 # Pretty print self, print needed decimals up to a max of 6.
738 redef fun to_s do
739 var str = to_precision( 6 )
740 var len = str.length
741 for i in [0..len-1] do
742 var j = len-1-i
743 var c = str[j]
744 if c == '0' then
745 continue
746 else if c == '.' then
747 return str.substring( 0, j+2 )
748 else
749 return str.substring( 0, j+1 )
750 end
751 end
752 return str
753 end
754
755 # `self' representation with `nb' digits after the '.'.
756 fun to_precision(nb: Int): String import String::from_cstring `{
757 int size;
758 char *str;
759
760 size = snprintf(NULL, 0, "%.*f", (int)nb, recv);
761 str = malloc(size + 1);
762 sprintf(str, "%.*f", (int)nb, recv );
763
764 return new_String_from_cstring( str );
765 `}
766 end
767
768 redef class Char
769 redef fun to_s
770 do
771 var s = new Buffer.with_capacity(1)
772 s[0] = self
773 return s.to_s
774 end
775
776 # Returns true if the char is a numerical digit
777 fun is_numeric: Bool
778 do
779 if self >= '0' and self <= '9'
780 then
781 return true
782 end
783 return false
784 end
785
786 # Returns true if the char is an alpha digit
787 fun is_alpha: Bool
788 do
789 if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true
790 return false
791 end
792
793 # Returns true if the char is an alpha or a numeric digit
794 fun is_alphanumeric: Bool
795 do
796 if self.is_numeric or self.is_alpha then return true
797 return false
798 end
799 end
800
801 redef class Collection[E]
802 # Concatenate elements.
803 redef fun to_s
804 do
805 var s = new Buffer
806 for e in self do if e != null then s.append(e.to_s)
807 return s.to_s
808 end
809
810 # Concatenate and separate each elements with `sep'.
811 fun join(sep: String): String
812 do
813 if is_empty then return ""
814
815 var s = new Buffer # Result
816
817 # Concat first item
818 var i = iterator
819 var e = i.item
820 if e != null then s.append(e.to_s)
821
822 # Concat other items
823 i.next
824 while i.is_ok do
825 s.append(sep)
826 e = i.item
827 if e != null then s.append(e.to_s)
828 i.next
829 end
830 return s.to_s
831 end
832 end
833
834 redef class Array[E]
835 # Fast implementation
836 redef fun to_s
837 do
838 var s = new Buffer
839 var i = 0
840 var l = length
841 while i < l do
842 var e = self[i]
843 if e != null then s.append(e.to_s)
844 i += 1
845 end
846 return s.to_s
847 end
848 end
849
850 redef class Map[K,V]
851 # Concatenate couple of 'key value'.
852 # key and value are separated by 'couple_sep'.
853 # each couple is separated each couple with `sep'.
854 fun join(sep: String, couple_sep: String): String
855 do
856 if is_empty then return ""
857
858 var s = new Buffer # Result
859
860 # Concat first item
861 var i = iterator
862 var k = i.key
863 var e = i.item
864 if e != null then s.append("{k}{couple_sep}{e}")
865
866 # Concat other items
867 i.next
868 while i.is_ok do
869 s.append(sep)
870 k = i.key
871 e = i.item
872 if e != null then s.append("{k}{couple_sep}{e}")
873 i.next
874 end
875 return s.to_s
876 end
877 end
878
879 ###############################################################################
880 # Native classes #
881 ###############################################################################
882
883 # Native strings are simple C char *
884 class NativeString
885 fun [](index: Int): Char is intern
886 fun []=(index: Int, item: Char) is intern
887 fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
888
889 # Position of the first nul character.
890 fun cstring_length: Int
891 do
892 var l = 0
893 while self[l] != '\0' do l += 1
894 return l
895 end
896 fun atoi: Int is intern
897 fun atof: Float is extern "atof"
898 end
899
900 # StringCapable objects can create native strings
901 interface StringCapable
902 protected fun calloc_string(size: Int): NativeString is intern
903 end
904
905 redef class Sys
906 var _args_cache: nullable Sequence[String]
907
908 redef fun args: Sequence[String]
909 do
910 if _args_cache == null then init_args
911 return _args_cache.as(not null)
912 end
913
914 # The name of the program as given by the OS
915 fun program_name: String
916 do
917 return new String.from_cstring(native_argv(0))
918 end
919
920 # Initialize `args' with the contents of `native_argc' and `native_argv'.
921 private fun init_args
922 do
923 var argc = native_argc
924 var args = new Array[String].with_capacity(0)
925 var i = 1
926 while i < argc do
927 args[i-1] = new String.from_cstring(native_argv(i))
928 i += 1
929 end
930 _args_cache = args
931 end
932
933 private fun native_argc: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
934
935 private fun native_argv(i: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.
936 end
937