stdlib: Strings, added function to avoid passing refs with a NativeString (can cause...
[nit.git] / lib / standard / string.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
5 #
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
12 # another product.
13
14 # Basic manipulations of strings of characters
15 package string
16
17 intrude import collection # FIXME should be collection::array
18 import hash
19
20 ###############################################################################
21 # String #
22 ###############################################################################
23
24 # Common subclass for String and Buffer
25 abstract class AbstractString
26 super AbstractArrayRead[Char]
27
28 readable private var _items: NativeString
29
30 redef fun [](index) do return _items[index]
31
32 # Create a substring.
33 #
34 # "abcd".substring(1, 2) # --> "bc"
35 # "abcd".substring(-1, 2) # --> "a"
36 # "abcd".substring(1, 0) # --> ""
37 # "abcd".substring(2, 5) # --> "cd"
38 fun substring(from: Int, count: Int): String
39 do
40 assert count >= 0
41 count += from
42 if from < 0 then from = 0
43 if count > length then count = length
44 if from < count then
45 var r = new Buffer.with_capacity(count - from)
46 while from < count do
47 r.push(_items[from])
48 from += 1
49 end
50 return r.to_s
51 else
52 return ""
53 end
54 end
55
56 # Create a substring from `self' beginning at the 'from' position
57 #
58 # "abcd".substring(1) # --> "bcd"
59 # "abcd".substring(-1) # --> "abcd"
60 # "abcd".substring(2) # --> "cd"
61 fun substring_from(from: Int): String
62 do
63 assert from < length
64 return substring(from, length - from)
65 end
66
67 # Does self have a substring 'str' starting from position 'pos
68 #
69 # "abcd".has_substring("bc",1) # --> true
70 # "abcd".has_substring("bc",2) # --> false
71 fun has_substring(str: String, pos: Int): Bool
72 do
73 var itsindex = str.length - 1
74 var myindex = pos + itsindex
75 var myitems = _items
76 var itsitems = str._items
77 if myindex > length or itsindex > myindex then return false
78 var its_index_from = str._index_from
79 itsindex += its_index_from
80 while itsindex >= its_index_from do
81 if myitems[myindex] != itsitems[itsindex] then return false
82 myindex -= 1
83 itsindex -= 1
84 end
85 return true
86 end
87
88 # Is this string prefixed by 'prefix'
89 #
90 # "abc".is_prefix("abcd") # --> true
91 # "bc".is_prefix("abcd") # --> false
92 fun has_prefix(prefix: String): Bool do return has_substring(prefix,0)
93
94 # Is this string suffixed by 'suffix'
95 #
96 # "abcd".has_suffix("abc") # --> false
97 # "abcd".has_suffix("bcd") # --> true
98 fun has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
99
100 # If `self' contains only digits, return the corresponding integer
101 fun to_i: Int
102 do
103 # Shortcut
104 return to_s.to_cstring.atoi
105 end
106
107 # If `self' contains a float, return the corresponding float
108 fun to_f: Float
109 do
110 # Shortcut
111 return to_s.to_cstring.atof
112 end
113
114 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
115 fun to_hex: Int do return a_to(16)
116
117 # If `self' contains only digits and letters, return the corresponding integer in a given base
118 fun a_to(base: Int) : Int
119 do
120 var i = 0
121 var neg = false
122
123 for c in self
124 do
125 var v = c.to_i
126 if v > base then
127 if neg then
128 return -i
129 else
130 return i
131 end
132 else if v < 0 then
133 neg = true
134 else
135 i = i * base + v
136 end
137 end
138 if neg then
139 return -i
140 else
141 return i
142 end
143 end
144
145 # Returns true if the string contains only Numeric values (and one "," or one "." character)
146 fun is_numeric: Bool
147 do
148 var has_point_or_comma = false
149 for i in self
150 do
151 if not i.is_numeric
152 then
153 if (i == '.' or i == ',') and not has_point_or_comma
154 then
155 has_point_or_comma = true
156 else
157 return false
158 end
159 end
160 end
161 return true
162 end
163
164 # A upper case version of `self'
165 fun to_upper: String
166 do
167 var s = new Buffer.with_capacity(length)
168 for i in self do s.add(i.to_upper)
169 return s.to_s
170 end
171
172 # A lower case version of `self'
173 fun to_lower : String
174 do
175 var s = new Buffer.with_capacity(length)
176 for i in self do s.add(i.to_lower)
177 return s.to_s
178 end
179
180
181 redef fun output
182 do
183 var i = 0
184 while i < length do
185 _items[i].output
186 i += 1
187 end
188 end
189 end
190
191 # Immutable strings of characters.
192 class String
193 super Comparable
194 super AbstractString
195 super StringCapable
196
197 redef type OTHER: String
198
199 # Index in _items of the start of the string
200 readable var _index_from: Int
201
202 # Indes in _items of the last item of the string
203 readable var _index_to: Int
204
205 ################################################
206 # AbstractString specific methods #
207 ################################################
208
209 # Access a character at index in String
210 #
211 redef fun [](index) do
212 assert index >= 0
213 # Check that the index (+ index_from) is not larger than indexTo
214 # In other terms, if the index is valid
215 assert (index + _index_from) <= _index_to
216 return _items[index + _index_from]
217 end
218
219 # Create a substring.
220 #
221 # "abcd".substring(1, 2) # --> "bc"
222 # "abcd".substring(-1, 2) # --> "a"
223 # "abcd".substring(1, 0) # --> ""
224 # "abcd".substring(2, 5) # --> "cd"
225 #
226 # A "from" index < 0 will be replaced by 0
227 # Unless a count value is > 0 at the same time
228 # In this case, from += count and count -= from
229 #
230 redef fun substring(from: Int, count: Int): String
231 do
232 assert count >= 0
233
234 if from < 0 then
235 count += from
236 if count < 0 then count = 0
237 from = 0
238 end
239
240 var realFrom = _index_from + from
241
242 if (realFrom + count) > _index_to then return new String.from_substring(realFrom, _index_to, _items)
243
244 if count == 0 then return ""
245
246 return new String.from_substring(realFrom, realFrom + count - 1, _items)
247 end
248
249 # Create a substring from `self' beginning at the 'from' position
250 #
251 # "abcd".substring_from(1) # --> "bcd"
252 # "abcd".substring_from(-1) # --> "abcd"
253 # "abcd".substring_from(2) # --> "cd"
254 #
255 # As with substring, a "from" index < 0 will be replaced by 0
256 #
257 redef fun substring_from(from: Int): String
258 do
259 if from > _length then return ""
260 if from < 0 then from = 0
261 return substring(from, _length)
262 end
263
264 # Does self have a substring 'str' starting from position 'pos
265 #
266 # "abcd".has_substring("bc",1) # --> true
267 # "abcd".has_substring("bc",2) # --> false
268 redef fun has_substring(str: String, pos: Int): Bool
269 do
270 var itsindex = str._length - 1
271
272 var myindex = pos + itsindex
273 var myitems = _items
274
275 var itsitems = str._items
276
277 if myindex > _length or itsindex > myindex then return false
278
279 var itsindexfrom = str.index_from
280 itsindex += itsindexfrom
281 myindex += index_from
282
283 while itsindex >= itsindexfrom do
284 if myitems[myindex] != itsitems[itsindex] then return false
285 myindex -= 1
286 itsindex -= 1
287 end
288
289 return true
290 end
291
292 # A upper case version of `self'
293 redef fun to_upper: String
294 do
295 var outstr = calloc_string(self._length + 1)
296 var out_index = 0
297
298 var myitems = self._items
299 var index_from = self._index_from
300 var max = self._index_to
301
302 while index_from <= max do
303 outstr[out_index] = myitems[index_from].to_upper
304 out_index += 1
305 index_from += 1
306 end
307
308 outstr[self.length] = '\0'
309
310 return new String.with_native(outstr, self._length)
311 end
312
313 # A lower case version of `self'
314 redef fun to_lower : String
315 do
316 var outstr = calloc_string(self._length + 1)
317 var out_index = 0
318
319 var myitems = self._items
320 var index_from = self._index_from
321 var max = self._index_to
322
323 while index_from <= max do
324 outstr[out_index] = myitems[index_from].to_lower
325 out_index += 1
326 index_from += 1
327 end
328
329 outstr[self.length] = '\0'
330
331 return new String.with_native(outstr, self._length)
332 end
333
334 redef fun output
335 do
336 var i = self._index_from
337 var imax = self._index_to
338 while i <= imax do
339 _items[i].output
340 i += 1
341 end
342 end
343
344 ##################################################
345 # String Specific Methods #
346 ##################################################
347
348 # Creates a String object as a substring of another String
349 #
350 # From : index to start at
351 #
352 # To : Index to stop at (from + count -1)
353 #
354 private init from_substring(from: Int, to: Int, internalString: NativeString)
355 do
356 _items = internalString
357 _index_from = from
358 _index_to = to
359 _length = to - from + 1
360 end
361
362 # Create a new string from a given char *.
363 init with_native(nat: NativeString, size: Int)
364 do
365 assert size >= 0
366 _items = nat
367 _length = size
368 _index_from = 0
369 _index_to = _length - 1
370 end
371
372 # Create a new string from a null terminated char *.
373 init from_cstring(str: NativeString)
374 do
375 with_native(str,str.cstring_length)
376 end
377
378 # Creates a new Nit String from an existing CString
379 # Pretty much equals to from_cstring but copies instead
380 # of passing a reference
381 # Avoids manual/automatic dealloc problems when dealing with native C code
382 init copy_from_native(str: NativeString)
383 do
384 var temp_length = str.cstring_length
385 var new_str = calloc_string(temp_length + 1)
386 str.copy_to(new_str, temp_length, 0, 0)
387 new_str[temp_length] = '\0'
388 with_native(new_str, temp_length)
389 end
390
391 # Return a null terminated char *
392 fun to_cstring: NativeString
393 do
394 #return items
395 if _index_from > 0 or _index_to != items.cstring_length - 1 then
396 var newItems = calloc_string(_length + 1)
397 self.items.copy_to(newItems, _length, _index_from, 0)
398 newItems[length] = '\0'
399 return newItems
400 end
401 return _items
402 end
403
404 redef fun ==(other)
405 do
406 if not other isa String or other is null then return false
407
408 if self.object_id == other.object_id then return true
409
410 var my_length = _length
411
412 if other._length != my_length then return false
413
414 var my_index = _index_from
415 var its_index = other._index_from
416
417 var last_iteration = my_index + my_length
418
419 var itsitems = other._items
420 var myitems = self._items
421
422 while my_index < last_iteration do
423 if myitems[my_index] != itsitems[its_index] then return false
424 my_index += 1
425 its_index += 1
426 end
427
428 return true
429 end
430
431 # The comparison between two strings is done on a lexicographical basis
432 # Eg : "aa" < "b" => true
433 redef fun <(other)
434 do
435 if self.object_id == other.object_id then return false
436
437 var my_curr_char : Char
438 var its_curr_char : Char
439
440 var curr_id_self = self._index_from
441 var curr_id_other = other._index_from
442
443 var my_items = self._items
444 var its_items = other._items
445
446 var my_length = self._length
447 var its_length = other._length
448
449 var max_iterations = curr_id_self + my_length
450
451 while curr_id_self < max_iterations do
452 my_curr_char = my_items[curr_id_self]
453 its_curr_char = its_items[curr_id_other]
454
455 if my_curr_char != its_curr_char then
456 if my_curr_char < its_curr_char then return true
457 return false
458 end
459
460 curr_id_self += 1
461 curr_id_other += 1
462 end
463
464 if my_length != its_length then
465 if my_length < its_length then return true
466 return false
467 end
468
469 return false
470 end
471
472 # The concatenation of `self' with `r'
473 fun +(s: String): String
474 do
475 var my_length = self._length
476 var its_length = s._length
477
478 var target_string = calloc_string(my_length + its_length + 1)
479
480 self._items.copy_to(target_string, my_length, _index_from, 0)
481 s._items.copy_to(target_string, its_length, s._index_from, my_length)
482
483 target_string[my_length + its_length] = '\0'
484
485 return new String.with_native(target_string, my_length + its_length)
486 end
487
488 # i repetitions of self
489 fun *(i: Int): String
490 do
491 assert i >= 0
492
493 var my_length = self._length
494
495 var final_length = my_length * i
496
497 var my_items = self._items
498
499 var target_string = calloc_string((final_length) + 1)
500
501 target_string[final_length] = '\0'
502
503 var current_last = 0
504
505 for iteration in [1 .. i] do
506 my_items.copy_to(target_string, my_length, 0, current_last)
507 current_last += my_length
508 end
509
510 return new String.with_native(target_string, final_length)
511 end
512
513 redef fun to_s do return self
514
515 redef fun hash
516 do
517 # djb2 hash algorythm
518 var h = 5381
519 var i = _length - 1
520
521 var myitems = _items
522 var strStart = _index_from
523
524 i += strStart
525
526 while i >= strStart do
527 h = (h * 32) + h + self._items[i].ascii
528 i -= 1
529 end
530
531 return h
532 end
533 end
534
535 # Mutable strings of characters.
536 class Buffer
537 super AbstractString
538 super Comparable
539 super StringCapable
540 super AbstractArray[Char]
541
542 redef type OTHER: String
543
544 redef fun []=(index, item)
545 do
546 if index == length then
547 add(item)
548 return
549 end
550 assert index >= 0 and index < length
551 _items[index] = item
552 end
553
554 redef fun add(c)
555 do
556 if _capacity <= length then enlarge(length + 5)
557 _items[length] = c
558 _length += 1
559 end
560
561 redef fun enlarge(cap)
562 do
563 var c = _capacity
564 if cap <= c then return
565 while c <= cap do c = c * 2 + 2
566 var a = calloc_string(c+1)
567 _items.copy_to(a, length, 0, 0)
568 _items = a
569 _capacity = c
570 end
571
572 redef fun append(s)
573 do
574 if s isa String then
575 var sl = s.length
576 if _capacity < _length + sl then enlarge(_length + sl)
577 s.items.copy_to(_items, sl, s._index_from, _length)
578 _length += sl
579 else
580 super
581 end
582 end
583
584 redef fun to_s: String
585 do
586 var l = length
587 var a = calloc_string(l+1)
588 _items.copy_to(a, l, 0, 0)
589
590 # Ensure the afterlast byte is '\0' to nul-terminated char *
591 a[length] = '\0'
592
593 return new String.with_native(a, length)
594 end
595
596 redef fun <(s)
597 do
598 var i = 0
599 var l1 = length
600 var l2 = s.length
601 while i < l1 and i < l2 do
602 var c1 = self[i].ascii
603 var c2 = s[i].ascii
604 if c1 < c2 then
605 return true
606 else if c2 < c1 then
607 return false
608 end
609 i += 1
610 end
611 if l1 < l2 then
612 return true
613 else
614 return false
615 end
616 end
617
618 # Create a new empty string.
619 init
620 do
621 with_capacity(5)
622 end
623
624 init from(s: String)
625 do
626 _capacity = s.length + 1
627 _length = s.length
628 _items = calloc_string(_capacity)
629 s.items.copy_to(_items, _length, s._index_from, 0)
630 end
631
632 # Create a new empty string with a given capacity.
633 init with_capacity(cap: Int)
634 do
635 assert cap >= 0
636 # _items = new NativeString.calloc(cap)
637 _items = calloc_string(cap+1)
638 _capacity = cap
639 _length = 0
640 end
641
642 redef fun ==(o)
643 do
644 if not o isa Buffer or o is null then return false
645 var l = length
646 if o.length != l then return false
647 var i = 0
648 var it = _items
649 var oit = o._items
650 while i < l do
651 if it[i] != oit[i] then return false
652 i += 1
653 end
654 return true
655 end
656
657 readable private var _capacity: Int
658 end
659
660 ###############################################################################
661 # Refinement #
662 ###############################################################################
663
664 redef class Object
665 # User readable representation of `self'.
666 fun to_s: String do return inspect
667
668 # The class name of the object in NativeString format.
669 private fun native_class_name: NativeString is intern
670
671 # The class name of the object.
672 # FIXME: real type information is not available at runtime.
673 # Therefore, for instance, an instance of List[Bool] has just
674 # "List" for class_name
675 fun class_name: String do return new String.from_cstring(native_class_name)
676
677 # Developer readable representation of `self'.
678 # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
679 fun inspect: String
680 do
681 return "<{inspect_head}>"
682 end
683
684 # Return "CLASSNAME:#OBJECTID".
685 # This function is mainly used with the redefinition of the inspect method
686 protected fun inspect_head: String
687 do
688 return "{class_name}:#{object_id.to_hex}"
689 end
690
691 protected fun args: Sequence[String]
692 do
693 return sys.args
694 end
695 end
696
697 redef class Bool
698 redef fun to_s
699 do
700 if self then
701 return once "true"
702 else
703 return once "false"
704 end
705 end
706 end
707
708 redef class Int
709 fun fill_buffer(s: Buffer, base: Int, signed: Bool)
710 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
711 # assume < to_c max const of char
712 do
713 var n: Int
714 # Sign
715 if self < 0 then
716 n = - self
717 s[0] = '-'
718 else if self == 0 then
719 s[0] = '0'
720 return
721 else
722 n = self
723 end
724 # Fill digits
725 var pos = digit_count(base) - 1
726 while pos >= 0 and n > 0 do
727 s[pos] = (n % base).to_c
728 n = n / base # /
729 pos -= 1
730 end
731 end
732
733 # return displayable int in base 10 and signed
734 redef fun to_s do return to_base(10,true)
735
736 # return displayable int in hexadecimal (unsigned (not now))
737 fun to_hex: String do return to_base(16,false)
738
739 # return displayable int in base base and signed
740 fun to_base(base: Int, signed: Bool): String
741 do
742 var l = digit_count(base)
743 var s = new Buffer.from(" " * l)
744 fill_buffer(s, base, signed)
745 return s.to_s
746 end
747 end
748
749 redef class Float
750 # Pretty print self, print needed decimals up to a max of 6.
751 redef fun to_s do
752 var str = to_precision( 6 )
753 var len = str.length
754 for i in [0..len-1] do
755 var j = len-1-i
756 var c = str[j]
757 if c == '0' then
758 continue
759 else if c == '.' then
760 return str.substring( 0, j+2 )
761 else
762 return str.substring( 0, j+1 )
763 end
764 end
765 return str
766 end
767
768 # `self' representation with `nb' digits after the '.'.
769 fun to_precision(nb: Int): String import String::from_cstring `{
770 int size;
771 char *str;
772
773 size = snprintf(NULL, 0, "%.*f", (int)nb, recv);
774 str = malloc(size + 1);
775 sprintf(str, "%.*f", (int)nb, recv );
776
777 return new_String_from_cstring( str );
778 `}
779 end
780
781 redef class Char
782 redef fun to_s
783 do
784 var s = new Buffer.with_capacity(1)
785 s[0] = self
786 return s.to_s
787 end
788
789 # Returns true if the char is a numerical digit
790 fun is_numeric: Bool
791 do
792 if self >= '0' and self <= '9'
793 then
794 return true
795 end
796 return false
797 end
798
799 # Returns true if the char is an alpha digit
800 fun is_alpha: Bool
801 do
802 if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true
803 return false
804 end
805
806 # Returns true if the char is an alpha or a numeric digit
807 fun is_alphanumeric: Bool
808 do
809 if self.is_numeric or self.is_alpha then return true
810 return false
811 end
812 end
813
814 redef class Collection[E]
815 # Concatenate elements.
816 redef fun to_s
817 do
818 var s = new Buffer
819 for e in self do if e != null then s.append(e.to_s)
820 return s.to_s
821 end
822
823 # Concatenate and separate each elements with `sep'.
824 fun join(sep: String): String
825 do
826 if is_empty then return ""
827
828 var s = new Buffer # Result
829
830 # Concat first item
831 var i = iterator
832 var e = i.item
833 if e != null then s.append(e.to_s)
834
835 # Concat other items
836 i.next
837 while i.is_ok do
838 s.append(sep)
839 e = i.item
840 if e != null then s.append(e.to_s)
841 i.next
842 end
843 return s.to_s
844 end
845 end
846
847 redef class Array[E]
848 # Fast implementation
849 redef fun to_s
850 do
851 var s = new Buffer
852 var i = 0
853 var l = length
854 while i < l do
855 var e = self[i]
856 if e != null then s.append(e.to_s)
857 i += 1
858 end
859 return s.to_s
860 end
861 end
862
863 redef class Map[K,V]
864 # Concatenate couple of 'key value'.
865 # key and value are separated by 'couple_sep'.
866 # each couple is separated each couple with `sep'.
867 fun join(sep: String, couple_sep: String): String
868 do
869 if is_empty then return ""
870
871 var s = new Buffer # Result
872
873 # Concat first item
874 var i = iterator
875 var k = i.key
876 var e = i.item
877 if e != null then s.append("{k}{couple_sep}{e}")
878
879 # Concat other items
880 i.next
881 while i.is_ok do
882 s.append(sep)
883 k = i.key
884 e = i.item
885 if e != null then s.append("{k}{couple_sep}{e}")
886 i.next
887 end
888 return s.to_s
889 end
890 end
891
892 ###############################################################################
893 # Native classes #
894 ###############################################################################
895
896 # Native strings are simple C char *
897 class NativeString
898 fun [](index: Int): Char is intern
899 fun []=(index: Int, item: Char) is intern
900 fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
901
902 # Position of the first nul character.
903 fun cstring_length: Int
904 do
905 var l = 0
906 while self[l] != '\0' do l += 1
907 return l
908 end
909 fun atoi: Int is intern
910 fun atof: Float is extern "atof"
911 end
912
913 # StringCapable objects can create native strings
914 interface StringCapable
915 protected fun calloc_string(size: Int): NativeString is intern
916 end
917
918 redef class Sys
919 var _args_cache: nullable Sequence[String]
920
921 redef fun args: Sequence[String]
922 do
923 if _args_cache == null then init_args
924 return _args_cache.as(not null)
925 end
926
927 # The name of the program as given by the OS
928 fun program_name: String
929 do
930 return new String.from_cstring(native_argv(0))
931 end
932
933 # Initialize `args' with the contents of `native_argc' and `native_argv'.
934 private fun init_args
935 do
936 var argc = native_argc
937 var args = new Array[String].with_capacity(0)
938 var i = 1
939 while i < argc do
940 args[i-1] = new String.from_cstring(native_argv(i))
941 i += 1
942 end
943 _args_cache = args
944 end
945
946 private fun native_argc: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
947
948 private fun native_argv(i: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.
949 end
950