cfe5673dfa48e1c6cdd29d161519972632f5d2d0
1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
14 # Basic manipulations of strings of characters
17 intrude import collection
# FIXME should be collection::array
24 ###############################################################################
26 ###############################################################################
28 # Common subclass for String and Buffer
29 abstract class AbstractString
30 super AbstractArrayRead[Char]
32 readable private var _items
: NativeString
34 # Access a character at `index` in the string.
36 # assert "abcd"[2] == 'c'
37 redef fun [](index
) do return _items
[index
]
41 # assert "abcd".substring(1, 2) == "bc"
42 # assert "abcd".substring(-1, 2) == "a"
43 # assert "abcd".substring(1, 0) == ""
44 # assert "abcd".substring(2, 5) == "cd"
46 # A `from` index < 0 will be replaced by 0.
47 # Unless a `count` value is > 0 at the same time.
48 # In this case, `from += count` and `count -= from`.
49 fun substring
(from
: Int, count
: Int): String
53 if from
< 0 then from
= 0
54 if count
> length
then count
= length
56 var r
= new Buffer.with_capacity
(count
- from
)
67 # Create a substring from `self` beginning at the `from` position
69 # assert "abcd".substring_from(1) == "bcd"
70 # assert "abcd".substring_from(-1) == "abcd"
71 # assert "abcd".substring_from(2) == "cd"
73 # As with substring, a `from` index < 0 will be replaced by 0
74 fun substring_from
(from
: Int): String
77 return substring
(from
, length
- from
)
80 # Does self have a substring `str` starting from position `pos`?
82 # assert "abcd".has_substring("bc",1) == true
83 # assert "abcd".has_substring("bc",2) == false
84 fun has_substring
(str
: String, pos
: Int): Bool
86 var itsindex
= str
.length
- 1
87 var myindex
= pos
+ itsindex
89 var itsitems
= str
._items
90 if myindex
> length
or itsindex
> myindex
then return false
91 var its_index_from
= str
._index_from
92 itsindex
+= its_index_from
93 while itsindex
>= its_index_from
do
94 if myitems
[myindex
] != itsitems
[itsindex
] then return false
101 # Is this string prefixed by `prefix`?
103 # assert "abcd".has_prefix("ab") == true
104 # assert "abcbc".has_prefix("bc") == false
105 fun has_prefix
(prefix
: String): Bool do return has_substring
(prefix
,0)
107 # Is this string suffixed by `suffix`?
109 # assert "abcd".has_suffix("abc") == false
110 # assert "abcd".has_suffix("bcd") == true
111 fun has_suffix
(suffix
: String): Bool do return has_substring
(suffix
, length
- suffix
.length
)
113 # If `self` contains only digits, return the corresponding integer
115 # assert "123".to_i == 123
116 # assert "-1".to_i == -1
120 return to_s
.to_cstring
.atoi
123 # If `self` contains a float, return the corresponding float
125 # assert "123".to_f == 123.0
126 # assert "-1".to_f == -1.0
127 # assert "-1.2e-3".to_f == -0.0012
131 return to_s
.to_cstring
.atof
134 # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
135 fun to_hex
: Int do return a_to
(16)
137 # If `self` contains only digits and letters, return the corresponding integer in a given base
139 # assert "120".a_to(3) == 15
140 fun a_to
(base
: Int) : Int
167 # Returns `true` if the string contains only Numeric values (and one "," or one "." character)
169 # assert "123".is_numeric == true
170 # assert "1.2".is_numeric == true
171 # assert "1,2".is_numeric == true
172 # assert "1..2".is_numeric == false
175 var has_point_or_comma
= false
180 if (i
== '.' or i
== ',') and not has_point_or_comma
182 has_point_or_comma
= true
191 # A upper case version of `self`
193 # assert "Hello World!".to_upper == "HELLO WORLD!"
196 var s
= new Buffer.with_capacity
(length
)
197 for i
in self do s
.add
(i
.to_upper
)
201 # A lower case version of `self`
203 # assert "Hello World!".to_lower == "hello world!"
204 fun to_lower
: String
206 var s
= new Buffer.with_capacity
(length
)
207 for i
in self do s
.add
(i
.to_lower
)
211 # Trims trailing and preceding white spaces
212 # A whitespace is defined as any character which ascii value is less than or equal to 32
214 # assert " Hello World ! ".trim == "Hello World !"
215 # assert "\na\nb\tc\t".trim == "a\nb\tc"
218 if self._length
== 0 then return self.to_s
219 # find position of the first non white space char (ascii < 32) from the start of the string
221 while self[start_pos
].ascii
<= 32 do
223 if start_pos
== _length
then return ""
225 # find position of the first non white space char from the end of the string
226 var end_pos
= length
- 1
227 while self[end_pos
].ascii
<= 32 do
229 if end_pos
== start_pos
then return self[start_pos
].to_s
231 return self.substring
(start_pos
, end_pos
- start_pos
+ 1)
243 # Mangle a string to be a unique string only made of alphanumeric characters
244 fun to_cmangle
: String
247 var underscore
= false
249 if (c
>= 'a' and c
<= 'z') or (c
>='A' and c
<= 'Z') then
255 res
.append
('_'.ascii
.to_s
)
258 if c
>= '0' and c
<= '9' then
261 else if c
== '_' then
266 res
.append
(c
.ascii
.to_s
)
274 # Escape " \ ' and non printable characters using the rules of literal C strings and characters
276 # assert "abAB12<>&".escape_to_c == "abAB12<>&"
277 # assert "\n\"'\\".escape_to_c == "\\n\\\"\\'\\\\"
278 fun escape_to_c
: String
284 else if c
== '\0' then
286 else if c
== '"' then
288 else if c == '\'' then
290 else if c == '\\
' then
292 else if c.ascii < 32 then
293 b.append("\\{c.ascii.to_base(8, false)}")
302 # Immutable strings of characters.
308 redef type OTHER: String
310 # Index in _items of the start of the string
311 readable var _index_from: Int
313 # Indes in _items of the last item of the string
314 readable var _index_to: Int
316 ################################################
317 # AbstractString specific methods #
318 ################################################
320 redef fun [](index) do
322 # Check that the index (+ index_from) is not larger than indexTo
323 # In other terms, if the index is valid
324 assert (index + _index_from) <= _index_to
325 return _items[index + _index_from]
328 redef fun substring(from: Int, count: Int): String
334 if count < 0 then count = 0
338 var realFrom = _index_from + from
340 if (realFrom + count) > _index_to then return new String.from_substring(realFrom, _index_to, _items)
342 if count == 0 then return ""
344 return new String.from_substring(realFrom, realFrom + count - 1, _items)
347 redef fun substring_from(from: Int): String
349 if from > _length then return ""
350 if from < 0 then from = 0
351 return substring(from, _length)
354 redef fun has_substring(str: String, pos: Int): Bool
356 var itsindex = str._length - 1
358 var myindex = pos + itsindex
361 var itsitems = str._items
363 if myindex > _length or itsindex > myindex then return false
365 var itsindexfrom = str.index_from
366 itsindex += itsindexfrom
367 myindex += index_from
369 while itsindex >= itsindexfrom do
370 if myitems[myindex] != itsitems[itsindex] then return false
378 redef fun to_upper: String
380 var outstr = calloc_string(self._length + 1)
383 var myitems = self._items
384 var index_from = self._index_from
385 var max = self._index_to
387 while index_from <= max do
388 outstr[out_index] = myitems[index_from].to_upper
393 outstr[self.length] = '\
0'
395 return outstr.to_s_with_length(self._length)
398 redef fun to_lower : String
400 var outstr = calloc_string(self._length + 1)
403 var myitems = self._items
404 var index_from = self._index_from
405 var max = self._index_to
407 while index_from <= max do
408 outstr[out_index] = myitems[index_from].to_lower
413 outstr[self.length] = '\
0'
415 return outstr.to_s_with_length(self._length)
418 redef fun trim: String
420 if self._length == 0 then return self
421 # find position of the first non white space char (ascii < 32) from the start of the string
422 var start_pos = self._index_from
423 while _items[start_pos].ascii <= 32 do
425 if start_pos == _index_to + 1 then return ""
427 # find position of the first non white space char from the end of the string
428 var end_pos = _index_to
429 while _items[end_pos].ascii <= 32 do
431 if end_pos == start_pos then return _items[start_pos].to_s
433 start_pos -= index_from
434 end_pos -= index_from
435 return self.substring(start_pos, end_pos - start_pos + 1)
440 var i = self._index_from
441 var imax = self._index_to
448 ##################################################
449 # String Specific Methods #
450 ##################################################
452 # Creates a String object as a substring of another String
454 # From : index to start at
456 # To : Index to stop at (from + count -1)
458 private init from_substring(from: Int, to: Int, internalString: NativeString)
460 _items = internalString
463 _length = to - from + 1
466 private init with_infos(items: NativeString, len: Int, from: Int, to: Int)
474 # Return a null terminated char *
475 fun to_cstring: NativeString
477 if _index_from > 0 or _index_to != items.cstring_length - 1 then
478 var newItems = calloc_string(_length + 1)
479 self.items.copy_to(newItems, _length, _index_from, 0)
480 newItems[length] = '\
0'
488 if not other isa String or other is null then return false
490 if self.object_id == other.object_id then return true
492 var my_length = _length
494 if other._length != my_length then return false
496 var my_index = _index_from
497 var its_index = other._index_from
499 var last_iteration = my_index + my_length
501 var itsitems = other._items
502 var myitems = self._items
504 while my_index < last_iteration do
505 if myitems[my_index] != itsitems[its_index] then return false
513 # The comparison between two strings is done on a lexicographical basis
515 # assert ("aa" < "b") == true
518 if self.object_id == other.object_id then return false
520 var my_curr_char : Char
521 var its_curr_char : Char
523 var curr_id_self = self._index_from
524 var curr_id_other = other._index_from
526 var my_items = self._items
527 var its_items = other._items
529 var my_length = self._length
530 var its_length = other._length
532 var max_iterations = curr_id_self + my_length
534 while curr_id_self < max_iterations do
535 my_curr_char = my_items[curr_id_self]
536 its_curr_char = its_items[curr_id_other]
538 if my_curr_char != its_curr_char then
539 if my_curr_char < its_curr_char then return true
547 return my_length < its_length
550 # The concatenation of `self` with `s`
552 # assert "hello " + "world!" == "hello world!"
553 fun +(s: String): String
555 var my_length = self._length
556 var its_length = s._length
558 var total_length = my_length + its_length
560 var target_string = calloc_string(my_length + its_length + 1)
562 self._items.copy_to(target_string, my_length, _index_from, 0)
563 s._items.copy_to(target_string, its_length, s._index_from, my_length)
565 target_string[total_length] = '\
0'
567 return target_string.to_s_with_length(total_length)
570 # `i` repetitions of `self`
572 # assert "abc"*3 == "abcabcabc"
573 # assert "abc"*1 == "abc"
574 # assert "abc"*0 == ""
575 fun *(i: Int): String
579 var my_length = self._length
581 var final_length = my_length * i
583 var my_items = self._items
585 var target_string = calloc_string((final_length) + 1)
587 target_string[final_length] = '\
0'
591 for iteration in [1 .. i] do
592 my_items.copy_to(target_string, my_length, 0, current_last)
593 current_last += my_length
596 return target_string.to_s_with_length(final_length)
599 redef fun to_s do return self
603 # djb2 hash algorythm
608 var strStart = _index_from
612 while i >= strStart do
613 h = (h * 32) + h + self._items[i].ascii
621 # Mutable strings of characters.
626 super AbstractArray[Char]
628 redef type OTHER: String
630 redef fun []=(index, item)
632 if index == length then
636 assert index >= 0 and index < length
642 if _capacity <= length then enlarge(length + 5)
647 redef fun enlarge(cap)
650 if cap <= c then return
651 while c <= cap do c = c * 2 + 2
652 var a = calloc_string(c+1)
653 _items.copy_to(a, length, 0, 0)
662 if _capacity < _length + sl then enlarge(_length + sl)
663 s.items.copy_to(_items, sl, s._index_from, _length)
670 redef fun to_s: String
673 var a = calloc_string(l+1)
674 _items.copy_to(a, l, 0, 0)
676 # Ensure the afterlast byte is '\
0' to nul-terminated char *
679 return a.to_s_with_length(length)
687 while i < l1 and i < l2 do
688 var c1 = self[i].ascii
704 # Create a new empty string.
712 _capacity = s.length + 1
714 _items = calloc_string(_capacity)
715 s.items.copy_to(_items, _length, s._index_from, 0)
718 # Create a new empty string with a given capacity.
719 init with_capacity(cap: Int)
722 # _items = new NativeString.calloc(cap)
723 _items = calloc_string(cap+1)
730 if not o isa Buffer or o is null then return false
732 if o.length != l then return false
737 if it[i] != oit[i] then return false
743 readable private var _capacity: Int
746 ###############################################################################
748 ###############################################################################
751 # User readable representation of `self`.
752 fun to_s: String do return inspect
754 # The class name of the object in NativeString format.
755 private fun native_class_name: NativeString is intern
757 # The class name of the object.
759 # assert 5.class_name == "Int"
760 fun class_name: String do return native_class_name.to_s
762 # Developer readable representation of `self`.
763 # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
766 return "<{inspect_head}>"
769 # Return "CLASSNAME:#OBJECTID".
770 # This function is mainly used with the redefinition of the inspect method
771 protected fun inspect_head: String
773 return "{class_name}:#{object_id.to_hex}"
776 protected fun args: Sequence[String]
783 # assert true.to_s == "true"
784 # assert false.to_s == "false"
796 # Fill `s` with the digits in base `base` of `self` (and with the '-' sign if 'signed
' and negative).
797 # assume < to_c max const of char
798 fun fill_buffer(s: Buffer, base: Int, signed: Bool)
805 else if self == 0 then
812 var pos = digit_count(base) - 1
813 while pos >= 0 and n > 0 do
814 s[pos] = (n % base).to_c
820 # C function to convert an nit Int to a NativeString (char*)
821 private fun native_int_to_s(len: Int): NativeString is extern "native_int_to_s"
823 # return displayable int in base 10 and signed
825 # assert 1.to_s == "1"
826 # assert (-123).to_s == "-123"
828 var len = digit_count(10)
829 return native_int_to_s(len).to_s_with_length(len)
832 # return displayable int in hexadecimal (unsigned (not now))
833 fun to_hex: String do return to_base(16,false)
835 # return displayable int in base base and signed
836 fun to_base(base: Int, signed: Bool): String
838 var l = digit_count(base)
839 var s = new Buffer.from(" " * l)
840 fill_buffer(s, base, signed)
846 # Pretty print self, print needoed decimals up to a max of 3.
848 var str = to_precision( 3 )
850 for i in [0..len-1] do
855 else if c == '.' then
856 return str.substring( 0, j+2 )
858 return str.substring( 0, j+1 )
864 # `self` representation with `nb` digits after the '.'.
865 fun to_precision(nb: Int): String
867 if nb == 0 then return self.to_i.to_s
869 for i in [0..nb[ do f = f * 10.0
876 if i == 0 then return "0.0"
880 var p1 = s.substring(0, s.length-nb)
881 var p2 = s.substring(s.length-nb, nb)
884 return "0." + ("0"*(nb-sl)) + s
888 fun to_precision_native(nb: Int): String import NativeString::to_s `{
892 size = snprintf(NULL, 0, "%.*f", (int)nb, recv);
893 str = malloc(size + 1);
894 sprintf(str, "%.*f", (int)nb, recv );
896 return NativeString_to_s( str );
901 # assert 'x
'.to_s == "x"
904 var s = new Buffer.with_capacity(1)
909 # Returns true if the char is a numerical digit
912 if self >= '0' and self <= '9'
919 # Returns true if the char is an alpha digit
922 if (self >= 'a
' and self <= 'z
') or (self >= 'A
' and self <= 'Z
') then return true
926 # Returns true if the char is an alpha or a numeric digit
927 fun is_alphanumeric: Bool
929 if self.is_numeric or self.is_alpha then return true
934 redef class Collection[E]
935 # Concatenate elements.
939 for e in self do if e != null then s.append(e.to_s)
943 # Concatenate and separate each elements with `sep`.
945 # assert [1, 2, 3].join(":") == "1:2:3"
946 # assert [1..3].join(":") == "1:2:3"
947 fun join(sep: String): String
949 if is_empty then return ""
951 var s = new Buffer # Result
956 if e != null then s.append(e.to_s)
963 if e != null then s.append(e.to_s)
971 # Fast implementation
979 if e != null then s.append(e.to_s)
987 # Concatenate couple of 'key value
'.
988 # key and value are separated by `couple_sep`.
989 # each couple is separated each couple with `sep`.
991 # var m = new ArrayMap[Int, String]
994 # assert m.join("; ", "=") == "1=one; 10=ten"
995 fun join(sep: String, couple_sep: String): String
997 if is_empty then return ""
999 var s = new Buffer # Result
1005 if e != null then s.append("{k}{couple_sep}{e}")
1007 # Concat other items
1013 if e != null then s.append("{k}{couple_sep}{e}")
1020 ###############################################################################
1022 ###############################################################################
1024 # Native strings are simple C char *
1028 fun [](index: Int): Char is intern
1029 fun []=(index: Int, item: Char) is intern
1030 fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
1032 # Position of the first nul character.
1033 fun cstring_length: Int
1036 while self[l] != '\
0' do l += 1
1039 fun atoi: Int is intern
1040 fun atof: Float is extern "atof"
1044 return to_s_with_length(cstring_length)
1047 fun to_s_with_length(length: Int): String
1050 return new String.with_infos(self, length, 0, length - 1)
1053 fun to_s_with_copy: String
1055 var length = cstring_length
1056 var new_self = calloc_string(length + 1)
1057 copy_to(new_self, length, 0, 0)
1058 return new String.with_infos(new_self, length, 0, length - 1)
1063 # StringCapable objects can create native strings
1064 interface StringCapable
1065 protected fun calloc_string(size: Int): NativeString is intern
1069 var _args_cache: nullable Sequence[String]
1071 redef fun args: Sequence[String]
1073 if _args_cache == null then init_args
1074 return _args_cache.as(not null)
1077 # The name of the program as given by the OS
1078 fun program_name: String
1080 return native_argv(0).to_s
1083 # Initialize `args` with the contents of `native_argc` and `native_argv`.
1084 private fun init_args
1086 var argc = native_argc
1087 var args = new Array[String].with_capacity(0)
1090 args[i-1] = native_argv(i).to_s
1096 # First argument of the main C function.
1097 private fun native_argc: Int is intern
1099 # Second argument of the main C function.
1100 private fun native_argv(i: Int): NativeString is intern