1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
14 # Basic manipulations of strings of characters
17 intrude import collection
# FIXME should be collection::array
20 ###############################################################################
22 ###############################################################################
24 # Common subclass for String and Buffer
25 abstract class AbstractString
26 super AbstractArrayRead[Char]
28 readable private var _items
: NativeString
30 redef fun [](index
) do return _items
[index
]
34 # "abcd".substring(1, 2) # --> "bc"
35 # "abcd".substring(-1, 2) # --> "a"
36 # "abcd".substring(1, 0) # --> ""
37 # "abcd".substring(2, 5) # --> "cd"
38 fun substring
(from
: Int, count
: Int): String
42 if from
< 0 then from
= 0
43 if count
> length
then count
= length
45 var r
= new Buffer.with_capacity
(count
- from
)
56 # Create a substring from `self' beginning at the 'from' position
58 # "abcd".substring(1) # --> "bcd"
59 # "abcd".substring(-1) # --> "abcd"
60 # "abcd".substring(2) # --> "cd"
61 fun substring_from
(from
: Int): String
64 return substring
(from
, length
- from
)
67 # Does self have a substring 'str' starting from position 'pos
69 # "abcd".has_substring("bc",1) # --> true
70 # "abcd".has_substring("bc",2) # --> false
71 fun has_substring
(str
: String, pos
: Int): Bool
73 var itsindex
= str
.length
- 1
74 var myindex
= pos
+ itsindex
76 var itsitems
= str
._items
77 if myindex
> length
or itsindex
> myindex
then return false
78 var its_index_from
= str
._index_from
79 itsindex
+= its_index_from
80 while itsindex
>= its_index_from
do
81 if myitems
[myindex
] != itsitems
[itsindex
] then return false
88 # Is this string prefixed by 'prefix'
90 # "abc".is_prefix("abcd") # --> true
91 # "bc".is_prefix("abcd") # --> false
92 fun has_prefix
(prefix
: String): Bool do return has_substring
(prefix
,0)
94 # Is this string suffixed by 'suffix'
96 # "abcd".has_suffix("abc") # --> false
97 # "abcd".has_suffix("bcd") # --> true
98 fun has_suffix
(suffix
: String): Bool do return has_substring
(suffix
, length
- suffix
.length
)
100 # If `self' contains only digits, return the corresponding integer
104 return to_s
.to_cstring
.atoi
107 # If `self' contains a float, return the corresponding float
111 return to_s
.to_cstring
.atof
114 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
115 fun to_hex
: Int do return a_to
(16)
117 # If `self' contains only digits and letters, return the corresponding integer in a given base
118 fun a_to
(base
: Int) : Int
145 # Returns true if the string contains only Numeric values (and one "," or one "." character)
148 var has_point_or_comma
= false
153 if (i
== '.' or i
== ',') and not has_point_or_comma
155 has_point_or_comma
= true
164 # A upper case version of `self'
167 var s
= new Buffer.with_capacity
(length
)
168 for i
in self do s
.add
(i
.to_upper
)
172 # A lower case version of `self'
173 fun to_lower
: String
175 var s
= new Buffer.with_capacity
(length
)
176 for i
in self do s
.add
(i
.to_lower
)
191 # Immutable strings of characters.
197 redef type OTHER: String
199 # Index in _items of the start of the string
200 readable var _index_from
: Int
202 # Indes in _items of the last item of the string
203 readable var _index_to
: Int
205 ################################################
206 # AbstractString specific methods #
207 ################################################
209 # Access a character at index in String
211 redef fun [](index
) do
213 # Check that the index (+ index_from) is not larger than indexTo
214 # In other terms, if the index is valid
215 assert (index
+ _index_from
) <= _index_to
216 return _items
[index
+ _index_from
]
219 # Create a substring.
221 # "abcd".substring(1, 2) # --> "bc"
222 # "abcd".substring(-1, 2) # --> "a"
223 # "abcd".substring(1, 0) # --> ""
224 # "abcd".substring(2, 5) # --> "cd"
226 # A "from" index < 0 will be replaced by 0
227 # Unless a count value is > 0 at the same time
228 # In this case, from += count and count -= from
230 redef fun substring
(from
: Int, count
: Int): String
236 if count
< 0 then count
= 0
240 var realFrom
= _index_from
+ from
242 if (realFrom
+ count
) > _index_to
then return new String.from_substring
(realFrom
, _index_to
, _items
)
244 if count
== 0 then return ""
246 return new String.from_substring
(realFrom
, realFrom
+ count
- 1, _items
)
249 # Create a substring from `self' beginning at the 'from' position
251 # "abcd".substring_from(1) # --> "bcd"
252 # "abcd".substring_from(-1) # --> "abcd"
253 # "abcd".substring_from(2) # --> "cd"
255 # As with substring, a "from" index < 0 will be replaced by 0
257 redef fun substring_from
(from
: Int): String
259 if from
> _length
then return ""
260 if from
< 0 then from
= 0
261 return substring
(from
, _length
)
264 # Does self have a substring 'str' starting from position 'pos
266 # "abcd".has_substring("bc",1) # --> true
267 # "abcd".has_substring("bc",2) # --> false
268 redef fun has_substring
(str
: String, pos
: Int): Bool
270 var itsindex
= str
._length
- 1
272 var myindex
= pos
+ itsindex
275 var itsitems
= str
._items
277 if myindex
> _length
or itsindex
> myindex
then return false
279 var itsindexfrom
= str
.index_from
280 itsindex
+= itsindexfrom
281 myindex
+= index_from
283 while itsindex
>= itsindexfrom
do
284 if myitems
[myindex
] != itsitems
[itsindex
] then return false
292 # A upper case version of `self'
293 redef fun to_upper
: String
295 var outstr
= calloc_string
(self._length
+ 1)
298 var myitems
= self._items
299 var index_from
= self._index_from
300 var max
= self._index_to
302 while index_from
<= max
do
303 outstr
[out_index
] = myitems
[index_from
].to_upper
308 outstr
[self.length
] = '\0'
310 return new String.with_native
(outstr
, self._length
)
313 # A lower case version of `self'
314 redef fun to_lower
: String
316 var outstr
= calloc_string
(self._length
+ 1)
319 var myitems
= self._items
320 var index_from
= self._index_from
321 var max
= self._index_to
323 while index_from
<= max
do
324 outstr
[out_index
] = myitems
[index_from
].to_lower
329 outstr
[self.length
] = '\0'
331 return new String.with_native
(outstr
, self._length
)
336 var i
= self._index_from
337 var imax
= self._index_to
344 ##################################################
345 # String Specific Methods #
346 ##################################################
348 # Creates a String object as a substring of another String
350 # From : index to start at
352 # To : Index to stop at (from + count -1)
354 private init from_substring
(from
: Int, to
: Int, internalString
: NativeString)
356 _items
= internalString
359 _length
= to
- from
+ 1
362 # Create a new string from a given char *.
363 init with_native
(nat
: NativeString, size
: Int)
369 _index_to
= _length
- 1
372 # Create a new string from a null terminated char *.
373 init from_cstring
(str
: NativeString)
375 with_native
(str
,str
.cstring_length
)
378 # Creates a new Nit String from an existing CString
379 # Pretty much equals to from_cstring but copies instead
380 # of passing a reference
381 # Avoids manual/automatic dealloc problems when dealing with native C code
382 init copy_from_native
(str
: NativeString)
384 var temp_length
= str
.cstring_length
385 var new_str
= calloc_string
(temp_length
+ 1)
386 str
.copy_to
(new_str
, temp_length
, 0, 0)
387 new_str
[temp_length
] = '\0'
388 with_native
(new_str
, temp_length
)
391 # Return a null terminated char *
392 fun to_cstring
: NativeString
395 if _index_from
> 0 or _index_to
!= items
.cstring_length
- 1 then
396 var newItems
= calloc_string
(_length
+ 1)
397 self.items
.copy_to
(newItems
, _length
, _index_from
, 0)
398 newItems
[length
] = '\0'
406 if not other
isa String or other
is null then return false
408 if self.object_id
== other
.object_id
then return true
410 var my_length
= _length
412 if other
._length
!= my_length
then return false
414 var my_index
= _index_from
415 var its_index
= other
._index_from
417 var last_iteration
= my_index
+ my_length
419 var itsitems
= other
._items
420 var myitems
= self._items
422 while my_index
< last_iteration
do
423 if myitems
[my_index
] != itsitems
[its_index
] then return false
431 # The comparison between two strings is done on a lexicographical basis
432 # Eg : "aa" < "b" => true
435 if self.object_id
== other
.object_id
then return false
437 var my_curr_char
: Char
438 var its_curr_char
: Char
440 var curr_id_self
= self._index_from
441 var curr_id_other
= other
._index_from
443 var my_items
= self._items
444 var its_items
= other
._items
446 var my_length
= self._length
447 var its_length
= other
._length
449 var max_iterations
= curr_id_self
+ my_length
451 while curr_id_self
< max_iterations
do
452 my_curr_char
= my_items
[curr_id_self
]
453 its_curr_char
= its_items
[curr_id_other
]
455 if my_curr_char
!= its_curr_char
then
456 if my_curr_char
< its_curr_char
then return true
464 if my_length
!= its_length
then
465 if my_length
< its_length
then return true
472 # The concatenation of `self' with `r'
473 fun +(s
: String): String
475 var my_length
= self._length
476 var its_length
= s
._length
478 var target_string
= calloc_string
(my_length
+ its_length
+ 1)
480 self._items
.copy_to
(target_string
, my_length
, _index_from
, 0)
481 s
._items
.copy_to
(target_string
, its_length
, s
._index_from
, my_length
)
483 target_string
[my_length
+ its_length
] = '\0'
485 return new String.with_native
(target_string
, my_length
+ its_length
)
488 # i repetitions of self
489 fun *(i
: Int): String
493 var my_length
= self._length
495 var final_length
= my_length
* i
497 var my_items
= self._items
499 var target_string
= calloc_string
((final_length
) + 1)
501 target_string
[final_length
] = '\0'
505 for iteration
in [1 .. i
] do
506 my_items
.copy_to
(target_string
, my_length
, 0, current_last
)
507 current_last
+= my_length
510 return new String.with_native
(target_string
, final_length
)
513 redef fun to_s
do return self
517 # djb2 hash algorythm
522 var strStart
= _index_from
526 while i
>= strStart
do
527 h
= (h
* 32) + h
+ self._items
[i
].ascii
535 # Mutable strings of characters.
540 super AbstractArray[Char]
542 redef type OTHER: String
544 redef fun []=(index
, item
)
546 if index
== length
then
550 assert index
>= 0 and index
< length
556 if _capacity
<= length
then enlarge
(length
+ 5)
561 redef fun enlarge
(cap
)
564 if cap
<= c
then return
565 while c
<= cap
do c
= c
* 2 + 2
566 var a
= calloc_string
(c
+1)
567 _items
.copy_to
(a
, length
, 0, 0)
576 if _capacity
< _length
+ sl
then enlarge
(_length
+ sl
)
577 s
.items
.copy_to
(_items
, sl
, s
._index_from
, _length
)
584 redef fun to_s
: String
587 var a
= calloc_string
(l
+1)
588 _items
.copy_to
(a
, l
, 0, 0)
590 # Ensure the afterlast byte is '\0' to nul-terminated char *
593 return new String.with_native
(a
, length
)
601 while i
< l1
and i
< l2
do
602 var c1
= self[i
].ascii
618 # Create a new empty string.
626 _capacity
= s
.length
+ 1
628 _items
= calloc_string
(_capacity
)
629 s
.items
.copy_to
(_items
, _length
, s
._index_from
, 0)
632 # Create a new empty string with a given capacity.
633 init with_capacity
(cap
: Int)
636 # _items = new NativeString.calloc(cap)
637 _items
= calloc_string
(cap
+1)
644 if not o
isa Buffer or o
is null then return false
646 if o
.length
!= l
then return false
651 if it
[i
] != oit
[i
] then return false
657 readable private var _capacity
: Int
660 ###############################################################################
662 ###############################################################################
665 # User readable representation of `self'.
666 fun to_s
: String do return inspect
668 # The class name of the object in NativeString format.
669 private fun native_class_name
: NativeString is intern
671 # The class name of the object.
672 # FIXME: real type information is not available at runtime.
673 # Therefore, for instance, an instance of List[Bool] has just
674 # "List" for class_name
675 fun class_name
: String do return new String.from_cstring
(native_class_name
)
677 # Developer readable representation of `self'.
678 # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
681 return "<{inspect_head}>"
684 # Return "CLASSNAME:#OBJECTID".
685 # This function is mainly used with the redefinition of the inspect method
686 protected fun inspect_head
: String
688 return "{class_name}:#{object_id.to_hex}"
691 protected fun args
: Sequence[String]
709 fun fill_buffer
(s
: Buffer, base
: Int, signed
: Bool)
710 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
711 # assume < to_c max const of char
718 else if self == 0 then
725 var pos
= digit_count
(base
) - 1
726 while pos
>= 0 and n
> 0 do
727 s
[pos
] = (n
% base
).to_c
733 # return displayable int in base 10 and signed
734 redef fun to_s
do return to_base
(10,true)
736 # return displayable int in hexadecimal (unsigned (not now))
737 fun to_hex
: String do return to_base
(16,false)
739 # return displayable int in base base and signed
740 fun to_base
(base
: Int, signed
: Bool): String
742 var l
= digit_count
(base
)
743 var s
= new Buffer.from
(" " * l
)
744 fill_buffer
(s
, base
, signed
)
750 # Pretty print self, print needed decimals up to a max of 6.
752 var str
= to_precision
( 6 )
754 for i
in [0..len-1
] do
759 else if c
== '.' then
760 return str
.substring
( 0, j
+2 )
762 return str
.substring
( 0, j
+1 )
768 # `self' representation with `nb' digits after the '.'.
769 fun to_precision
(nb
: Int): String import String::from_cstring
`{
773 size = snprintf(NULL, 0, "%.*f", (int)nb, recv);
774 str = malloc(size + 1);
775 sprintf(str, "%.*f", (int)nb, recv );
777 return new_String_from_cstring( str );
784 var s
= new Buffer.with_capacity
(1)
789 # Returns true if the char is a numerical digit
792 if self >= '0' and self <= '9'
799 # Returns true if the char is an alpha digit
802 if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true
806 # Returns true if the char is an alpha or a numeric digit
807 fun is_alphanumeric
: Bool
809 if self.is_numeric
or self.is_alpha
then return true
814 redef class Collection[E
]
815 # Concatenate elements.
819 for e
in self do if e
!= null then s
.append
(e
.to_s
)
823 # Concatenate and separate each elements with `sep'.
824 fun join
(sep
: String): String
826 if is_empty
then return ""
828 var s
= new Buffer # Result
833 if e
!= null then s
.append
(e
.to_s
)
840 if e
!= null then s
.append
(e
.to_s
)
848 # Fast implementation
856 if e
!= null then s
.append
(e
.to_s
)
864 # Concatenate couple of 'key value'.
865 # key and value are separated by 'couple_sep'.
866 # each couple is separated each couple with `sep'.
867 fun join
(sep
: String, couple_sep
: String): String
869 if is_empty
then return ""
871 var s
= new Buffer # Result
877 if e
!= null then s
.append
("{k}{couple_sep}{e}")
885 if e
!= null then s
.append
("{k}{couple_sep}{e}")
892 ###############################################################################
894 ###############################################################################
896 # Native strings are simple C char *
898 fun [](index
: Int): Char is intern
899 fun []=(index
: Int, item
: Char) is intern
900 fun copy_to
(dest
: NativeString, length
: Int, from
: Int, to
: Int) is intern
902 # Position of the first nul character.
903 fun cstring_length
: Int
906 while self[l
] != '\0' do l
+= 1
909 fun atoi
: Int is intern
910 fun atof
: Float is extern "atof"
913 # StringCapable objects can create native strings
914 interface StringCapable
915 protected fun calloc_string
(size
: Int): NativeString is intern
919 var _args_cache
: nullable Sequence[String]
921 redef fun args
: Sequence[String]
923 if _args_cache
== null then init_args
924 return _args_cache
.as(not null)
927 # The name of the program as given by the OS
928 fun program_name
: String
930 return new String.from_cstring
(native_argv
(0))
933 # Initialize `args' with the contents of `native_argc' and `native_argv'.
934 private fun init_args
936 var argc
= native_argc
937 var args
= new Array[String].with_capacity
(0)
940 args
[i-1
] = new String.from_cstring
(native_argv
(i
))
946 private fun native_argc
: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
948 private fun native_argv
(i
: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.