1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
14 # Basic manipulations of strings of characters
17 intrude import collection
# FIXME should be collection::array
24 ###############################################################################
26 ###############################################################################
28 # Common subclass for String and Buffer
29 abstract class AbstractString
30 super AbstractArrayRead[Char]
32 readable private var _items
: NativeString
34 redef fun [](index
) do return _items
[index
]
38 # "abcd".substring(1, 2) # --> "bc"
39 # "abcd".substring(-1, 2) # --> "a"
40 # "abcd".substring(1, 0) # --> ""
41 # "abcd".substring(2, 5) # --> "cd"
42 fun substring
(from
: Int, count
: Int): String
46 if from
< 0 then from
= 0
47 if count
> length
then count
= length
49 var r
= new Buffer.with_capacity
(count
- from
)
60 # Create a substring from `self' beginning at the 'from' position
62 # "abcd".substring(1) # --> "bcd"
63 # "abcd".substring(-1) # --> "abcd"
64 # "abcd".substring(2) # --> "cd"
65 fun substring_from
(from
: Int): String
68 return substring
(from
, length
- from
)
71 # Does self have a substring 'str' starting from position 'pos
73 # "abcd".has_substring("bc",1) # --> true
74 # "abcd".has_substring("bc",2) # --> false
75 fun has_substring
(str
: String, pos
: Int): Bool
77 var itsindex
= str
.length
- 1
78 var myindex
= pos
+ itsindex
80 var itsitems
= str
._items
81 if myindex
> length
or itsindex
> myindex
then return false
82 var its_index_from
= str
._index_from
83 itsindex
+= its_index_from
84 while itsindex
>= its_index_from
do
85 if myitems
[myindex
] != itsitems
[itsindex
] then return false
92 # Is this string prefixed by 'prefix'
94 # "abc".is_prefix("abcd") # --> true
95 # "bc".is_prefix("abcd") # --> false
96 fun has_prefix
(prefix
: String): Bool do return has_substring
(prefix
,0)
98 # Is this string suffixed by 'suffix'
100 # "abcd".has_suffix("abc") # --> false
101 # "abcd".has_suffix("bcd") # --> true
102 fun has_suffix
(suffix
: String): Bool do return has_substring
(suffix
, length
- suffix
.length
)
104 # If `self' contains only digits, return the corresponding integer
108 return to_s
.to_cstring
.atoi
111 # If `self' contains a float, return the corresponding float
115 return to_s
.to_cstring
.atof
118 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
119 fun to_hex
: Int do return a_to
(16)
121 # If `self' contains only digits and letters, return the corresponding integer in a given base
122 fun a_to
(base
: Int) : Int
149 # Returns true if the string contains only Numeric values (and one "," or one "." character)
152 var has_point_or_comma
= false
157 if (i
== '.' or i
== ',') and not has_point_or_comma
159 has_point_or_comma
= true
168 # A upper case version of `self'
171 var s
= new Buffer.with_capacity
(length
)
172 for i
in self do s
.add
(i
.to_upper
)
176 # A lower case version of `self'
177 fun to_lower
: String
179 var s
= new Buffer.with_capacity
(length
)
180 for i
in self do s
.add
(i
.to_lower
)
184 # Trims trailing and preceding white spaces
185 # A whitespace is defined as any character which ascii value is less than or equal to 32
188 if self._length
== 0 then return self.to_s
189 # find position of the first non white space char (ascii < 32) from the start of the string
191 while self[start_pos
].ascii
<= 32 do
193 if start_pos
== _length
then return ""
195 # find position of the first non white space char from the end of the string
196 var end_pos
= length
- 1
197 while self[end_pos
].ascii
<= 32 do
199 if end_pos
== start_pos
then return self[start_pos
].to_s
201 return self.substring
(start_pos
, end_pos
- start_pos
+ 1)
214 # Immutable strings of characters.
220 redef type OTHER: String
222 # Index in _items of the start of the string
223 readable var _index_from
: Int
225 # Indes in _items of the last item of the string
226 readable var _index_to
: Int
228 ################################################
229 # AbstractString specific methods #
230 ################################################
232 # Access a character at index in String
234 redef fun [](index
) do
236 # Check that the index (+ index_from) is not larger than indexTo
237 # In other terms, if the index is valid
238 assert (index
+ _index_from
) <= _index_to
239 return _items
[index
+ _index_from
]
242 # Create a substring.
244 # "abcd".substring(1, 2) # --> "bc"
245 # "abcd".substring(-1, 2) # --> "a"
246 # "abcd".substring(1, 0) # --> ""
247 # "abcd".substring(2, 5) # --> "cd"
249 # A "from" index < 0 will be replaced by 0
250 # Unless a count value is > 0 at the same time
251 # In this case, from += count and count -= from
253 redef fun substring
(from
: Int, count
: Int): String
259 if count
< 0 then count
= 0
263 var realFrom
= _index_from
+ from
265 if (realFrom
+ count
) > _index_to
then return new String.from_substring
(realFrom
, _index_to
, _items
)
267 if count
== 0 then return ""
269 return new String.from_substring
(realFrom
, realFrom
+ count
- 1, _items
)
272 # Create a substring from `self' beginning at the 'from' position
274 # "abcd".substring_from(1) # --> "bcd"
275 # "abcd".substring_from(-1) # --> "abcd"
276 # "abcd".substring_from(2) # --> "cd"
278 # As with substring, a "from" index < 0 will be replaced by 0
280 redef fun substring_from
(from
: Int): String
282 if from
> _length
then return ""
283 if from
< 0 then from
= 0
284 return substring
(from
, _length
)
287 # Does self have a substring 'str' starting from position 'pos
289 # "abcd".has_substring("bc",1) # --> true
290 # "abcd".has_substring("bc",2) # --> false
291 redef fun has_substring
(str
: String, pos
: Int): Bool
293 var itsindex
= str
._length
- 1
295 var myindex
= pos
+ itsindex
298 var itsitems
= str
._items
300 if myindex
> _length
or itsindex
> myindex
then return false
302 var itsindexfrom
= str
.index_from
303 itsindex
+= itsindexfrom
304 myindex
+= index_from
306 while itsindex
>= itsindexfrom
do
307 if myitems
[myindex
] != itsitems
[itsindex
] then return false
315 # A upper case version of `self'
316 redef fun to_upper
: String
318 var outstr
= calloc_string
(self._length
+ 1)
321 var myitems
= self._items
322 var index_from
= self._index_from
323 var max
= self._index_to
325 while index_from
<= max
do
326 outstr
[out_index
] = myitems
[index_from
].to_upper
331 outstr
[self.length
] = '\0'
333 return new String.with_native
(outstr
, self._length
)
336 # A lower case version of `self'
337 redef fun to_lower
: String
339 var outstr
= calloc_string
(self._length
+ 1)
342 var myitems
= self._items
343 var index_from
= self._index_from
344 var max
= self._index_to
346 while index_from
<= max
do
347 outstr
[out_index
] = myitems
[index_from
].to_lower
352 outstr
[self.length
] = '\0'
354 return new String.with_native
(outstr
, self._length
)
357 redef fun trim
: String
359 if self._length
== 0 then return self
360 # find position of the first non white space char (ascii < 32) from the start of the string
361 var start_pos
= self._index_from
362 while _items
[start_pos
].ascii
<= 32 do
364 if start_pos
== _index_to
+ 1 then return ""
366 # find position of the first non white space char from the end of the string
367 var end_pos
= _index_to
368 while _items
[end_pos
].ascii
<= 32 do
370 if end_pos
== start_pos
then return _items
[start_pos
].to_s
372 start_pos
-= index_from
373 end_pos
-= index_from
374 return self.substring
(start_pos
, end_pos
- start_pos
+ 1)
379 var i
= self._index_from
380 var imax
= self._index_to
387 ##################################################
388 # String Specific Methods #
389 ##################################################
391 # Creates a String object as a substring of another String
393 # From : index to start at
395 # To : Index to stop at (from + count -1)
397 private init from_substring
(from
: Int, to
: Int, internalString
: NativeString)
399 _items
= internalString
402 _length
= to
- from
+ 1
405 # Create a new string from a given char *.
406 init with_native
(nat
: NativeString, size
: Int)
412 _index_to
= _length
- 1
415 # Create a new string from a null terminated char *.
416 init from_cstring
(str
: NativeString)
418 with_native
(str
,str
.cstring_length
)
421 # Creates a new Nit String from an existing CString
422 # Pretty much equals to from_cstring but copies instead
423 # of passing a reference
424 # Avoids manual/automatic dealloc problems when dealing with native C code
425 init copy_from_native
(str
: NativeString)
427 var temp_length
= str
.cstring_length
428 var new_str
= calloc_string
(temp_length
+ 1)
429 str
.copy_to
(new_str
, temp_length
, 0, 0)
430 new_str
[temp_length
] = '\0'
431 with_native
(new_str
, temp_length
)
434 # Return a null terminated char *
435 fun to_cstring
: NativeString
438 if _index_from
> 0 or _index_to
!= items
.cstring_length
- 1 then
439 var newItems
= calloc_string
(_length
+ 1)
440 self.items
.copy_to
(newItems
, _length
, _index_from
, 0)
441 newItems
[length
] = '\0'
449 if not other
isa String or other
is null then return false
451 if self.object_id
== other
.object_id
then return true
453 var my_length
= _length
455 if other
._length
!= my_length
then return false
457 var my_index
= _index_from
458 var its_index
= other
._index_from
460 var last_iteration
= my_index
+ my_length
462 var itsitems
= other
._items
463 var myitems
= self._items
465 while my_index
< last_iteration
do
466 if myitems
[my_index
] != itsitems
[its_index
] then return false
474 # The comparison between two strings is done on a lexicographical basis
475 # Eg : "aa" < "b" => true
478 if self.object_id
== other
.object_id
then return false
480 var my_curr_char
: Char
481 var its_curr_char
: Char
483 var curr_id_self
= self._index_from
484 var curr_id_other
= other
._index_from
486 var my_items
= self._items
487 var its_items
= other
._items
489 var my_length
= self._length
490 var its_length
= other
._length
492 var max_iterations
= curr_id_self
+ my_length
494 while curr_id_self
< max_iterations
do
495 my_curr_char
= my_items
[curr_id_self
]
496 its_curr_char
= its_items
[curr_id_other
]
498 if my_curr_char
!= its_curr_char
then
499 if my_curr_char
< its_curr_char
then return true
507 return my_length
< its_length
510 # The concatenation of `self' with `r'
511 fun +(s
: String): String
513 var my_length
= self._length
514 var its_length
= s
._length
516 var target_string
= calloc_string
(my_length
+ its_length
+ 1)
518 self._items
.copy_to
(target_string
, my_length
, _index_from
, 0)
519 s
._items
.copy_to
(target_string
, its_length
, s
._index_from
, my_length
)
521 target_string
[my_length
+ its_length
] = '\0'
523 return new String.with_native
(target_string
, my_length
+ its_length
)
526 # i repetitions of self
527 fun *(i
: Int): String
531 var my_length
= self._length
533 var final_length
= my_length
* i
535 var my_items
= self._items
537 var target_string
= calloc_string
((final_length
) + 1)
539 target_string
[final_length
] = '\0'
543 for iteration
in [1 .. i
] do
544 my_items
.copy_to
(target_string
, my_length
, 0, current_last
)
545 current_last
+= my_length
548 return new String.with_native
(target_string
, final_length
)
551 redef fun to_s
do return self
555 # djb2 hash algorythm
560 var strStart
= _index_from
564 while i
>= strStart
do
565 h
= (h
* 32) + h
+ self._items
[i
].ascii
573 # Mutable strings of characters.
578 super AbstractArray[Char]
580 redef type OTHER: String
582 redef fun []=(index
, item
)
584 if index
== length
then
588 assert index
>= 0 and index
< length
594 if _capacity
<= length
then enlarge
(length
+ 5)
599 redef fun enlarge
(cap
)
602 if cap
<= c
then return
603 while c
<= cap
do c
= c
* 2 + 2
604 var a
= calloc_string
(c
+1)
605 _items
.copy_to
(a
, length
, 0, 0)
614 if _capacity
< _length
+ sl
then enlarge
(_length
+ sl
)
615 s
.items
.copy_to
(_items
, sl
, s
._index_from
, _length
)
622 redef fun to_s
: String
625 var a
= calloc_string
(l
+1)
626 _items
.copy_to
(a
, l
, 0, 0)
628 # Ensure the afterlast byte is '\0' to nul-terminated char *
631 return new String.with_native
(a
, length
)
639 while i
< l1
and i
< l2
do
640 var c1
= self[i
].ascii
656 # Create a new empty string.
664 _capacity
= s
.length
+ 1
666 _items
= calloc_string
(_capacity
)
667 s
.items
.copy_to
(_items
, _length
, s
._index_from
, 0)
670 # Create a new empty string with a given capacity.
671 init with_capacity
(cap
: Int)
674 # _items = new NativeString.calloc(cap)
675 _items
= calloc_string
(cap
+1)
682 if not o
isa Buffer or o
is null then return false
684 if o
.length
!= l
then return false
689 if it
[i
] != oit
[i
] then return false
695 readable private var _capacity
: Int
698 ###############################################################################
700 ###############################################################################
703 # User readable representation of `self'.
704 fun to_s
: String do return inspect
706 # The class name of the object in NativeString format.
707 private fun native_class_name
: NativeString is intern
709 # The class name of the object.
710 # FIXME: real type information is not available at runtime.
711 # Therefore, for instance, an instance of List[Bool] has just
712 # "List" for class_name
713 fun class_name
: String do return new String.from_cstring
(native_class_name
)
715 # Developer readable representation of `self'.
716 # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
719 return "<{inspect_head}>"
722 # Return "CLASSNAME:#OBJECTID".
723 # This function is mainly used with the redefinition of the inspect method
724 protected fun inspect_head
: String
726 return "{class_name}:#{object_id.to_hex}"
729 protected fun args
: Sequence[String]
747 fun fill_buffer
(s
: Buffer, base
: Int, signed
: Bool)
748 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
749 # assume < to_c max const of char
756 else if self == 0 then
763 var pos
= digit_count
(base
) - 1
764 while pos
>= 0 and n
> 0 do
765 s
[pos
] = (n
% base
).to_c
771 # return displayable int in base 10 and signed
772 redef fun to_s
do return to_base
(10,true)
774 # return displayable int in hexadecimal (unsigned (not now))
775 fun to_hex
: String do return to_base
(16,false)
777 # return displayable int in base base and signed
778 fun to_base
(base
: Int, signed
: Bool): String
780 var l
= digit_count
(base
)
781 var s
= new Buffer.from
(" " * l
)
782 fill_buffer
(s
, base
, signed
)
788 # Pretty print self, print needed decimals up to a max of 6.
790 var str
= to_precision
( 3 )
792 for i
in [0..len-1
] do
797 else if c
== '.' then
798 return str
.substring
( 0, j
+2 )
800 return str
.substring
( 0, j
+1 )
806 # `self' representation with `nb' digits after the '.'.
807 fun to_precision
(nb
: Int): String
809 if nb
== 0 then return self.to_i
.to_s
811 for i
in [0..nb
[ do f
= f
* 10.0
818 if i
== 0 then return "0.0"
822 var p1
= s
.substring
(0, s
.length-nb
)
823 var p2
= s
.substring
(s
.length-nb
, nb
)
826 return "0." + ("0"*(nb-sl
)) + s
830 fun to_precision_native
(nb
: Int): String import String::from_cstring
`{
834 size = snprintf(NULL, 0, "%.*f", (int)nb, recv);
835 str = malloc(size + 1);
836 sprintf(str, "%.*f", (int)nb, recv );
838 return new_String_from_cstring( str );
845 var s
= new Buffer.with_capacity
(1)
850 # Returns true if the char is a numerical digit
853 if self >= '0' and self <= '9'
860 # Returns true if the char is an alpha digit
863 if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true
867 # Returns true if the char is an alpha or a numeric digit
868 fun is_alphanumeric
: Bool
870 if self.is_numeric
or self.is_alpha
then return true
875 redef class Collection[E
]
876 # Concatenate elements.
880 for e
in self do if e
!= null then s
.append
(e
.to_s
)
884 # Concatenate and separate each elements with `sep'.
885 fun join
(sep
: String): String
887 if is_empty
then return ""
889 var s
= new Buffer # Result
894 if e
!= null then s
.append
(e
.to_s
)
901 if e
!= null then s
.append
(e
.to_s
)
909 # Fast implementation
917 if e
!= null then s
.append
(e
.to_s
)
925 # Concatenate couple of 'key value'.
926 # key and value are separated by 'couple_sep'.
927 # each couple is separated each couple with `sep'.
928 fun join
(sep
: String, couple_sep
: String): String
930 if is_empty
then return ""
932 var s
= new Buffer # Result
938 if e
!= null then s
.append
("{k}{couple_sep}{e}")
946 if e
!= null then s
.append
("{k}{couple_sep}{e}")
953 ###############################################################################
955 ###############################################################################
957 # Native strings are simple C char *
959 fun [](index
: Int): Char is intern
960 fun []=(index
: Int, item
: Char) is intern
961 fun copy_to
(dest
: NativeString, length
: Int, from
: Int, to
: Int) is intern
963 # Position of the first nul character.
964 fun cstring_length
: Int
967 while self[l
] != '\0' do l
+= 1
970 fun atoi
: Int is intern
971 fun atof
: Float is extern "atof"
974 # StringCapable objects can create native strings
975 interface StringCapable
976 protected fun calloc_string
(size
: Int): NativeString is intern
980 var _args_cache
: nullable Sequence[String]
982 redef fun args
: Sequence[String]
984 if _args_cache
== null then init_args
985 return _args_cache
.as(not null)
988 # The name of the program as given by the OS
989 fun program_name
: String
991 return new String.from_cstring
(native_argv
(0))
994 # Initialize `args' with the contents of `native_argc' and `native_argv'.
995 private fun init_args
997 var argc
= native_argc
998 var args
= new Array[String].with_capacity
(0)
1001 args
[i-1
] = new String.from_cstring
(native_argv
(i
))
1007 # First argument of the main C function.
1008 private fun native_argc
: Int is intern
1010 # Second argument of the main C function.
1011 private fun native_argv
(i
: Int): NativeString is intern