1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
14 # Basic manipulations of strings of characters
17 intrude import collection
# FIXME should be collection::array
20 ###############################################################################
22 ###############################################################################
24 # Common subclass for String and Buffer
25 abstract class AbstractString
26 super AbstractArrayRead[Char]
28 readable private var _items
: NativeString
30 redef fun [](index
) do return _items
[index
]
34 # "abcd".substring(1, 2) # --> "bc"
35 # "abcd".substring(-1, 2) # --> "a"
36 # "abcd".substring(1, 0) # --> ""
37 # "abcd".substring(2, 5) # --> "cd"
38 fun substring
(from
: Int, count
: Int): String
42 if from
< 0 then from
= 0
43 if count
> length
then count
= length
45 var r
= new Buffer.with_capacity
(count
- from
)
56 # Create a substring from `self' beginning at the 'from' position
58 # "abcd".substring(1) # --> "bcd"
59 # "abcd".substring(-1) # --> "abcd"
60 # "abcd".substring(2) # --> "cd"
61 fun substring_from
(from
: Int): String
64 return substring
(from
, length
- from
)
67 # Does self have a substring 'str' starting from position 'pos
69 # "abcd".has_substring("bc",1) # --> true
70 # "abcd".has_substring("bc",2) # --> false
71 fun has_substring
(str
: String, pos
: Int): Bool
73 var itsindex
= str
.length
- 1
74 var myindex
= pos
+ itsindex
76 var itsitems
= str
._items
77 if myindex
> length
or itsindex
> myindex
then return false
78 var its_index_from
= str
._index_from
79 itsindex
+= its_index_from
80 while itsindex
>= its_index_from
do
81 if myitems
[myindex
] != itsitems
[itsindex
] then return false
88 # Is this string prefixed by 'prefix'
90 # "abc".is_prefix("abcd") # --> true
91 # "bc".is_prefix("abcd") # --> false
92 fun has_prefix
(prefix
: String): Bool do return has_substring
(prefix
,0)
94 # Is this string suffixed by 'suffix'
96 # "abcd".has_suffix("abc") # --> false
97 # "abcd".has_suffix("bcd") # --> true
98 fun has_suffix
(suffix
: String): Bool do return has_substring
(suffix
, length
- suffix
.length
)
100 # If `self' contains only digits, return the corresponding integer
104 return to_s
.to_cstring
.atoi
107 # If `self' contains a float, return the corresponding float
111 return to_s
.to_cstring
.atof
114 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
115 fun to_hex
: Int do return a_to
(16)
117 # If `self' contains only digits and letters, return the corresponding integer in a given base
118 fun a_to
(base
: Int) : Int
145 # Returns true if the string contains only Numeric values (and one "," or one "." character)
148 var has_point_or_comma
= false
153 if (i
== '.' or i
== ',') and not has_point_or_comma
155 has_point_or_comma
= true
164 # A upper case version of `self'
167 var s
= new Buffer.with_capacity
(length
)
168 for i
in self do s
.add
(i
.to_upper
)
172 # A lower case version of `self'
173 fun to_lower
: String
175 var s
= new Buffer.with_capacity
(length
)
176 for i
in self do s
.add
(i
.to_lower
)
180 # Trims trailing and preceding white spaces
181 # A whitespace is defined as any character which ascii value is less than or equal to 32
184 if self._length
== 0 then return self.to_s
185 # find position of the first non white space char (ascii < 32) from the start of the string
187 while self[start_pos
].ascii
<= 32 do
189 if start_pos
== _length
then return ""
191 # find position of the first non white space char from the end of the string
192 var end_pos
= length
- 1
193 while self[end_pos
].ascii
<= 32 do
195 if end_pos
== start_pos
then return self[start_pos
].to_s
197 return self.substring
(start_pos
, end_pos
- start_pos
+ 1)
210 # Immutable strings of characters.
216 redef type OTHER: String
218 # Index in _items of the start of the string
219 readable var _index_from
: Int
221 # Indes in _items of the last item of the string
222 readable var _index_to
: Int
224 ################################################
225 # AbstractString specific methods #
226 ################################################
228 # Access a character at index in String
230 redef fun [](index
) do
232 # Check that the index (+ index_from) is not larger than indexTo
233 # In other terms, if the index is valid
234 assert (index
+ _index_from
) <= _index_to
235 return _items
[index
+ _index_from
]
238 # Create a substring.
240 # "abcd".substring(1, 2) # --> "bc"
241 # "abcd".substring(-1, 2) # --> "a"
242 # "abcd".substring(1, 0) # --> ""
243 # "abcd".substring(2, 5) # --> "cd"
245 # A "from" index < 0 will be replaced by 0
246 # Unless a count value is > 0 at the same time
247 # In this case, from += count and count -= from
249 redef fun substring
(from
: Int, count
: Int): String
255 if count
< 0 then count
= 0
259 var realFrom
= _index_from
+ from
261 if (realFrom
+ count
) > _index_to
then return new String.from_substring
(realFrom
, _index_to
, _items
)
263 if count
== 0 then return ""
265 return new String.from_substring
(realFrom
, realFrom
+ count
- 1, _items
)
268 # Create a substring from `self' beginning at the 'from' position
270 # "abcd".substring_from(1) # --> "bcd"
271 # "abcd".substring_from(-1) # --> "abcd"
272 # "abcd".substring_from(2) # --> "cd"
274 # As with substring, a "from" index < 0 will be replaced by 0
276 redef fun substring_from
(from
: Int): String
278 if from
> _length
then return ""
279 if from
< 0 then from
= 0
280 return substring
(from
, _length
)
283 # Does self have a substring 'str' starting from position 'pos
285 # "abcd".has_substring("bc",1) # --> true
286 # "abcd".has_substring("bc",2) # --> false
287 redef fun has_substring
(str
: String, pos
: Int): Bool
289 var itsindex
= str
._length
- 1
291 var myindex
= pos
+ itsindex
294 var itsitems
= str
._items
296 if myindex
> _length
or itsindex
> myindex
then return false
298 var itsindexfrom
= str
.index_from
299 itsindex
+= itsindexfrom
300 myindex
+= index_from
302 while itsindex
>= itsindexfrom
do
303 if myitems
[myindex
] != itsitems
[itsindex
] then return false
311 # A upper case version of `self'
312 redef fun to_upper
: String
314 var outstr
= calloc_string
(self._length
+ 1)
317 var myitems
= self._items
318 var index_from
= self._index_from
319 var max
= self._index_to
321 while index_from
<= max
do
322 outstr
[out_index
] = myitems
[index_from
].to_upper
327 outstr
[self.length
] = '\0'
329 return new String.with_native
(outstr
, self._length
)
332 # A lower case version of `self'
333 redef fun to_lower
: String
335 var outstr
= calloc_string
(self._length
+ 1)
338 var myitems
= self._items
339 var index_from
= self._index_from
340 var max
= self._index_to
342 while index_from
<= max
do
343 outstr
[out_index
] = myitems
[index_from
].to_lower
348 outstr
[self.length
] = '\0'
350 return new String.with_native
(outstr
, self._length
)
353 redef fun trim
: String
355 if self._length
== 0 then return self
356 # find position of the first non white space char (ascii < 32) from the start of the string
357 var start_pos
= self._index_from
358 while _items
[start_pos
].ascii
<= 32 do
360 if start_pos
== _index_to
+ 1 then return ""
362 # find position of the first non white space char from the end of the string
363 var end_pos
= _index_to
364 while _items
[end_pos
].ascii
<= 32 do
366 if end_pos
== start_pos
then return _items
[start_pos
].to_s
368 start_pos
-= index_from
369 end_pos
-= index_from
370 return self.substring
(start_pos
, end_pos
- start_pos
+ 1)
375 var i
= self._index_from
376 var imax
= self._index_to
383 ##################################################
384 # String Specific Methods #
385 ##################################################
387 # Creates a String object as a substring of another String
389 # From : index to start at
391 # To : Index to stop at (from + count -1)
393 private init from_substring
(from
: Int, to
: Int, internalString
: NativeString)
395 _items
= internalString
398 _length
= to
- from
+ 1
401 # Create a new string from a given char *.
402 init with_native
(nat
: NativeString, size
: Int)
408 _index_to
= _length
- 1
411 # Create a new string from a null terminated char *.
412 init from_cstring
(str
: NativeString)
414 with_native
(str
,str
.cstring_length
)
417 # Creates a new Nit String from an existing CString
418 # Pretty much equals to from_cstring but copies instead
419 # of passing a reference
420 # Avoids manual/automatic dealloc problems when dealing with native C code
421 init copy_from_native
(str
: NativeString)
423 var temp_length
= str
.cstring_length
424 var new_str
= calloc_string
(temp_length
+ 1)
425 str
.copy_to
(new_str
, temp_length
, 0, 0)
426 new_str
[temp_length
] = '\0'
427 with_native
(new_str
, temp_length
)
430 # Return a null terminated char *
431 fun to_cstring
: NativeString
434 if _index_from
> 0 or _index_to
!= items
.cstring_length
- 1 then
435 var newItems
= calloc_string
(_length
+ 1)
436 self.items
.copy_to
(newItems
, _length
, _index_from
, 0)
437 newItems
[length
] = '\0'
445 if not other
isa String or other
is null then return false
447 if self.object_id
== other
.object_id
then return true
449 var my_length
= _length
451 if other
._length
!= my_length
then return false
453 var my_index
= _index_from
454 var its_index
= other
._index_from
456 var last_iteration
= my_index
+ my_length
458 var itsitems
= other
._items
459 var myitems
= self._items
461 while my_index
< last_iteration
do
462 if myitems
[my_index
] != itsitems
[its_index
] then return false
470 # The comparison between two strings is done on a lexicographical basis
471 # Eg : "aa" < "b" => true
474 if self.object_id
== other
.object_id
then return false
476 var my_curr_char
: Char
477 var its_curr_char
: Char
479 var curr_id_self
= self._index_from
480 var curr_id_other
= other
._index_from
482 var my_items
= self._items
483 var its_items
= other
._items
485 var my_length
= self._length
486 var its_length
= other
._length
488 var max_iterations
= curr_id_self
+ my_length
490 while curr_id_self
< max_iterations
do
491 my_curr_char
= my_items
[curr_id_self
]
492 its_curr_char
= its_items
[curr_id_other
]
494 if my_curr_char
!= its_curr_char
then
495 if my_curr_char
< its_curr_char
then return true
503 if my_length
!= its_length
then
504 if my_length
< its_length
then return true
511 # The concatenation of `self' with `r'
512 fun +(s
: String): String
514 var my_length
= self._length
515 var its_length
= s
._length
517 var target_string
= calloc_string
(my_length
+ its_length
+ 1)
519 self._items
.copy_to
(target_string
, my_length
, _index_from
, 0)
520 s
._items
.copy_to
(target_string
, its_length
, s
._index_from
, my_length
)
522 target_string
[my_length
+ its_length
] = '\0'
524 return new String.with_native
(target_string
, my_length
+ its_length
)
527 # i repetitions of self
528 fun *(i
: Int): String
532 var my_length
= self._length
534 var final_length
= my_length
* i
536 var my_items
= self._items
538 var target_string
= calloc_string
((final_length
) + 1)
540 target_string
[final_length
] = '\0'
544 for iteration
in [1 .. i
] do
545 my_items
.copy_to
(target_string
, my_length
, 0, current_last
)
546 current_last
+= my_length
549 return new String.with_native
(target_string
, final_length
)
552 redef fun to_s
do return self
556 # djb2 hash algorythm
561 var strStart
= _index_from
565 while i
>= strStart
do
566 h
= (h
* 32) + h
+ self._items
[i
].ascii
574 # Mutable strings of characters.
579 super AbstractArray[Char]
581 redef type OTHER: String
583 redef fun []=(index
, item
)
585 if index
== length
then
589 assert index
>= 0 and index
< length
595 if _capacity
<= length
then enlarge
(length
+ 5)
600 redef fun enlarge
(cap
)
603 if cap
<= c
then return
604 while c
<= cap
do c
= c
* 2 + 2
605 var a
= calloc_string
(c
+1)
606 _items
.copy_to
(a
, length
, 0, 0)
615 if _capacity
< _length
+ sl
then enlarge
(_length
+ sl
)
616 s
.items
.copy_to
(_items
, sl
, s
._index_from
, _length
)
623 redef fun to_s
: String
626 var a
= calloc_string
(l
+1)
627 _items
.copy_to
(a
, l
, 0, 0)
629 # Ensure the afterlast byte is '\0' to nul-terminated char *
632 return new String.with_native
(a
, length
)
640 while i
< l1
and i
< l2
do
641 var c1
= self[i
].ascii
657 # Create a new empty string.
665 _capacity
= s
.length
+ 1
667 _items
= calloc_string
(_capacity
)
668 s
.items
.copy_to
(_items
, _length
, s
._index_from
, 0)
671 # Create a new empty string with a given capacity.
672 init with_capacity
(cap
: Int)
675 # _items = new NativeString.calloc(cap)
676 _items
= calloc_string
(cap
+1)
683 if not o
isa Buffer or o
is null then return false
685 if o
.length
!= l
then return false
690 if it
[i
] != oit
[i
] then return false
696 readable private var _capacity
: Int
699 ###############################################################################
701 ###############################################################################
704 # User readable representation of `self'.
705 fun to_s
: String do return inspect
707 # The class name of the object in NativeString format.
708 private fun native_class_name
: NativeString is intern
710 # The class name of the object.
711 # FIXME: real type information is not available at runtime.
712 # Therefore, for instance, an instance of List[Bool] has just
713 # "List" for class_name
714 fun class_name
: String do return new String.from_cstring
(native_class_name
)
716 # Developer readable representation of `self'.
717 # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
720 return "<{inspect_head}>"
723 # Return "CLASSNAME:#OBJECTID".
724 # This function is mainly used with the redefinition of the inspect method
725 protected fun inspect_head
: String
727 return "{class_name}:#{object_id.to_hex}"
730 protected fun args
: Sequence[String]
748 fun fill_buffer
(s
: Buffer, base
: Int, signed
: Bool)
749 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
750 # assume < to_c max const of char
757 else if self == 0 then
764 var pos
= digit_count
(base
) - 1
765 while pos
>= 0 and n
> 0 do
766 s
[pos
] = (n
% base
).to_c
772 # return displayable int in base 10 and signed
773 redef fun to_s
do return to_base
(10,true)
775 # return displayable int in hexadecimal (unsigned (not now))
776 fun to_hex
: String do return to_base
(16,false)
778 # return displayable int in base base and signed
779 fun to_base
(base
: Int, signed
: Bool): String
781 var l
= digit_count
(base
)
782 var s
= new Buffer.from
(" " * l
)
783 fill_buffer
(s
, base
, signed
)
789 # Pretty print self, print needed decimals up to a max of 6.
791 var str
= to_precision
( 6 )
793 for i
in [0..len-1
] do
798 else if c
== '.' then
799 return str
.substring
( 0, j
+2 )
801 return str
.substring
( 0, j
+1 )
807 # `self' representation with `nb' digits after the '.'.
808 fun to_precision
(nb
: Int): String import String::from_cstring
`{
812 size = snprintf(NULL, 0, "%.*f", (int)nb, recv);
813 str = malloc(size + 1);
814 sprintf(str, "%.*f", (int)nb, recv );
816 return new_String_from_cstring( str );
823 var s
= new Buffer.with_capacity
(1)
828 # Returns true if the char is a numerical digit
831 if self >= '0' and self <= '9'
838 # Returns true if the char is an alpha digit
841 if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true
845 # Returns true if the char is an alpha or a numeric digit
846 fun is_alphanumeric
: Bool
848 if self.is_numeric
or self.is_alpha
then return true
853 redef class Collection[E
]
854 # Concatenate elements.
858 for e
in self do if e
!= null then s
.append
(e
.to_s
)
862 # Concatenate and separate each elements with `sep'.
863 fun join
(sep
: String): String
865 if is_empty
then return ""
867 var s
= new Buffer # Result
872 if e
!= null then s
.append
(e
.to_s
)
879 if e
!= null then s
.append
(e
.to_s
)
887 # Fast implementation
895 if e
!= null then s
.append
(e
.to_s
)
903 # Concatenate couple of 'key value'.
904 # key and value are separated by 'couple_sep'.
905 # each couple is separated each couple with `sep'.
906 fun join
(sep
: String, couple_sep
: String): String
908 if is_empty
then return ""
910 var s
= new Buffer # Result
916 if e
!= null then s
.append
("{k}{couple_sep}{e}")
924 if e
!= null then s
.append
("{k}{couple_sep}{e}")
931 ###############################################################################
933 ###############################################################################
935 # Native strings are simple C char *
937 fun [](index
: Int): Char is intern
938 fun []=(index
: Int, item
: Char) is intern
939 fun copy_to
(dest
: NativeString, length
: Int, from
: Int, to
: Int) is intern
941 # Position of the first nul character.
942 fun cstring_length
: Int
945 while self[l
] != '\0' do l
+= 1
948 fun atoi
: Int is intern
949 fun atof
: Float is extern "atof"
952 # StringCapable objects can create native strings
953 interface StringCapable
954 protected fun calloc_string
(size
: Int): NativeString is intern
958 var _args_cache
: nullable Sequence[String]
960 redef fun args
: Sequence[String]
962 if _args_cache
== null then init_args
963 return _args_cache
.as(not null)
966 # The name of the program as given by the OS
967 fun program_name
: String
969 return new String.from_cstring
(native_argv
(0))
972 # Initialize `args' with the contents of `native_argc' and `native_argv'.
973 private fun init_args
975 var argc
= native_argc
976 var args
= new Array[String].with_capacity
(0)
979 args
[i-1
] = new String.from_cstring
(native_argv
(i
))
985 private fun native_argc
: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
987 private fun native_argv
(i
: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.