1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
14 # Basic manipulations of strings of characters
17 intrude import collection
# FIXME should be collection::array
20 ###############################################################################
22 ###############################################################################
24 # Common subclass for String and Buffer
25 abstract class AbstractString
26 super AbstractArrayRead[Char]
28 readable private var _items
: NativeString
30 redef fun [](index
) do return _items
[index
]
34 # "abcd".substring(1, 2) # --> "bc"
35 # "abcd".substring(-1, 2) # --> "a"
36 # "abcd".substring(1, 0) # --> ""
37 # "abcd".substring(2, 5) # --> "cd"
38 fun substring
(from
: Int, count
: Int): String
42 if from
< 0 then from
= 0
43 if count
> length
then count
= length
45 var r
= new Buffer.with_capacity
(count
- from
)
56 # Create a substring from `self' beginning at the 'from' position
58 # "abcd".substring(1) # --> "bcd"
59 # "abcd".substring(-1) # --> "abcd"
60 # "abcd".substring(2) # --> "cd"
61 fun substring_from
(from
: Int): String
64 return substring
(from
, length
- from
)
67 # Does self have a substring 'str' starting from position 'pos
69 # "abcd".has_substring("bc",1) # --> true
70 # "abcd".has_substring("bc",2) # --> false
71 fun has_substring
(str
: String, pos
: Int): Bool
73 var itsindex
= str
.length
- 1
74 var myindex
= pos
+ itsindex
76 var itsitems
= str
._items
77 if myindex
> length
or itsindex
> myindex
then return false
78 var its_index_from
= str
._index_from
79 itsindex
+= its_index_from
80 while itsindex
>= its_index_from
do
81 if myitems
[myindex
] != itsitems
[itsindex
] then return false
88 # Is this string prefixed by 'prefix'
90 # "abc".is_prefix("abcd") # --> true
91 # "bc".is_prefix("abcd") # --> false
92 fun has_prefix
(prefix
: String): Bool do return has_substring
(prefix
,0)
94 # Is this string suffixed by 'suffix'
96 # "abcd".has_suffix("abc") # --> false
97 # "abcd".has_suffix("bcd") # --> true
98 fun has_suffix
(suffix
: String): Bool do return has_substring
(suffix
, length
- suffix
.length
)
100 # If `self' contains only digits, return the corresponding integer
104 return to_s
.to_cstring
.atoi
107 # If `self' contains a float, return the corresponding float
111 return to_s
.to_cstring
.atof
114 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
115 fun to_hex
: Int do return a_to
(16)
117 # If `self' contains only digits and letters, return the corresponding integer in a given base
118 fun a_to
(base
: Int) : Int
145 # Returns true if the string contains only Numeric values (and one "," or one "." character)
148 var has_point_or_comma
= false
153 if (i
== '.' or i
== ',') and not has_point_or_comma
155 has_point_or_comma
= true
164 # A upper case version of `self'
167 var s
= new Buffer.with_capacity
(length
)
168 for i
in self do s
.add
(i
.to_upper
)
172 # A lower case version of `self'
173 fun to_lower
: String
175 var s
= new Buffer.with_capacity
(length
)
176 for i
in self do s
.add
(i
.to_lower
)
191 # Immutable strings of characters.
197 redef type OTHER: String
199 # Index in _items of the start of the string
200 readable var _index_from
: Int
202 # Indes in _items of the last item of the string
203 readable var _index_to
: Int
205 ################################################
206 # AbstractString specific methods #
207 ################################################
209 # Access a character at index in String
211 redef fun [](index
) do
213 # Check that the index (+ index_from) is not larger than indexTo
214 # In other terms, if the index is valid
215 assert (index
+ _index_from
) <= _index_to
216 return _items
[index
+ _index_from
]
219 # Create a substring.
221 # "abcd".substring(1, 2) # --> "bc"
222 # "abcd".substring(-1, 2) # --> "a"
223 # "abcd".substring(1, 0) # --> ""
224 # "abcd".substring(2, 5) # --> "cd"
226 # A "from" index < 0 will be replaced by 0
227 # Unless a count value is > 0 at the same time
228 # In this case, from += count and count -= from
230 redef fun substring
(from
: Int, count
: Int): String
236 if count
< 0 then count
= 0
240 var realFrom
= _index_from
+ from
242 if (realFrom
+ count
) > _index_to
then return new String.from_substring
(realFrom
, _index_to
, _items
)
244 if count
== 0 then return ""
246 return new String.from_substring
(realFrom
, realFrom
+ count
- 1, _items
)
249 # Create a substring from `self' beginning at the 'from' position
251 # "abcd".substring_from(1) # --> "bcd"
252 # "abcd".substring_from(-1) # --> "abcd"
253 # "abcd".substring_from(2) # --> "cd"
255 # As with substring, a "from" index < 0 will be replaced by 0
257 redef fun substring_from
(from
: Int): String
259 if from
> _length
then return ""
260 if from
< 0 then from
= 0
261 return substring
(from
, _length
)
264 # Does self have a substring 'str' starting from position 'pos
266 # "abcd".has_substring("bc",1) # --> true
267 # "abcd".has_substring("bc",2) # --> false
268 redef fun has_substring
(str
: String, pos
: Int): Bool
270 var itsindex
= str
._length
- 1
272 var myindex
= pos
+ itsindex
275 var itsitems
= str
._items
277 if myindex
> _length
or itsindex
> myindex
then return false
279 var itsindexfrom
= str
.index_from
280 itsindex
+= itsindexfrom
281 myindex
+= index_from
283 while itsindex
>= itsindexfrom
do
284 if myitems
[myindex
] != itsitems
[itsindex
] then return false
292 # A upper case version of `self'
293 redef fun to_upper
: String
295 var outstr
= calloc_string
(self._length
+ 1)
298 var myitems
= self._items
299 var index_from
= self._index_from
300 var max
= self._index_to
302 while index_from
<= max
do
303 outstr
[out_index
] = myitems
[index_from
].to_upper
308 outstr
[self.length
] = '\0'
310 return new String.with_native
(outstr
, self._length
)
313 # A lower case version of `self'
314 redef fun to_lower
: String
316 var outstr
= calloc_string
(self._length
+ 1)
319 var myitems
= self._items
320 var index_from
= self._index_from
321 var max
= self._index_to
323 while index_from
<= max
do
324 outstr
[out_index
] = myitems
[index_from
].to_lower
329 outstr
[self.length
] = '\0'
331 return new String.with_native
(outstr
, self._length
)
336 var i
= self._index_from
337 var imax
= self._index_to
344 ##################################################
345 # String Specific Methods #
346 ##################################################
348 # Creates a String object as a substring of another String
350 # From : index to start at
352 # To : Index to stop at (from + count -1)
354 private init from_substring
(from
: Int, to
: Int, internalString
: NativeString)
356 _items
= internalString
359 _length
= to
- from
+ 1
362 # Create a new string from a given char *.
363 init with_native
(nat
: NativeString, size
: Int)
369 _index_to
= _length
- 1
372 # Create a new string from a null terminated char *.
373 init from_cstring
(str
: NativeString)
375 with_native
(str
,str
.cstring_length
)
378 # Return a null terminated char *
379 fun to_cstring
: NativeString
382 if _index_from
> 0 or _index_to
!= items
.cstring_length
- 1 then
383 var newItems
= calloc_string
(_length
+ 1)
384 self.items
.copy_to
(newItems
, _length
, _index_from
, 0)
385 newItems
[length
] = '\0'
393 if not other
isa String or other
is null then return false
395 if self.object_id
== other
.object_id
then return true
397 var my_length
= _length
399 if other
._length
!= my_length
then return false
401 var my_index
= _index_from
402 var its_index
= other
._index_from
404 var last_iteration
= my_index
+ my_length
406 var itsitems
= other
._items
407 var myitems
= self._items
409 while my_index
< last_iteration
do
410 if myitems
[my_index
] != itsitems
[its_index
] then return false
418 # The comparison between two strings is done on a lexicographical basis
419 # Eg : "aa" < "b" => true
422 if self.object_id
== other
.object_id
then return false
424 var my_curr_char
: Char
425 var its_curr_char
: Char
427 var curr_id_self
= self._index_from
428 var curr_id_other
= other
._index_from
430 var my_items
= self._items
431 var its_items
= other
._items
433 var my_length
= self._length
434 var its_length
= other
._length
436 var max_iterations
= curr_id_self
+ my_length
438 while curr_id_self
< max_iterations
do
439 my_curr_char
= my_items
[curr_id_self
]
440 its_curr_char
= its_items
[curr_id_other
]
442 if my_curr_char
!= its_curr_char
then
443 if my_curr_char
< its_curr_char
then return true
451 if my_length
!= its_length
then
452 if my_length
< its_length
then return true
459 # The concatenation of `self' with `r'
460 fun +(s
: String): String
462 var my_length
= self._length
463 var its_length
= s
._length
465 var target_string
= calloc_string
(my_length
+ its_length
+ 1)
467 self._items
.copy_to
(target_string
, my_length
, _index_from
, 0)
468 s
._items
.copy_to
(target_string
, its_length
, s
._index_from
, my_length
)
470 target_string
[my_length
+ its_length
] = '\0'
472 return new String.with_native
(target_string
, my_length
+ its_length
)
475 # i repetitions of self
476 fun *(i
: Int): String
480 var my_length
= self._length
482 var final_length
= my_length
* i
484 var my_items
= self._items
486 var target_string
= calloc_string
((final_length
) + 1)
488 target_string
[final_length
] = '\0'
492 for iteration
in [1 .. i
] do
493 my_items
.copy_to
(target_string
, my_length
, 0, current_last
)
494 current_last
+= my_length
497 return new String.with_native
(target_string
, final_length
)
500 redef fun to_s
do return self
504 # djb2 hash algorythm
509 var strStart
= _index_from
513 while i
>= strStart
do
514 h
= (h
* 32) + h
+ self._items
[i
].ascii
522 # Mutable strings of characters.
527 super AbstractArray[Char]
529 redef type OTHER: String
531 redef fun []=(index
, item
)
533 if index
== length
then
537 assert index
>= 0 and index
< length
543 if _capacity
<= length
then enlarge
(length
+ 5)
548 redef fun enlarge
(cap
)
551 if cap
<= c
then return
552 while c
<= cap
do c
= c
* 2 + 2
553 var a
= calloc_string
(c
+1)
554 _items
.copy_to
(a
, length
, 0, 0)
563 if _capacity
< _length
+ sl
then enlarge
(_length
+ sl
)
564 s
.items
.copy_to
(_items
, sl
, s
._index_from
, _length
)
571 redef fun to_s
: String
574 var a
= calloc_string
(l
+1)
575 _items
.copy_to
(a
, l
, 0, 0)
577 # Ensure the afterlast byte is '\0' to nul-terminated char *
580 return new String.with_native
(a
, length
)
588 while i
< l1
and i
< l2
do
589 var c1
= self[i
].ascii
605 # Create a new empty string.
613 _capacity
= s
.length
+ 1
615 _items
= calloc_string
(_capacity
)
616 s
.items
.copy_to
(_items
, _length
, s
._index_from
, 0)
619 # Create a new empty string with a given capacity.
620 init with_capacity
(cap
: Int)
623 # _items = new NativeString.calloc(cap)
624 _items
= calloc_string
(cap
+1)
631 if not o
isa Buffer or o
is null then return false
633 if o
.length
!= l
then return false
638 if it
[i
] != oit
[i
] then return false
644 readable private var _capacity
: Int
647 ###############################################################################
649 ###############################################################################
652 # User readable representation of `self'.
653 fun to_s
: String do return inspect
655 # The class name of the object in NativeString format.
656 private fun native_class_name
: NativeString is intern
658 # The class name of the object.
659 # FIXME: real type information is not available at runtime.
660 # Therefore, for instance, an instance of List[Bool] has just
661 # "List" for class_name
662 fun class_name
: String do return new String.from_cstring
(native_class_name
)
664 # Developer readable representation of `self'.
665 # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
668 return "<{inspect_head}>"
671 # Return "CLASSNAME:#OBJECTID".
672 # This function is mainly used with the redefinition of the inspect method
673 protected fun inspect_head
: String
675 return "{class_name}:#{object_id.to_hex}"
678 protected fun args
: Sequence[String]
696 fun fill_buffer
(s
: Buffer, base
: Int, signed
: Bool)
697 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
698 # assume < to_c max const of char
705 else if self == 0 then
712 var pos
= digit_count
(base
) - 1
713 while pos
>= 0 and n
> 0 do
714 s
[pos
] = (n
% base
).to_c
720 # return displayable int in base 10 and signed
721 redef fun to_s
do return to_base
(10,true)
723 # return displayable int in hexadecimal (unsigned (not now))
724 fun to_hex
: String do return to_base
(16,false)
726 # return displayable int in base base and signed
727 fun to_base
(base
: Int, signed
: Bool): String
729 var l
= digit_count
(base
)
730 var s
= new Buffer.from
(" " * l
)
731 fill_buffer
(s
, base
, signed
)
737 # Pretty print self, print needed decimals up to a max of 6.
739 var str
= to_precision
( 6 )
741 for i
in [0..len-1
] do
746 else if c
== '.' then
747 return str
.substring
( 0, j
+2 )
749 return str
.substring
( 0, j
+1 )
755 # `self' representation with `nb' digits after the '.'.
756 fun to_precision
(nb
: Int): String import String::from_cstring
`{
760 size = snprintf(NULL, 0, "%.*f", (int)nb, recv);
761 str = malloc(size + 1);
762 sprintf(str, "%.*f", (int)nb, recv );
764 return new_String_from_cstring( str );
771 var s
= new Buffer.with_capacity
(1)
776 # Returns true if the char is a numerical digit
779 if self >= '0' and self <= '9'
786 # Returns true if the char is an alpha digit
789 if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true
793 # Returns true if the char is an alpha or a numeric digit
794 fun is_alphanumeric
: Bool
796 if self.is_numeric
or self.is_alpha
then return true
801 redef class Collection[E
]
802 # Concatenate elements.
806 for e
in self do if e
!= null then s
.append
(e
.to_s
)
810 # Concatenate and separate each elements with `sep'.
811 fun join
(sep
: String): String
813 if is_empty
then return ""
815 var s
= new Buffer # Result
820 if e
!= null then s
.append
(e
.to_s
)
827 if e
!= null then s
.append
(e
.to_s
)
835 # Fast implementation
843 if e
!= null then s
.append
(e
.to_s
)
851 # Concatenate couple of 'key value'.
852 # key and value are separated by 'couple_sep'.
853 # each couple is separated each couple with `sep'.
854 fun join
(sep
: String, couple_sep
: String): String
856 if is_empty
then return ""
858 var s
= new Buffer # Result
864 if e
!= null then s
.append
("{k}{couple_sep}{e}")
872 if e
!= null then s
.append
("{k}{couple_sep}{e}")
879 ###############################################################################
881 ###############################################################################
883 # Native strings are simple C char *
885 fun [](index
: Int): Char is intern
886 fun []=(index
: Int, item
: Char) is intern
887 fun copy_to
(dest
: NativeString, length
: Int, from
: Int, to
: Int) is intern
889 # Position of the first nul character.
890 fun cstring_length
: Int
893 while self[l
] != '\0' do l
+= 1
896 fun atoi
: Int is intern
897 fun atof
: Float is extern "atof"
900 # StringCapable objects can create native strings
901 interface StringCapable
902 protected fun calloc_string
(size
: Int): NativeString is intern
906 var _args_cache
: nullable Sequence[String]
908 redef fun args
: Sequence[String]
910 if _args_cache
== null then init_args
911 return _args_cache
.as(not null)
914 # The name of the program as given by the OS
915 fun program_name
: String
917 return new String.from_cstring
(native_argv
(0))
920 # Initialize `args' with the contents of `native_argc' and `native_argv'.
921 private fun init_args
923 var argc
= native_argc
924 var args
= new Array[String].with_capacity
(0)
927 args
[i-1
] = new String.from_cstring
(native_argv
(i
))
933 private fun native_argc
: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
935 private fun native_argv
(i
: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.