1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
14 # Basic manipulations of strings of characters
17 intrude import collection
# FIXME should be collection::array
20 ###############################################################################
22 ###############################################################################
24 # Common subclass for String and Buffer
25 abstract class AbstractString
26 super AbstractArrayRead[Char]
28 readable private var _items
: NativeString
30 redef fun [](index
) do return _items
[index
]
34 # "abcd".substring(1, 2) # --> "bc"
35 # "abcd".substring(-1, 2) # --> "a"
36 # "abcd".substring(1, 0) # --> ""
37 # "abcd".substring(2, 5) # --> "cd"
38 fun substring
(from
: Int, count
: Int): String
42 if from
< 0 then from
= 0
43 if count
> length
then count
= length
45 var r
= new Buffer.with_capacity
(count
- from
)
56 # Create a substring from `self' beginning at the 'from' position
58 # "abcd".substring(1) # --> "bcd"
59 # "abcd".substring(-1) # --> "abcd"
60 # "abcd".substring(2) # --> "cd"
61 fun substring_from
(from
: Int): String
64 return substring
(from
, length
- from
)
67 # Does self have a substring 'str' starting from position 'pos
69 # "abcd".has_substring("bc",1) # --> true
70 # "abcd".has_substring("bc",2) # --> false
71 fun has_substring
(str
: String, pos
: Int): Bool
73 var itsindex
= str
.length
- 1
74 var myindex
= pos
+ itsindex
76 var itsitems
= str
._items
77 if myindex
> length
or itsindex
> myindex
then return false
78 var its_index_from
= str
._index_from
79 itsindex
+= its_index_from
80 while itsindex
>= its_index_from
do
81 if myitems
[myindex
] != itsitems
[itsindex
] then return false
88 # Is this string prefixed by 'prefix'
90 # "abc".is_prefix("abcd") # --> true
91 # "bc".is_prefix("abcd") # --> false
92 fun has_prefix
(prefix
: String): Bool do return has_substring
(prefix
,0)
94 # Is this string suffixed by 'suffix'
96 # "abcd".has_suffix("abc") # --> false
97 # "abcd".has_suffix("bcd") # --> true
98 fun has_suffix
(suffix
: String): Bool do return has_substring
(suffix
, length
- suffix
.length
)
100 # If `self' contains only digits, return the corresponding integer
104 return to_s
.to_cstring
.atoi
107 # If `self' contains a float, return the corresponding float
111 return to_s
.to_cstring
.atof
114 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
115 fun to_hex
: Int do return a_to
(16)
117 # If `self' contains only digits and letters, return the corresponding integer in a given base
118 fun a_to
(base
: Int) : Int
145 # Returns true if the string contains only Numeric values (and one "," or one "." character)
148 var has_point_or_comma
= false
153 if (i
== '.' or i
== ',') and not has_point_or_comma
155 has_point_or_comma
= true
164 # A upper case version of `self'
167 var s
= new Buffer.with_capacity
(length
)
168 for i
in self do s
.add
(i
.to_upper
)
172 # A lower case version of `self'
173 fun to_lower
: String
175 var s
= new Buffer.with_capacity
(length
)
176 for i
in self do s
.add
(i
.to_lower
)
180 # Trims trailing and preceding white spaces
181 # A whitespace is defined as any character which ascii value is less than or equal to 32
184 if self._length
== 0 then return self.to_s
185 # find position of the first non white space char (ascii < 32) from the start of the string
187 while self[start_pos
].ascii
<= 32 do
189 if start_pos
== _length
then return ""
191 # find position of the first non white space char from the end of the string
192 var end_pos
= length
- 1
193 while self[end_pos
].ascii
<= 32 do
195 if end_pos
== start_pos
then return self[start_pos
].to_s
197 return self.substring
(start_pos
, end_pos
- start_pos
+ 1)
210 # Immutable strings of characters.
216 redef type OTHER: String
218 # Index in _items of the start of the string
219 readable var _index_from
: Int
221 # Indes in _items of the last item of the string
222 readable var _index_to
: Int
224 ################################################
225 # AbstractString specific methods #
226 ################################################
228 # Access a character at index in String
230 redef fun [](index
) do
232 # Check that the index (+ index_from) is not larger than indexTo
233 # In other terms, if the index is valid
234 assert (index
+ _index_from
) <= _index_to
235 return _items
[index
+ _index_from
]
238 # Create a substring.
240 # "abcd".substring(1, 2) # --> "bc"
241 # "abcd".substring(-1, 2) # --> "a"
242 # "abcd".substring(1, 0) # --> ""
243 # "abcd".substring(2, 5) # --> "cd"
245 # A "from" index < 0 will be replaced by 0
246 # Unless a count value is > 0 at the same time
247 # In this case, from += count and count -= from
249 redef fun substring
(from
: Int, count
: Int): String
255 if count
< 0 then count
= 0
259 var realFrom
= _index_from
+ from
261 if (realFrom
+ count
) > _index_to
then return new String.from_substring
(realFrom
, _index_to
, _items
)
263 if count
== 0 then return ""
265 return new String.from_substring
(realFrom
, realFrom
+ count
- 1, _items
)
268 # Create a substring from `self' beginning at the 'from' position
270 # "abcd".substring_from(1) # --> "bcd"
271 # "abcd".substring_from(-1) # --> "abcd"
272 # "abcd".substring_from(2) # --> "cd"
274 # As with substring, a "from" index < 0 will be replaced by 0
276 redef fun substring_from
(from
: Int): String
278 if from
> _length
then return ""
279 if from
< 0 then from
= 0
280 return substring
(from
, _length
)
283 # Does self have a substring 'str' starting from position 'pos
285 # "abcd".has_substring("bc",1) # --> true
286 # "abcd".has_substring("bc",2) # --> false
287 redef fun has_substring
(str
: String, pos
: Int): Bool
289 var itsindex
= str
._length
- 1
291 var myindex
= pos
+ itsindex
294 var itsitems
= str
._items
296 if myindex
> _length
or itsindex
> myindex
then return false
298 var itsindexfrom
= str
.index_from
299 itsindex
+= itsindexfrom
300 myindex
+= index_from
302 while itsindex
>= itsindexfrom
do
303 if myitems
[myindex
] != itsitems
[itsindex
] then return false
311 # A upper case version of `self'
312 redef fun to_upper
: String
314 var outstr
= calloc_string
(self._length
+ 1)
317 var myitems
= self._items
318 var index_from
= self._index_from
319 var max
= self._index_to
321 while index_from
<= max
do
322 outstr
[out_index
] = myitems
[index_from
].to_upper
327 outstr
[self.length
] = '\0'
329 return new String.with_native
(outstr
, self._length
)
332 # A lower case version of `self'
333 redef fun to_lower
: String
335 var outstr
= calloc_string
(self._length
+ 1)
338 var myitems
= self._items
339 var index_from
= self._index_from
340 var max
= self._index_to
342 while index_from
<= max
do
343 outstr
[out_index
] = myitems
[index_from
].to_lower
348 outstr
[self.length
] = '\0'
350 return new String.with_native
(outstr
, self._length
)
353 redef fun trim
: String
355 if self._length
== 0 then return self
356 # find position of the first non white space char (ascii < 32) from the start of the string
357 var start_pos
= self._index_from
358 while _items
[start_pos
].ascii
<= 32 do
360 if start_pos
== _index_to
+ 1 then return ""
362 # find position of the first non white space char from the end of the string
363 var end_pos
= _index_to
364 while _items
[end_pos
].ascii
<= 32 do
366 if end_pos
== start_pos
then return _items
[start_pos
].to_s
368 start_pos
-= index_from
369 end_pos
-= index_from
370 return self.substring
(start_pos
, end_pos
- start_pos
+ 1)
375 var i
= self._index_from
376 var imax
= self._index_to
383 ##################################################
384 # String Specific Methods #
385 ##################################################
387 # Creates a String object as a substring of another String
389 # From : index to start at
391 # To : Index to stop at (from + count -1)
393 private init from_substring
(from
: Int, to
: Int, internalString
: NativeString)
395 _items
= internalString
398 _length
= to
- from
+ 1
401 # Create a new string from a given char *.
402 init with_native
(nat
: NativeString, size
: Int)
408 _index_to
= _length
- 1
411 # Create a new string from a null terminated char *.
412 init from_cstring
(str
: NativeString)
414 with_native
(str
,str
.cstring_length
)
417 # Creates a new Nit String from an existing CString
418 # Pretty much equals to from_cstring but copies instead
419 # of passing a reference
420 # Avoids manual/automatic dealloc problems when dealing with native C code
421 init copy_from_native
(str
: NativeString)
423 var temp_length
= str
.cstring_length
424 var new_str
= calloc_string
(temp_length
+ 1)
425 str
.copy_to
(new_str
, temp_length
, 0, 0)
426 new_str
[temp_length
] = '\0'
427 with_native
(new_str
, temp_length
)
430 # Return a null terminated char *
431 fun to_cstring
: NativeString
434 if _index_from
> 0 or _index_to
!= items
.cstring_length
- 1 then
435 var newItems
= calloc_string
(_length
+ 1)
436 self.items
.copy_to
(newItems
, _length
, _index_from
, 0)
437 newItems
[length
] = '\0'
445 if not other
isa String or other
is null then return false
447 if self.object_id
== other
.object_id
then return true
449 var my_length
= _length
451 if other
._length
!= my_length
then return false
453 var my_index
= _index_from
454 var its_index
= other
._index_from
456 var last_iteration
= my_index
+ my_length
458 var itsitems
= other
._items
459 var myitems
= self._items
461 while my_index
< last_iteration
do
462 if myitems
[my_index
] != itsitems
[its_index
] then return false
470 # The comparison between two strings is done on a lexicographical basis
471 # Eg : "aa" < "b" => true
474 if self.object_id
== other
.object_id
then return false
476 var my_curr_char
: Char
477 var its_curr_char
: Char
479 var curr_id_self
= self._index_from
480 var curr_id_other
= other
._index_from
482 var my_items
= self._items
483 var its_items
= other
._items
485 var my_length
= self._length
486 var its_length
= other
._length
488 var max_iterations
= curr_id_self
+ my_length
490 while curr_id_self
< max_iterations
do
491 my_curr_char
= my_items
[curr_id_self
]
492 its_curr_char
= its_items
[curr_id_other
]
494 if my_curr_char
!= its_curr_char
then
495 if my_curr_char
< its_curr_char
then return true
503 return my_length
< its_length
506 # The concatenation of `self' with `r'
507 fun +(s
: String): String
509 var my_length
= self._length
510 var its_length
= s
._length
512 var target_string
= calloc_string
(my_length
+ its_length
+ 1)
514 self._items
.copy_to
(target_string
, my_length
, _index_from
, 0)
515 s
._items
.copy_to
(target_string
, its_length
, s
._index_from
, my_length
)
517 target_string
[my_length
+ its_length
] = '\0'
519 return new String.with_native
(target_string
, my_length
+ its_length
)
522 # i repetitions of self
523 fun *(i
: Int): String
527 var my_length
= self._length
529 var final_length
= my_length
* i
531 var my_items
= self._items
533 var target_string
= calloc_string
((final_length
) + 1)
535 target_string
[final_length
] = '\0'
539 for iteration
in [1 .. i
] do
540 my_items
.copy_to
(target_string
, my_length
, 0, current_last
)
541 current_last
+= my_length
544 return new String.with_native
(target_string
, final_length
)
547 redef fun to_s
do return self
551 # djb2 hash algorythm
556 var strStart
= _index_from
560 while i
>= strStart
do
561 h
= (h
* 32) + h
+ self._items
[i
].ascii
569 # Mutable strings of characters.
574 super AbstractArray[Char]
576 redef type OTHER: String
578 redef fun []=(index
, item
)
580 if index
== length
then
584 assert index
>= 0 and index
< length
590 if _capacity
<= length
then enlarge
(length
+ 5)
595 redef fun enlarge
(cap
)
598 if cap
<= c
then return
599 while c
<= cap
do c
= c
* 2 + 2
600 var a
= calloc_string
(c
+1)
601 _items
.copy_to
(a
, length
, 0, 0)
610 if _capacity
< _length
+ sl
then enlarge
(_length
+ sl
)
611 s
.items
.copy_to
(_items
, sl
, s
._index_from
, _length
)
618 redef fun to_s
: String
621 var a
= calloc_string
(l
+1)
622 _items
.copy_to
(a
, l
, 0, 0)
624 # Ensure the afterlast byte is '\0' to nul-terminated char *
627 return new String.with_native
(a
, length
)
635 while i
< l1
and i
< l2
do
636 var c1
= self[i
].ascii
652 # Create a new empty string.
660 _capacity
= s
.length
+ 1
662 _items
= calloc_string
(_capacity
)
663 s
.items
.copy_to
(_items
, _length
, s
._index_from
, 0)
666 # Create a new empty string with a given capacity.
667 init with_capacity
(cap
: Int)
670 # _items = new NativeString.calloc(cap)
671 _items
= calloc_string
(cap
+1)
678 if not o
isa Buffer or o
is null then return false
680 if o
.length
!= l
then return false
685 if it
[i
] != oit
[i
] then return false
691 readable private var _capacity
: Int
694 ###############################################################################
696 ###############################################################################
699 # User readable representation of `self'.
700 fun to_s
: String do return inspect
702 # The class name of the object in NativeString format.
703 private fun native_class_name
: NativeString is intern
705 # The class name of the object.
706 # FIXME: real type information is not available at runtime.
707 # Therefore, for instance, an instance of List[Bool] has just
708 # "List" for class_name
709 fun class_name
: String do return new String.from_cstring
(native_class_name
)
711 # Developer readable representation of `self'.
712 # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
715 return "<{inspect_head}>"
718 # Return "CLASSNAME:#OBJECTID".
719 # This function is mainly used with the redefinition of the inspect method
720 protected fun inspect_head
: String
722 return "{class_name}:#{object_id.to_hex}"
725 protected fun args
: Sequence[String]
743 fun fill_buffer
(s
: Buffer, base
: Int, signed
: Bool)
744 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
745 # assume < to_c max const of char
752 else if self == 0 then
759 var pos
= digit_count
(base
) - 1
760 while pos
>= 0 and n
> 0 do
761 s
[pos
] = (n
% base
).to_c
767 # return displayable int in base 10 and signed
768 redef fun to_s
do return to_base
(10,true)
770 # return displayable int in hexadecimal (unsigned (not now))
771 fun to_hex
: String do return to_base
(16,false)
773 # return displayable int in base base and signed
774 fun to_base
(base
: Int, signed
: Bool): String
776 var l
= digit_count
(base
)
777 var s
= new Buffer.from
(" " * l
)
778 fill_buffer
(s
, base
, signed
)
784 # Pretty print self, print needed decimals up to a max of 6.
786 var str
= to_precision
( 6 )
788 for i
in [0..len-1
] do
793 else if c
== '.' then
794 return str
.substring
( 0, j
+2 )
796 return str
.substring
( 0, j
+1 )
802 # `self' representation with `nb' digits after the '.'.
803 fun to_precision
(nb
: Int): String import String::from_cstring
`{
807 size = snprintf(NULL, 0, "%.*f", (int)nb, recv);
808 str = malloc(size + 1);
809 sprintf(str, "%.*f", (int)nb, recv );
811 return new_String_from_cstring( str );
818 var s
= new Buffer.with_capacity
(1)
823 # Returns true if the char is a numerical digit
826 if self >= '0' and self <= '9'
833 # Returns true if the char is an alpha digit
836 if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true
840 # Returns true if the char is an alpha or a numeric digit
841 fun is_alphanumeric
: Bool
843 if self.is_numeric
or self.is_alpha
then return true
848 redef class Collection[E
]
849 # Concatenate elements.
853 for e
in self do if e
!= null then s
.append
(e
.to_s
)
857 # Concatenate and separate each elements with `sep'.
858 fun join
(sep
: String): String
860 if is_empty
then return ""
862 var s
= new Buffer # Result
867 if e
!= null then s
.append
(e
.to_s
)
874 if e
!= null then s
.append
(e
.to_s
)
882 # Fast implementation
890 if e
!= null then s
.append
(e
.to_s
)
898 # Concatenate couple of 'key value'.
899 # key and value are separated by 'couple_sep'.
900 # each couple is separated each couple with `sep'.
901 fun join
(sep
: String, couple_sep
: String): String
903 if is_empty
then return ""
905 var s
= new Buffer # Result
911 if e
!= null then s
.append
("{k}{couple_sep}{e}")
919 if e
!= null then s
.append
("{k}{couple_sep}{e}")
926 ###############################################################################
928 ###############################################################################
930 # Native strings are simple C char *
932 fun [](index
: Int): Char is intern
933 fun []=(index
: Int, item
: Char) is intern
934 fun copy_to
(dest
: NativeString, length
: Int, from
: Int, to
: Int) is intern
936 # Position of the first nul character.
937 fun cstring_length
: Int
940 while self[l
] != '\0' do l
+= 1
943 fun atoi
: Int is intern
944 fun atof
: Float is extern "atof"
947 # StringCapable objects can create native strings
948 interface StringCapable
949 protected fun calloc_string
(size
: Int): NativeString is intern
953 var _args_cache
: nullable Sequence[String]
955 redef fun args
: Sequence[String]
957 if _args_cache
== null then init_args
958 return _args_cache
.as(not null)
961 # The name of the program as given by the OS
962 fun program_name
: String
964 return new String.from_cstring
(native_argv
(0))
967 # Initialize `args' with the contents of `native_argc' and `native_argv'.
968 private fun init_args
970 var argc
= native_argc
971 var args
= new Array[String].with_capacity
(0)
974 args
[i-1
] = new String.from_cstring
(native_argv
(i
))
980 private fun native_argc
: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
982 private fun native_argv
(i
: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.