stdlib: Char, added functions is_alpha and is_alphanumeric (used for several operatio...
[nit.git] / lib / standard / string.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
5 #
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
12 # another product.
13
14 # Basic manipulations of strings of characters
15 package string
16
17 intrude import collection # FIXME should be collection::array
18 import hash
19
20 ###############################################################################
21 # String #
22 ###############################################################################
23
24 # Common subclass for String and Buffer
25 abstract class AbstractString
26 super AbstractArrayRead[Char]
27
28 readable private var _items: NativeString
29
30 redef fun [](index) do return _items[index]
31
32 # Create a substring.
33 #
34 # "abcd".substring(1, 2) # --> "bc"
35 # "abcd".substring(-1, 2) # --> "a"
36 # "abcd".substring(1, 0) # --> ""
37 # "abcd".substring(2, 5) # --> "cd"
38 fun substring(from: Int, count: Int): String
39 do
40 assert count >= 0
41 count += from
42 if from < 0 then from = 0
43 if count > length then count = length
44 if from < count then
45 var r = new Buffer.with_capacity(count - from)
46 while from < count do
47 r.push(_items[from])
48 from += 1
49 end
50 return r.to_s
51 else
52 return ""
53 end
54 end
55
56 # Create a substring from `self' beginning at the 'from' position
57 #
58 # "abcd".substring(1) # --> "bcd"
59 # "abcd".substring(-1) # --> "abcd"
60 # "abcd".substring(2) # --> "cd"
61 fun substring_from(from: Int): String
62 do
63 assert from < length
64 return substring(from, length - from)
65 end
66
67 # Is `self' a substring of the `str' string from pos `pos'
68 #
69 # "bc".is_substring("abcd",1) # --> true
70 # "bc".is_substring("abcd",2) # --> false
71 fun has_substring(str: String, pos: Int): Bool
72 do
73 var itsindex = str.length - 1
74 var myindex = pos + itsindex
75 var myitems = _items
76 var itsitems = str._items
77 if myindex > length or itsindex > myindex then return false
78 while itsindex >= 0 do
79 if myitems[myindex] != itsitems[itsindex] then return false
80 myindex -= 1
81 itsindex -= 1
82 end
83 return true
84 end
85
86 # Is this string prefixed by 'prefix'
87 #
88 # "abc".is_prefix("abcd") # --> true
89 # "bc".is_prefix("abcd") # --> false
90 fun has_prefix(prefix: String): Bool do return has_substring(prefix,0)
91
92 # Is this string suffixed by 'suffix'
93 #
94 # "abcd".has_suffix("abc") # --> false
95 # "abcd".has_suffix("bcd") # --> true
96 fun has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
97
98 # If `self' contains only digits, return the corresponding integer
99 fun to_i: Int
100 do
101 # Shortcut
102 return to_s.to_cstring.atoi
103 end
104
105 # If `self' contains a float, return the corresponding float
106 fun to_f: Float
107 do
108 # Shortcut
109 return to_s.to_cstring.atof
110 end
111
112 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
113 fun to_hex: Int do return a_to(16)
114
115 # If `self' contains only digits and letters, return the corresponding integer in a given base
116 fun a_to(base: Int) : Int
117 do
118 var i = 0
119 var neg = false
120
121 for c in self
122 do
123 var v = c.to_i
124 if v > base then
125 if neg then
126 return -i
127 else
128 return i
129 end
130 else if v < 0 then
131 neg = true
132 else
133 i = i * base + v
134 end
135 end
136 if neg then
137 return -i
138 else
139 return i
140 end
141 end
142
143 # Returns true if the string contains only Numeric values (and one "," or one "." character)
144 fun is_numeric: Bool
145 do
146 var has_point_or_comma = false
147 for i in self
148 do
149 if not i.is_numeric
150 then
151 if (i == '.' or i == ',') and not has_point_or_comma
152 then
153 has_point_or_comma = true
154 else
155 return false
156 end
157 end
158 end
159 return true
160 end
161
162 # A upper case version of `self'
163 fun to_upper: String
164 do
165 var s = new Buffer.with_capacity(length)
166 for i in self do s.add(i.to_upper)
167 return s.to_s
168 end
169
170 # A lower case version of `self'
171 fun to_lower : String
172 do
173 var s = new Buffer.with_capacity(length)
174 for i in self do s.add(i.to_lower)
175 return s.to_s
176 end
177
178
179 redef fun output
180 do
181 var i = 0
182 while i < length do
183 _items[i].output
184 i += 1
185 end
186 end
187 end
188
189 # Immutable strings of characters.
190 class String
191 super Comparable
192 super AbstractString
193
194 redef type OTHER: String
195
196 # Create a new string from a given char *.
197 init with_native(nat: NativeString, size: Int)
198 do
199 assert size >= 0
200 _items = nat
201 _length = size
202 end
203
204 # Create a new string from a null terminated char *.
205 init from_cstring(str: NativeString)
206 do
207 var size = str.cstring_length
208 _items = str
209 _length = size
210 end
211
212 # Return a null terminated char *
213 fun to_cstring: NativeString
214 do
215 return _items
216 end
217
218 redef fun ==(o)
219 do
220 if not o isa String or o is null then return false
221 var l = length
222 if o.length != l then return false
223 var i = 0
224 var it = _items
225 var oit = o._items
226 while i < l do
227 if it[i] != oit[i] then return false
228 i += 1
229 end
230 return true
231 end
232
233 redef fun <(s)
234 do
235 var i = 0
236 var l1 = length
237 var l2 = s.length
238 var n1 = _items
239 var n2 = s._items
240 while i < l1 and i < l2 do
241 var c1 = n1[i].ascii
242 var c2 = n2[i].ascii
243 if c1 < c2 then
244 return true
245 else if c2 < c1 then
246 return false
247 end
248 i += 1
249 end
250 if l1 < l2 then
251 return true
252 else
253 return false
254 end
255 end
256
257 # The concatenation of `self' with `r'
258 fun +(s: String): String
259 do
260 var r = new Buffer.with_capacity(length + s.length)
261 r.append(self)
262 r.append(s)
263 return r.to_s
264 end
265
266 # i repetitions of self
267 fun *(i: Int): String
268 do
269 assert i >= 0
270 var r = new Buffer.with_capacity(length * i)
271 while i > 0 do
272 r.append(self)
273 i -= 1
274 end
275 return r.to_s
276 end
277
278 redef fun to_s do return self
279
280 redef fun hash
281 do
282 # djb2 hash algorythm
283 var h = 5381
284 var i = _length - 1
285 var it = _items
286 while i >= 0 do
287 h = (h * 32) + h + it[i].ascii
288 i -= 1
289 end
290 return h
291
292 end
293 end
294
295 # Mutable strings of characters.
296 class Buffer
297 super AbstractString
298 super Comparable
299 super StringCapable
300 super AbstractArray[Char]
301
302 redef type OTHER: String
303
304 redef fun []=(index, item)
305 do
306 if index == length then
307 add(item)
308 return
309 end
310 assert index >= 0 and index < length
311 _items[index] = item
312 end
313
314 redef fun add(c)
315 do
316 if _capacity <= length then enlarge(length + 5)
317 _items[length] = c
318 _length += 1
319 end
320
321 redef fun enlarge(cap)
322 do
323 var c = _capacity
324 if cap <= c then return
325 while c <= cap do c = c * 2 + 2
326 var a = calloc_string(c+1)
327 _items.copy_to(a, length, 0, 0)
328 _items = a
329 _capacity = c
330 end
331
332 redef fun append(s)
333 do
334 if s isa String then
335 var sl = s.length
336 if _capacity < length + sl then enlarge(length + sl)
337 s.items.copy_to(_items, sl, 0, length)
338 _length += sl
339 else
340 super
341 end
342 end
343
344 redef fun to_s: String
345 do
346 var l = length
347 var a = calloc_string(l+1)
348 _items.copy_to(a, l, 0, 0)
349
350 # Ensure the afterlast byte is '\0' to nul-terminated char *
351 a[length] = '\0'
352
353 return new String.with_native(a, length)
354 end
355
356 redef fun <(s)
357 do
358 var i = 0
359 var l1 = length
360 var l2 = s.length
361 while i < l1 and i < l2 do
362 var c1 = self[i].ascii
363 var c2 = s[i].ascii
364 if c1 < c2 then
365 return true
366 else if c2 < c1 then
367 return false
368 end
369 i += 1
370 end
371 if l1 < l2 then
372 return true
373 else
374 return false
375 end
376 end
377
378 # Create a new empty string.
379 init
380 do
381 with_capacity(5)
382 end
383
384 init from(s: String)
385 do
386 _capacity = s.length + 1
387 _length = s.length
388 _items = calloc_string(_capacity)
389 s.items.copy_to(_items, _length, 0, 0)
390 end
391
392 # Create a new empty string with a given capacity.
393 init with_capacity(cap: Int)
394 do
395 assert cap >= 0
396 # _items = new NativeString.calloc(cap)
397 _items = calloc_string(cap+1)
398 _capacity = cap
399 _length = 0
400 end
401
402 redef fun ==(o)
403 do
404 if not o isa Buffer or o is null then return false
405 var l = length
406 if o.length != l then return false
407 var i = 0
408 var it = _items
409 var oit = o._items
410 while i < l do
411 if it[i] != oit[i] then return false
412 i += 1
413 end
414 return true
415 end
416
417 readable private var _capacity: Int
418 end
419
420 ###############################################################################
421 # Refinement #
422 ###############################################################################
423
424 redef class Object
425 # User readable representation of `self'.
426 fun to_s: String do return inspect
427
428 # The class name of the object in NativeString format.
429 private fun native_class_name: NativeString is intern
430
431 # The class name of the object.
432 # FIXME: real type information is not available at runtime.
433 # Therefore, for instance, an instance of List[Bool] has just
434 # "List" for class_name
435 fun class_name: String do return new String.from_cstring(native_class_name)
436
437 # Developer readable representation of `self'.
438 # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
439 fun inspect: String
440 do
441 return "<{inspect_head}>"
442 end
443
444 # Return "CLASSNAME:#OBJECTID".
445 # This function is mainly used with the redefinition of the inspect method
446 protected fun inspect_head: String
447 do
448 return "{class_name}:#{object_id.to_hex}"
449 end
450
451 protected fun args: Sequence[String]
452 do
453 return sys.args
454 end
455 end
456
457 redef class Bool
458 redef fun to_s
459 do
460 if self then
461 return once "true"
462 else
463 return once "false"
464 end
465 end
466 end
467
468 redef class Int
469 fun fill_buffer(s: Buffer, base: Int, signed: Bool)
470 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
471 # assume < to_c max const of char
472 do
473 var n: Int
474 # Sign
475 if self < 0 then
476 n = - self
477 s[0] = '-'
478 else if self == 0 then
479 s[0] = '0'
480 return
481 else
482 n = self
483 end
484 # Fill digits
485 var pos = digit_count(base) - 1
486 while pos >= 0 and n > 0 do
487 s[pos] = (n % base).to_c
488 n = n / base # /
489 pos -= 1
490 end
491 end
492
493 # return displayable int in base 10 and signed
494 redef fun to_s do return to_base(10,true)
495
496 # return displayable int in hexadecimal (unsigned (not now))
497 fun to_hex: String do return to_base(16,false)
498
499 # return displayable int in base base and signed
500 fun to_base(base: Int, signed: Bool): String
501 do
502 var l = digit_count(base)
503 var s = new Buffer.from(" " * l)
504 fill_buffer(s, base, signed)
505 return s.to_s
506 end
507 end
508
509 redef class Float
510 redef fun to_s do return to_precision(6)
511
512 # `self' representation with `nb' digits after the '.'.
513 fun to_precision(nb: Int): String
514 do
515 if nb == 0 then return to_i.to_s
516
517 var i = to_i
518 var dec = 1.0
519 while nb > 0 do
520 dec = dec * 10.0
521 nb -= 1
522 end
523 var d = ((self-i.to_f)*dec).to_i
524 return "{i}.{d}"
525 end
526 end
527
528 redef class Char
529 redef fun to_s
530 do
531 var s = new Buffer.with_capacity(1)
532 s[0] = self
533 return s.to_s
534 end
535
536 # Returns true if the char is a numerical digit
537 fun is_numeric: Bool
538 do
539 if self >= '0' and self <= '9'
540 then
541 return true
542 end
543 return false
544 end
545
546 # Returns true if the char is an alpha digit
547 fun is_alpha: Bool
548 do
549 if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true
550 return false
551 end
552
553 # Returns true if the char is an alpha or a numeric digit
554 fun is_alphanumeric: Bool
555 do
556 if self.is_numeric or self.is_alpha then return true
557 return false
558 end
559 end
560
561 redef class Collection[E]
562 # Concatenate elements.
563 redef fun to_s
564 do
565 var s = new Buffer
566 for e in self do if e != null then s.append(e.to_s)
567 return s.to_s
568 end
569
570 # Concatenate and separate each elements with `sep'.
571 fun join(sep: String): String
572 do
573 if is_empty then return ""
574
575 var s = new Buffer # Result
576
577 # Concat first item
578 var i = iterator
579 var e = i.item
580 if e != null then s.append(e.to_s)
581
582 # Concat other items
583 i.next
584 while i.is_ok do
585 s.append(sep)
586 e = i.item
587 if e != null then s.append(e.to_s)
588 i.next
589 end
590 return s.to_s
591 end
592 end
593
594 redef class Array[E]
595 # Fast implementation
596 redef fun to_s
597 do
598 var s = new Buffer
599 var i = 0
600 var l = length
601 while i < l do
602 var e = self[i]
603 if e != null then s.append(e.to_s)
604 i += 1
605 end
606 return s.to_s
607 end
608 end
609
610 redef class Map[K,V]
611 # Concatenate couple of 'key value'.
612 # key and value are separated by 'couple_sep'.
613 # each couple is separated each couple with `sep'.
614 fun join(sep: String, couple_sep: String): String
615 do
616 if is_empty then return ""
617
618 var s = new Buffer # Result
619
620 # Concat first item
621 var i = iterator
622 var k = i.key
623 var e = i.item
624 if e != null then s.append("{k}{couple_sep}{e}")
625
626 # Concat other items
627 i.next
628 while i.is_ok do
629 s.append(sep)
630 k = i.key
631 e = i.item
632 if e != null then s.append("{k}{couple_sep}{e}")
633 i.next
634 end
635 return s.to_s
636 end
637 end
638
639 ###############################################################################
640 # Native classes #
641 ###############################################################################
642
643 # Native strings are simple C char *
644 class NativeString
645 fun [](index: Int): Char is intern
646 fun []=(index: Int, item: Char) is intern
647 fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
648
649 # Position of the first nul character.
650 fun cstring_length: Int
651 do
652 var l = 0
653 while self[l] != '\0' do l += 1
654 return l
655 end
656 fun atoi: Int is intern
657 fun atof: Float is extern "atof"
658 end
659
660 # StringCapable objects can create native strings
661 interface StringCapable
662 protected fun calloc_string(size: Int): NativeString is intern
663 end
664
665 redef class Sys
666 var _args_cache: nullable Sequence[String]
667
668 redef fun args: Sequence[String]
669 do
670 if _args_cache == null then init_args
671 return _args_cache.as(not null)
672 end
673
674 # The name of the program as given by the OS
675 fun program_name: String
676 do
677 return new String.from_cstring(native_argv(0))
678 end
679
680 # Initialize `args' with the contents of `native_argc' and `native_argv'.
681 private fun init_args
682 do
683 var argc = native_argc
684 var args = new Array[String].with_capacity(0)
685 var i = 1
686 while i < argc do
687 args[i-1] = new String.from_cstring(native_argv(i))
688 i += 1
689 end
690 _args_cache = args
691 end
692
693 private fun native_argc: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
694
695 private fun native_argv(i: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.
696 end
697