stdlib: Modified strings for performances with substring operations
[nit.git] / lib / standard / string.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
5 #
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
12 # another product.
13
14 # Basic manipulations of strings of characters
15 package string
16
17 intrude import collection # FIXME should be collection::array
18 import hash
19
20 ###############################################################################
21 # String #
22 ###############################################################################
23
24 # Common subclass for String and Buffer
25 abstract class AbstractString
26 super AbstractArrayRead[Char]
27
28 readable private var _items: NativeString
29
30 redef fun [](index) do return _items[index]
31
32 # Create a substring.
33 #
34 # "abcd".substring(1, 2) # --> "bc"
35 # "abcd".substring(-1, 2) # --> "a"
36 # "abcd".substring(1, 0) # --> ""
37 # "abcd".substring(2, 5) # --> "cd"
38 fun substring(from: Int, count: Int): String
39 do
40 assert count >= 0
41 count += from
42 if from < 0 then from = 0
43 if count > length then count = length
44 if from < count then
45 var r = new Buffer.with_capacity(count - from)
46 while from < count do
47 r.push(_items[from])
48 from += 1
49 end
50 return r.to_s
51 else
52 return ""
53 end
54 end
55
56 # Create a substring from `self' beginning at the 'from' position
57 #
58 # "abcd".substring(1) # --> "bcd"
59 # "abcd".substring(-1) # --> "abcd"
60 # "abcd".substring(2) # --> "cd"
61 fun substring_from(from: Int): String
62 do
63 assert from < length
64 return substring(from, length - from)
65 end
66
67 # Is `self' a substring of the `str' string from pos `pos'
68 #
69 # "bc".is_substring("abcd",1) # --> true
70 # "bc".is_substring("abcd",2) # --> false
71 fun has_substring(str: String, pos: Int): Bool
72 do
73 var itsindex = str.length - 1
74 var myindex = pos + itsindex
75 var myitems = _items
76 var itsitems = str._items
77 if myindex > length or itsindex > myindex then return false
78 var its_index_from = str._indexFrom
79 itsindex += its_index_from
80 while itsindex >= its_index_from do
81 if myitems[myindex] != itsitems[itsindex] then return false
82 myindex -= 1
83 itsindex -= 1
84 end
85 return true
86 end
87
88 # Is this string prefixed by 'prefix'
89 #
90 # "abc".is_prefix("abcd") # --> true
91 # "bc".is_prefix("abcd") # --> false
92 fun has_prefix(prefix: String): Bool do return has_substring(prefix,0)
93
94 # Is this string suffixed by 'suffix'
95 #
96 # "abcd".has_suffix("abc") # --> false
97 # "abcd".has_suffix("bcd") # --> true
98 fun has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
99
100 # If `self' contains only digits, return the corresponding integer
101 fun to_i: Int
102 do
103 # Shortcut
104 return to_s.to_cstring.atoi
105 end
106
107 # If `self' contains a float, return the corresponding float
108 fun to_f: Float
109 do
110 # Shortcut
111 return to_s.to_cstring.atof
112 end
113
114 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
115 fun to_hex: Int do return a_to(16)
116
117 # If `self' contains only digits and letters, return the corresponding integer in a given base
118 fun a_to(base: Int) : Int
119 do
120 var i = 0
121 var neg = false
122
123 for c in self
124 do
125 var v = c.to_i
126 if v > base then
127 if neg then
128 return -i
129 else
130 return i
131 end
132 else if v < 0 then
133 neg = true
134 else
135 i = i * base + v
136 end
137 end
138 if neg then
139 return -i
140 else
141 return i
142 end
143 end
144
145 # A upper case version of `self'
146 fun to_upper: String
147 do
148 var s = new Buffer.with_capacity(length)
149 for i in self do s.add(i.to_upper)
150 return s.to_s
151 end
152
153 # A lower case version of `self'
154 fun to_lower : String
155 do
156 var s = new Buffer.with_capacity(length)
157 for i in self do s.add(i.to_lower)
158 return s.to_s
159 end
160
161
162 redef fun output
163 do
164 var i = 0
165 while i < length do
166 _items[i].output
167 i += 1
168 end
169 end
170 end
171
172 # Immutable strings of characters.
173 class String
174 super Comparable
175 super AbstractString
176 super StringCapable
177
178 redef type OTHER: String
179
180 readable var _indexFrom: Int
181 readable var _indexTo: Int
182
183 ################################################
184 # AbstractString specific methods #
185 ################################################
186
187 # Access a character at index in String
188 #
189 redef fun [](index) do
190 assert index >= 0
191 assert (index + _indexFrom) < (_indexFrom + _length)
192 return items[index + _indexFrom]
193 end
194
195 # Create a substring.
196 #
197 # "abcd".substring(1, 2) # --> "bc"
198 # "abcd".substring(-1, 2) # --> "a"
199 # "abcd".substring(1, 0) # --> ""
200 # "abcd".substring(2, 5) # --> "cd"
201 redef fun substring(from: Int, count: Int): String
202 do
203 assert count >= 0
204
205 if from < 0 then
206 count += from
207 if count < 0 then count = 0
208 from = 0
209 end
210
211 var realFrom = _indexFrom + from
212
213 if (realFrom + count) > _indexTo then return new String.from_substring(realFrom, _indexTo, _items)
214
215 if count == 0 then return ""
216
217 return new String.from_substring(realFrom, realFrom + count - 1, _items)
218 end
219
220 # Create a substring from `self' beginning at the 'from' position
221 #
222 # "abcd".substring(1) # --> "bcd"
223 # "abcd".substring(-1) # --> "abcd"
224 # "abcd".substring(2) # --> "cd"
225 redef fun substring_from(from: Int): String
226 do
227 if from > _length then return ""
228 if from < 0 then from = 0
229 return substring(from, _length)
230 end
231
232 # Is `self' a substring of the `str' string from pos `pos'
233 #
234 # "bc".is_substring("abcd",1) # --> true
235 # "bc".is_substring("abcd",2) # --> false
236 redef fun has_substring(str: String, pos: Int): Bool
237 do
238 var itsindex = str._length - 1
239
240 var myindex = pos + itsindex
241 var myitems = _items
242
243 var itsitems = str._items
244
245 if myindex > _length or itsindex > myindex then return false
246
247 var itsindexfrom = str.indexFrom
248 itsindex += itsindexfrom
249 myindex += indexFrom
250
251 while itsindex >= itsindexfrom do
252 if myitems[myindex] != itsitems[itsindex] then return false
253 myindex -= 1
254 itsindex -= 1
255 end
256
257 return true
258 end
259
260 # A upper case version of `self'
261 redef fun to_upper: String
262 do
263 var outstr = calloc_string(self._length + 1)
264 var index = 0
265
266 var myitems = self._items
267 var index_from = self._indexFrom
268 var max = self._indexTo
269
270 while index_from <= max do
271 outstr[index] = myitems[index_from].to_upper
272 index += 1
273 index_from += 1
274 end
275
276 outstr[self.length] = '\0'
277
278 return new String.with_native(outstr, self._length)
279 end
280
281 # A lower case version of `self'
282 redef fun to_lower : String
283 do
284 var outstr = calloc_string(self._length + 1)
285 var index = 0
286
287 var myitems = self._items
288 var index_from = self._indexFrom
289 var max = self._indexTo
290
291 while index_from <= max do
292 outstr[index] = myitems[index_from].to_lower
293 index += 1
294 index_from += 1
295 end
296
297 outstr[self.length] = '\0'
298
299 return new String.with_native(outstr, self._length)
300 end
301
302 redef fun output
303 do
304 var i = self._indexFrom
305 while i < length do
306 _items[i].output
307 i += 1
308 end
309 end
310
311 ##################################################
312 # String Specific Methods #
313 ##################################################
314
315 # Creates a String object as a substring of another String
316 private init from_substring(from: Int, to: Int, internalString: NativeString)
317 do
318 _items = internalString
319 _indexFrom = from
320 _indexTo = to
321 _length = to - from + 1
322 end
323
324 # Create a new string from a given char *.
325 init with_native(nat: NativeString, size: Int)
326 do
327 assert size >= 0
328 _items = nat
329 _length = size
330 _indexFrom = 0
331 _indexTo = size - 1
332 end
333
334 # Create a new string from a null terminated char *.
335 init from_cstring(str: NativeString)
336 do
337 with_native(str,str.cstring_length)
338 end
339
340 # Return a null terminated char *
341 fun to_cstring: NativeString
342 do
343 #return items
344 if _indexFrom > 0 or _indexTo != items.cstring_length-1 then
345 var newItems = calloc_string(length+1)
346 self.items.copy_to(newItems, _length, _indexFrom, 0)
347 newItems[length] = '\0'
348 return newItems
349 end
350 return _items
351 end
352
353 redef fun ==(o)
354 do
355 if not o isa String or o is null then return false
356
357 if self.object_id == o.object_id then return true
358
359 var l = _length
360
361 if o._length != l then return false
362
363 var i = _indexFrom
364 var j = o._indexFrom
365 var max = l + _indexFrom
366 var itsitems = o._items
367 var myitems = self._items
368
369 while i < max do
370 if myitems[i] != itsitems[j] then return false
371 i += 1
372 j += 1
373 end
374
375 return true
376 end
377
378 redef fun <(s)
379 do
380 if self.object_id == s.object_id then return false
381
382 var c1 : Int
383 var c2 : Int
384 var currIdSelf = self._indexFrom
385 var currIdOther = s._indexFrom
386 var my_items = self._items
387 var its_items = s._items
388
389 if self._length < s._length then
390 return true
391 else if self.length > s._length then
392 return false
393 end
394
395 var self_upper_bound = self._length + currIdSelf
396 var other_upper_bound = s._length + currIdOther
397
398 while currIdSelf < self_upper_bound and currIdOther < other_upper_bound do
399 c1 = my_items[currIdSelf].ascii
400 c2 = its_items[currIdOther].ascii
401
402 if c1 < c2 then
403 return true
404 else if c2 < c1 then
405 return false
406 end
407
408 currIdSelf += 1
409 currIdOther += 1
410 end
411
412 return false
413 end
414
415 # The concatenation of `self' with `r'
416 fun +(s: String): String
417 do
418 var r = new Buffer.with_capacity(length + s.length)
419 r.append(self)
420 r.append(s)
421 return r.to_s
422 end
423
424 # i repetitions of self
425 fun *(i: Int): String
426 do
427 assert i >= 0
428 var r = new Buffer.with_capacity(length * i)
429 while i > 0 do
430 r.append(self)
431 i -= 1
432 end
433 return r.to_s
434 end
435
436 redef fun to_s do return self
437
438 redef fun hash
439 do
440 # djb2 hash algorythm
441 var h = 5381
442 var i = _length - 1
443
444 var myitems = self.items
445 var index_from = self._indexFrom
446
447 i += index_from
448
449 while i >= index_from do
450 h = (h * 32) + h + self._items[i].ascii
451 i -= 1
452 end
453
454 return h
455 end
456 end
457
458 # Mutable strings of characters.
459 class Buffer
460 super AbstractString
461 super Comparable
462 super StringCapable
463 super AbstractArray[Char]
464
465 redef type OTHER: String
466
467 redef fun []=(index, item)
468 do
469 if index == length then
470 add(item)
471 return
472 end
473 assert index >= 0 and index < length
474 _items[index] = item
475 end
476
477 redef fun add(c)
478 do
479 if _capacity <= length then enlarge(length + 5)
480 _items[length] = c
481 _length += 1
482 end
483
484 redef fun enlarge(cap)
485 do
486 var c = _capacity
487 if cap <= c then return
488 while c <= cap do c = c * 2 + 2
489 var a = calloc_string(c+1)
490 _items.copy_to(a, length, 0, 0)
491 _items = a
492 _capacity = c
493 end
494
495 redef fun append(s)
496 do
497 if s isa String then
498 var sl = s.length
499 if _capacity < _length + sl then enlarge(_length + sl)
500 s.items.copy_to(_items, sl, s._indexFrom, _length)
501 _length += sl
502 else
503 super
504 end
505 end
506
507 redef fun to_s: String
508 do
509 var l = length
510 var a = calloc_string(l+1)
511 _items.copy_to(a, l, 0, 0)
512
513 # Ensure the afterlast byte is '\0' to nul-terminated char *
514 a[length] = '\0'
515
516 return new String.with_native(a, length)
517 end
518
519 redef fun <(s)
520 do
521 var i = 0
522 var l1 = length
523 var l2 = s.length
524 while i < l1 and i < l2 do
525 var c1 = self[i].ascii
526 var c2 = s[i].ascii
527 if c1 < c2 then
528 return true
529 else if c2 < c1 then
530 return false
531 end
532 i += 1
533 end
534 if l1 < l2 then
535 return true
536 else
537 return false
538 end
539 end
540
541 # Create a new empty string.
542 init
543 do
544 with_capacity(5)
545 end
546
547 init from(s: String)
548 do
549 _capacity = s.length + 1
550 _length = s.length
551 _items = calloc_string(_capacity)
552 s.items.copy_to(_items, _length, s._indexFrom, 0)
553 end
554
555 # Create a new empty string with a given capacity.
556 init with_capacity(cap: Int)
557 do
558 assert cap >= 0
559 # _items = new NativeString.calloc(cap)
560 _items = calloc_string(cap+1)
561 _capacity = cap
562 _length = 0
563 end
564
565 redef fun ==(o)
566 do
567 if not o isa Buffer or o is null then return false
568 var l = length
569 if o.length != l then return false
570 var i = 0
571 var it = _items
572 var oit = o._items
573 while i < l do
574 if it[i] != oit[i] then return false
575 i += 1
576 end
577 return true
578 end
579
580 readable private var _capacity: Int
581 end
582
583 ###############################################################################
584 # Refinement #
585 ###############################################################################
586
587 redef class Object
588 # User readable representation of `self'.
589 fun to_s: String do return inspect
590
591 # The class name of the object in NativeString format.
592 private fun native_class_name: NativeString is intern
593
594 # The class name of the object.
595 # FIXME: real type information is not available at runtime.
596 # Therefore, for instance, an instance of List[Bool] has just
597 # "List" for class_name
598 fun class_name: String do return new String.from_cstring(native_class_name)
599
600 # Developer readable representation of `self'.
601 # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
602 fun inspect: String
603 do
604 return "<{inspect_head}>"
605 end
606
607 # Return "CLASSNAME:#OBJECTID".
608 # This function is mainly used with the redefinition of the inspect method
609 protected fun inspect_head: String
610 do
611 return "{class_name}:#{object_id.to_hex}"
612 end
613
614 protected fun args: Sequence[String]
615 do
616 return sys.args
617 end
618 end
619
620 redef class Bool
621 redef fun to_s
622 do
623 if self then
624 return once "true"
625 else
626 return once "false"
627 end
628 end
629 end
630
631 redef class Int
632 fun fill_buffer(s: Buffer, base: Int, signed: Bool)
633 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
634 # assume < to_c max const of char
635 do
636 var n: Int
637 # Sign
638 if self < 0 then
639 n = - self
640 s[0] = '-'
641 else if self == 0 then
642 s[0] = '0'
643 return
644 else
645 n = self
646 end
647 # Fill digits
648 var pos = digit_count(base) - 1
649 while pos >= 0 and n > 0 do
650 s[pos] = (n % base).to_c
651 n = n / base # /
652 pos -= 1
653 end
654 end
655
656 # return displayable int in base 10 and signed
657 redef fun to_s do return to_base(10,true)
658
659 # return displayable int in hexadecimal (unsigned (not now))
660 fun to_hex: String do return to_base(16,false)
661
662 # return displayable int in base base and signed
663 fun to_base(base: Int, signed: Bool): String
664 do
665 var l = digit_count(base)
666 var s = new Buffer.from(" " * l)
667 fill_buffer(s, base, signed)
668 return s.to_s
669 end
670 end
671
672 redef class Float
673 redef fun to_s do return to_precision(6)
674
675 # `self' representation with `nb' digits after the '.'.
676 fun to_precision(nb: Int): String
677 do
678 if nb == 0 then return to_i.to_s
679
680 var i = to_i
681 var dec = 1.0
682 while nb > 0 do
683 dec = dec * 10.0
684 nb -= 1
685 end
686 var d = ((self-i.to_f)*dec).to_i
687 return "{i}.{d}"
688 end
689 end
690
691 redef class Char
692 redef fun to_s
693 do
694 var s = new Buffer.with_capacity(1)
695 s[0] = self
696 return s.to_s
697 end
698 end
699
700 redef class Collection[E]
701 # Concatenate elements.
702 redef fun to_s
703 do
704 var s = new Buffer
705 for e in self do if e != null then s.append(e.to_s)
706 return s.to_s
707 end
708
709 # Concatenate and separate each elements with `sep'.
710 fun join(sep: String): String
711 do
712 if is_empty then return ""
713
714 var s = new Buffer # Result
715
716 # Concat first item
717 var i = iterator
718 var e = i.item
719 if e != null then s.append(e.to_s)
720
721 # Concat other items
722 i.next
723 while i.is_ok do
724 s.append(sep)
725 e = i.item
726 if e != null then s.append(e.to_s)
727 i.next
728 end
729 return s.to_s
730 end
731 end
732
733 redef class Array[E]
734 # Fast implementation
735 redef fun to_s
736 do
737 var s = new Buffer
738 var i = 0
739 var l = length
740 while i < l do
741 var e = self[i]
742 if e != null then s.append(e.to_s)
743 i += 1
744 end
745 return s.to_s
746 end
747 end
748
749 redef class Map[K,V]
750 # Concatenate couple of 'key value'.
751 # key and value are separated by 'couple_sep'.
752 # each couple is separated each couple with `sep'.
753 fun join(sep: String, couple_sep: String): String
754 do
755 if is_empty then return ""
756
757 var s = new Buffer # Result
758
759 # Concat first item
760 var i = iterator
761 var k = i.key
762 var e = i.item
763 if e != null then s.append("{k}{couple_sep}{e}")
764
765 # Concat other items
766 i.next
767 while i.is_ok do
768 s.append(sep)
769 k = i.key
770 e = i.item
771 if e != null then s.append("{k}{couple_sep}{e}")
772 i.next
773 end
774 return s.to_s
775 end
776 end
777
778 ###############################################################################
779 # Native classes #
780 ###############################################################################
781
782 # Native strings are simple C char *
783 class NativeString
784 fun [](index: Int): Char is intern
785 fun []=(index: Int, item: Char) is intern
786 fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
787
788 # Position of the first nul character.
789 fun cstring_length: Int
790 do
791 var l = 0
792 while self[l] != '\0' do l += 1
793 return l
794 end
795 fun atoi: Int is intern
796 fun atof: Float is extern "atof"
797 end
798
799 # StringCapable objects can create native strings
800 interface StringCapable
801 protected fun calloc_string(size: Int): NativeString is intern
802 end
803
804 redef class Sys
805 var _args_cache: nullable Sequence[String]
806
807 redef fun args: Sequence[String]
808 do
809 if _args_cache == null then init_args
810 return _args_cache.as(not null)
811 end
812
813 # The name of the program as given by the OS
814 fun program_name: String
815 do
816 return new String.from_cstring(native_argv(0))
817 end
818
819 # Initialize `args' with the contents of `native_argc' and `native_argv'.
820 private fun init_args
821 do
822 var argc = native_argc
823 var args = new Array[String].with_capacity(0)
824 var i = 1
825 while i < argc do
826 args[i-1] = new String.from_cstring(native_argv(i))
827 i += 1
828 end
829 _args_cache = args
830 end
831
832 private fun native_argc: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
833
834 private fun native_argv(i: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.
835 end
836