Merge branch 'alexandre/typo-in-nit-reference' into wip
[nit.git] / lib / standard / string.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
5 #
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
12 # another product.
13
14 # Basic manipulations of strings of characters
15 package string
16
17 intrude import collection # FIXME should be collection::array
18 import hash
19
20 ###############################################################################
21 # String #
22 ###############################################################################
23
24 # Common subclass for String and Buffer
25 abstract class AbstractString
26 super AbstractArrayRead[Char]
27
28 readable private var _items: NativeString
29
30 redef fun [](index) do return _items[index]
31
32 # Create a substring.
33 #
34 # "abcd".substring(1, 2) # --> "bc"
35 # "abcd".substring(-1, 2) # --> "a"
36 # "abcd".substring(1, 0) # --> ""
37 # "abcd".substring(2, 5) # --> "cd"
38 fun substring(from: Int, count: Int): String
39 do
40 assert count >= 0
41 count += from
42 if from < 0 then from = 0
43 if count > length then count = length
44 if from < count then
45 var r = new Buffer.with_capacity(count - from)
46 while from < count do
47 r.push(_items[from])
48 from += 1
49 end
50 return r.to_s
51 else
52 return ""
53 end
54 end
55
56 # Create a substring from `self' beginning at the 'from' position
57 #
58 # "abcd".substring(1) # --> "bcd"
59 # "abcd".substring(-1) # --> "abcd"
60 # "abcd".substring(2) # --> "cd"
61 fun substring_from(from: Int): String
62 do
63 assert from < length
64 return substring(from, length - from)
65 end
66
67 # Is `self' a substring of the `str' string from pos `pos'
68 #
69 # "bc".is_substring("abcd",1) # --> true
70 # "bc".is_substring("abcd",2) # --> false
71 fun has_substring(str: String, pos: Int): Bool
72 do
73 var itsindex = str.length - 1
74 var myindex = pos + itsindex
75 var myitems = _items
76 var itsitems = str._items
77 if myindex > length or itsindex > myindex then return false
78 while itsindex >= 0 do
79 if myitems[myindex] != itsitems[itsindex] then return false
80 myindex -= 1
81 itsindex -= 1
82 end
83 return true
84 end
85
86 # Is this string prefixed by 'prefix'
87 #
88 # "abc".is_prefix("abcd") # --> true
89 # "bc".is_prefix("abcd") # --> false
90 fun has_prefix(prefix: String): Bool do return has_substring(prefix,0)
91
92 # Is this string suffixed by 'suffix'
93 #
94 # "abcd".has_suffix("abc") # --> false
95 # "abcd".has_suffix("bcd") # --> true
96 fun has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
97
98 # If `self' contains only digits, return the corresponding integer
99 fun to_i: Int
100 do
101 # Shortcut
102 return to_s.to_cstring.atoi
103 end
104
105 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
106 fun to_hex: Int do return a_to(16)
107
108 # If `self' contains only digits and letters, return the corresponding integer in a given base
109 fun a_to(base: Int) : Int
110 do
111 var i = 0
112 var neg = false
113
114 for c in self
115 do
116 var v = c.to_i
117 if v > base then
118 if neg then
119 return -i
120 else
121 return i
122 end
123 else if v < 0 then
124 neg = true
125 else
126 i = i * base + v
127 end
128 end
129 if neg then
130 return -i
131 else
132 return i
133 end
134 end
135
136 # A upper case version of `self'
137 fun to_upper: String
138 do
139 var s = new Buffer.with_capacity(length)
140 for i in self do s.add(i.to_upper)
141 return s.to_s
142 end
143
144 # A lower case version of `self'
145 fun to_lower : String
146 do
147 var s = new Buffer.with_capacity(length)
148 for i in self do s.add(i.to_lower)
149 return s.to_s
150 end
151
152
153 redef fun output
154 do
155 var i = 0
156 while i < length do
157 _items[i].output
158 i += 1
159 end
160 end
161 end
162
163 # Immutable strings of characters.
164 class String
165 super Comparable
166 super AbstractString
167
168 redef type OTHER: String
169
170 # Create a new string from a given char *.
171 init with_native(nat: NativeString, size: Int)
172 do
173 assert size >= 0
174 _items = nat
175 _length = size
176 end
177
178 # Create a new string from a null terminated char *.
179 init from_cstring(str: NativeString)
180 do
181 var size = str.cstring_length
182 _items = str
183 _length = size
184 end
185
186 # Return a null terminated char *
187 fun to_cstring: NativeString
188 do
189 return _items
190 end
191
192 redef fun ==(o)
193 do
194 if not o isa String or o is null then return false
195 var l = length
196 if o.length != l then return false
197 var i = 0
198 var it = _items
199 var oit = o._items
200 while i < l do
201 if it[i] != oit[i] then return false
202 i += 1
203 end
204 return true
205 end
206
207 redef fun <(s)
208 do
209 var i = 0
210 var l1 = length
211 var l2 = s.length
212 var n1 = _items
213 var n2 = s._items
214 while i < l1 and i < l2 do
215 var c1 = n1[i].ascii
216 var c2 = n2[i].ascii
217 if c1 < c2 then
218 return true
219 else if c2 < c1 then
220 return false
221 end
222 i += 1
223 end
224 if l1 < l2 then
225 return true
226 else
227 return false
228 end
229 end
230
231 # The concatenation of `self' with `r'
232 fun +(s: String): String
233 do
234 var r = new Buffer.with_capacity(length + s.length)
235 r.append(self)
236 r.append(s)
237 return r.to_s
238 end
239
240 # i repetitions of self
241 fun *(i: Int): String
242 do
243 assert i >= 0
244 var r = new Buffer.with_capacity(length * i)
245 while i > 0 do
246 r.append(self)
247 i -= 1
248 end
249 return r.to_s
250 end
251
252 redef fun to_s do return self
253
254 redef fun hash
255 do
256 # djb2 hash algorythm
257 var h = 5381
258 var i = _length - 1
259 var it = _items
260 while i >= 0 do
261 h = (h * 32) + h + it[i].ascii
262 i -= 1
263 end
264 return h
265
266 end
267
268 fun to_f : Float is extern import String::to_cstring
269 end
270
271 # Mutable strings of characters.
272 class Buffer
273 super AbstractString
274 super Comparable
275 super StringCapable
276 super AbstractArray[Char]
277
278 redef type OTHER: String
279
280 redef fun []=(index, item)
281 do
282 if index == length then
283 add(item)
284 return
285 end
286 assert index >= 0 and index < length
287 _items[index] = item
288 end
289
290 redef fun add(c)
291 do
292 if _capacity <= length then enlarge(length + 5)
293 _items[length] = c
294 _length += 1
295 end
296
297 redef fun enlarge(cap)
298 do
299 var c = _capacity
300 if cap <= c then return
301 while c <= cap do c = c * 2 + 2
302 var a = calloc_string(c+1)
303 _items.copy_to(a, length, 0, 0)
304 _items = a
305 _capacity = c
306 end
307
308 redef fun append(s)
309 do
310 if s isa String then
311 var sl = s.length
312 if _capacity < length + sl then enlarge(length + sl)
313 s.items.copy_to(_items, sl, 0, length)
314 _length += sl
315 else
316 super
317 end
318 end
319
320 redef fun to_s: String
321 do
322 var l = length
323 var a = calloc_string(l+1)
324 _items.copy_to(a, l, 0, 0)
325
326 # Ensure the afterlast byte is '\0' to nul-terminated char *
327 a[length] = '\0'
328
329 return new String.with_native(a, length)
330 end
331
332 redef fun <(s)
333 do
334 var i = 0
335 var l1 = length
336 var l2 = s.length
337 while i < l1 and i < l2 do
338 var c1 = self[i].ascii
339 var c2 = s[i].ascii
340 if c1 < c2 then
341 return true
342 else if c2 < c1 then
343 return false
344 end
345 i += 1
346 end
347 if l1 < l2 then
348 return true
349 else
350 return false
351 end
352 end
353
354 # Create a new empty string.
355 init
356 do
357 with_capacity(5)
358 end
359
360 init from(s: String)
361 do
362 _capacity = s.length + 1
363 _length = s.length
364 _items = calloc_string(_capacity)
365 s.items.copy_to(_items, _length, 0, 0)
366 end
367
368 # Create a new empty string with a given capacity.
369 init with_capacity(cap: Int)
370 do
371 assert cap >= 0
372 # _items = new NativeString.calloc(cap)
373 _items = calloc_string(cap+1)
374 _capacity = cap
375 _length = 0
376 end
377
378 redef fun ==(o)
379 do
380 if not o isa Buffer or o is null then return false
381 var l = length
382 if o.length != l then return false
383 var i = 0
384 var it = _items
385 var oit = o._items
386 while i < l do
387 if it[i] != oit[i] then return false
388 i += 1
389 end
390 return true
391 end
392
393 readable private var _capacity: Int
394 end
395
396 ###############################################################################
397 # Refinement #
398 ###############################################################################
399
400 redef class Object
401 # User readable representation of `self'.
402 fun to_s: String do return inspect
403
404 # The class name of the object in NativeString format.
405 private fun native_class_name: NativeString is intern
406
407 # The class name of the object.
408 # FIXME: real type information is not available at runtime.
409 # Therefore, for instance, an instance of List[Bool] has just
410 # "List" for class_name
411 fun class_name: String do return new String.from_cstring(native_class_name)
412
413 # Developer readable representation of `self'.
414 # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
415 fun inspect: String
416 do
417 return "<{inspect_head}>"
418 end
419
420 # Return "CLASSNAME:#OBJECTID".
421 # This function is mainly used with the redefinition of the inspect method
422 protected fun inspect_head: String
423 do
424 return "{class_name}:#{object_id.to_hex}"
425 end
426
427 protected fun args: Sequence[String]
428 do
429 return sys.args
430 end
431 end
432
433 redef class Bool
434 redef fun to_s
435 do
436 if self then
437 return once "true"
438 else
439 return once "false"
440 end
441 end
442 end
443
444 redef class Int
445 fun fill_buffer(s: Buffer, base: Int, signed: Bool)
446 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
447 # assume < to_c max const of char
448 do
449 var n: Int
450 # Sign
451 if self < 0 then
452 n = - self
453 s[0] = '-'
454 else if self == 0 then
455 s[0] = '0'
456 return
457 else
458 n = self
459 end
460 # Fill digits
461 var pos = digit_count(base) - 1
462 while pos >= 0 and n > 0 do
463 s[pos] = (n % base).to_c
464 n = n / base # /
465 pos -= 1
466 end
467 end
468
469 # return displayable int in base 10 and signed
470 redef fun to_s do return to_base(10,true)
471
472 # return displayable int in hexadecimal (unsigned (not now))
473 fun to_hex: String do return to_base(16,false)
474
475 # return displayable int in base base and signed
476 fun to_base(base: Int, signed: Bool): String
477 do
478 var l = digit_count(base)
479 var s = new Buffer.from(" " * l)
480 fill_buffer(s, base, signed)
481 return s.to_s
482 end
483 end
484
485 redef class Float
486 redef fun to_s do return to_precision(6)
487
488 # `self' representation with `nb' digits after the '.'.
489 fun to_precision(nb: Int): String
490 do
491 if nb == 0 then return to_i.to_s
492
493 var i = to_i
494 var dec = 1.0
495 while nb > 0 do
496 dec = dec * 10.0
497 nb -= 1
498 end
499 var d = ((self-i.to_f)*dec).to_i
500 return "{i}.{d}"
501 end
502 end
503
504 redef class Char
505 redef fun to_s
506 do
507 var s = new Buffer.with_capacity(1)
508 s[0] = self
509 return s.to_s
510 end
511 end
512
513 redef class Collection[E]
514 # Concatenate elements.
515 redef fun to_s
516 do
517 var s = new Buffer
518 for e in self do if e != null then s.append(e.to_s)
519 return s.to_s
520 end
521
522 # Concatenate and separate each elements with `sep'.
523 fun join(sep: String): String
524 do
525 if is_empty then return ""
526
527 var s = new Buffer # Result
528
529 # Concat first item
530 var i = iterator
531 var e = i.item
532 if e != null then s.append(e.to_s)
533
534 # Concat other items
535 i.next
536 while i.is_ok do
537 s.append(sep)
538 e = i.item
539 if e != null then s.append(e.to_s)
540 i.next
541 end
542 return s.to_s
543 end
544 end
545
546 redef class Array[E]
547 # Fast implementation
548 redef fun to_s
549 do
550 var s = new Buffer
551 var i = 0
552 var l = length
553 while i < l do
554 var e = self[i]
555 if e != null then s.append(e.to_s)
556 i += 1
557 end
558 return s.to_s
559 end
560 end
561
562 redef class Map[K,V]
563 # Concatenate couple of 'key value'.
564 # key and value are separated by 'couple_sep'.
565 # each couple is separated each couple with `sep'.
566 fun join(sep: String, couple_sep: String): String
567 do
568 if is_empty then return ""
569
570 var s = new Buffer # Result
571
572 # Concat first item
573 var i = iterator
574 var k = i.key
575 var e = i.item
576 if e != null then s.append("{k}{couple_sep}{e}")
577
578 # Concat other items
579 i.next
580 while i.is_ok do
581 s.append(sep)
582 k = i.key
583 e = i.item
584 if e != null then s.append("{k}{couple_sep}{e}")
585 i.next
586 end
587 return s.to_s
588 end
589 end
590
591 ###############################################################################
592 # Native classes #
593 ###############################################################################
594
595 # Native strings are simple C char *
596 class NativeString
597 fun [](index: Int): Char is intern
598 fun []=(index: Int, item: Char) is intern
599 fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
600
601 # Position of the first nul character.
602 fun cstring_length: Int
603 do
604 var l = 0
605 while self[l] != '\0' do l += 1
606 return l
607 end
608 fun atoi: Int is intern
609 end
610
611 # StringCapable objects can create native strings
612 interface StringCapable
613 protected fun calloc_string(size: Int): NativeString is intern
614 end
615
616 redef class Sys
617 var _args_cache: nullable Sequence[String]
618
619 redef fun args: Sequence[String]
620 do
621 if _args_cache == null then init_args
622 return _args_cache.as(not null)
623 end
624
625 # The name of the program as given by the OS
626 fun program_name: String
627 do
628 return new String.from_cstring(native_argv(0))
629 end
630
631 # Initialize `args' with the contents of `native_argc' and `native_argv'.
632 private fun init_args
633 do
634 var argc = native_argc
635 var args = new Array[String].with_capacity(0)
636 var i = 1
637 while i < argc do
638 args[i-1] = new String.from_cstring(native_argv(i))
639 i += 1
640 end
641 _args_cache = args
642 end
643
644 private fun native_argc: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
645
646 private fun native_argv(i: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.
647 end
648