update NOTICE and LICENSE
[nit.git] / lib / standard / string.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
5 #
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
12 # another product.
13
14 # This module is about character strings.
15 package string
16
17 intrude import collection # FIXME should be collection::array
18 import hash
19
20 ###############################################################################
21 # String #
22 ###############################################################################
23
24 abstract class AbstractString
25 super AbstractArrayRead[Char]
26 readable private var _items: NativeString
27
28 redef fun [](index) do return _items[index]
29
30 # Create a substring.
31 #
32 # "abcd".substring(1, 2) # --> "bc"
33 # "abcd".substring(-1, 2) # --> "a"
34 # "abcd".substring(1, 0) # --> ""
35 # "abcd".substring(2, 5) # --> "cd"
36 fun substring(from: Int, count: Int): String
37 do
38 assert count >= 0
39 count += from
40 if from < 0 then from = 0
41 if count > length then count = length
42 if from < count then
43 var r = new Buffer.with_capacity(count - from)
44 while from < count do
45 r.push(_items[from])
46 from += 1
47 end
48 return r.to_s
49 else
50 return ""
51 end
52 end
53
54 # Create a substring with the string beginning at the 'from' position
55 #
56 # "abcd".substring(1) # --> "bcd"
57 # "abcd".substring(-1) # --> "abcd"
58 # "abcd".substring(2) # --> "cd"
59 fun substring_from(from: Int): String
60 do
61 assert from < length
62 return substring(from, length - from)
63 end
64
65 # is this string a substring of the 'of' string from pos 'pos'
66 #
67 # "bc".is_substring("abcd",1) # --> true
68 # "bc".is_substring("abcd",2) # --> false
69 fun has_substring(str: String, pos: Int): Bool
70 do
71 var itsindex = str.length - 1
72 var myindex = pos + itsindex
73 var myitems = _items
74 var itsitems = str._items
75 if myindex > length or itsindex > myindex then return false
76 while itsindex >= 0 do
77 if myitems[myindex] != itsitems[itsindex] then return false
78 myindex -= 1
79 itsindex -= 1
80 end
81 return true
82 end
83
84 # Is this string prefixed by 'prefix'
85 #
86 # "abc".is_prefix("abcd") # --> true
87 # "bc".is_prefix("abcd") # --> false
88 fun has_prefix(prefix: String): Bool do return has_substring(prefix,0)
89
90 # Is this string suffixed by 'suffix'
91 #
92 # "abcd".has_suffix("abc") # --> false
93 # "abcd".has_suffix("bcd") # --> true
94 fun has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
95
96 # If `self' contains only digits, return the corresponding integer
97 fun to_i: Int
98 do
99 # Shortcut
100 return to_s.to_cstring.atoi
101 end
102
103 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
104 fun to_hex: Int do return a_to(16)
105
106 # If `self' contains only digits and letters, return the corresponding integer in a given base
107 fun a_to(base: Int) : Int
108 do
109 var i = 0
110 var neg = false
111
112 for c in self
113 do
114 var v = c.to_i
115 if v > base then
116 if neg then
117 return -i
118 else
119 return i
120 end
121 else if v < 0 then
122 neg = true
123 else
124 i = i * base + v
125 end
126 end
127 if neg then
128 return -i
129 else
130 return i
131 end
132 end
133
134 # String to upper case
135 fun to_upper: String
136 do
137 var s = new Buffer.with_capacity(length)
138 for i in self do s.add(i.to_upper)
139 return s.to_s
140 end
141
142 # String to lower case
143 fun to_lower : String
144 do
145 var s = new Buffer.with_capacity(length)
146 for i in self do s.add(i.to_lower)
147 return s.to_s
148 end
149
150
151 redef fun output
152 do
153 var i = 0
154 while i < length do
155 _items[i].output
156 i += 1
157 end
158 end
159 end
160
161
162 class String
163 super Comparable
164 super AbstractString
165 redef type OTHER: String
166
167 # Create a new string from a given char *.
168 init with_native(nat: NativeString, size: Int)
169 do
170 assert size >= 0
171 _items = nat
172 _length = size
173 end
174
175 # Create a new string from a null terminated char *.
176 init from_cstring(str: NativeString)
177 do
178 var size = str.cstring_length
179 _items = str
180 _length = size
181 end
182
183 # Return a null terminated char *
184 fun to_cstring: NativeString
185 do
186 return _items
187 end
188
189 redef fun ==(o)
190 do
191 if not o isa String or o is null then return false
192 var l = length
193 if o.length != l then return false
194 var i = 0
195 var it = _items
196 var oit = o._items
197 while i < l do
198 if it[i] != oit[i] then return false
199 i += 1
200 end
201 return true
202 end
203
204 redef fun <(s)
205 do
206 var i = 0
207 var l1 = length
208 var l2 = s.length
209 var n1 = _items
210 var n2 = s._items
211 while i < l1 and i < l2 do
212 var c1 = n1[i].ascii
213 var c2 = n2[i].ascii
214 if c1 < c2 then
215 return true
216 else if c2 < c1 then
217 return false
218 end
219 i += 1
220 end
221 if l1 < l2 then
222 return true
223 else
224 return false
225 end
226 end
227
228 # The concatenation of `self' with `r'
229 fun +(s: String): String
230 do
231 var r = new Buffer.with_capacity(length + s.length)
232 r.append(self)
233 r.append(s)
234 return r.to_s
235 end
236
237 # i repetitions of self
238 fun *(i: Int): String
239 do
240 assert i >= 0
241 var r = new Buffer.with_capacity(length * i)
242 while i > 0 do
243 r.append(self)
244 i -= 1
245 end
246 return r.to_s
247 end
248
249 redef fun to_s do return self
250
251 redef fun hash
252 do
253 # djb2 hash algorythm
254 var h = 5381
255 var i = _length - 1
256 var it = _items
257 while i >= 0 do
258 h = (h * 32) + h + it[i].ascii
259 i -= 1
260 end
261 return h
262
263 end
264 end
265
266 # Strings are arrays of characters.
267 class Buffer
268 super AbstractString
269 super Comparable
270 super StringCapable
271 super AbstractArray[Char]
272
273 redef type OTHER: String
274
275 redef fun []=(index, item)
276 do
277 if index == length then
278 add(item)
279 return
280 end
281 assert index >= 0 and index < length
282 _items[index] = item
283 end
284
285 redef fun add(c)
286 do
287 if _capacity <= length then enlarge(length + 5)
288 _items[length] = c
289 _length += 1
290 end
291
292 redef fun enlarge(cap)
293 do
294 var c = _capacity
295 if cap <= c then return
296 while c <= cap do c = c * 2 + 2
297 var a = calloc_string(c+1)
298 _items.copy_to(a, length, 0, 0)
299 _items = a
300 _capacity = c
301 end
302
303 redef fun append(s)
304 do
305 if s isa String then
306 var sl = s.length
307 if _capacity < length + sl then enlarge(length + sl)
308 s.items.copy_to(_items, sl, 0, length)
309 _length += sl
310 else
311 super
312 end
313 end
314
315 redef fun to_s: String
316 do
317 var l = length
318 var a = calloc_string(l+1)
319 _items.copy_to(a, l, 0, 0)
320
321 # Ensure the afterlast byte is '\0' to nul-terminated char *
322 a[length] = '\0'
323
324 return new String.with_native(a, length)
325 end
326
327 redef fun <(s)
328 do
329 var i = 0
330 var l1 = length
331 var l2 = s.length
332 while i < l1 and i < l2 do
333 var c1 = self[i].ascii
334 var c2 = s[i].ascii
335 if c1 < c2 then
336 return true
337 else if c2 < c1 then
338 return false
339 end
340 i += 1
341 end
342 if l1 < l2 then
343 return true
344 else
345 return false
346 end
347 end
348
349 # Create a new empty string.
350 init
351 do
352 with_capacity(5)
353 end
354
355 init from(s: String)
356 do
357 _capacity = s.length + 1
358 _length = s.length
359 _items = calloc_string(_capacity)
360 s.items.copy_to(_items, _length, 0, 0)
361 end
362
363 # Create a new empty string with a given capacity.
364 init with_capacity(cap: Int)
365 do
366 assert cap >= 0
367 # _items = new NativeString.calloc(cap)
368 _items = calloc_string(cap+1)
369 _capacity = cap
370 _length = 0
371 end
372
373 redef fun ==(o)
374 do
375 if not o isa Buffer or o is null then return false
376 var l = length
377 if o.length != l then return false
378 var i = 0
379 var it = _items
380 var oit = o._items
381 while i < l do
382 if it[i] != oit[i] then return false
383 i += 1
384 end
385 return true
386 end
387
388 readable private var _capacity: Int
389 end
390
391 ###############################################################################
392 # Refinement #
393 ###############################################################################
394
395 redef class Object
396 # fun class_name: String is extern intern # The name of the class
397
398 # User redeable representation of `self'.
399 fun to_s: String do return inspect
400
401 # The class name of the object in NativeString format.
402 private fun native_class_name: NativeString is intern
403
404 # The class name of the object.
405 # FIXME: real type information is not available at runtime. Therefore, for instance, an instance of List[Bool] has just "List" for classname
406 fun class_name: String do return new String.from_cstring(native_class_name)
407
408 # Developper readable representation of `self'.
409 # Usualy, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
410 fun inspect: String
411 do
412 return "<{inspect_head}>"
413 end
414
415 # Return "CLASSNAME:#OBJECTID".
416 # This fuction is mainly used with the redefinition of the inspect method
417 protected fun inspect_head: String
418 do
419 return "{class_name}:#{object_id.to_hex}"
420 end
421
422 protected fun args: Sequence[String]
423 do
424 return sys.args
425 end
426 end
427
428 redef class Bool
429 redef fun to_s
430 do
431 if self then
432 return once "true"
433 else
434 return once "false"
435 end
436 end
437 end
438
439 redef class Int
440 fun fill_buffer(s: Buffer, base: Int, signed: Bool)
441 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
442 # assume < to_c max const of char
443 do
444 var n: Int
445 # Sign
446 if self < 0 then
447 n = - self
448 s[0] = '-'
449 else if self == 0 then
450 s[0] = '0'
451 return
452 else
453 n = self
454 end
455 # Fill digits
456 var pos = digit_count(base) - 1
457 while pos >= 0 and n > 0 do
458 s[pos] = (n % base).to_c
459 n = n / base # /
460 pos -= 1
461 end
462 end
463
464 # return displayable int in base 10 and signed
465 redef fun to_s do return to_base(10,true)
466
467 # return displayable int in hexadecimal (unsigned (not now))
468 fun to_hex: String do return to_base(16,false)
469
470 # return displayable int in base base and signed
471 fun to_base(base: Int, signed: Bool): String
472 do
473 var l = digit_count(base)
474 var s = new Buffer.from(" " * l)
475 fill_buffer(s, base, signed)
476 return s.to_s
477 end
478 end
479
480 redef class Float
481 redef fun to_s do return to_precision(6)
482
483 # `self' representation with `nb' digits after the '.'.
484 fun to_precision(nb: Int): String
485 do
486 if nb == 0 then return to_i.to_s
487
488 var i = to_i
489 var dec = 1.0
490 while nb > 0 do
491 dec = dec * 10.0
492 nb -= 1
493 end
494 var d = ((self-i.to_f)*dec).to_i
495 return "{i}.{d}"
496 end
497 end
498
499 redef class Char
500 redef fun to_s
501 do
502 var s = new Buffer.with_capacity(1)
503 s[0] = self
504 return s.to_s
505 end
506 end
507
508 redef class Collection[E]
509 # Concatenate elements.
510 redef fun to_s
511 do
512 var s = new Buffer
513 for e in self do if e != null then s.append(e.to_s)
514 return s.to_s
515 end
516
517 # Concatenate and separate each elements with `sep'.
518 fun join(sep: String): String
519 do
520 if is_empty then return ""
521
522 var s = new Buffer # Result
523
524 # Concat first item
525 var i = iterator
526 var e = i.item
527 if e != null then s.append(e.to_s)
528
529 # Concat other items
530 i.next
531 while i.is_ok do
532 s.append(sep)
533 e = i.item
534 if e != null then s.append(e.to_s)
535 i.next
536 end
537 return s.to_s
538 end
539 end
540
541 redef class Array[E]
542 # Fast implementation
543 redef fun to_s
544 do
545 var s = new Buffer
546 var i = 0
547 var l = length
548 while i < l do
549 var e = self[i]
550 if e != null then s.append(e.to_s)
551 i += 1
552 end
553 return s.to_s
554 end
555 end
556
557 redef class Map[K,V]
558 # Concatenate couple of 'key value' separate by 'couple_sep' and separate each couple with `sep'.
559 fun map_join(sep: String, couple_sep: String): String
560 do
561 if is_empty then return ""
562
563 var s = new Buffer # Result
564
565 # Concat first item
566 var i = iterator
567 var k = i.key
568 var e = i.item
569 if e != null then s.append("{k}{couple_sep}{e}")
570
571 # Concat other items
572 i.next
573 while i.is_ok do
574 s.append(sep)
575 k = i.key
576 e = i.item
577 if e != null then s.append("{k}{couple_sep}{e}")
578 i.next
579 end
580 return s.to_s
581 end
582 end
583
584 ###############################################################################
585 # Native classe #
586 ###############################################################################
587
588 # Native strings are simple C char *
589 class NativeString
590 fun [](index: Int): Char is intern
591 fun []=(index: Int, item: Char) is intern
592 fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
593
594 # Position of the first nul character.
595 fun cstring_length: Int
596 do
597 var l = 0
598 while self[l] != '\0' do l += 1
599 return l
600 end
601 fun atoi: Int is intern
602 end
603
604 # StringCapable objects can create native strings
605 class StringCapable
606 protected fun calloc_string(size: Int): NativeString is intern
607 end
608
609 redef class Sys
610 var _args_cache: nullable Sequence[String]
611
612 redef fun args: Sequence[String]
613 do
614 if _args_cache == null then init_args
615 return _args_cache.as(not null)
616 end
617
618 # The name of the program as given by the OS
619 fun program_name: String
620 do
621 return new String.from_cstring(native_argv(0))
622 end
623
624 # Initialize `args' with the contents of `native_argc' and `native_argv'.
625 private fun init_args
626 do
627 var argc = native_argc
628 var args = new Array[String].with_capacity(0)
629 var i = 1
630 while i < argc do
631 args[i-1] = new String.from_cstring(native_argv(i))
632 i += 1
633 end
634 _args_cache = args
635 end
636
637 private fun native_argc: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
638
639 private fun native_argv(i: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.
640 end
641