nullable: convert lib, tools and tests
[nit.git] / lib / standard / string.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
5 #
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
12 # another product.
13
14 # This module is about character strings.
15 package string
16
17 intrude import array
18
19 ###############################################################################
20 # String #
21 ###############################################################################
22
23 abstract class AbstractString
24 special AbstractArrayRead[Char]
25 readable private attr _items: NativeString
26
27 redef meth [](index) do return _items[index]
28
29 # Create a substring.
30 #
31 # "abcd".substring(1, 2) # --> "bc"
32 # "abcd".substring(-1, 2) # --> "a"
33 # "abcd".substring(1, 0) # --> ""
34 # "abcd".substring(2, 5) # --> "cd"
35 meth substring(from: Int, count: Int): String
36 do
37 assert count >= 0
38 count += from
39 if from < 0 then from = 0
40 if count > length then count = length
41 if from < count then
42 var r = new Buffer.with_capacity(count - from)
43 while from < count do
44 r.push(_items[from])
45 from += 1
46 end
47 return r.to_s
48 else
49 return ""
50 end
51 end
52
53 # Create a substring with the string beginning at the 'from' position
54 #
55 # "abcd".substring(1) # --> "bcd"
56 # "abcd".substring(-1) # --> "abcd"
57 # "abcd".substring(2) # --> "cd"
58 meth substring_from(from: Int): String
59 do
60 assert from < length
61 return substring(from, length - from)
62 end
63
64 # is this string a substring of the 'of' string from pos 'pos'
65 #
66 # "bc".is_substring("abcd",1) # --> true
67 # "bc".is_substring("abcd",2) # --> false
68 meth has_substring(str: String, pos: Int): Bool
69 do
70 var itsindex = str.length - 1
71 var myindex = pos + itsindex
72 var myitems = _items
73 var itsitems = str._items
74 if myindex > length or itsindex > myindex then return false
75 while itsindex > 0 do
76 if myitems[myindex] != itsitems[itsindex] then return false
77 myindex -= myindex
78 itsindex -= itsindex
79 end
80 return true
81 end
82
83 # Is this string prefixed by 'prefix'
84 #
85 # "abc".is_prefix("abcd") # --> true
86 # "bc".is_prefix("abcd") # --> false
87 meth has_prefix(prefix: String): Bool do return has_substring(prefix,0)
88
89 # Is this string suffixed by 'suffix'
90 #
91 # "abcd".has_suffix("abc") # --> false
92 # "abcd".has_suffix("bcd") # --> true
93 meth has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
94
95 # If `self' contains only digits, return the corresponding integer
96 meth to_i: Int
97 do
98 # Shortcut
99 return to_s.to_cstring.atoi
100 end
101
102 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
103 meth to_hex: Int do return a_to(16)
104
105 # If `self' contains only digits and letters, return the corresponding integer in a given base
106 meth a_to(base: Int) : Int
107 do
108 var i = 0
109 var neg = false
110
111 for c in self
112 do
113 var v = c.to_i
114 if v > base then
115 if neg then
116 return -i
117 else
118 return i
119 end
120 else if v < 0 then
121 neg = true
122 else
123 i = i * base + v
124 end
125 end
126 if neg then
127 return -i
128 else
129 return i
130 end
131 end
132
133 # String to upper case
134 meth to_upper: String
135 do
136 var s = new Buffer.with_capacity(length)
137 for i in self do s.add(i.to_upper)
138 return s.to_s
139 end
140
141 # String to lower case
142 meth to_lower : String
143 do
144 var s = new Buffer.with_capacity(length)
145 for i in self do s.add(i.to_lower)
146 return s.to_s
147 end
148
149
150 redef meth output
151 do
152 var i = 0
153 while i < length do
154 _items[i].output
155 i += 1
156 end
157 end
158 end
159
160
161 class String
162 special Comparable
163 special AbstractString
164 redef type OTHER: String
165
166 # Create a new string from a given char *.
167 init with_native(nat: NativeString, size: Int)
168 do
169 assert size >= 0
170 _items = nat
171 _length = size
172 end
173
174 # Create a new string from a null terminated char *.
175 init from_cstring(str: NativeString)
176 do
177 var size = str.cstring_length
178 _items = str
179 _length = size
180 end
181
182 # Return a null terminated char *
183 meth to_cstring: NativeString
184 do
185 return _items
186 end
187
188 redef meth ==(o)
189 do
190 if not o isa String or o is null then return false
191 var l = length
192 if o.length != l then return false
193 var i = 0
194 var it = _items
195 var oit = o._items
196 while i < l do
197 if it[i] != oit[i] then return false
198 i += 1
199 end
200 return true
201 end
202
203 redef meth <(s)
204 do
205 var i = 0
206 var l1 = length
207 var l2 = s.length
208 var n1 = _items
209 var n2 = s._items
210 while i < l1 and i < l2 do
211 var c1 = n1[i].ascii
212 var c2 = n2[i].ascii
213 if c1 < c2 then
214 return true
215 else if c2 < c1 then
216 return false
217 end
218 i += 1
219 end
220 if l1 < l2 then
221 return true
222 else
223 return false
224 end
225 end
226
227 # The concatenation of `self' with `r'
228 meth +(s: String): String
229 do
230 var r = new Buffer.with_capacity(length + s.length)
231 r.append(self)
232 r.append(s)
233 return r.to_s
234 end
235
236 # i repetitions of self
237 meth *(i: Int): String
238 do
239 assert i >= 0
240 var r = new Buffer.with_capacity(length * i)
241 while i > 0 do
242 r.append(self)
243 i -= 1
244 end
245 return r.to_s
246 end
247
248 redef meth to_s do return self
249 end
250
251 # Strings are arrays of characters.
252 class Buffer
253 special AbstractString
254 special Comparable
255 special StringCapable
256 special AbstractArray[Char]
257
258 redef type OTHER: String
259
260 redef meth []=(index, item)
261 do
262 if index == length then
263 add(item)
264 return
265 end
266 assert index >= 0 and index < length
267 _items[index] = item
268 end
269
270 redef meth add(c)
271 do
272 if _capacity <= length then enlarge(length + 5)
273 _items[length] = c
274 _length += 1
275 end
276
277 redef meth enlarge(cap)
278 do
279 var c = _capacity
280 if cap <= c then return
281 while c <= cap do c = c * 2 + 2
282 var a = calloc_string(c+1)
283 _items.copy_to(a, length, 0, 0)
284 _items = a
285 _capacity = c
286 end
287
288 redef meth append(s)
289 do
290 if s isa String then
291 var sl = s.length
292 if _capacity < length + sl then enlarge(length + sl)
293 s.items.copy_to(_items, sl, 0, length)
294 _length += sl
295 else
296 super
297 end
298 end
299
300 redef meth to_s: String
301 do
302 var l = length
303 var a = calloc_string(l+1)
304 _items.copy_to(a, l, 0, 0)
305
306 # Ensure the afterlast byte is '\0' to nul-terminated char *
307 a[length] = '\0'
308
309 return new String.with_native(a, length)
310 end
311
312 redef meth <(s)
313 do
314 var i = 0
315 var l1 = length
316 var l2 = s.length
317 while i < l1 and i < l2 do
318 var c1 = self[i].ascii
319 var c2 = s[i].ascii
320 if c1 < c2 then
321 return true
322 else if c2 < c1 then
323 return false
324 end
325 i += 1
326 end
327 if l1 < l2 then
328 return true
329 else
330 return false
331 end
332 end
333
334 # Create a new empty string.
335 init
336 do
337 with_capacity(5)
338 end
339
340 init from(s: String)
341 do
342 _capacity = s.length + 1
343 _length = s.length
344 _items = calloc_string(_capacity)
345 s.items.copy_to(_items, _length, 0, 0)
346 end
347
348 # Create a new empty string with a given capacity.
349 init with_capacity(cap: Int)
350 do
351 assert cap >= 0
352 # _items = new NativeString.calloc(cap)
353 _items = calloc_string(cap+1)
354 _capacity = cap
355 _length = 0
356 end
357
358 redef meth ==(o)
359 do
360 if not o isa Buffer or o is null then return false
361 var l = length
362 if o.length != l then return false
363 var i = 0
364 var it = _items
365 var oit = o._items
366 while i < l do
367 if it[i] != oit[i] then return false
368 i += 1
369 end
370 return true
371 end
372
373 readable private attr _capacity: Int
374 end
375
376 ###############################################################################
377 # Refinement #
378 ###############################################################################
379
380 redef class Object
381 # meth class_name: String is extern intern # The name of the class
382
383 # User redeable representation of `self'.
384 meth to_s: String do return inspect
385
386 # Developper readable representation of `self'.
387 # Usualy, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
388 meth inspect: String
389 do
390 var r = inspect_head
391 # r.add('>')
392 return r
393 end
394
395 # Return "<CLASSNAME:#OBJECTID".
396 # This fuction is mainly used with the redefinition of the inspect(0) method
397 protected meth inspect_head: String
398 do
399 return "<{object_id.to_hex}"
400 end
401
402 protected meth args: IndexedCollection[String]
403 do
404 return sys.args
405 end
406 end
407
408 redef class Bool
409 redef meth to_s
410 do
411 if self then
412 return once "true"
413 else
414 return once "false"
415 end
416 end
417 end
418
419 redef class Int
420 meth fill_buffer(s: Buffer, base: Int, signed: Bool)
421 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
422 # assume < to_c max const of char
423 do
424 var n: Int
425 # Sign
426 if self < 0 then
427 n = - self
428 s[0] = '-'
429 else if self == 0 then
430 s[0] = '0'
431 return
432 else
433 n = self
434 end
435 # Fill digits
436 var pos = digit_count(base) - 1
437 while pos >= 0 and n > 0 do
438 s[pos] = (n % base).to_c
439 n = n / base # /
440 pos -= 1
441 end
442 end
443
444 # return displayable int in base 10 and signed
445 redef meth to_s do return to_base(10,true)
446
447 # return displayable int in hexadecimal (unsigned (not now))
448 meth to_hex: String do return to_base(16,false)
449
450 # return displayable int in base base and signed
451 meth to_base(base: Int, signed: Bool): String
452 do
453 var l = digit_count(base)
454 var s = new Buffer.from(" " * l)
455 fill_buffer(s, base, signed)
456 return s.to_s
457 end
458 end
459
460 redef class Float
461 redef meth to_s do return to_precision(6)
462
463 # `self' representation with `nb' digits after the '.'.
464 meth to_precision(nb: Int): String
465 do
466 if nb == 0 then return to_i.to_s
467
468 var i = to_i
469 var dec = 1.0
470 while nb > 0 do
471 dec = dec * 10.0
472 nb -= 1
473 end
474 var d = ((self-i.to_f)*dec).to_i
475 return "{i}.{d}"
476 end
477 end
478
479 redef class Char
480 redef meth to_s
481 do
482 var s = new Buffer.with_capacity(1)
483 s[0] = self
484 return s.to_s
485 end
486 end
487
488 redef class Collection[E]
489 # Concatenate elements.
490 redef meth to_s
491 do
492 var s = new Buffer
493 for e in self do if e != null then s.append(e.to_s)
494 return s.to_s
495 end
496
497 # Concatenate and separate each elements with `sep'.
498 meth join(sep: String): String
499 do
500 if is_empty then return ""
501
502 var s = new Buffer # Result
503
504 # Concat first item
505 var i = iterator
506 var e = i.item
507 if e != null then s.append(e.to_s)
508
509 # Concat other items
510 i.next
511 while i.is_ok do
512 s.append(sep)
513 e = i.item
514 if e != null then s.append(e.to_s)
515 i.next
516 end
517 return s.to_s
518 end
519 end
520
521 redef class Array[E]
522 # Fast implementation
523 redef meth to_s
524 do
525 var s = new Buffer
526 var i = 0
527 var l = length
528 while i < l do
529 var e = self[i]
530 if e != null then s.append(e.to_s)
531 i += 1
532 end
533 return s.to_s
534 end
535 end
536
537 redef class Map[K,V]
538 # Concatenate couple of 'key value' separate by 'couple_sep' and separate each couple with `sep'.
539 meth map_join(sep: String, couple_sep: String): String
540 do
541 if is_empty then return ""
542
543 var s = new Buffer # Result
544
545 # Concat first item
546 var i = iterator
547 var k = i.key
548 var e = i.item
549 if e != null then s.append("{k}{couple_sep}{e}")
550
551 # Concat other items
552 i.next
553 while i.is_ok do
554 s.append(sep)
555 k = i.key
556 e = i.item
557 if e != null then s.append("{k}{couple_sep}{e}")
558 i.next
559 end
560 return s.to_s
561 end
562 end
563
564 ###############################################################################
565 # Native classe #
566 ###############################################################################
567
568 # Native strings are simple C char *
569 class NativeString
570 meth [](index: Int): Char is intern
571 meth []=(index: Int, item: Char) is intern
572 meth copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
573
574 # Position of the first nul character.
575 meth cstring_length: Int
576 do
577 var l = 0
578 while self[l] != '\0' do l += 1
579 return l
580 end
581 meth atoi: Int is intern
582 end
583
584 # StringCapable objects can create native strings
585 class StringCapable
586 protected meth calloc_string(size: Int): NativeString is intern
587 end
588
589 redef class Sys
590 attr _args_cache: nullable IndexedCollection[String]
591
592 redef meth args: IndexedCollection[String]
593 do
594 if _args_cache == null then init_args
595 return _args_cache.as(not null)
596 end
597
598 # The name of the program as given by the OS
599 meth program_name: String
600 do
601 return new String.from_cstring(native_argv(0))
602 end
603
604 # Initialize `args' with the contents of `native_argc' and `native_argv'.
605 private meth init_args
606 do
607 var argc = native_argc
608 var args = new Array[String].with_capacity(0)
609 var i = 1
610 while i < argc do
611 args[i-1] = new String.from_cstring(native_argv(i))
612 i += 1
613 end
614 _args_cache = args
615 end
616
617 private meth native_argc: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
618
619 private meth native_argv(i: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.
620 end
621