lib: Split collections into readable and writable
[nit.git] / lib / standard / string.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
5 #
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
12 # another product.
13
14 # This module is about character strings.
15 package string
16
17 intrude import array
18
19 ###############################################################################
20 # String #
21 ###############################################################################
22
23 abstract class AbstractString
24 special AbstractArrayRead[Char]
25 readable private attr _items: NativeString
26
27 redef meth [](index) do return _items[index]
28
29 # Create a substring.
30 #
31 # "abcd".substring(1, 2) # --> "bc"
32 # "abcd".substring(-1, 2) # --> "a"
33 # "abcd".substring(1, 0) # --> ""
34 # "abcd".substring(2, 5) # --> "cd"
35 meth substring(from: Int, count: Int): String
36 do
37 assert count >= 0
38 count += from
39 if from < 0 then from = 0
40 if count > length then count = length
41 if from < count then
42 var r = new Buffer.with_capacity(count - from)
43 while from < count do
44 r.push(_items[from])
45 from += 1
46 end
47 return r.to_s
48 else
49 return ""
50 end
51 end
52
53 # Create a substring with the string beginning at the 'from' position
54 #
55 # "abcd".substring(1) # --> "bcd"
56 # "abcd".substring(-1) # --> "abcd"
57 # "abcd".substring(2) # --> "cd"
58 meth substring_from(from: Int): String
59 do
60 assert from < length
61 return substring(from, length - from)
62 end
63
64 # is this string a substring of the 'of' string from pos 'pos'
65 #
66 # "bc".is_substring("abcd",1) # --> true
67 # "bc".is_substring("abcd",2) # --> false
68 meth has_substring(str: String, pos: Int): Bool
69 do
70 var itsindex = str.length - 1
71 var myindex = pos + itsindex
72 var myitems = _items
73 var itsitems = str._items
74 if myindex > length or itsindex > myindex then return false
75 while itsindex > 0 do
76 if myitems[myindex] != itsitems[itsindex] then return false
77 myindex -= myindex
78 itsindex -= itsindex
79 end
80 return true
81 end
82
83 # Is this string prefixed by 'prefix'
84 #
85 # "abc".is_prefix("abcd") # --> true
86 # "bc".is_prefix("abcd") # --> false
87 meth has_prefix(prefix: String): Bool do return has_substring(prefix,0)
88
89 # Is this string suffixed by 'suffix'
90 #
91 # "abcd".has_suffix("abc") # --> false
92 # "abcd".has_suffix("bcd") # --> true
93 meth has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
94
95 # If `self' contains only digits, return the corresponding integer
96 meth to_i: Int
97 do
98 # Shortcut
99 return to_s.to_cstring.atoi
100 end
101
102 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
103 meth to_hex: Int do return a_to(16)
104
105 # If `self' contains only digits and letters, return the corresponding integer in a given base
106 meth a_to(base: Int) : Int
107 do
108 var i = 0
109 var neg = false
110
111 for c in self
112 do
113 var v = c.to_i
114 if v > base then
115 if neg then
116 return -i
117 else
118 return i
119 end
120 else if v < 0 then
121 neg = true
122 else
123 i = i * base + v
124 end
125 end
126 if neg then
127 return -i
128 else
129 return i
130 end
131 end
132
133 # String to upper case
134 meth to_upper: String
135 do
136 var s = new Buffer.with_capacity(length)
137 for i in self do s.add(i.to_upper)
138 return s.to_s
139 end
140
141 # String to lower case
142 meth to_lower : String
143 do
144 var s = new Buffer.with_capacity(length)
145 for i in self do s.add(i.to_lower)
146 return s.to_s
147 end
148
149
150 redef meth output
151 do
152 var i = 0
153 while i < length do
154 _items[i].output
155 i += 1
156 end
157 end
158 end
159
160
161 class String
162 special Comparable
163 special AbstractString
164 redef type OTHER: String
165
166 # Create a new string from a given char *.
167 init with_native(nat: NativeString, size: Int)
168 do
169 assert size >= 0
170 _items = nat
171 _length = size
172 end
173
174 # Create a new string from a null terminated char *.
175 init from_cstring(str: NativeString)
176 do
177 var size = str.cstring_length
178 _items = str
179 _length = size
180 end
181
182 # Return a null terminated char *
183 meth to_cstring: NativeString
184 do
185 return _items
186 end
187
188 redef meth ==(o)
189 do
190 if not o isa String or o is null then return false
191 assert o isa String
192 var l = length
193 if o.length != l then return false
194 var i = 0
195 var it = _items
196 var oit = o._items
197 while i < l do
198 if it[i] != oit[i] then return false
199 i += 1
200 end
201 return true
202 end
203
204 redef meth <(s)
205 do
206 var i = 0
207 var l1 = length
208 var l2 = s.length
209 var n1 = _items
210 var n2 = s._items
211 while i < l1 and i < l2 do
212 var c1 = n1[i].ascii
213 var c2 = n2[i].ascii
214 if c1 < c2 then
215 return true
216 else if c2 < c1 then
217 return false
218 end
219 i += 1
220 end
221 if l1 < l2 then
222 return true
223 else
224 return false
225 end
226 end
227
228 # The concatenation of `self' with `r'
229 meth +(s: String): String
230 do
231 var r = new Buffer.with_capacity(length + s.length)
232 r.append(self)
233 r.append(s)
234 return r.to_s
235 end
236
237 # i repetitions of self
238 meth *(i: Int): String
239 do
240 assert i >= 0
241 var r = new Buffer.with_capacity(length * i)
242 while i > 0 do
243 r.append(self)
244 i -= 1
245 end
246 return r.to_s
247 end
248
249 redef meth to_s do return self
250 end
251
252 # Strings are arrays of characters.
253 class Buffer
254 special AbstractString
255 special Comparable
256 special StringCapable
257 special AbstractArray[Char]
258
259 redef type OTHER: String
260
261 redef meth []=(index, item)
262 do
263 if index == length then
264 add(item)
265 return
266 end
267 assert index >= 0 and index < length
268 _items[index] = item
269 end
270
271 redef meth add(c)
272 do
273 if _capacity <= length then enlarge(length + 5)
274 _items[length] = c
275 _length += 1
276 end
277
278 redef meth enlarge(cap)
279 do
280 var c = _capacity
281 if cap <= c then return
282 while c <= cap do c = c * 2 + 2
283 var a = calloc_string(c+1)
284 _items.copy_to(a, length, 0, 0)
285 _items = a
286 _capacity = c
287 end
288
289 redef meth append(s)
290 do
291 if s isa String then
292 var sl = s.length
293 if _capacity < length + sl then enlarge(length + sl)
294 s.items.copy_to(_items, sl, 0, length)
295 _length += sl
296 else
297 super
298 end
299 end
300
301 redef meth to_s: String
302 do
303 var l = length
304 var a = calloc_string(l+1)
305 _items.copy_to(a, l, 0, 0)
306
307 # Ensure the afterlast byte is '\0' to nul-terminated char *
308 a[length] = '\0'
309
310 return new String.with_native(a, length)
311 end
312
313 redef meth <(s)
314 do
315 var i = 0
316 var l1 = length
317 var l2 = s.length
318 while i < l1 and i < l2 do
319 var c1 = self[i].ascii
320 var c2 = s[i].ascii
321 if c1 < c2 then
322 return true
323 else if c2 < c1 then
324 return false
325 end
326 i += 1
327 end
328 if l1 < l2 then
329 return true
330 else
331 return false
332 end
333 end
334
335 # Create a new empty string.
336 init
337 do
338 with_capacity(5)
339 end
340
341 init from(s: String)
342 do
343 _capacity = s.length + 1
344 _length = s.length
345 _items = calloc_string(_capacity)
346 s.items.copy_to(_items, _length, 0, 0)
347 end
348
349 # Create a new empty string with a given capacity.
350 init with_capacity(cap: Int)
351 do
352 assert cap >= 0
353 # _items = new NativeString.calloc(cap)
354 _items = calloc_string(cap+1)
355 _capacity = cap
356 _length = 0
357 end
358
359 redef meth ==(o)
360 do
361 if not o isa Buffer or o is null then return false
362 assert o isa Buffer
363 var l = length
364 if o.length != l then return false
365 var i = 0
366 var it = _items
367 var oit = o._items
368 while i < l do
369 if it[i] != oit[i] then return false
370 i += 1
371 end
372 return true
373 end
374
375 readable private attr _capacity: Int
376 end
377
378 ###############################################################################
379 # Refinement #
380 ###############################################################################
381
382 redef class Object
383 # meth class_name: String is extern intern # The name of the class
384
385 # User redeable representation of `self'.
386 meth to_s: String do return inspect
387
388 # Developper readable representation of `self'.
389 # Usualy, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
390 meth inspect: String
391 do
392 var r = inspect_head
393 # r.add('>')
394 return r
395 end
396
397 # Return "<CLASSNAME:#OBJECTID".
398 # This fuction is mainly used with the redefinition of the inspect(0) method
399 protected meth inspect_head: String
400 do
401 return "<{object_id.to_hex}"
402 end
403
404 protected meth args: IndexedCollection[String]
405 do
406 return sys.args
407 end
408 end
409
410 redef class Bool
411 redef meth to_s
412 do
413 if self then
414 return once "true"
415 else
416 return once "false"
417 end
418 end
419 end
420
421 redef class Int
422 meth fill_buffer(s: Buffer, base: Int, signed: Bool)
423 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
424 # assume < to_c max const of char
425 do
426 var n: Int
427 # Sign
428 if self < 0 then
429 n = - self
430 s[0] = '-'
431 else if self == 0 then
432 s[0] = '0'
433 return
434 else
435 n = self
436 end
437 # Fill digits
438 var pos = digit_count(base) - 1
439 while pos >= 0 and n > 0 do
440 s[pos] = (n % base).to_c
441 n = n / base # /
442 pos -= 1
443 end
444 end
445
446 # return displayable int in base 10 and signed
447 redef meth to_s do return to_base(10,true)
448
449 # return displayable int in hexadecimal (unsigned (not now))
450 meth to_hex: String do return to_base(16,false)
451
452 # return displayable int in base base and signed
453 meth to_base(base: Int, signed: Bool): String
454 do
455 var l = digit_count(base)
456 var s = new Buffer.from(" " * l)
457 fill_buffer(s, base, signed)
458 return s.to_s
459 end
460 end
461
462 redef class Float
463 redef meth to_s do return to_precision(6)
464
465 # `self' representation with `nb' digits after the '.'.
466 meth to_precision(nb: Int): String
467 do
468 if nb == 0 then return to_i.to_s
469
470 var i = to_i
471 var dec = 1.0
472 while nb > 0 do
473 dec = dec * 10.0
474 nb -= 1
475 end
476 var d = ((self-i.to_f)*dec).to_i
477 return "{i}.{d}"
478 end
479 end
480
481 redef class Char
482 redef meth to_s
483 do
484 var s = new Buffer.with_capacity(1)
485 s[0] = self
486 return s.to_s
487 end
488 end
489
490 redef class Collection[E]
491 # Concatenate elements.
492 redef meth to_s
493 do
494 var s = new Buffer
495 for e in self do if e != null then s.append(e.to_s)
496 return s.to_s
497 end
498
499 # Concatenate and separate each elements with `sep'.
500 meth join(sep: String): String
501 do
502 if is_empty then return ""
503
504 var s = new Buffer # Result
505
506 # Concat first item
507 var i = iterator
508 var e = i.item
509 if e != null then s.append(e.to_s)
510
511 # Concat other items
512 i.next
513 while i.is_ok do
514 s.append(sep)
515 e = i.item
516 if e != null then s.append(e.to_s)
517 i.next
518 end
519 return s.to_s
520 end
521 end
522
523 redef class Array[E]
524 # Fast implementation
525 redef meth to_s
526 do
527 var s = new Buffer
528 var i = 0
529 var l = length
530 while i < l do
531 var e = self[i]
532 if e != null then s.append(e.to_s)
533 i += 1
534 end
535 return s.to_s
536 end
537 end
538
539 redef class Map[K,V]
540 # Concatenate couple of 'key value' separate by 'couple_sep' and separate each couple with `sep'.
541 meth map_join(sep: String, couple_sep: String): String
542 do
543 if is_empty then return ""
544
545 var s = new Buffer # Result
546
547 # Concat first item
548 var i = iterator
549 var k = i.key
550 var e = i.item
551 if e != null then s.append("{k}{couple_sep}{e}")
552
553 # Concat other items
554 i.next
555 while i.is_ok do
556 s.append(sep)
557 k = i.key
558 e = i.item
559 if e != null then s.append("{k}{couple_sep}{e}")
560 i.next
561 end
562 return s.to_s
563 end
564 end
565
566 ###############################################################################
567 # Native classe #
568 ###############################################################################
569
570 # Native strings are simple C char *
571 class NativeString
572 meth [](index: Int): Char is intern
573 meth []=(index: Int, item: Char) is intern
574 meth copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
575
576 # Position of the first nul character.
577 meth cstring_length: Int
578 do
579 var l = 0
580 while self[l] != '\0' do l += 1
581 return l
582 end
583 meth atoi: Int is intern
584 end
585
586 # StringCapable objects can create native strings
587 class StringCapable
588 protected meth calloc_string(size: Int): NativeString is intern
589 end
590
591 redef class Sys
592 attr _args_cache: IndexedCollection[String]
593
594 redef meth args: IndexedCollection[String]
595 do
596 if _args_cache == null then init_args
597 return _args_cache
598 end
599
600 # The name of the program as given by the OS
601 meth program_name: String
602 do
603 return new String.from_cstring(native_argv(0))
604 end
605
606 # Initialize `args' with the contents of `native_argc' and `native_argv'.
607 private meth init_args
608 do
609 var argc = native_argc
610 var args = new Array[String].with_capacity(0)
611 var i = 1
612 while i < argc do
613 args[i-1] = new String.from_cstring(native_argv(i))
614 i += 1
615 end
616 _args_cache = args
617 end
618
619 private meth native_argc: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
620
621 private meth native_argv(i: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.
622 end
623