metamodel: rename 'universal' to 'enum'
[nit.git] / lib / standard / string.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2008 Jean Privat <jean@pryen.org>
4 # Copyright 2006-2008 Floréal Morandat <morandat@lirmm.fr>
5 #
6 # This file is free software, which comes along with NIT. This software is
7 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
8 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
9 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
10 # is kept unaltered, and a notification of the changes is added.
11 # You are allowed to redistribute it and sell it, alone or is a part of
12 # another product.
13
14 # This module is about character strings.
15 package string
16
17 intrude import collection # FIXME should be collection::array
18 import hash
19
20 ###############################################################################
21 # String #
22 ###############################################################################
23
24 abstract class AbstractString
25 super AbstractArrayRead[Char]
26 readable private var _items: NativeString
27
28 redef fun [](index) do return _items[index]
29
30 # Create a substring.
31 #
32 # "abcd".substring(1, 2) # --> "bc"
33 # "abcd".substring(-1, 2) # --> "a"
34 # "abcd".substring(1, 0) # --> ""
35 # "abcd".substring(2, 5) # --> "cd"
36 fun substring(from: Int, count: Int): String
37 do
38 assert count >= 0
39 count += from
40 if from < 0 then from = 0
41 if count > length then count = length
42 if from < count then
43 var r = new Buffer.with_capacity(count - from)
44 while from < count do
45 r.push(_items[from])
46 from += 1
47 end
48 return r.to_s
49 else
50 return ""
51 end
52 end
53
54 # Create a substring with the string beginning at the 'from' position
55 #
56 # "abcd".substring(1) # --> "bcd"
57 # "abcd".substring(-1) # --> "abcd"
58 # "abcd".substring(2) # --> "cd"
59 fun substring_from(from: Int): String
60 do
61 assert from < length
62 return substring(from, length - from)
63 end
64
65 # is this string a substring of the 'of' string from pos 'pos'
66 #
67 # "bc".is_substring("abcd",1) # --> true
68 # "bc".is_substring("abcd",2) # --> false
69 fun has_substring(str: String, pos: Int): Bool
70 do
71 var itsindex = str.length - 1
72 var myindex = pos + itsindex
73 var myitems = _items
74 var itsitems = str._items
75 if myindex > length or itsindex > myindex then return false
76 while itsindex >= 0 do
77 if myitems[myindex] != itsitems[itsindex] then return false
78 myindex -= 1
79 itsindex -= 1
80 end
81 return true
82 end
83
84 # Is this string prefixed by 'prefix'
85 #
86 # "abc".is_prefix("abcd") # --> true
87 # "bc".is_prefix("abcd") # --> false
88 fun has_prefix(prefix: String): Bool do return has_substring(prefix,0)
89
90 # Is this string suffixed by 'suffix'
91 #
92 # "abcd".has_suffix("abc") # --> false
93 # "abcd".has_suffix("bcd") # --> true
94 fun has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
95
96 # If `self' contains only digits, return the corresponding integer
97 fun to_i: Int
98 do
99 # Shortcut
100 return to_s.to_cstring.atoi
101 end
102
103 # If `self' contains only digits and alpha <= 'f', return the corresponding integer.
104 fun to_hex: Int do return a_to(16)
105
106 # If `self' contains only digits and letters, return the corresponding integer in a given base
107 fun a_to(base: Int) : Int
108 do
109 var i = 0
110 var neg = false
111
112 for c in self
113 do
114 var v = c.to_i
115 if v > base then
116 if neg then
117 return -i
118 else
119 return i
120 end
121 else if v < 0 then
122 neg = true
123 else
124 i = i * base + v
125 end
126 end
127 if neg then
128 return -i
129 else
130 return i
131 end
132 end
133
134 # String to upper case
135 fun to_upper: String
136 do
137 var s = new Buffer.with_capacity(length)
138 for i in self do s.add(i.to_upper)
139 return s.to_s
140 end
141
142 # String to lower case
143 fun to_lower : String
144 do
145 var s = new Buffer.with_capacity(length)
146 for i in self do s.add(i.to_lower)
147 return s.to_s
148 end
149
150
151 redef fun output
152 do
153 var i = 0
154 while i < length do
155 _items[i].output
156 i += 1
157 end
158 end
159 end
160
161
162 class String
163 super Comparable
164 super AbstractString
165 redef type OTHER: String
166
167 # Create a new string from a given char *.
168 init with_native(nat: NativeString, size: Int)
169 do
170 assert size >= 0
171 _items = nat
172 _length = size
173 end
174
175 # Create a new string from a null terminated char *.
176 init from_cstring(str: NativeString)
177 do
178 var size = str.cstring_length
179 _items = str
180 _length = size
181 end
182
183 # Return a null terminated char *
184 fun to_cstring: NativeString
185 do
186 return _items
187 end
188
189 redef fun ==(o)
190 do
191 if not o isa String or o is null then return false
192 var l = length
193 if o.length != l then return false
194 var i = 0
195 var it = _items
196 var oit = o._items
197 while i < l do
198 if it[i] != oit[i] then return false
199 i += 1
200 end
201 return true
202 end
203
204 redef fun <(s)
205 do
206 var i = 0
207 var l1 = length
208 var l2 = s.length
209 var n1 = _items
210 var n2 = s._items
211 while i < l1 and i < l2 do
212 var c1 = n1[i].ascii
213 var c2 = n2[i].ascii
214 if c1 < c2 then
215 return true
216 else if c2 < c1 then
217 return false
218 end
219 i += 1
220 end
221 if l1 < l2 then
222 return true
223 else
224 return false
225 end
226 end
227
228 # The concatenation of `self' with `r'
229 fun +(s: String): String
230 do
231 var r = new Buffer.with_capacity(length + s.length)
232 r.append(self)
233 r.append(s)
234 return r.to_s
235 end
236
237 # i repetitions of self
238 fun *(i: Int): String
239 do
240 assert i >= 0
241 var r = new Buffer.with_capacity(length * i)
242 while i > 0 do
243 r.append(self)
244 i -= 1
245 end
246 return r.to_s
247 end
248
249 redef fun to_s do return self
250
251 redef fun hash
252 do
253 # djb2 hash algorythm
254 var h = 5381
255 var i = _length - 1
256 var it = _items
257 while i >= 0 do
258 h = (h * 32) + h + it[i].ascii
259 i -= 1
260 end
261 return h
262
263 end
264 end
265
266 # Strings are arrays of characters.
267 class Buffer
268 super AbstractString
269 super Comparable
270 super StringCapable
271 super AbstractArray[Char]
272
273 redef type OTHER: String
274
275 redef fun []=(index, item)
276 do
277 if index == length then
278 add(item)
279 return
280 end
281 assert index >= 0 and index < length
282 _items[index] = item
283 end
284
285 redef fun add(c)
286 do
287 if _capacity <= length then enlarge(length + 5)
288 _items[length] = c
289 _length += 1
290 end
291
292 redef fun enlarge(cap)
293 do
294 var c = _capacity
295 if cap <= c then return
296 while c <= cap do c = c * 2 + 2
297 var a = calloc_string(c+1)
298 _items.copy_to(a, length, 0, 0)
299 _items = a
300 _capacity = c
301 end
302
303 redef fun append(s)
304 do
305 if s isa String then
306 var sl = s.length
307 if _capacity < length + sl then enlarge(length + sl)
308 s.items.copy_to(_items, sl, 0, length)
309 _length += sl
310 else
311 super
312 end
313 end
314
315 redef fun to_s: String
316 do
317 var l = length
318 var a = calloc_string(l+1)
319 _items.copy_to(a, l, 0, 0)
320
321 # Ensure the afterlast byte is '\0' to nul-terminated char *
322 a[length] = '\0'
323
324 return new String.with_native(a, length)
325 end
326
327 redef fun <(s)
328 do
329 var i = 0
330 var l1 = length
331 var l2 = s.length
332 while i < l1 and i < l2 do
333 var c1 = self[i].ascii
334 var c2 = s[i].ascii
335 if c1 < c2 then
336 return true
337 else if c2 < c1 then
338 return false
339 end
340 i += 1
341 end
342 if l1 < l2 then
343 return true
344 else
345 return false
346 end
347 end
348
349 # Create a new empty string.
350 init
351 do
352 with_capacity(5)
353 end
354
355 init from(s: String)
356 do
357 _capacity = s.length + 1
358 _length = s.length
359 _items = calloc_string(_capacity)
360 s.items.copy_to(_items, _length, 0, 0)
361 end
362
363 # Create a new empty string with a given capacity.
364 init with_capacity(cap: Int)
365 do
366 assert cap >= 0
367 # _items = new NativeString.calloc(cap)
368 _items = calloc_string(cap+1)
369 _capacity = cap
370 _length = 0
371 end
372
373 redef fun ==(o)
374 do
375 if not o isa Buffer or o is null then return false
376 var l = length
377 if o.length != l then return false
378 var i = 0
379 var it = _items
380 var oit = o._items
381 while i < l do
382 if it[i] != oit[i] then return false
383 i += 1
384 end
385 return true
386 end
387
388 readable private var _capacity: Int
389 end
390
391 ###############################################################################
392 # Refinement #
393 ###############################################################################
394
395 redef class Object
396 # fun class_name: String is extern intern # The name of the class
397
398 # User redeable representation of `self'.
399 fun to_s: String do return inspect
400
401 # Developper readable representation of `self'.
402 # Usualy, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
403 fun inspect: String
404 do
405 var r = inspect_head
406 # r.add('>')
407 return r
408 end
409
410 # Return "<CLASSNAME:#OBJECTID".
411 # This fuction is mainly used with the redefinition of the inspect(0) method
412 protected fun inspect_head: String
413 do
414 return "<{object_id.to_hex}"
415 end
416
417 protected fun args: Sequence[String]
418 do
419 return sys.args
420 end
421 end
422
423 redef class Bool
424 redef fun to_s
425 do
426 if self then
427 return once "true"
428 else
429 return once "false"
430 end
431 end
432 end
433
434 redef class Int
435 fun fill_buffer(s: Buffer, base: Int, signed: Bool)
436 # Fill `s' with the digits in base 'base' of `self' (and with the '-' sign if 'signed' and negative).
437 # assume < to_c max const of char
438 do
439 var n: Int
440 # Sign
441 if self < 0 then
442 n = - self
443 s[0] = '-'
444 else if self == 0 then
445 s[0] = '0'
446 return
447 else
448 n = self
449 end
450 # Fill digits
451 var pos = digit_count(base) - 1
452 while pos >= 0 and n > 0 do
453 s[pos] = (n % base).to_c
454 n = n / base # /
455 pos -= 1
456 end
457 end
458
459 # return displayable int in base 10 and signed
460 redef fun to_s do return to_base(10,true)
461
462 # return displayable int in hexadecimal (unsigned (not now))
463 fun to_hex: String do return to_base(16,false)
464
465 # return displayable int in base base and signed
466 fun to_base(base: Int, signed: Bool): String
467 do
468 var l = digit_count(base)
469 var s = new Buffer.from(" " * l)
470 fill_buffer(s, base, signed)
471 return s.to_s
472 end
473 end
474
475 redef class Float
476 redef fun to_s do return to_precision(6)
477
478 # `self' representation with `nb' digits after the '.'.
479 fun to_precision(nb: Int): String
480 do
481 if nb == 0 then return to_i.to_s
482
483 var i = to_i
484 var dec = 1.0
485 while nb > 0 do
486 dec = dec * 10.0
487 nb -= 1
488 end
489 var d = ((self-i.to_f)*dec).to_i
490 return "{i}.{d}"
491 end
492 end
493
494 redef class Char
495 redef fun to_s
496 do
497 var s = new Buffer.with_capacity(1)
498 s[0] = self
499 return s.to_s
500 end
501 end
502
503 redef class Collection[E]
504 # Concatenate elements.
505 redef fun to_s
506 do
507 var s = new Buffer
508 for e in self do if e != null then s.append(e.to_s)
509 return s.to_s
510 end
511
512 # Concatenate and separate each elements with `sep'.
513 fun join(sep: String): String
514 do
515 if is_empty then return ""
516
517 var s = new Buffer # Result
518
519 # Concat first item
520 var i = iterator
521 var e = i.item
522 if e != null then s.append(e.to_s)
523
524 # Concat other items
525 i.next
526 while i.is_ok do
527 s.append(sep)
528 e = i.item
529 if e != null then s.append(e.to_s)
530 i.next
531 end
532 return s.to_s
533 end
534 end
535
536 redef class Array[E]
537 # Fast implementation
538 redef fun to_s
539 do
540 var s = new Buffer
541 var i = 0
542 var l = length
543 while i < l do
544 var e = self[i]
545 if e != null then s.append(e.to_s)
546 i += 1
547 end
548 return s.to_s
549 end
550 end
551
552 redef class Map[K,V]
553 # Concatenate couple of 'key value' separate by 'couple_sep' and separate each couple with `sep'.
554 fun map_join(sep: String, couple_sep: String): String
555 do
556 if is_empty then return ""
557
558 var s = new Buffer # Result
559
560 # Concat first item
561 var i = iterator
562 var k = i.key
563 var e = i.item
564 if e != null then s.append("{k}{couple_sep}{e}")
565
566 # Concat other items
567 i.next
568 while i.is_ok do
569 s.append(sep)
570 k = i.key
571 e = i.item
572 if e != null then s.append("{k}{couple_sep}{e}")
573 i.next
574 end
575 return s.to_s
576 end
577 end
578
579 ###############################################################################
580 # Native classe #
581 ###############################################################################
582
583 # Native strings are simple C char *
584 class NativeString
585 fun [](index: Int): Char is intern
586 fun []=(index: Int, item: Char) is intern
587 fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
588
589 # Position of the first nul character.
590 fun cstring_length: Int
591 do
592 var l = 0
593 while self[l] != '\0' do l += 1
594 return l
595 end
596 fun atoi: Int is intern
597 end
598
599 # StringCapable objects can create native strings
600 class StringCapable
601 protected fun calloc_string(size: Int): NativeString is intern
602 end
603
604 redef class Sys
605 var _args_cache: nullable Sequence[String]
606
607 redef fun args: Sequence[String]
608 do
609 if _args_cache == null then init_args
610 return _args_cache.as(not null)
611 end
612
613 # The name of the program as given by the OS
614 fun program_name: String
615 do
616 return new String.from_cstring(native_argv(0))
617 end
618
619 # Initialize `args' with the contents of `native_argc' and `native_argv'.
620 private fun init_args
621 do
622 var argc = native_argc
623 var args = new Array[String].with_capacity(0)
624 var i = 1
625 while i < argc do
626 args[i-1] = new String.from_cstring(native_argv(i))
627 i += 1
628 end
629 _args_cache = args
630 end
631
632 private fun native_argc: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function.
633
634 private fun native_argv(i: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function.
635 end
636