1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Copyright 2005-2008 Jean Privat <jean@pryen.org>
5 # This file is free software, which comes along with NIT. This software is
6 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
7 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
8 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
9 # is kept unaltered, and a notification of the changes is added.
10 # You are allowed to redistribute it and sell it, alone or is a part of
13 # Basic string search, match and replace.
18 # Patterns are abstract string motifs (include `String` and `Char`).
20 # Search `self` into `s` from a certain position.
21 # Return the position of the first character of the matching section.
22 # Return -1 if not found.
24 # assert 'l'.search_index_in("hello world", 0) == 2
25 # assert 'l'.search_index_in("hello world", 3) == 3
26 # assert 'z'.search_index_in("hello world", 0) == -1
28 # This method is usually faster than `search_in` if what is
29 # required is only the index.
30 # Note: in most implementation, `search_in` is implemented with this method.
31 protected fun search_index_in
(s
: Text, from
: Int): Int is abstract
33 # Search `self` into `s` from a certain position.
34 # Return null if not found.
36 # assert 'l'.search_in("hello world", 0).from == 2
37 # assert 'l'.search_in("hello world", 3).from == 3
38 # assert 'z'.search_in("hello world", 0) == null
40 # If only the index of the first character if required, see `search_index_in`.
42 # Note: Is used by `String::search`, `String::search_from`, and others.
43 protected fun search_in
(s
: Text, from
: Int): nullable Match is abstract
45 # Search all `self` occurrences into `s`.
47 # assert 'l'.search_all_in("hello world").length == 3
48 # assert 'z'.search_all_in("hello world").length == 0
50 # Note: Is used by `String::search_all`.
51 protected fun search_all_in
(s
: Text): Array[Match]
53 var res
= new Array[Match] # Result
54 var match
= search_in
(s
, 0)
55 while match
!= null do
57 match
= search_in
(s
, match
.after
)
62 # Split `s` using `self` is separator.
64 # Returns an array of matches that are between each occurence of `self`.
65 # If self is not present, an array with a single match on `s` is retunred.
67 # assert 'l'.split_in("hello world").join("|") == "he||o wor|d"
68 # assert 'z'.split_in("hello world").join("|") == "hello world"
70 # Note: is used by `String::split`
71 protected fun split_in
(s
: Text): Array[Match]
73 var res
= new Array[Match] # Result
75 var match
= search_in
(s
, 0)
76 while match
!= null do
77 # Compute the splited part length
78 var len
= match
.from
- i
79 res
.add
(new Match(s
.to_s
, i
, len
))
81 match
= search_in
(s
, i
)
84 res
.add
(new Match(s
.to_s
, i
, s
.length
- i
))
89 protected fun is_in
(s
: Text): Bool do return search_index_in
(s
, 0) != -1
92 # BM_Pattern are pre-compiled string motif for the Boyer-Moore algorithm.
93 # (cf. A Fast String Searching Algorithm, with R.S. Boyer. Communications
94 # of the Association for Computing Machinery, 20(10), 1977, pp. 762-772.)
95 # http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/index.html
97 # var pat = new BM_Pattern("hello")
98 # assert "I say hello to the world!".search(pat).from == 6
99 # assert "I say goodbye to the world!".search(pat) == null
103 redef fun to_s
do return _motif
105 # boyer-moore search gives the position of the first occurrence of a pattern starting at position `from`
106 redef fun search_index_in
(s
, from
)
113 while j
< n
- m
+ 1 do
114 var i
= m
- 1 # Cursor in the pattern
115 while i
>= 0 and _motif
.chars
[i
] == s
.chars
[i
+ j
] do i
-= 1
119 var gs
= _gs
[i
] # Good shift
120 var bc
= bc
(s
.chars
[i
+j
]) - m
+ 1 + i
# Bad char
121 # Both are true, do move to the best
129 return -1 # found nothing
132 # boyer-moore search. Return null if not found
133 redef fun search_in
(s
, from
)
135 var to
= search_index_in
(s
, from
)
139 return new Match(s
.to_s
, to
, _length
)
145 _length
= _motif
.length
146 _gs
= new Array[Int].with_capacity
(_length
)
152 private var motif
: String
154 # length of the motif
155 private var length
: Int is noinit
157 private fun bc
(e
: Char): Int
159 if _bc_table
.has_key
(e
) then
167 private var gs
: Array[Int] is noinit
170 private var bc_table
= new ArrayMap[Char, Int]
172 private fun compute_bc
178 _bc_table
[x
.chars
[i
]] = m
- i
- 1
183 private fun suffixes
: Array[Int]
187 var suff
= new Array[Int].filled_with
(m
, m
)
193 if i
> g
and suff
[i
+m-1-f
] < i
- g
then
194 suff
[i
] = suff
[i
+m-1-f
]
198 while g
>= 0 and x
.chars
[g
] == x
.chars
[g
+ m
- 1 - f
] do g
-= 1
206 private fun compute_gs
218 if i
== -1 or suff
[i
] == i
+ 1 then
219 while j
< m
- 1 - i
do
220 if _gs
[j
] == m
then _gs
[j
] = m
- 1 - i
228 _gs
[m
- 1 - suff
[i
]] = m
- 1 - i
233 redef fun hash
do return _motif
.hash
234 redef fun ==(o
) do return o
isa BM_Pattern and o
._motif
== _motif
237 # Matches are a part of a `Text` found by a `Pattern`.
239 # The base string matched
242 # var m = "hello world".search("lo")
243 # assert m.string == "hello world"
247 # The starting position in the string
250 # var m = "hello world".search("lo")
255 # The length of the matching part
258 # var m = "hello world".search("lo")
259 # assert m.length == 2
263 # The position of the first character just after the matching part.
264 # May be out of the base string
267 # var m = "hello world".search("lo")
268 # assert m.after == 5
270 fun after
: Int do return from
+ length
272 # The contents of the matching part
275 # var m = "hello world".search("lo")
276 # assert m.to_s == "lo"
278 redef fun to_s
do return string
.substring
(from
,length
)
280 # The content of `string` before the match
283 # var m = "hello world".search("lo")
284 # assert m.text_before == "hel"
286 fun text_before
: String do return string
.substring
(0, from
)
288 # The content of `string` after the match
291 # var m = "hello world".search("lo")
292 # assert m.text_after == " world"
294 fun text_after
: String do return string
.substring_from
(after
)
298 assert positive_length
: length
>= 0
299 assert valid_from
: from
>= 0
300 assert valid_after
: from
+ length
<= string
.length
307 redef fun search_index_in
(s
, from
)
311 if s
.chars
[from
] == self then return from
317 redef fun search_in
(s
, from
)
319 var pos
= search_index_in
(s
, from
)
323 return new Match(s
.to_s
, pos
, 1)
331 redef fun search_index_in
(s
, from
)
334 var stop
= s
.length
- length
+ 1
337 while i
>= 0 and self.chars
[i
] == s
.chars
[i
+ from
] do i
-= 1
339 if i
< 0 then return from
340 # Not found so try next one
346 redef fun search_in
(s
, from
)
348 var pos
= search_index_in
(s
, from
)
352 return new Match(s
.to_s
, pos
, length
)
356 # Search the first occurence of `pattern`.
357 # Return null if not found.
359 # assert "I say hello to the world!".search("hello").from == 6
360 # assert "I say goodbye to the world!".search("hello") == null
361 fun search
(pattern
: Pattern): nullable Match do return pattern
.search_in
(self, 0)
363 # Search the first occurence of `pattern` after `from`.
364 # The search starts at `from`.
365 # Return null if not found.
367 # assert "I say hello to the world!".search_from("hello",4).from == 6
368 # assert "I say hello to the world!".search_from("hello",7) == null
369 fun search_from
(pattern
: Pattern, from
: Int): nullable Match do return pattern
.search_in
(self, from
)
371 # Search the last occurence of the text `t`.
373 # assert "bob".search_last("b").from == 2
374 # assert "bob".search_last("bo").from == 0
375 # assert "bob".search_last("ob").from == 1
376 # assert "bobob".search_last("ob").from == 3
377 # assert "bobbob".search_last("bb").from == 2
378 # assert "bobbob".search_last("bob").from == 3
379 # assert "bob".search_last("z") == null
380 # assert "".search_last("b") == null
381 fun search_last
(t
: Text): nullable Match do
382 return search_last_up_to
(t
, length
)
385 # Search the last occurence of the text `t` before `up_to`.
387 # assert "bobbob".search_last_up_to("b", 3).from == 2
388 # assert "bobbob".search_last_up_to("b", 6).from == 5
389 # assert "bobbob".search_last_up_to("b", 0) == null
390 fun search_last_up_to
(t
: Text, up_to
: Int): nullable Match do
391 var i
= up_to
- t
.length
394 if substring
(i
, t
.length
) == t
then
395 return new Match(self.to_s
, i
, t
.length
)
402 # Extract a given prefix, if any.
405 # var p = "hello world".prefix("hello")
407 # assert p.text_after == " world"
409 fun prefix
(t
: Text): nullable Match do
411 if substring
(0, len
) == t
then
412 return new Match(self.to_s
, 0, len
)
417 # Extract a given suffix, if any.
420 # var p = "hello world".suffix("world")
422 # assert p.text_before == "hello "
424 fun suffix
(t
: Text): nullable Match do
426 var from
= length
- len
427 if substring
(from
, len
) == t
then
428 return new Match(self.to_s
, from
, len
)
433 # Search all occurrences of `pattern` into self.
435 # var a = new Array[Int]
436 # for i in "hello world".search_all('o') do
440 fun search_all
(pattern
: Pattern): Array[Match] do return pattern
.search_all_in
(self)
442 # Split `self` using `pattern` as separator.
444 # assert "hello world".split('o') == ["hell", " w", "rld"]
445 fun split
(pattern
: Pattern): Array[String]
447 var matches
= pattern
.split_in
(self)
448 var res
= new Array[String].with_capacity
(matches
.length
)
449 for m
in matches
do res
.add
(m
.to_s
)
453 # @deprecated alias for `split`
454 fun split_with
(pattern
: Pattern): Array[String] do return self.split
(pattern
)
456 # Split `self` on the first occurence of `pattern`
458 # assert "hello".split_once_on('l') == ["he", "lo"]
459 # assert "a, b, c, d, e".split_once_on(", ") == ["a", "b, c, d, e"]
460 fun split_once_on
(pattern
: Pattern): Array[SELFTYPE]
462 var m
= pattern
.search_in
(self, 0)
463 var res
= new Array[SELFTYPE]
467 res
.add substring
(0, m
.from
)
468 res
.add substring_from
(m
.after
)
473 # Replace all occurences of `pattern` with `string`
475 # assert "hlelo".replace("le", "el") == "hello"
476 # assert "hello".replace('l', "") == "heo"
477 fun replace
(pattern
: Pattern, string
: SELFTYPE): String
479 return self.split_with
(pattern
).join
(string
)
482 # Does `self` contains at least one instance of `pattern`?
484 # assert "hello".has('l')
485 # assert "hello".has("ll")
486 # assert not "hello".has("lll")
487 fun has
(pattern
: Pattern): Bool do return pattern
.is_in
(self)
489 # Returns a copy of `self` minus all occurences of `pattern`
491 # assert "__init__".remove_all('_') == "init"
492 # assert "abcd".remove_all("bc") == "ad"
493 # assert "abcd".remove_all("[ad]".to_re) == "bc"
494 fun remove_all
(pattern
: Pattern): String do return split
(pattern
).join