# You are allowed to redistribute it and sell it, alone or is a part of
# another product.
-# This module is about string search and matching.
-# It includes some good features
-package string_search
+# Basic string search, match and replace.
+module string_search
import string
-# Patterns are string motifs.
+# Patterns are abstract string motifs (include `String` and `Char`).
interface Pattern
- # Search `self' into `s' from a certain position.
+ # Search `self` into `s` from a certain position.
# Return the position of the first character of the matching section.
# Return -1 if not found.
fun search_index_in(s: String, from: Int): Int is abstract
- # Search `self' into `s' from a certain position.
+ # Search `self` into `s` from a certain position.
# Return null if not found.
fun search_in(s: String, from: Int): nullable Match is abstract
- # Search all `self' occucences into `s'.
+ # Search all `self` occurrences into `s`.
fun search_all_in(s: String): Array[Match]
do
var res = new Array[Match] # Result
return res
end
- # Split `s' using `self' is separator.
+ # Split `s` using `self` is separator.
fun split_in(s: String): Array[Match]
do
var res = new Array[Match] # Result
end
end
-# BM_Pattern are precompiled string motif for the Boyer-Moore Fast String Searching Algorithm
-# (cf. A Fast String Searching Algorithm, with R.S. Boyer.
-# Communications of the Association for Computing Machinery, 20(10), 1977, pp. 762-772.)
-# see also http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/index.html
+# BM_Pattern are pre-compiled string motif for the Boyer-Moore algorithm.
+# (cf. A Fast String Searching Algorithm, with R.S. Boyer. Communications
+# of the Association for Computing Machinery, 20(10), 1977, pp. 762-772.)
+# http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/index.html
class BM_Pattern
super Pattern
+
redef fun to_s do return _motif
- # boyer-moore search gives the position of the first occurence of a pattern starting at position `from'
+ # boyer-moore search gives the position of the first occurrence of a pattern starting at position `from`
redef fun search_index_in(s, from)
do
assert from >= 0
while j < n - m + 1 do
var i = m - 1 # Cursor in the pattern
while i >= 0 and _motif[i] == s[i + j] do i -= 1
- if (i < 0) then
+ if i < 0 then
return j
else
var gs = _gs[i] # Good shift
return -1 # found nothing
end
- # boyer-moore search. Return null if not found
+ # boyer-moore search. Return null if not found
redef fun search_in(s, from)
do
var to = search_index_in(s, from)
end
end
- # Compile a new motif
+ # Compile a new motif
init(motif: String)
do
_motif = motif
i += 1
end
end
+
+ redef fun hash do return _motif.hash
+ redef fun ==(o) do return o isa BM_Pattern and o._motif == _motif
end
# Matches are a part of a string.
# The starting position in the string
readable var _from: Int
- # The length of the mathching part
+ # The length of the matching part
readable var _length: Int
# The position of the first character just after the matching part.
# May be out of the base string
fun after: Int do return _from + _length
- # The contents of the mathing part
+ # The contents of the matching part
redef fun to_s do return _string.substring(_from, _length)
- # Matches `len' characters of `s' from `f'.
+ # Matches `len` characters of `s` from `f`.
init(s: String, f: Int, len: Int)
do
assert positive_length: len >= 0
redef class Char
super Pattern
+
redef fun search_index_in(s, from)
do
var stop = s.length
redef class String
super Pattern
+
redef fun search_index_in(s, from)
do
assert from >= 0
end
end
- # Like `search_from' but from the first chararter.
+ # Like `search_from` but from the first character.
fun search(p: Pattern): nullable Match do return p.search_in(self, 0)
# Search the given pattern into self from a.
- # The search starts at `from'.
+ # The search starts at `from`.
# Return null if not found.
fun search_from(p: Pattern, from: Int): nullable Match do return p.search_in(self, from)
- # Search all occurences of p into self.
+ # Search all occurrences of p into self.
#
- # var a = new Array[Int]
- # for i in "hello world".searches('o') do
- # a.add(i.from)
- # end
- # a # -> [4, 7]
+ # var a = new Array[Int]
+ # for i in "hello world".search_all('o') do
+ # a.add(i.from)
+ # end
+ # assert a == [4, 7]
fun search_all(p: Pattern): Array[Match] do return p.search_all_in(self)
- # Split self using p is separator.
- # "hello world".split('o') # -> ["hell", " w", "rld"]
- fun split_with(p: Pattern): Array[String]
+ # Split `self` using `p` as separator.
+ #
+ # assert "hello world".split('o') == ["hell", " w", "rld"]
+ fun split(p: Pattern): Array[String]
do
var matches = p.split_in(self)
var res = new Array[String].with_capacity(matches.length)
return res
end
- # Split self using '\n' is separator.
- # "hello\nworld".split # -> ["hello","world"]
- fun split: Array[String] do return split_with('\n')
+ # @deprecated alias for `split`
+ fun split_with(p: Pattern): Array[String] do return self.split(p)
# Replace all occurences of a pattern with a string
#
- # "hlelo".replace("le", "el") # -> "hello"
- # "hello".replace('l', "") # -> "heo"
+ # assert "hlelo".replace("le", "el") == "hello"
+ # assert "hello".replace('l', "") == "heo"
fun replace(p: Pattern, string: String): String
do
return self.split_with(p).join(string)
# Escape the four characters < > & and " with their html counterpart
#
- # "a&b->\"x\"".html_escape # -> "a&b->"x""
+ # assert "a&b->\"x\"".html_escape == "a&b->"x""
fun html_escape: String
do
var ret = self