core :: BM_Pattern
(cf. A Fast String Searching Algorithm, with R.S. Boyer. Communications of the Association for Computing Machinery, 20(10), 1977, pp. 762-772.) http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/index.html
var pat = new BM_Pattern("hello")
assert "I say hello to the world!".search(pat).from == 6
assert "I say goodbye to the world!".search(pat) == null
core :: BM_Pattern :: defaultinit
core $ BM_Pattern :: SELF
Type of this instance, automatically specialized in every classcore $ BM_Pattern :: init
core $ BM_Pattern :: search_index_in
boyer-moore search gives the position of the first occurrence of a pattern starting at positionfrom
core :: Object :: class_factory
Implementation used byget_class
to create the specific class.
core :: BM_Pattern :: defaultinit
core :: Pattern :: defaultinit
core :: Object :: defaultinit
core :: Object :: is_same_instance
Return true ifself
and other
are the same instance (i.e. same identity).
core :: Object :: is_same_serialized
Isself
the same as other
in a serialization context?
core :: Object :: is_same_type
Return true ifself
and other
have the same dynamic type.
core :: Object :: output_class_name
Display class name on stdout (debug only).core :: Pattern :: search_all_in
Search allself
occurrences into s
.
core :: Pattern :: search_index_in
Searchself
into s
from a certain position.
# BM_Pattern are pre-compiled string motif for the Boyer-Moore algorithm.
# (cf. A Fast String Searching Algorithm, with R.S. Boyer. Communications
# of the Association for Computing Machinery, 20(10), 1977, pp. 762-772.)
# http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/index.html
#
# var pat = new BM_Pattern("hello")
# assert "I say hello to the world!".search(pat).from == 6
# assert "I say goodbye to the world!".search(pat) == null
class BM_Pattern
super Pattern
redef fun to_s do return _motif
# boyer-moore search gives the position of the first occurrence of a pattern starting at position `from`
redef fun search_index_in(s, from)
do
assert from >= 0
var n = s.length
var m = _length
var j = from
while j < n - m + 1 do
var i = m - 1 # Cursor in the pattern
while i >= 0 and _motif[i] == s[i + j] do i -= 1
if i < 0 then
return j
else
var gs = _gs[i] # Good shift
var bc = bc(s[i+j]) - m + 1 + i # Bad char
# Both are true, do move to the best
if gs > bc then
j += gs
else
j += bc
end
end
end
return -1 # found nothing
end
# boyer-moore search. Return null if not found
redef fun search_in(s, from)
do
var to = search_index_in(s, from)
if to < 0 then
return null
else
return new Match(s.to_s, to, _length)
end
end
init
do
_length = _motif.length
_gs = new Array[Int].with_capacity(_length)
compute_gs
compute_bc
end
# searched motif
private var motif: String
# length of the motif
private var length: Int is noinit
private fun bc(e: Char): Int
do
if _bc_table.has_key(e) then
return _bc_table[e]
else
return _length
end
end
# good shifts
private var gs: Array[Int] is noinit
# bad characters
private var bc_table = new ArrayMap[Char, Int]
private fun compute_bc
do
var x = _motif
var m = _length
var i = 0
while i < m - 1 do
_bc_table[x.chars[i]] = m - i - 1
i += 1
end
end
private fun suffixes: Array[Int]
do
var x = _motif
var m = _length
var suff = new Array[Int].filled_with(m, m)
var f = 0
var g = m - 1
var i = m - 2
while i >= 0 do
if i > g and suff[i+m-1-f] < i - g then
suff[i] = suff[i+m-1-f]
else
if i < g then g = i
f = i
while g >= 0 and x.chars[g] == x.chars[g + m - 1 - f] do g -= 1
suff[i] = f - g
end
i -= 1
end
return suff
end
private fun compute_gs
do
var m = _length
var suff = suffixes
var i = 0
while i < m do
_gs[i] = m
i += 1
end
var j = 0
i = m - 1
while i >= -1 do
if i == -1 or suff[i] == i + 1 then
while j < m - 1 - i do
if _gs[j] == m then _gs[j] = m - 1 - i
j += 1
end
end
i -= 1
end
i = 0
while i < m - 1 do
_gs[m - 1 - suff[i]] = m - 1 - i
i += 1
end
end
redef fun hash do return _motif.hash
redef fun ==(o) do return o isa BM_Pattern and o._motif == _motif
end
lib/core/text/string_search.nit:92,1--235,3