Property definitions

core $ BM_Pattern :: defaultinit
# BM_Pattern are pre-compiled string motif for the Boyer-Moore algorithm.
# (cf. A Fast String Searching Algorithm, with R.S. Boyer.  Communications
# of the Association for Computing Machinery, 20(10), 1977, pp. 762-772.)
# http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/index.html
#
#     var pat = new BM_Pattern("hello")
#     assert "I say hello to the world!".search(pat).from  == 6
#     assert "I say goodbye to the world!".search(pat)     == null
class BM_Pattern
	super Pattern

	redef fun to_s do return _motif

	# boyer-moore search gives the position of the first occurrence of a pattern starting at position `from`
	redef fun search_index_in(s, from)
	do
		assert from >= 0
		var n = s.length
		var m = _length

		var j = from
		while j < n - m + 1 do
			var i = m - 1 # Cursor in the pattern
			while i >= 0 and _motif[i] == s[i + j] do i -= 1
			if i < 0 then
				return j
			else
				var gs = _gs[i] # Good shift
				var bc = bc(s[i+j]) - m + 1 + i # Bad char
				# Both are true, do move to the best
				if gs > bc then
					j += gs
				else
					j += bc
				end
			end
		end
		return -1 # found nothing
	end

	# boyer-moore search. Return null if not found
	redef fun search_in(s, from)
	do
		var to = search_index_in(s, from)
		if to < 0 then
			return null
		else
			return new Match(s.to_s, to, _length)
		end
	end

	init
	do
		_length = _motif.length
		_gs = new Array[Int].with_capacity(_length)
		compute_gs
		compute_bc
	end

	# searched motif
	private var motif: String

	# length of the motif
	private var length: Int is noinit

	private fun bc(e: Char): Int
	do
		if _bc_table.has_key(e) then
			return _bc_table[e]
		else
			return _length
		end
	end

	# good shifts
	private var gs: Array[Int] is noinit

	# bad characters
	private var bc_table = new ArrayMap[Char, Int]

	private fun compute_bc
	do
		var x = _motif
		var m = _length
		var i = 0
		while i < m - 1 do
			_bc_table[x.chars[i]] = m - i - 1
			i += 1
		end
	end

	private fun suffixes: Array[Int]
	do
		var x = _motif
		var m = _length
		var suff = new Array[Int].filled_with(m, m)

		var f = 0
		var g = m - 1
		var i = m - 2
		while i >= 0 do
			if i > g and suff[i+m-1-f] < i - g then
				suff[i] = suff[i+m-1-f]
			else
				if i < g then g = i
				f = i
				while g >= 0 and x.chars[g] == x.chars[g + m - 1 - f] do g -= 1
				suff[i] = f - g
			end
			i -= 1
		end
		return suff
	end

	private fun compute_gs
	do
		var m = _length
		var suff = suffixes
		var i = 0
		while i < m do
			_gs[i] = m
			i += 1
		end
		var j = 0
		i = m - 1
		while i >= -1 do
			if i == -1 or suff[i] == i + 1 then
				while j < m - 1 - i do
					if _gs[j] == m then _gs[j] = m - 1 - i
					j += 1
				end
			end
			i -= 1
		end
		i = 0
		while i < m - 1 do
			_gs[m - 1 - suff[i]] = m - 1 - i
			i += 1
		end
	end

	redef fun hash do return _motif.hash
	redef fun ==(o) do return o isa BM_Pattern and o._motif == _motif
end
lib/core/text/string_search.nit:92,1--235,3