BM_Pattern are pre-compiled string motif for the Boyer-Moore algorithm.

(cf. A Fast String Searching Algorithm, with R.S. Boyer. Communications of the Association for Computing Machinery, 20(10), 1977, pp. 762-772.) http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/index.html

var pat = new BM_Pattern("hello")
assert "I say hello to the world!".search(pat).from  == 6
assert "I say goodbye to the world!".search(pat)     == null

Introduced properties

Redefined properties

redef fun ==(o: nullable Object): Bool

core $ BM_Pattern :: ==

Have self and other the same value?
redef type SELF: BM_Pattern

core $ BM_Pattern :: SELF

Type of this instance, automatically specialized in every class
redef fun hash: Int

core $ BM_Pattern :: hash

The hash code of the object.
redef init init

core $ BM_Pattern :: init

redef fun search_in(s: Text, from: Int): nullable Match

core $ BM_Pattern :: search_in

boyer-moore search. Return null if not found
redef fun search_index_in(s: Text, from: Int): Int

core $ BM_Pattern :: search_index_in

boyer-moore search gives the position of the first occurrence of a pattern starting at position from
redef fun to_s: String

core $ BM_Pattern :: to_s

User readable representation of self.

All properties

fun !=(other: nullable Object): Bool

core :: Object :: !=

Have self and other different values?
fun ==(other: nullable Object): Bool

core :: Object :: ==

Have self and other the same value?
type CLASS: Class[SELF]

core :: Object :: CLASS

The type of the class of self.
type SELF: Object

core :: Object :: SELF

Type of this instance, automatically specialized in every class
protected fun class_factory(name: String): CLASS

core :: Object :: class_factory

Implementation used by get_class to create the specific class.
fun class_name: String

core :: Object :: class_name

The class name of the object.
fun get_class: CLASS

core :: Object :: get_class

The meta-object representing the dynamic type of self.
fun hash: Int

core :: Object :: hash

The hash code of the object.
init init

core :: Object :: init

fun inspect: String

core :: Object :: inspect

Developer readable representation of self.
protected fun inspect_head: String

core :: Object :: inspect_head

Return "CLASSNAME:#OBJECTID".
protected fun is_in(s: Text): Bool

core :: Pattern :: is_in

Is self in s?
intern fun is_same_instance(other: nullable Object): Bool

core :: Object :: is_same_instance

Return true if self and other are the same instance (i.e. same identity).
fun is_same_serialized(other: nullable Object): Bool

core :: Object :: is_same_serialized

Is self the same as other in a serialization context?
intern fun is_same_type(other: Object): Bool

core :: Object :: is_same_type

Return true if self and other have the same dynamic type.
intern fun object_id: Int

core :: Object :: object_id

An internal hash code for the object based on its identity.
fun output

core :: Object :: output

Display self on stdout (debug only).
intern fun output_class_name

core :: Object :: output_class_name

Display class name on stdout (debug only).
protected fun search_all_in(s: Text): Array[Match]

core :: Pattern :: search_all_in

Search all self occurrences into s.
protected abstract fun search_in(s: Text, from: Int): nullable Match

core :: Pattern :: search_in

Search self into s from a certain position.
protected abstract fun search_index_in(s: Text, from: Int): Int

core :: Pattern :: search_index_in

Search self into s from a certain position.
fun serialization_hash: Int

core :: Object :: serialization_hash

Hash value use for serialization
protected fun split_in(s: Text): Array[Match]

core :: Pattern :: split_in

Split s using self is separator.
intern fun sys: Sys

core :: Object :: sys

Return the global sys object, the only instance of the Sys class.
abstract fun to_jvalue(env: JniEnv): JValue

core :: Object :: to_jvalue

fun to_s: String

core :: Object :: to_s

User readable representation of self.
package_diagram core::BM_Pattern BM_Pattern core::Pattern Pattern core::BM_Pattern->core::Pattern core::Object Object core::Pattern->core::Object ...core::Object ... ...core::Object->core::Object

Ancestors

interface Object

core :: Object

The root of the class hierarchy.

Parents

interface Pattern

core :: Pattern

Patterns are abstract string motifs (include String and Char).

Class definitions

core $ BM_Pattern
# BM_Pattern are pre-compiled string motif for the Boyer-Moore algorithm.
# (cf. A Fast String Searching Algorithm, with R.S. Boyer.  Communications
# of the Association for Computing Machinery, 20(10), 1977, pp. 762-772.)
# http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/index.html
#
#     var pat = new BM_Pattern("hello")
#     assert "I say hello to the world!".search(pat).from  == 6
#     assert "I say goodbye to the world!".search(pat)     == null
class BM_Pattern
	super Pattern

	redef fun to_s do return _motif

	# boyer-moore search gives the position of the first occurrence of a pattern starting at position `from`
	redef fun search_index_in(s, from)
	do
		assert from >= 0
		var n = s.length
		var m = _length

		var j = from
		while j < n - m + 1 do
			var i = m - 1 # Cursor in the pattern
			while i >= 0 and _motif[i] == s[i + j] do i -= 1
			if i < 0 then
				return j
			else
				var gs = _gs[i] # Good shift
				var bc = bc(s[i+j]) - m + 1 + i # Bad char
				# Both are true, do move to the best
				if gs > bc then
					j += gs
				else
					j += bc
				end
			end
		end
		return -1 # found nothing
	end

	# boyer-moore search. Return null if not found
	redef fun search_in(s, from)
	do
		var to = search_index_in(s, from)
		if to < 0 then
			return null
		else
			return new Match(s.to_s, to, _length)
		end
	end

	init
	do
		_length = _motif.length
		_gs = new Array[Int].with_capacity(_length)
		compute_gs
		compute_bc
	end

	# searched motif
	private var motif: String

	# length of the motif
	private var length: Int is noinit

	private fun bc(e: Char): Int
	do
		if _bc_table.has_key(e) then
			return _bc_table[e]
		else
			return _length
		end
	end

	# good shifts
	private var gs: Array[Int] is noinit

	# bad characters
	private var bc_table = new ArrayMap[Char, Int]

	private fun compute_bc
	do
		var x = _motif
		var m = _length
		var i = 0
		while i < m - 1 do
			_bc_table[x.chars[i]] = m - i - 1
			i += 1
		end
	end

	private fun suffixes: Array[Int]
	do
		var x = _motif
		var m = _length
		var suff = new Array[Int].filled_with(m, m)

		var f = 0
		var g = m - 1
		var i = m - 2
		while i >= 0 do
			if i > g and suff[i+m-1-f] < i - g then
				suff[i] = suff[i+m-1-f]
			else
				if i < g then g = i
				f = i
				while g >= 0 and x.chars[g] == x.chars[g + m - 1 - f] do g -= 1
				suff[i] = f - g
			end
			i -= 1
		end
		return suff
	end

	private fun compute_gs
	do
		var m = _length
		var suff = suffixes
		var i = 0
		while i < m do
			_gs[i] = m
			i += 1
		end
		var j = 0
		i = m - 1
		while i >= -1 do
			if i == -1 or suff[i] == i + 1 then
				while j < m - 1 - i do
					if _gs[j] == m then _gs[j] = m - 1 - i
					j += 1
				end
			end
			i -= 1
		end
		i = 0
		while i < m - 1 do
			_gs[m - 1 - suff[i]] = m - 1 - i
			i += 1
		end
	end

	redef fun hash do return _motif.hash
	redef fun ==(o) do return o isa BM_Pattern and o._motif == _motif
end
lib/core/text/string_search.nit:92,1--235,3