A regular expression pattern

Used as a Pattern on intances of Text to call has, search_all, replace, etc.

Example:

var re = "ab+a".to_re
assert "aabbbbaaaaba".search_all(re).join(", ") == "abbbba, aba"
assert "aabbbbaaaaba".has(re)
assert "aabbbbaaaaba".replace(re, "+") == "a+aa+"
assert "aabbbbaaaaba".split(re) == ["a", "aa", ""]

Introduced properties

fun compile: nullable Error

core :: Regex :: compile

Compile the regular expression, if needed
init defaultinit(string: String)

core :: Regex :: defaultinit

fun extended: Bool

core :: Regex :: extended

Treat the pattern as a POSIX extended regular expression (the default)
fun extended=(extended: Bool)

core :: Regex :: extended=

Treat the pattern as a POSIX extended regular expression (the default)
fun ignore_case: Bool

core :: Regex :: ignore_case

Ignore case when matching letters
fun ignore_case=(ignore_case: Bool)

core :: Regex :: ignore_case=

Ignore case when matching letters
fun newline: Bool

core :: Regex :: newline

Treat a newline in string as dividing string into multiple lines
fun newline=(newline: Bool)

core :: Regex :: newline=

Treat a newline in string as dividing string into multiple lines
fun not_bol: Bool

core :: Regex :: not_bol

Do not regard the beginning of the specified string as the beginning of a line
fun not_bol=(not_bol: Bool)

core :: Regex :: not_bol=

Do not regard the beginning of the specified string as the beginning of a line
fun not_eol: Bool

core :: Regex :: not_eol

Do not regard the end of the specified string as the end of a line
fun not_eol=(not_eol: Bool)

core :: Regex :: not_eol=

Do not regard the end of the specified string as the end of a line
fun optimize_has: Bool

core :: Regex :: optimize_has

Optimize self for String::has and is_in, but do not support searches
fun optimize_has=(optimize_has: Bool)

core :: Regex :: optimize_has=

Optimize self for String::has and is_in, but do not support searches
fun string: String

core :: Regex :: string

The String source of this regular expression
fun string=(string: String)

core :: Regex :: string=

The String source of this regular expression

Redefined properties

redef type SELF: Regex

core $ Regex :: SELF

Type of this instance, automatically specialized in every class
redef fun finalize

core $ Regex :: finalize

Liberate any resources held by self before the memory holding self is freed
redef fun is_in(text: Text): Bool

core $ Regex :: is_in

assert "ab".to_re.is_in("abcd")
redef fun search_all_in(text: Text): Array[Match]

core $ Regex :: search_all_in

require: not optimize_has
redef fun search_in(text: Text, charfrom: Int): nullable Match

core $ Regex :: search_in

require: not optimize_has
redef fun search_index_in(text: Text, from: Int): Int

core $ Regex :: search_index_in

require: not optimize_has
redef fun to_s: String

core $ Regex :: to_s

User readable representation of self.

All properties

fun !=(other: nullable Object): Bool

core :: Object :: !=

Have self and other different values?
fun ==(other: nullable Object): Bool

core :: Object :: ==

Have self and other the same value?
type CLASS: Class[SELF]

core :: Object :: CLASS

The type of the class of self.
type SELF: Object

core :: Object :: SELF

Type of this instance, automatically specialized in every class
protected fun class_factory(name: String): CLASS

core :: Object :: class_factory

Implementation used by get_class to create the specific class.
fun class_name: String

core :: Object :: class_name

The class name of the object.
fun compile: nullable Error

core :: Regex :: compile

Compile the regular expression, if needed
init defaultinit(string: String)

core :: Regex :: defaultinit

fun extended: Bool

core :: Regex :: extended

Treat the pattern as a POSIX extended regular expression (the default)
fun extended=(extended: Bool)

core :: Regex :: extended=

Treat the pattern as a POSIX extended regular expression (the default)
fun finalize

core :: Finalizable :: finalize

Liberate any resources held by self before the memory holding self is freed
fun get_class: CLASS

core :: Object :: get_class

The meta-object representing the dynamic type of self.
fun hash: Int

core :: Object :: hash

The hash code of the object.
fun ignore_case: Bool

core :: Regex :: ignore_case

Ignore case when matching letters
fun ignore_case=(ignore_case: Bool)

core :: Regex :: ignore_case=

Ignore case when matching letters
init init

core :: Object :: init

fun inspect: String

core :: Object :: inspect

Developer readable representation of self.
protected fun inspect_head: String

core :: Object :: inspect_head

Return "CLASSNAME:#OBJECTID".
protected fun is_in(s: Text): Bool

core :: Pattern :: is_in

Is self in s?
intern fun is_same_instance(other: nullable Object): Bool

core :: Object :: is_same_instance

Return true if self and other are the same instance (i.e. same identity).
fun is_same_serialized(other: nullable Object): Bool

core :: Object :: is_same_serialized

Is self the same as other in a serialization context?
intern fun is_same_type(other: Object): Bool

core :: Object :: is_same_type

Return true if self and other have the same dynamic type.
fun newline: Bool

core :: Regex :: newline

Treat a newline in string as dividing string into multiple lines
fun newline=(newline: Bool)

core :: Regex :: newline=

Treat a newline in string as dividing string into multiple lines
fun not_bol: Bool

core :: Regex :: not_bol

Do not regard the beginning of the specified string as the beginning of a line
fun not_bol=(not_bol: Bool)

core :: Regex :: not_bol=

Do not regard the beginning of the specified string as the beginning of a line
fun not_eol: Bool

core :: Regex :: not_eol

Do not regard the end of the specified string as the end of a line
fun not_eol=(not_eol: Bool)

core :: Regex :: not_eol=

Do not regard the end of the specified string as the end of a line
intern fun object_id: Int

core :: Object :: object_id

An internal hash code for the object based on its identity.
fun optimize_has: Bool

core :: Regex :: optimize_has

Optimize self for String::has and is_in, but do not support searches
fun optimize_has=(optimize_has: Bool)

core :: Regex :: optimize_has=

Optimize self for String::has and is_in, but do not support searches
fun output

core :: Object :: output

Display self on stdout (debug only).
intern fun output_class_name

core :: Object :: output_class_name

Display class name on stdout (debug only).
protected fun search_all_in(s: Text): Array[Match]

core :: Pattern :: search_all_in

Search all self occurrences into s.
protected abstract fun search_in(s: Text, from: Int): nullable Match

core :: Pattern :: search_in

Search self into s from a certain position.
protected abstract fun search_index_in(s: Text, from: Int): Int

core :: Pattern :: search_index_in

Search self into s from a certain position.
fun serialization_hash: Int

core :: Object :: serialization_hash

Hash value use for serialization
protected fun split_in(s: Text): Array[Match]

core :: Pattern :: split_in

Split s using self is separator.
fun string: String

core :: Regex :: string

The String source of this regular expression
fun string=(string: String)

core :: Regex :: string=

The String source of this regular expression
intern fun sys: Sys

core :: Object :: sys

Return the global sys object, the only instance of the Sys class.
abstract fun to_jvalue(env: JniEnv): JValue

core :: Object :: to_jvalue

fun to_s: String

core :: Object :: to_s

User readable representation of self.
package_diagram core::Regex Regex core::Finalizable Finalizable core::Regex->core::Finalizable core::Pattern Pattern core::Regex->core::Pattern core::Object Object core::Finalizable->core::Object core::Pattern->core::Object ...core::Object ... ...core::Object->core::Object

Ancestors

interface Object

core :: Object

The root of the class hierarchy.

Parents

class Finalizable

core :: Finalizable

An object needing finalization
interface Pattern

core :: Pattern

Patterns are abstract string motifs (include String and Char).

Class definitions

core $ Regex
# A regular expression pattern
#
# Used as a `Pattern` on intances of `Text` to call `has`, `search_all`, `replace`, etc.
#
# Example:
#
#     var re = "ab+a".to_re
#     assert "aabbbbaaaaba".search_all(re).join(", ") == "abbbba, aba"
#     assert "aabbbbaaaaba".has(re)
#     assert "aabbbbaaaaba".replace(re, "+") == "a+aa+"
#     assert "aabbbbaaaaba".split(re) == ["a", "aa", ""]
class Regex
	super Finalizable
	super Pattern

	# The `String` source of this regular expression
	var string: String is writable

	# Treat the pattern as a POSIX extended regular expression (the default)
	#
	# If `false`, it is treated as a POSIX basic regular expression (BRE).
	#
	# The extended syntax supports `?`, `+` and `|`. Also, `\` causes the following
	# character to be used as literal.
	var extended = true is writable

	# Ignore case when matching letters
	var ignore_case = false is writable

	# Optimize `self` for `String::has` and `is_in`, but do not support searches
	#
	# If `true`, `self` cannont be used with `String::search_all`, `String::replace`
	# or `String::split`.
	var optimize_has = false is writable

	# Treat a newline in string as dividing string into multiple lines
	#
	# So that `$` can match before the newline and `^` can match after.
	# Also, don’t permit `.` to match a newline, and don’t permit `[^…]` to match a newline.
	#
	# Otherwise, newline acts like any other ordinary character.
	var newline = false is writable

	# Do not regard the beginning of the specified string as the beginning of a line
	#
	# More generally, don’t make any assumptions about what text might precede it.
	var not_bol = false is writable

	# Do not regard the end of the specified string as the end of a line
	#
	# More generally, don’t make any assumptions about what text might follow it.
	var not_eol = false is writable

	# Cache of the last used compiled regular expression
	private var native: nullable NativeRegex = null

	# Cache of a single `regmatch_t` to prevent many calls to `malloc`
	private var native_match: NativeMatchArray is lazy do
		native_match_is_init = true
		return new NativeMatchArray.malloc(native.as(not null).re_nsub+1)
	end

	private var native_match_is_init = false

	# `cflags` of the last successful `compile`
	private var cflags_cache = 0

	# `string` of the last successful `compile`
	private var string_cache: nullable String = null

	# Compile the regular expression, if needed
	#
	# Return `null` on success and an `Error` otherwise.
	#
	# This method is always called by `get_match` and `has_match`, but the user
	# should call it to check for errors.
	#
	#     assert "ab".to_re.compile == null
	#     assert "[ab".to_re.compile.message == "Unmatched [ or [^"
	fun compile: nullable Error
	do
		var cflags = 0
		if extended then cflags |= flag_extended
		if ignore_case then cflags |= flag_icase
		if optimize_has then cflags |= flag_nosub
		if newline then cflags |= flag_newline

		var native = self.native
		var need_compilation = native == null or cflags != cflags_cache or string != string_cache

		if need_compilation then

			# Initial allocation
			if native == null then
				native = new NativeRegex.malloc
				self.native = native
			end

			var res = native.regcomp(string.to_cstring, cflags)

			# All is good
			if res == 0 then
				# Update the cache
				self.native = native

				# We store these to know if we need to recompile or not
				self.cflags_cache = cflags
				self.string_cache = string

				return null
			end

			var error_cstr = native.regerror(res)

			# We leave it to the lib to decide how to allocate the string that we keep
			var error_str = error_cstr.to_s
			error_cstr.free

			return new Error(error_str)
		end

		return null
	end

	redef fun finalize
	do
		var native = self.native
		if native != null then
			native.regfree
			native.free
			self.native = null

			if native_match_is_init then
				self.native_match.free
			end
		end
	end

	private fun gather_eflags: Int
	do
		var eflags = 0
		if not_bol then eflags |= flag_notbol
		if not_eol then eflags |= flag_noteol
		return eflags
	end

	private fun get_error(errcode: Int): String
	do
		var native = native
		assert native != null

		# Error, should be out of memory but we cover any possible error anyway
		var error_cstr = native.regerror(errcode)

		# We leave it to the lib to decide how to allocate the string that we keep
		var error_str = error_cstr.to_s
		error_cstr.free

		return error_str
	end

	#     assert "ab".to_re.is_in("abcd")
	#     assert "ab".to_re.is_in("cdab")
	#     assert not "ab".to_re.is_in("acdb")
	#     assert "ab".to_re.is_in("ab")
	redef fun is_in(text)
	do
		var comp_res = compile
		assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output

		var native = native
		assert native != null

		# Actually execute
		var eflags = gather_eflags
		var res = native.regexec_match_only(text.to_cstring, eflags)

		# Got a match?
		if res == 0 then return true

		# Got no match, not an error?
		if res.is_nomatch then return false

		# Error, should be out of memory but we cover any possible error anyway
		var error_str = get_error(res)
		"Regex search failed with: {error_str}\n".output
		abort
	end

	# require: not optimize_has
	#
	#     assert "l".to_re.search_index_in("hello world", 0) == 2
	#     assert "el+o".to_re.search_index_in("hello world", 0) == 1
	#     assert "l+".to_re.search_index_in("hello world", 3) == 3
	#     assert "z".to_re.search_index_in("hello world", 0) == -1
	redef fun search_index_in(text, from)
	do
		assert not optimize_has

		var comp_res = compile
		assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output

		var native = native
		assert native != null

		# Actually execute
		text = text.to_s
		var cstr = text.substring_from(from).to_cstring
		var eflags = gather_eflags
		var match = self.native_match

		var res = native.regexec(cstr, 1, match, eflags)

		# Found one?
		if res == 0 then return match.rm_so + from

		# No more match?
		if res.is_nomatch then return -1

		# Error, should be out of memory but we cover any possible error anyway
		var error_str = get_error(res)
		"Regex search failed with: {error_str}\n".output
		abort
	end

	# require: not optimize_has
	#
	#     assert "l".to_re.search_in("hello world", 0).from == 2
	#     assert "el+o".to_re.search_in("hello world", 0).from == 1
	#     assert "l+".to_re.search_in("hello world", 3).from == 3
	#     assert "z".to_re.search_in("hello world", 0) == null
	#     assert "cd(e)".to_re.search_in("abcdef", 2)[1].to_s == "e"
	redef fun search_in(text, charfrom)
	do
		assert not optimize_has

		var comp_res = compile
		assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output

		var native = native
		assert native != null

		# Actually execute
		var cstr = text.to_cstring
		var rets = cstr.to_s_unsafe(text.byte_length, copy=false)
		var bytefrom = cstr.char_to_byte_index_cached(charfrom, 0, 0)
		var subcstr = cstr.fast_cstring(bytefrom)
		var eflags = gather_eflags
		var native_match = self.native_match

		var nsub = native.re_nsub
		var res = native.regexec(subcstr, nsub + 1, native_match, eflags)

		# Found one?
		if res == 0 then
			var bfrom = native_match.rm_so + bytefrom
			var bto = native_match.rm_eo - 1 + bytefrom
			var cpos = cstr.byte_to_char_index_cached(bfrom, charfrom, bytefrom)
			var len = cstr.utf8_length(bfrom, bto - bfrom + 1)
			var match = new Match(rets, cpos, len)
			var subs = match.subs

			# Add sub expressions
			for i in [1 .. nsub] do
				if native_match[i].rm_so < 0 then
					subs.add null
					continue
				end
				var sub_bfrom = native_match[i].rm_so + bytefrom
				var sub_bto = native_match[i].rm_eo - 1 + bytefrom
				var sub_cpos = cstr.byte_to_char_index_cached(sub_bfrom, cpos, bfrom)
				var sub_len = cstr.utf8_length(sub_bfrom, sub_bto - sub_bfrom + 1)
				subs.add(new Match(rets, sub_cpos, sub_len))
			end

			return match
		end

		# No more match?
		if res.is_nomatch then return null

		# Error, should be out of memory but we cover any possible error anyway
		var error_str = get_error(res)
		"Regex search failed with: {error_str}\n".output
		abort
	end

	# require: not optimize_has
	#
	#     assert "ab".to_re.search_all_in("abbab").join(", ") == "ab, ab"
	#     assert "b+".to_re.search_all_in("abbabaabbbbbcab").join(", ") == "bb, b, bbbbb, b"
	redef fun search_all_in(text)
	do
		assert not optimize_has

		var comp_res = compile
		assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output

		var native = native
		assert native != null

		# Actually execute
		var cstr = text.to_cstring
		var subcstr = cstr
		var rets = cstr.to_s_unsafe(text.byte_length, copy=false)
		var eflags = gather_eflags
		var eflags_or_notbol = eflags | flag_notbol
		var native_match = self.native_match
		var matches = new Array[Match]

		var nsub = native.re_nsub
		var res = native.regexec(subcstr, nsub + 1, native_match, eflags)
		var bytesub = 0
		var charsub = 0
		while res == 0 do
			var bfrom = native_match.rm_so + bytesub
			var bto = native_match.rm_eo - 1 + bytesub
			var cstart = cstr.byte_to_char_index_cached(bfrom, charsub, bytesub)
			var len = cstr.utf8_length(bfrom, bto - bfrom + 1)
			var match = new Match(rets, cstart, len)
			matches.add match
			var subs = match.subs

			# Add sub expressions
			for i in [1 .. nsub] do
				if native_match[i].rm_so < 0 then
					subs.add null
					continue
				end
				var sub_bfrom = native_match[i].rm_so + bytesub
				var sub_bto = native_match[i].rm_eo - 1 + bytesub
				var sub_cstart = cstr.byte_to_char_index_cached(sub_bfrom, cstart, bfrom)
				var sub_len = cstr.utf8_length(sub_bfrom, sub_bto - sub_bfrom + 1)
				subs.add(new Match(rets, sub_cstart, sub_len))
			end

			bytesub = bto + 1
			charsub = cstart + len
			subcstr = cstr.fast_cstring(bytesub)
			res = native.regexec(subcstr, nsub + 1, native_match, eflags_or_notbol)
		end

		# No more match?
		if res.is_nomatch then return matches

		# Error, should be out of memory but we cover any possible error anyway
		var error_str = get_error(res)
		"Regex search failed with: {error_str}\n".output
		abort
	end

	redef fun to_s do return "/{string}/"
end
lib/core/re.nit:127,1--479,3