X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/re.nit b/lib/standard/re.nit index 7baabd2..24637cb 100644 --- a/lib/standard/re.nit +++ b/lib/standard/re.nit @@ -42,7 +42,7 @@ private extern class NativeRegex `{ regex_t* `} # Compile the regular expression `regex` into a form that is suitable for subsequent `regexec` searches fun regcomp(regex: NativeString, cflags: Int): Int `{ - return regcomp(recv, regex, cflags); + return regcomp(self, regex, cflags); `} # Match `string` against the precompiled pattern buffer of `self`, locating matches @@ -50,32 +50,32 @@ private extern class NativeRegex `{ regex_t* `} # `nmatch` and `pmatch` are used to provide information regarding the location of any matches. # `eflags` may be the bitwise-or of one or both of `flag_notbol` and `flag_noteol`. fun regexec(string: NativeString, nmatch: Int, pmatch: NativeMatchArray, eflags: Int): Int `{ - return regexec(recv, string, nmatch, pmatch, eflags); + return regexec(self, string, nmatch, pmatch, eflags); `} # Match `string` against the precompiled pattern buffer of `self`, do not locate matches # # `eflags` may be the bitwise-or of one or both of `flag_notbol` and `flag_noteol`. fun regexec_match_only(string: NativeString, eflags: Int): Int `{ - return regexec(recv, string, 0, NULL, eflags); + return regexec(self, string, 0, NULL, eflags); `} # Free the memory allocated to the pattern buffer by the compiling process # # Does not free the memory holding `self`, use `free` for this purpose. - fun regfree `{ regfree(recv); `} + fun regfree `{ regfree(self); `} # Turn the error codes that can be returned by both `regcomp` and `regexec` into error message strings fun regerror(errcode: Int): NativeString `{ - size_t len = regerror(errcode, recv, NULL, 0); + size_t len = regerror(errcode, self, NULL, 0); char *message = malloc(len); - regerror(errcode, recv, message, len); + regerror(errcode, self, message, len); return message; `} - # This field holds the number of parenthetical subexpressions in the regular expression that was compiled. - fun re_nsub: Int `{ return recv->re_nsub; `} + # Number of parenthetical subexpressions in this compiled regular expression + fun re_nsub: Int `{ return self->re_nsub; `} end # Flags for `NativeRegex::regcomp` @@ -96,7 +96,7 @@ private fun error_nomatch: Int `{ return REG_NOMATCH; `} private fun error_espace: Int `{ return REG_ESPACE; `} redef universal Int - private fun is_nomatch: Bool `{ return recv == REG_NOMATCH; `} + private fun is_nomatch: Bool `{ return self == REG_NOMATCH; `} end # An array of `regmatch_t` or a pointer to one @@ -105,17 +105,17 @@ private extern class NativeMatchArray `{ regmatch_t* `} new malloc(length: Int) `{ return malloc(length * sizeof(regmatch_t)); `} # The offset in string of the beginning of a substring - fun rm_so: Int `{ return recv->rm_so; `} + fun rm_so: Int `{ return self->rm_so; `} # The offset in string of the end of the substring - fun rm_eo: Int `{ return recv->rm_eo; `} + fun rm_eo: Int `{ return self->rm_eo; `} # Get a pointer to the element at `index`, can also be used as a subarray - fun [](index: Int): NativeMatchArray `{ return recv + index; `} + fun [](index: Int): NativeMatchArray `{ return self + index; `} end redef extern class NativeString - private fun substring_from(index: Int): NativeString `{ return recv + index; `} + private fun substring_from(index: Int): NativeString `{ return self + index; `} end redef class Text @@ -180,7 +180,12 @@ class Regex private var native: nullable NativeRegex = null # Cache of a single `regmatch_t` to prevent many calls to `malloc` - private var native_match = new NativeMatchArray.malloc(0) is lazy + private var native_match: NativeMatchArray is lazy do + native_match_is_init = true + return new NativeMatchArray.malloc(native.re_nsub+1) + end + + private var native_match_is_init = false # `cflags` of the last successful `compile` private var cflags_cache = 0 @@ -249,7 +254,10 @@ class Regex native.regfree native.free self.native = null - self.native_match.free + + if native_match_is_init then + self.native_match.free + end end end @@ -348,13 +356,26 @@ class Regex text = text.to_s var cstr = text.substring_from(from).to_cstring var eflags = gather_eflags - var match = self.native_match - var matches = new Array[Match] + var native_match = self.native_match - var res = native.regexec(cstr, 1, match, eflags) + var nsub = native.re_nsub + var res = native.regexec(cstr, nsub+1, native_match, eflags) # Found one? - if res == 0 then return new Match(text, from + match.rm_so, match.rm_eo - match.rm_so) + if res == 0 then + var match = new Match(text, + from + native_match.rm_so, + native_match.rm_eo - native_match.rm_so) + + # Add sub expressions + for i in [1..nsub] do + match.subs.add new Match( text, + native_match[i].rm_so, + native_match[i].rm_eo - native_match[i].rm_so) + end + + return match + end # No more match? if res.is_nomatch then return null @@ -381,18 +402,30 @@ class Regex var cstr = text.to_cstring var eflags = gather_eflags var eflags_or_notbol = eflags.bin_or(flag_notbol) - var match = self.native_match + var native_match = self.native_match var matches = new Array[Match] - var res = native.regexec(cstr, 1, match, eflags) + var nsub = native.re_nsub + var res = native.regexec(cstr, nsub+1, native_match, eflags) var d = 0 while res == 0 do - matches.add new Match(text, d + match.rm_so, match.rm_eo - match.rm_so) - if d == match.rm_eo then + var match = new Match(text, + d + native_match.rm_so, + native_match.rm_eo - native_match.rm_so) + matches.add match + + # Add sub expressions + for i in [1..nsub] do + match.subs.add new Match( text, + d + native_match[i].rm_so, + native_match[i].rm_eo - native_match[i].rm_so) + end + + if d == native_match.rm_eo then d += 1 - else d = d + match.rm_eo - cstr = cstr.substring_from(match.rm_eo) - res = native.regexec(cstr, 1, match, eflags_or_notbol) + else d = d + native_match.rm_eo + cstr = cstr.substring_from(native_match.rm_eo) + res = native.regexec(cstr, nsub+1, native_match, eflags_or_notbol) end # No more match? @@ -406,3 +439,34 @@ class Regex redef fun to_s do return "/{string}/" end + +redef class Match + # Parenthesized subexpressions in this match + # + # ~~~ + # var re = "c (d e+) f".to_re + # var match = "a b c d eee f g".search(re) + # assert match.subs.length == 1 + # assert match.subs.first.to_s == "d eee" + # ~~~ + var subs = new Array[Match] is lazy + + # Get the `n`th expression in this match + # + # `n == 0` returns this match, and a greater `n` returns the corresponding + # subexpression. + # + # Require: `n >= 0 and n <= subs.length` + # + # ~~~ + # var re = "c (d e+) f".to_re + # var match = "a b c d eee f g".search(re) + # assert match[0].to_s == "c d eee f" + # assert match[1].to_s == "d eee" + # ~~~ + fun [](n: Int): Match do + if n == 0 then return self + assert n > 0 and n <= subs.length + return subs[n-1] + end +end