Pattern
Implemented using libc regular expressions.
The main entities are Text::to_re
and Regex
.
core :: union_find
union–find algorithm using an efficient disjoint-set data structurebucketed_game :: bucketed_game
Game framework with an emphasis on efficient event coordinationaccept_scroll_and_zoom
gamnit :: camera_control_android
Two fingers camera manipulation, pinch to zoom and slide to scrollgamnit :: camera_control_linux
Mouse wheel and middle mouse button to control camerapthreads :: concurrent_array_and_barrier
A basic usage example of the modulespthreads
and pthreads::cocurrent_collections
pthreads :: concurrent_collections
Introduces thread-safe concurrent collectionsserialization :: custom_serialization
Example of an ad hoc serializer that is tailored to transform business specific objects into customized representation.egl
, sdl
and x11
FileServer
action, which is a standard and minimal file server
cocoa :: foundation
The Foundation Kit provides basic Objective-C classes and structuresfunctional_types.nit
functional :: functional_types
This module provides functional type to represents various function forms.gtk :: gtk_assistant
gtk :: gtk_dialogs
HttpRequest
class and services to create it
app::http_request
main service AsyncHttpRequest
Serializable::inspect
to show more useful information
Iterator
.
actors :: mandelbrot
Example implemented from "The computer Language Benchmarks Game" - Mandelbrotmarkdown2 :: markdown_html_rendering
HTML rendering of Markdown documentsmarkdown2 :: markdown_latex_rendering
LaTeX rendering of Markdown documentsmarkdown2 :: markdown_man_rendering
Manpages rendering of Markdown documentsmarkdown2 :: markdown_md_rendering
Markdown rendering of Markdown documentsmore_collections :: more_collections
Highly specific, but useful, collections-related classes.mpi :: mpi_simple
curl :: native_curl
Binding of C libCurl which allow us to interact with network.app.nit
on Android using a custom Java entry point
nitcc_runtime :: nitcc_runtime
Runtime library required by parsers and lexers generated by nitccnlp :: nlp_server
glesv2 :: opengles2_hello_triangle
Basic example of OpenGL ES 2.0 usage using SDL 2performance_analysis :: performance_analysis
Services to gather information on the performance of events by categoriesrestful
annotation documented at lib/nitcorn/restful.nit
sax :: sax_locator
Interface for associating a SAX event with a document location.Locator
.
msgpack :: serialization_common
Serialization services forserialization_write
and serialization_read
serialization :: serialization_core
Abstract services to serialize Nit objects to different formatsdeserialize_json
and JsonDeserializer
serialize_to_json
and JsonSerializer
msgpack :: serialization_write
Serialize full Nit objects to MessagePack formatroot
to execute
agent_simulation
by refining the Agent class to make
socket :: socket_simple_server
Simple server example using a non-blockingTCPServer
EulerCamera
and App::frame_core_draw
to get a stereoscopic view
gamnit :: texture_atlas_parser
Tool to parse XML texture atlas and generated Nit code to access subtextures
# Regular expression support for all services based on `Pattern`
#
# Implemented using libc regular expressions.
#
# The main entities are `Text::to_re` and `Regex`.
module re
import text
import text::flat
import gc
import error
in "C Header" `{
#include <sys/types.h>
#include <regex.h>
`}
# Main extern class to wrap libc regular expression support
#
# It is recommanded to use the higher level API offered by the class `Regex`,
# but it can still be used for advanced purpose or in optimized code.
#
# To use this class and other `private` entities of this module, use `intrude import core::re`
private extern class NativeRegex `{ regex_t* `}
# Allocate a new `NativeRegex`, it must then be compiled using `regcomp` before calling `regexec`
new malloc `{ return malloc(sizeof(regex_t)); `}
# Compile the regular expression `regex` into a form that is suitable for subsequent `regexec` searches
fun regcomp(regex: CString, cflags: Int): Int `{
return regcomp(self, regex, cflags);
`}
# Match `string` against the precompiled pattern buffer of `self`, locating matches
#
# `nmatch` and `pmatch` are used to provide information regarding the location of any matches.
# `eflags` may be the bitwise-or of one or both of `flag_notbol` and `flag_noteol`.
fun regexec(string: CString, nmatch: Int, pmatch: NativeMatchArray, eflags: Int): Int `{
return regexec(self, string, nmatch, pmatch, eflags);
`}
# Match `string` against the precompiled pattern buffer of `self`, do not locate matches
#
# `eflags` may be the bitwise-or of one or both of `flag_notbol` and `flag_noteol`.
fun regexec_match_only(string: CString, eflags: Int): Int `{
return regexec(self, string, 0, NULL, eflags);
`}
# Free the memory allocated to the pattern buffer by the compiling process
#
# Does not free the memory holding `self`, use `free` for this purpose.
fun regfree `{ regfree(self); `}
# Turn the error codes that can be returned by both `regcomp` and `regexec` into error message strings
fun regerror(errcode: Int): CString `{
size_t len = regerror(errcode, self, NULL, 0);
char *message = malloc(len);
regerror(errcode, self, message, len);
return message;
`}
# Number of parenthetical subexpressions in this compiled regular expression
fun re_nsub: Int `{ return self->re_nsub; `}
end
# Flags for `NativeRegex::regcomp`
private fun flag_extended: Int `{ return REG_EXTENDED; `}
private fun flag_icase: Int `{ return REG_ICASE; `}
private fun flag_nosub: Int `{ return REG_NOSUB; `}
private fun flag_newline: Int `{ return REG_NEWLINE; `}
# Flags for `NativeRegex::regexec`
private fun flag_notbol: Int `{ return REG_NOTBOL; `}
private fun flag_noteol: Int `{ return REG_NOTEOL; `}
# Errors of `NativeRegex::regexec`
private fun error_nomatch: Int `{ return REG_NOMATCH; `}
private fun error_espace: Int `{ return REG_ESPACE; `}
redef universal Int
private fun is_nomatch: Bool `{ return self == REG_NOMATCH; `}
end
# An array of `regmatch_t` or a pointer to one
private extern class NativeMatchArray `{ regmatch_t* `}
# Allocate a new array of `length` `regmatch_t`
new malloc(length: Int) `{ return malloc(length * sizeof(regmatch_t)); `}
# The offset in string of the beginning of a substring
fun rm_so: Int `{ return self->rm_so; `}
# The offset in string of the end of the substring
fun rm_eo: Int `{ return self->rm_eo; `}
# Get a pointer to the element at `index`, can also be used as a subarray
fun [](index: Int): NativeMatchArray `{ return self + index; `}
end
redef extern class CString
private fun substring_from(index: Int): CString `{ return self + index; `}
end
redef class Text
# Get a `Regex` instance from `self`
fun to_re: Regex do return new Regex(self.to_s)
end
# A regular expression pattern
#
# Used as a `Pattern` on intances of `Text` to call `has`, `search_all`, `replace`, etc.
#
# Example:
#
# var re = "ab+a".to_re
# assert "aabbbbaaaaba".search_all(re).join(", ") == "abbbba, aba"
# assert "aabbbbaaaaba".has(re)
# assert "aabbbbaaaaba".replace(re, "+") == "a+aa+"
# assert "aabbbbaaaaba".split(re) == ["a", "aa", ""]
class Regex
super Finalizable
super Pattern
# The `String` source of this regular expression
var string: String is writable
# Treat the pattern as a POSIX extended regular expression (the default)
#
# If `false`, it is treated as a POSIX basic regular expression (BRE).
#
# The extended syntax supports `?`, `+` and `|`. Also, `\` causes the following
# character to be used as literal.
var extended = true is writable
# Ignore case when matching letters
var ignore_case = false is writable
# Optimize `self` for `String::has` and `is_in`, but do not support searches
#
# If `true`, `self` cannont be used with `String::search_all`, `String::replace`
# or `String::split`.
var optimize_has = false is writable
# Treat a newline in string as dividing string into multiple lines
#
# So that `$` can match before the newline and `^` can match after.
# Also, don’t permit `.` to match a newline, and don’t permit `[^…]` to match a newline.
#
# Otherwise, newline acts like any other ordinary character.
var newline = false is writable
# Do not regard the beginning of the specified string as the beginning of a line
#
# More generally, don’t make any assumptions about what text might precede it.
var not_bol = false is writable
# Do not regard the end of the specified string as the end of a line
#
# More generally, don’t make any assumptions about what text might follow it.
var not_eol = false is writable
# Cache of the last used compiled regular expression
private var native: nullable NativeRegex = null
# Cache of a single `regmatch_t` to prevent many calls to `malloc`
private var native_match: NativeMatchArray is lazy do
native_match_is_init = true
return new NativeMatchArray.malloc(native.as(not null).re_nsub+1)
end
private var native_match_is_init = false
# `cflags` of the last successful `compile`
private var cflags_cache = 0
# `string` of the last successful `compile`
private var string_cache: nullable String = null
# Compile the regular expression, if needed
#
# Return `null` on success and an `Error` otherwise.
#
# This method is always called by `get_match` and `has_match`, but the user
# should call it to check for errors.
#
# assert "ab".to_re.compile == null
# assert "[ab".to_re.compile.message.has_prefix("Unmatched [")
fun compile: nullable Error
do
var cflags = 0
if extended then cflags |= flag_extended
if ignore_case then cflags |= flag_icase
if optimize_has then cflags |= flag_nosub
if newline then cflags |= flag_newline
var native = self.native
var need_compilation = native == null or cflags != cflags_cache or string != string_cache
if need_compilation then
# Initial allocation
if native == null then
native = new NativeRegex.malloc
self.native = native
end
var res = native.regcomp(string.to_cstring, cflags)
# All is good
if res == 0 then
# Update the cache
self.native = native
# We store these to know if we need to recompile or not
self.cflags_cache = cflags
self.string_cache = string
return null
end
var error_cstr = native.regerror(res)
# We leave it to the lib to decide how to allocate the string that we keep
var error_str = error_cstr.to_s
error_cstr.free
return new Error(error_str)
end
return null
end
redef fun finalize
do
var native = self.native
if native != null then
native.regfree
native.free
self.native = null
if native_match_is_init then
self.native_match.free
end
end
end
private fun gather_eflags: Int
do
var eflags = 0
if not_bol then eflags |= flag_notbol
if not_eol then eflags |= flag_noteol
return eflags
end
private fun get_error(errcode: Int): String
do
var native = native
assert native != null
# Error, should be out of memory but we cover any possible error anyway
var error_cstr = native.regerror(errcode)
# We leave it to the lib to decide how to allocate the string that we keep
var error_str = error_cstr.to_s
error_cstr.free
return error_str
end
# assert "ab".to_re.is_in("abcd")
# assert "ab".to_re.is_in("cdab")
# assert not "ab".to_re.is_in("acdb")
# assert "ab".to_re.is_in("ab")
redef fun is_in(text)
do
var comp_res = compile
assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
var native = native
assert native != null
# Actually execute
var eflags = gather_eflags
var res = native.regexec_match_only(text.to_cstring, eflags)
# Got a match?
if res == 0 then return true
# Got no match, not an error?
if res.is_nomatch then return false
# Error, should be out of memory but we cover any possible error anyway
var error_str = get_error(res)
"Regex search failed with: {error_str}\n".output
abort
end
# require: not optimize_has
#
# assert "l".to_re.search_index_in("hello world", 0) == 2
# assert "el+o".to_re.search_index_in("hello world", 0) == 1
# assert "l+".to_re.search_index_in("hello world", 3) == 3
# assert "z".to_re.search_index_in("hello world", 0) == -1
redef fun search_index_in(text, from)
do
assert not optimize_has
var comp_res = compile
assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
var native = native
assert native != null
# Actually execute
text = text.to_s
var cstr = text.substring_from(from).to_cstring
var eflags = gather_eflags
var match = self.native_match
var res = native.regexec(cstr, 1, match, eflags)
# Found one?
if res == 0 then return match.rm_so + from
# No more match?
if res.is_nomatch then return -1
# Error, should be out of memory but we cover any possible error anyway
var error_str = get_error(res)
"Regex search failed with: {error_str}\n".output
abort
end
# require: not optimize_has
#
# assert "l".to_re.search_in("hello world", 0).from == 2
# assert "el+o".to_re.search_in("hello world", 0).from == 1
# assert "l+".to_re.search_in("hello world", 3).from == 3
# assert "z".to_re.search_in("hello world", 0) == null
# assert "cd(e)".to_re.search_in("abcdef", 2)[1].to_s == "e"
redef fun search_in(text, charfrom)
do
assert not optimize_has
var comp_res = compile
assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
var native = native
assert native != null
# Actually execute
var cstr = text.to_cstring
var rets = cstr.to_s_unsafe(text.byte_length, copy=false)
var bytefrom = cstr.char_to_byte_index_cached(charfrom, 0, 0)
var subcstr = cstr.fast_cstring(bytefrom)
var eflags = gather_eflags
var native_match = self.native_match
var nsub = native.re_nsub
var res = native.regexec(subcstr, nsub + 1, native_match, eflags)
# Found one?
if res == 0 then
var bfrom = native_match.rm_so + bytefrom
var bto = native_match.rm_eo - 1 + bytefrom
var cpos = cstr.byte_to_char_index_cached(bfrom, charfrom, bytefrom)
var len = cstr.utf8_length(bfrom, bto - bfrom + 1)
var match = new Match(rets, cpos, len)
var subs = match.subs
# Add sub expressions
for i in [1 .. nsub] do
if native_match[i].rm_so < 0 then
subs.add null
continue
end
var sub_bfrom = native_match[i].rm_so + bytefrom
var sub_bto = native_match[i].rm_eo - 1 + bytefrom
var sub_cpos = cstr.byte_to_char_index_cached(sub_bfrom, cpos, bfrom)
var sub_len = cstr.utf8_length(sub_bfrom, sub_bto - sub_bfrom + 1)
subs.add(new Match(rets, sub_cpos, sub_len))
end
return match
end
# No more match?
if res.is_nomatch then return null
# Error, should be out of memory but we cover any possible error anyway
var error_str = get_error(res)
"Regex search failed with: {error_str}\n".output
abort
end
# require: not optimize_has
#
# assert "ab".to_re.search_all_in("abbab").join(", ") == "ab, ab"
# assert "b+".to_re.search_all_in("abbabaabbbbbcab").join(", ") == "bb, b, bbbbb, b"
redef fun search_all_in(text)
do
assert not optimize_has
var comp_res = compile
assert comp_res == null else "Regex compilation failed with: {comp_res.message}\n".output
var native = native
assert native != null
# Actually execute
var cstr = text.to_cstring
var subcstr = cstr
var rets = cstr.to_s_unsafe(text.byte_length, copy=false)
var eflags = gather_eflags
var eflags_or_notbol = eflags | flag_notbol
var native_match = self.native_match
var matches = new Array[Match]
var nsub = native.re_nsub
var res = native.regexec(subcstr, nsub + 1, native_match, eflags)
var bytesub = 0
var charsub = 0
while res == 0 do
var bfrom = native_match.rm_so + bytesub
var bto = native_match.rm_eo - 1 + bytesub
var cstart = cstr.byte_to_char_index_cached(bfrom, charsub, bytesub)
var len = cstr.utf8_length(bfrom, bto - bfrom + 1)
var match = new Match(rets, cstart, len)
matches.add match
var subs = match.subs
# Add sub expressions
for i in [1 .. nsub] do
if native_match[i].rm_so < 0 then
subs.add null
continue
end
var sub_bfrom = native_match[i].rm_so + bytesub
var sub_bto = native_match[i].rm_eo - 1 + bytesub
var sub_cstart = cstr.byte_to_char_index_cached(sub_bfrom, cstart, bfrom)
var sub_len = cstr.utf8_length(sub_bfrom, sub_bto - sub_bfrom + 1)
subs.add(new Match(rets, sub_cstart, sub_len))
end
bytesub = bto + 1
charsub = cstart + len
subcstr = cstr.fast_cstring(bytesub)
res = native.regexec(subcstr, nsub + 1, native_match, eflags_or_notbol)
end
# No more match?
if res.is_nomatch then return matches
# Error, should be out of memory but we cover any possible error anyway
var error_str = get_error(res)
"Regex search failed with: {error_str}\n".output
abort
end
redef fun to_s do return "/{string}/"
end
redef class Match
# Parenthesized subexpressions in this match
#
# ~~~
# var re = "c (d e+) f".to_re
# var match = "a b c d eee f g".search(re)
# assert match.subs.length == 1
# assert match.subs.first.to_s == "d eee"
# ~~~
var subs = new Array[nullable Match] is lazy
# Get the `n`th expression in this match
#
# `n == 0` returns this match, and a greater `n` returns the corresponding
# subexpression.
#
# Require: `n >= 0 and n <= subs.length`
#
# ~~~
# var re = "c (d e+) f".to_re
# var match = "a b c d eee f g".search(re)
# assert match[0].to_s == "c d eee f"
# assert match[1].to_s == "d eee"
# ~~~
fun [](n: Int): nullable Match do
if n == 0 then return self
assert n > 0 and n <= subs.length
return subs[n-1]
end
end
lib/core/re.nit:17,1--510,3