X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/string_search.nit b/lib/standard/string_search.nit index 65f3292..437341f 100644 --- a/lib/standard/string_search.nit +++ b/lib/standard/string_search.nit @@ -84,6 +84,9 @@ interface Pattern res.add(new Match(s.to_s, i, s.length - i)) return res end + + # Is `self` in `s`? + protected fun is_in(s: Text): Bool do return search_index_in(s, 0) != -1 end # BM_Pattern are pre-compiled string motif for the Boyer-Moore algorithm. @@ -137,22 +140,19 @@ class BM_Pattern end end - # Compile a new motif - init(motif: String) + init do - _motif = motif _length = _motif.length _gs = new Array[Int].with_capacity(_length) - _bc_table = new ArrayMap[Char, Int] compute_gs compute_bc end # searched motif - var _motif: String + private var motif: String # length of the motif - var _length: Int + private var length: Int is noinit private fun bc(e: Char): Int do @@ -164,10 +164,10 @@ class BM_Pattern end # good shifts - var _gs: Array[Int] + private var gs: Array[Int] is noinit # bad characters - var _bc_table: Map[Char, Int] + private var bc_table = new ArrayMap[Char, Int] private fun compute_bc do @@ -205,7 +205,6 @@ class BM_Pattern private fun compute_gs do - var x = _motif var m = _length var suff = suffixes var i = 0 @@ -238,30 +237,26 @@ end # Matches are a part of a `Text` found by a `Pattern`. class Match # The base string matched - readable var _string: String + var string: String # The starting position in the string - readable var _from: Int + var from: Int # The length of the matching part - readable var _length: Int + var length: Int # The position of the first character just after the matching part. # May be out of the base string - fun after: Int do return _from + _length + fun after: Int do return from + length # The contents of the matching part - redef fun to_s do return _string.substring(_from,_length) + redef fun to_s do return string.substring(from,length) - # Matches `len` characters of `s` from `f`. - init(s: String, f: Int, len: Int) + init do - assert positive_length: len >= 0 - assert valid_from: f >= 0 - assert valid_after: f + len <= s.length - _string = s - _from = f - _length = len + assert positive_length: length >= 0 + assert valid_from: from >= 0 + assert valid_after: from + length <= string.length end end @@ -332,6 +327,37 @@ redef class Text # assert "I say hello to the world!".search_from("hello",7) == null fun search_from(p: Pattern, from: Int): nullable Match do return p.search_in(self, from) + # Search the last occurence of the text `t`. + # + # assert "bob".search_last("b").from == 2 + # assert "bob".search_last("bo").from == 0 + # assert "bob".search_last("ob").from == 1 + # assert "bobob".search_last("ob").from == 3 + # assert "bobbob".search_last("bb").from == 2 + # assert "bobbob".search_last("bob").from == 3 + # assert "bob".search_last("z") == null + # assert "".search_last("b") == null + fun search_last(t: Text): nullable Match do + return search_last_up_to(t, length) + end + + # Search the last occurence of the text `t` before `up_to`. + # + # assert "bobbob".search_last_up_to("b", 3).from == 2 + # assert "bobbob".search_last_up_to("b", 6).from == 5 + # assert "bobbob".search_last_up_to("b", 0) == null + fun search_last_up_to(t: Text, up_to: Int): nullable Match do + var i = up_to - t.length + + while i >= 0 do + if substring(i, t.length) == t then + return new Match(self.to_s, i, t.length) + end + i -= 1 + end + return null + end + # Search all occurrences of p into self. # # var a = new Array[Int] @@ -344,36 +370,47 @@ redef class Text # Split `self` using `p` as separator. # # assert "hello world".split('o') == ["hell", " w", "rld"] - fun split(p: Pattern): Array[SELFTYPE] + fun split(p: Pattern): Array[String] do var matches = p.split_in(self) - var res = new Array[SELFTYPE].with_capacity(matches.length) + var res = new Array[String].with_capacity(matches.length) for m in matches do res.add(m.to_s) return res end # @deprecated alias for `split` - fun split_with(p: Pattern): Array[SELFTYPE] do return self.split(p) + fun split_with(p: Pattern): Array[String] do return self.split(p) + + # Split `self` on the first `=` + # + # assert "hello".split_once_on('l') == ["he", "lo"] + # assert "a, b, c, d, e".split_once_on(", ") == ["a", "b, c, d, e"] + fun split_once_on(p: Pattern): Array[SELFTYPE] + do + var m = p.search_in(self, 0) + var res = new Array[SELFTYPE] + if m == null then + res.add self + else + res.add substring(0, m.from) + res.add substring_from(m.after) + end + return res + end # Replace all occurences of a pattern with a string # # assert "hlelo".replace("le", "el") == "hello" # assert "hello".replace('l', "") == "heo" - fun replace(p: Pattern, string: SELFTYPE): SELFTYPE + fun replace(p: Pattern, string: SELFTYPE): String do return self.split_with(p).join(string) end - # Escape the four characters `<`, `>`, `&`, and `"` with their html counterpart + # Does `self` contains at least one instance of `pattern`? # - # assert "a&b->\"x\"".html_escape == "a&b->"x"" - fun html_escape: SELFTYPE - do - var ret = self - if ret.chars.has('&') then ret = ret.replace('&', "&") - if ret.chars.has('<') then ret = ret.replace('<', "<") - if ret.chars.has('>') then ret = ret.replace('>', ">") - if ret.chars.has('"') then ret = ret.replace('"', """) - return ret - end + # assert "hello".has('l') + # assert "hello".has("ll") + # assert not "hello".has("lll") + fun has(pattern: Pattern): Bool do return pattern.is_in(self) end