X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/string_search.nit b/lib/standard/string_search.nit index 22d0bc9..8dcf90a 100644 --- a/lib/standard/string_search.nit +++ b/lib/standard/string_search.nit @@ -4,7 +4,7 @@ # # This file is free software, which comes along with NIT. This software is # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. You can modify it is you want, provided this header # is kept unaltered, and a notification of the changes is added. # You are allowed to redistribute it and sell it, alone or is a part of @@ -20,14 +20,35 @@ interface Pattern # Search `self` into `s` from a certain position. # Return the position of the first character of the matching section. # Return -1 if not found. - fun search_index_in(s: String, from: Int): Int is abstract + # + # assert 'l'.search_index_in("hello world", 0) == 2 + # assert 'l'.search_index_in("hello world", 3) == 3 + # assert 'z'.search_index_in("hello world", 0) == -1 + # + # This method is usually faster than `search_in` if what is + # required is only the index. + # Note: in most implementation, `search_in` is implemented with this method. + protected fun search_index_in(s: Text, from: Int): Int is abstract # Search `self` into `s` from a certain position. # Return null if not found. - fun search_in(s: String, from: Int): nullable Match is abstract + # + # assert 'l'.search_in("hello world", 0).from == 2 + # assert 'l'.search_in("hello world", 3).from == 3 + # assert 'z'.search_in("hello world", 0) == null + # + # If only the index of the first character if required, see `search_index_in`. + # + # Note: Is used by `String::search`, `String::search_from`, and others. + protected fun search_in(s: Text, from: Int): nullable Match is abstract # Search all `self` occurrences into `s`. - fun search_all_in(s: String): Array[Match] + # + # assert 'l'.search_all_in("hello world").length == 3 + # assert 'z'.search_all_in("hello world").length == 0 + # + # Note: Is used by `String::search_all`. + protected fun search_all_in(s: Text): Array[Match] do var res = new Array[Match] # Result var match = search_in(s, 0) @@ -39,7 +60,15 @@ interface Pattern end # Split `s` using `self` is separator. - fun split_in(s: String): Array[Match] + # + # Returns an array of matches that are between each occurence of `self`. + # If self is not present, an array with a single match on `s` is retunred. + # + # assert 'l'.split_in("hello world").join("|") == "he||o wor|d" + # assert 'z'.split_in("hello world").join("|") == "hello world" + # + # Note: is used by `String::split` + protected fun split_in(s: Text): Array[Match] do var res = new Array[Match] # Result var i = 0 # Cursor @@ -47,12 +76,12 @@ interface Pattern while match != null do # Compute the splited part length var len = match.from - i - res.add(new Match(s, i, len)) + res.add(new Match(s.to_s, i, len)) i = match.after match = search_in(s, i) end # Add the last part - res.add(new Match(s, i, s.length - i)) + res.add(new Match(s.to_s, i, s.length - i)) return res end end @@ -61,6 +90,10 @@ end # (cf. A Fast String Searching Algorithm, with R.S. Boyer. Communications # of the Association for Computing Machinery, 20(10), 1977, pp. 762-772.) # http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/index.html +# +# var pat = new BM_Pattern("hello") +# assert "I say hello to the world!".search(pat).from == 6 +# assert "I say goodbye to the world!".search(pat) == null class BM_Pattern super Pattern @@ -100,7 +133,7 @@ class BM_Pattern if to < 0 then return null else - return new Match(s, to, _length) + return new Match(s.to_s, to, _length) end end @@ -108,7 +141,7 @@ class BM_Pattern init(motif: String) do _motif = motif - _length = motif.length + _length = _motif.length _gs = new Array[Int].with_capacity(_length) _bc_table = new ArrayMap[Char, Int] compute_gs @@ -122,7 +155,7 @@ class BM_Pattern var _length: Int private fun bc(e: Char): Int - do + do if _bc_table.has_key(e) then return _bc_table[e] else @@ -202,23 +235,23 @@ class BM_Pattern redef fun ==(o) do return o isa BM_Pattern and o._motif == _motif end -# Matches are a part of a string. +# Matches are a part of a `Text` found by a `Pattern`. class Match # The base string matched - readable var _string: String + var string: String # The starting position in the string - readable var _from: Int + var from: Int # The length of the matching part - readable var _length: Int + var length: Int # The position of the first character just after the matching part. # May be out of the base string - fun after: Int do return _from + _length + fun after: Int do return from + length # The contents of the matching part - redef fun to_s do return _string.substring(_from, _length) + redef fun to_s do return string.substring(from,length) # Matches `len` characters of `s` from `f`. init(s: String, f: Int, len: Int) @@ -226,9 +259,9 @@ class Match assert positive_length: len >= 0 assert valid_from: f >= 0 assert valid_after: f + len <= s.length - _string = s - _from = f - _length = len + string = s + from = f + length = len end end @@ -251,12 +284,12 @@ redef class Char if pos < 0 then return null else - return new Match(s, pos, 1) + return new Match(s.to_s, pos, 1) end end end -redef class String +redef class Text super Pattern redef fun search_index_in(s, from) @@ -280,16 +313,23 @@ redef class String if pos < 0 then return null else - return new Match(s, pos, length) + return new Match(s.to_s, pos, length) end end - # Like `search_from` but from the first character. + # Search the first occurence of the pattern `p`. + # Return null if not found. + # + # assert "I say hello to the world!".search("hello").from == 6 + # assert "I say goodbye to the world!".search("hello") == null fun search(p: Pattern): nullable Match do return p.search_in(self, 0) - # Search the given pattern into self from a. + # Search the first occurence of the pattern `p` after `from`. # The search starts at `from`. # Return null if not found. + # + # assert "I say hello to the world!".search_from("hello",4).from == 6 + # assert "I say hello to the world!".search_from("hello",7) == null fun search_from(p: Pattern, from: Int): nullable Match do return p.search_in(self, from) # Search all occurrences of p into self. @@ -304,30 +344,30 @@ redef class String # Split `self` using `p` as separator. # # assert "hello world".split('o') == ["hell", " w", "rld"] - fun split(p: Pattern): Array[String] + fun split(p: Pattern): Array[SELFTYPE] do var matches = p.split_in(self) - var res = new Array[String].with_capacity(matches.length) + var res = new Array[SELFTYPE].with_capacity(matches.length) for m in matches do res.add(m.to_s) return res end # @deprecated alias for `split` - fun split_with(p: Pattern): Array[String] do return self.split(p) + fun split_with(p: Pattern): Array[SELFTYPE] do return self.split(p) # Replace all occurences of a pattern with a string # # assert "hlelo".replace("le", "el") == "hello" # assert "hello".replace('l', "") == "heo" - fun replace(p: Pattern, string: String): String + fun replace(p: Pattern, string: SELFTYPE): SELFTYPE do return self.split_with(p).join(string) end - # Escape the four characters < > & and " with their html counterpart + # Escape the four characters `<`, `>`, `&`, and `"` with their html counterpart # # assert "a&b->\"x\"".html_escape == "a&b->"x"" - fun html_escape: String + fun html_escape: SELFTYPE do var ret = self if ret.chars.has('&') then ret = ret.replace('&', "&")