X-Git-Url: http://nitlanguage.org diff --git a/lib/core/text/abstract_text.nit b/lib/core/text/abstract_text.nit index 6cf79bf..e07e0d4 100644 --- a/lib/core/text/abstract_text.nit +++ b/lib/core/text/abstract_text.nit @@ -68,7 +68,7 @@ abstract class Text fun substring(from: Int, count: Int): SELFTYPE is abstract # Iterates on the substrings of self if any - fun substrings: Iterator[FlatText] is abstract + private fun substrings: Iterator[FlatText] is abstract # Is the current Text empty (== "") # @@ -146,15 +146,7 @@ abstract class Text # Returns -1 if not found # # DEPRECATED : Use self.chars.last_index_of_from instead - fun last_index_of_from(item: Char, pos: Int): Int - do - var iter = self.chars.reverse_iterator_from(pos) - while iter.is_ok do - if iter.item == item then return iter.index - iter.next - end - return -1 - end + fun last_index_of_from(item: Char, pos: Int): Int do return chars.last_index_of_from(item, pos) # Gets an iterator on the chars of self # @@ -256,7 +248,17 @@ abstract class Text # If `self` contains only digits and alpha <= 'f', return the corresponding integer. # # assert "ff".to_hex == 255 - fun to_hex: Int do return a_to(16) + fun to_hex(pos, ln: nullable Int): Int do + var res = 0 + if pos == null then pos = 0 + if ln == null then ln = length - pos + var max = pos + ln + for i in [pos .. max[ do + res <<= 4 + res += self[i].from_hex + end + return res + end # If `self` contains only digits <= '7', return the corresponding integer. # @@ -303,26 +305,32 @@ abstract class Text end end - # Returns `true` if the string contains only Numeric values (and one "," or one "." character) + # Is this string in a valid numeric format compatible with `to_f`? # # assert "123".is_numeric == true # assert "1.2".is_numeric == true - # assert "1,2".is_numeric == true + # assert "-1.2".is_numeric == true + # assert "-1.23e-2".is_numeric == true # assert "1..2".is_numeric == false + # assert "".is_numeric == false fun is_numeric: Bool do - var has_point_or_comma = false + var has_point = false + var e_index = -1 for i in [0..length[ do var c = chars[i] if not c.is_numeric then - if (c == '.' or c == ',') and not has_point_or_comma then - has_point_or_comma = true + if c == '.' and not has_point then + has_point = true + else if c == 'e' and e_index == -1 and i > 0 and i < length - 1 and chars[i-1] != '-' then + e_index = i + else if c == '-' and i == e_index + 1 and i < length - 1 then else return false end end end - return true + return not is_empty end # Returns `true` if the string contains only Hex chars @@ -487,18 +495,21 @@ abstract class Text end end - # Justify a self in a space of `length` + # Justify `self` in a space of `length` # # `left` is the space ratio on the left side. # * 0.0 for left-justified (no space at the left) # * 1.0 for right-justified (all spaces at the left) # * 0.5 for centered (half the spaces at the left) # + # `char`, or `' '` by default, is repeated to pad the empty space. + # # Examples # # assert "hello".justify(10, 0.0) == "hello " # assert "hello".justify(10, 1.0) == " hello" # assert "hello".justify(10, 0.5) == " hello " + # assert "hello".justify(10, 0.5, '.') == "..hello..." # # If `length` is not enough, `self` is returned as is. # @@ -507,13 +518,14 @@ abstract class Text # REQUIRE: `left >= 0.0 and left <= 1.0` # ENSURE: `self.length <= length implies result.length == length` # ENSURE: `self.length >= length implies result == self` - fun justify(length: Int, left: Float): String + fun justify(length: Int, left: Float, char: nullable Char): String do + var pad = (char or else ' ').to_s var diff = length - self.length if diff <= 0 then return to_s assert left >= 0.0 and left <= 1.0 var before = (diff.to_f * left).to_i - return " " * before + self + " " * (diff-before) + return pad * before + self + pad * (diff-before) end # Mangle a string to be a unique string only made of alphanumeric characters and underscores. @@ -543,7 +555,7 @@ abstract class Text if c >= '0' and c <= '9' then res.add('_') - res.append(c.ascii.to_s) + res.append(c.code_point.to_s) res.add('d') start = 1 end @@ -555,7 +567,7 @@ abstract class Text continue end if underscore then - res.append('_'.ascii.to_s) + res.append('_'.code_point.to_s) res.add('d') end if c >= '0' and c <= '9' then @@ -566,28 +578,31 @@ abstract class Text underscore = true else res.add('_') - res.append(c.ascii.to_s) + res.append(c.code_point.to_s) res.add('d') underscore = false end end if underscore then - res.append('_'.ascii.to_s) + res.append('_'.code_point.to_s) res.add('d') end return res.to_s end - # Escape " \ ' and non printable characters using the rules of literal C strings and characters + # Escape `"` `\` `'`, trigraphs and non printable characters using the rules of literal C strings and characters # - # assert "abAB12<>&".escape_to_c == "abAB12<>&" + # assert "abAB12<>&".escape_to_c == "abAB12<>&" # assert "\n\"'\\".escape_to_c == "\\n\\\"\\'\\\\" + # assert "allo???!".escape_to_c == "allo??\\?!" + # assert "??=??/??'??(??)".escape_to_c == "?\\?=?\\?/??\\'?\\?(?\\?)" + # assert "??!????-".escape_to_c == "?\\?!?\\??\\?-" # # Most non-printable characters (bellow ASCII 32) are escaped to an octal form `\nnn`. # Three digits are always used to avoid following digits to be interpreted as an element # of the octal sequence. # - # assert "{0.ascii}{1.ascii}{8.ascii}{31.ascii}{32.ascii}".escape_to_c == "\\000\\001\\010\\037 " + # assert "{0.code_point}{1.code_point}{8.code_point}{31.code_point}{32.code_point}".escape_to_c == "\\000\\001\\010\\037 " # # The exceptions are the common `\t` and `\n`. fun escape_to_c: String @@ -605,9 +620,27 @@ abstract class Text b.append("\\\'") else if c == '\\' then b.append("\\\\") - else if c.ascii < 32 then + else if c == '?' then + # Escape if it is the last question mark of a ANSI C trigraph. + var j = i + 1 + if j < length then + var next = chars[j] + # We ignore `??'` because it will be escaped as `??\'`. + if + next == '!' or + next == '(' or + next == ')' or + next == '-' or + next == '/' or + next == '<' or + next == '=' or + next == '>' + then b.add('\\') + end + b.add('?') + else if c.code_point < 32 then b.add('\\') - var oct = c.ascii.to_base(8, false) + var oct = c.code_point.to_base(8) # Force 3 octal digits since it is the # maximum allowed in the C specification if oct.length == 1 then @@ -628,6 +661,7 @@ abstract class Text # The result might no be legal in C but be used in other languages # # assert "ab|\{\}".escape_more_to_c("|\{\}") == "ab\\|\\\{\\\}" + # assert "allo???!".escape_more_to_c("") == "allo??\\?!" fun escape_more_to_c(chars: String): String do var b = new Buffer @@ -680,8 +714,8 @@ abstract class Text else if c == ':' or c == ' ' or c == '#' then b.add('\\') b.add(c) - else if c.ascii < 32 or c == ';' or c == '|' or c == '\\' or c == '=' then - b.append("?{c.ascii.to_base(16, false)}") + else if c.code_point < 32 or c == ';' or c == '|' or c == '\\' or c == '=' then + b.append("?{c.code_point.to_base(16)}") else b.add(c) end @@ -695,7 +729,7 @@ abstract class Text # assert s.length == 2 # var u = s.unescape_nit # assert u.length == 1 - # assert u.chars[0].ascii == 10 # (the ASCII value of the "new line" character) + # assert u.chars[0].code_point == 10 # (the ASCII value of the "new line" character) fun unescape_nit: String do var res = new Buffer.with_cap(self.length) @@ -726,6 +760,49 @@ abstract class Text return res.to_s end + # Returns `self` with all characters escaped with their UTF-16 representation + # + # assert "Aèあ𐏓".escape_to_utf16 == "\\u0041\\u00e8\\u3042\\ud800\\udfd3" + fun escape_to_utf16: String do + var buf = new Buffer + for i in chars do buf.append i.escape_to_utf16 + return buf.to_s + end + + # Returns the Unicode char escaped by `self` + # + # assert "\\u0041".from_utf16_escape == 'A' + # assert "\\ud800\\udfd3".from_utf16_escape == '𐏓' + # assert "\\u00e8".from_utf16_escape == 'è' + # assert "\\u3042".from_utf16_escape == 'あ' + fun from_utf16_escape(pos, ln: nullable Int): Char do + if pos == null then pos = 0 + if ln == null then ln = length - pos + if ln < 6 then return 0xFFFD.code_point + var cp = from_utf16_digit(pos + 2) + if cp < 0xD800 then return cp.code_point + if cp > 0xDFFF then return cp.code_point + if cp > 0xDBFF then return 0xFFFD.code_point + if ln == 6 then return 0xFFFD.code_point + if ln < 12 then return 0xFFFD.code_point + cp <<= 16 + cp += from_utf16_digit(pos + 8) + var cplo = cp & 0xFFFF + if cplo < 0xDC00 then return 0xFFFD.code_point + if cplo > 0xDFFF then return 0xFFFD.code_point + return cp.from_utf16_surr.code_point + end + + # Returns a UTF-16 escape value + # + # var s = "\\ud800\\udfd3" + # assert s.from_utf16_digit(2) == 0xD800 + # assert s.from_utf16_digit(8) == 0xDFD3 + fun from_utf16_digit(pos: nullable Int): Int do + if pos == null then pos = 0 + return to_hex(pos, 4) + end + # Encode `self` to percent (or URL) encoding # # assert "aBc09-._~".to_percent_encoding == "aBc09-._~" @@ -787,7 +864,7 @@ abstract class Text if c == '%' then if i + 2 >= length then # What follows % has been cut off - buf[l] = '?'.ascii.to_b + buf[l] = '?'.ascii else i += 1 var hex_s = substring(i, 2) @@ -797,17 +874,17 @@ abstract class Text i += 1 else # What follows a % is not Hex - buf[l] = '?'.ascii.to_b + buf[l] = '?'.ascii i -= 1 end end - else buf[l] = c.ascii.to_b + else buf[l] = c.ascii i += 1 l += 1 end - return buf.to_s_with_length(l) + return buf.to_s_unsafe(l) end # Escape the characters `<`, `>`, `&`, `"`, `'` and `/` as HTML/XML entity references. @@ -905,7 +982,7 @@ abstract class Text for i in [0..length[ do var char = chars[i] - h = (h << 5) + h + char.ascii + h = (h << 5) + h + char.code_point end hash_cache = h @@ -913,36 +990,46 @@ abstract class Text return hash_cache.as(not null) end - # Gives the formatted string back as a Nit string with `args` in place + # Format `self` by replacing each `%n` with the `n`th item of `args` # - # assert "This %1 is a %2.".format("String", "formatted String") == "This String is a formatted String." - # assert "\\%1 This string".format("String") == "\\%1 This string" + # The character `%` followed by something other than a number are left as is. + # To represent a `%` followed by a number, double the `%`, as in `%%7`. + # + # assert "This %0 is a %1.".format("String", "formatted String") == "This String is a formatted String." + # assert "Do not escape % nor %%1".format("unused") == "Do not escape % nor %1" fun format(args: Object...): String do var s = new Array[Text] var curr_st = 0 var i = 0 while i < length do - # Skip escaped characters - if self[i] == '\\' then - i += 1 - # In case of format - else if self[i] == '%' then + if self[i] == '%' then var fmt_st = i i += 1 var ciph_st = i while i < length and self[i].is_numeric do i += 1 end - i -= 1 - var fmt_end = i - var ciph_len = fmt_end - ciph_st + 1 - var arg_index = substring(ciph_st, ciph_len).to_i - 1 + var ciph_len = i - ciph_st + if ciph_len == 0 then + # What follows '%' is not a number. + s.push substring(curr_st, i - curr_st) + if i < length and self[i] == '%' then + # Skip the next `%` + i += 1 + end + curr_st = i + continue + end + + var arg_index = substring(ciph_st, ciph_len).to_i if arg_index >= args.length then continue s.push substring(curr_st, fmt_st - curr_st) s.push args[arg_index].to_s - curr_st = i + 1 + + curr_st = i + i -= 1 end i += 1 end @@ -950,6 +1037,65 @@ abstract class Text return s.plain_to_s end + # Return the Levenshtein distance between two strings + # + # ~~~ + # assert "abcd".levenshtein_distance("abcd") == 0 + # assert "".levenshtein_distance("abcd") == 4 + # assert "abcd".levenshtein_distance("") == 4 + # assert "abcd".levenshtein_distance("xyz") == 4 + # assert "abcd".levenshtein_distance("xbdy") == 3 + # ~~~ + fun levenshtein_distance(other: String): Int + do + var slen = self.length + var olen = other.length + + # fast cases + if slen == 0 then return olen + if olen == 0 then return slen + if self == other then return 0 + + # previous row of distances + var v0 = new Array[Int].with_capacity(olen+1) + + # current row of distances + var v1 = new Array[Int].with_capacity(olen+1) + + for j in [0..olen] do + # prefix insert cost + v0[j] = j + end + + for i in [0..slen[ do + + # prefix delete cost + v1[0] = i + 1 + + for j in [0..olen[ do + # delete cost + var cost1 = v1[j] + 1 + # insert cost + var cost2 = v0[j + 1] + 1 + # same char cost (+0) + var cost3 = v0[j] + # change cost + if self[i] != other[j] then cost3 += 1 + # keep the min + v1[j+1] = cost1.min(cost2).min(cost3) + end + + # Switch columns: + # * v1 become v0 in the next iteration + # * old v0 is reused as the new v1 + var tmp = v1 + v1 = v0 + v0 = tmp + end + + return v0[olen] + end + # Copies `n` bytes from `self` at `src_offset` into `dest` starting at `dest_offset` # # Basically a high-level synonym of NativeString::copy_to @@ -958,7 +1104,7 @@ abstract class Text # # var ns = new NativeString(8) # "Text is String".copy_to_native(ns, 8, 2, 0) - # assert ns.to_s_with_length(8) == "xt is St" + # assert ns.to_s_unsafe(8) == "xt is St" # fun copy_to_native(dest: NativeString, n, src_offset, dest_offset: Int) do var mypos = src_offset @@ -971,6 +1117,39 @@ abstract class Text end end + # Packs the content of a string in packs of `ln` chars. + # This variant ensures that only the last element might be smaller than `ln` + # + # ~~~nit + # var s = "abcdefghijklmnopqrstuvwxyz" + # assert s.pack_l(4) == ["abcd","efgh","ijkl","mnop","qrst","uvwx","yz"] + # ~~~ + fun pack_l(ln: Int): Array[Text] do + var st = 0 + var retarr = new Array[Text].with_capacity(length / ln + length % ln) + while st < length do + retarr.add(substring(st, ln)) + st += ln + end + return retarr + end + + # Packs the content of a string in packs of `ln` chars. + # This variant ensures that only the first element might be smaller than `ln` + # + # ~~~nit + # var s = "abcdefghijklmnopqrstuvwxyz" + # assert s.pack_r(4) == ["ab","cdef","ghij","klmn","opqr","stuv","wxyz"] + # ~~~ + fun pack_r(ln: Int): Array[Text] do + var st = length + var retarr = new Array[Text].with_capacity(length / ln + length % ln) + while st >= 0 do + retarr.add(substring(st - ln, ln)) + st -= ln + end + return retarr.reversed + end end # All kinds of array-based text representations. @@ -981,10 +1160,7 @@ abstract class FlatText # # Warning : Might be void in some subclasses, be sure to check # if set before using it. - private var items: NativeString is noinit - - # Real items, used as cache for to_cstring is called - private var real_items: nullable NativeString = null + var items: NativeString is noinit # Returns a char* starting at position `first_byte` # @@ -1001,7 +1177,7 @@ abstract class FlatText # # As always, do not modify the content of the String in C code, if this is what you want # copy locally the char* as Nit Strings are immutable. - private fun fast_cstring: NativeString is abstract + fun fast_cstring: NativeString is abstract redef var length = 0 @@ -1206,30 +1382,19 @@ abstract class String # Letters that follow a letter are lowercased # Letters that follow a non-letter are upcased. # + # If `keep_upper = true`, already uppercase letters are not lowercased. + # # SEE : `Char::is_letter` for the definition of letter. # # assert "jAVASCRIPT".capitalized == "Javascript" # assert "i am root".capitalized == "I Am Root" # assert "ab_c -ab0c ab\nc".capitalized == "Ab_C -Ab0C Ab\nC" - fun capitalized: SELFTYPE do + # assert "preserve my ACRONYMS".capitalized(keep_upper=true) == "Preserve My ACRONYMS" + fun capitalized(keep_upper: nullable Bool): SELFTYPE do if length == 0 then return self var buf = new Buffer.with_cap(length) - - var curr = chars[0].to_upper - var prev = curr - buf[0] = curr - - for i in [1 .. length[ do - prev = curr - curr = self[i] - if prev.is_letter then - buf[i] = curr.to_lower - else - buf[i] = curr.to_upper - end - end - + buf.capitalize(keep_upper=keep_upper, src=self) return buf.to_s end end @@ -1324,6 +1489,13 @@ abstract class Buffer # Letters that follow a letter are lowercased # Letters that follow a non-letter are upcased. # + # If `keep_upper = true`, uppercase letters are not lowercased. + # + # When `src` is specified, this method reads from `src` instead of `self` + # but it still writes the result to the beginning of `self`. + # This requires `self` to have the capacity to receive all of the + # capitalized content of `src`. + # # SEE: `Char::is_letter` for the definition of a letter. # # var b = new FlatBuffer.from("jAVAsCriPt") @@ -1335,16 +1507,32 @@ abstract class Buffer # b = new FlatBuffer.from("ab_c -ab0c ab\nc") # b.capitalize # assert b == "Ab_C -Ab0C Ab\nC" - fun capitalize do + # + # b = new FlatBuffer.from("12345") + # b.capitalize(src="foo") + # assert b == "Foo45" + # + # b = new FlatBuffer.from("preserve my ACRONYMS") + # b.capitalize(keep_upper=true) + # assert b == "Preserve My ACRONYMS" + fun capitalize(keep_upper: nullable Bool, src: nullable Text) do + src = src or else self + var length = src.length if length == 0 then return - var c = self[0].to_upper + keep_upper = keep_upper or else false + + var c = src[0].to_upper self[0] = c var prev = c for i in [1 .. length[ do prev = c - c = self[i] + c = src[i] if prev.is_letter then - self[i] = c.to_lower + if keep_upper then + self[i] = c + else + self[i] = c.to_lower + end else self[i] = c.to_upper end @@ -1360,6 +1548,42 @@ abstract class Buffer # In Buffers, the internal sequence of character is mutable # Thus, `chars` can be used to modify the buffer. redef fun chars: Sequence[Char] is abstract + + # Appends `length` chars from `s` starting at index `from` + # + # ~~~nit + # var b = new Buffer + # b.append_substring("abcde", 1, 2) + # assert b == "bc" + # b.append_substring("vwxyz", 2, 3) + # assert b == "bcxyz" + # b.append_substring("ABCDE", 4, 300) + # assert b == "bcxyzE" + # b.append_substring("VWXYZ", 400, 1) + # assert b == "bcxyzE" + # ~~~ + fun append_substring(s: Text, from, length: Int) do + if from < 0 then + length += from + from = 0 + end + var ln = s.length + if (length + from) > ln then length = ln - from + if length <= 0 then return + append_substring_impl(s, from, length) + end + + # Unsafe version of `append_substring` for performance + # + # NOTE: Use only if sure about `from` and `length`, no checks + # or bound recalculation is done + fun append_substring_impl(s: Text, from, length: Int) do + var pos = from + for i in [0 .. length[ do + self.add s[pos] + pos += 1 + end + end end # View for chars on Buffer objects, extends Sequence @@ -1440,21 +1664,21 @@ redef class Byte var ns = new NativeString(nslen + 1) ns[nslen] = 0u8 native_byte_to_s(ns, nslen + 1) - return ns.to_s_with_length(nslen) + return ns.to_s_unsafe(nslen) end end redef class Int # Wrapper of strerror C function - private fun strerror_ext: NativeString `{ return strerror(self); `} + private fun strerror_ext: NativeString `{ return strerror((int)self); `} # Returns a string describing error number fun strerror: String do return strerror_ext.to_s - # Fill `s` with the digits in base `base` of `self` (and with the '-' sign if 'signed' and negative). + # Fill `s` with the digits in base `base` of `self` (and with the '-' sign if negative). # assume < to_c max const of char - private fun fill_buffer(s: Buffer, base: Int, signed: Bool) + private fun fill_buffer(s: Buffer, base: Int) do var n: Int # Sign @@ -1486,14 +1710,30 @@ redef class Int snprintf(nstr, strlen, "%ld", self); `} - # return displayable int in base base and signed - fun to_base(base: Int, signed: Bool): String is abstract + # String representation of `self` in the given `base` + # + # ~~~ + # assert 15.to_base(10) == "15" + # assert 15.to_base(16) == "f" + # assert 15.to_base(2) == "1111" + # assert (-10).to_base(3) == "-101" + # ~~~ + fun to_base(base: Int): String + do + var l = digit_count(base) + var s = new Buffer + s.enlarge(l) + for x in [0..l[ do s.add(' ') + fill_buffer(s, base) + return s.to_s + end + # return displayable int in hexadecimal # # assert 1.to_hex == "1" # assert (-255).to_hex == "-ff" - fun to_hex: String do return to_base(16,false) + fun to_hex: String do return to_base(16) end redef class Float @@ -1573,9 +1813,33 @@ end redef class Char + # Returns a sequence with the UTF-8 bytes of `self` + # + # assert 'a'.bytes == [0x61u8] + # assert 'ま'.bytes == [0xE3u8, 0x81u8, 0xBEu8] + fun bytes: SequenceRead[Byte] do return to_s.bytes + + # Is `self` an UTF-16 surrogate pair ? + fun is_surrogate: Bool do + var cp = code_point + return cp >= 0xD800 and cp <= 0xDFFF + end + + # Is `self` a UTF-16 high surrogate ? + fun is_hi_surrogate: Bool do + var cp = code_point + return cp >= 0xD800 and cp <= 0xDBFF + end + + # Is `self` a UTF-16 low surrogate ? + fun is_lo_surrogate: Bool do + var cp = code_point + return cp >= 0xDC00 and cp <= 0xDFFF + end + # Length of `self` in a UTF-8 String - private fun u8char_len: Int do - var c = self.ascii + fun u8char_len: Int do + var c = self.code_point if c < 0x80 then return 1 if c <= 0x7FF then return 2 if c <= 0xFFFF then return 3 @@ -1589,7 +1853,47 @@ redef class Char var ln = u8char_len var ns = new NativeString(ln + 1) u8char_tos(ns, ln) - return ns.to_s_with_length(ln) + return ns.to_s_unsafe(ln) + end + + # Returns `self` escaped to UTF-16 + # + # i.e. Represents `self`.`code_point` using UTF-16 codets escaped + # with a `\u` + # + # assert 'A'.escape_to_utf16 == "\\u0041" + # assert 'è'.escape_to_utf16 == "\\u00e8" + # assert 'あ'.escape_to_utf16 == "\\u3042" + # assert '𐏓'.escape_to_utf16 == "\\ud800\\udfd3" + fun escape_to_utf16: String do + var cp = code_point + var buf: Buffer + if cp < 0xD800 or (cp >= 0xE000 and cp <= 0xFFFF) then + buf = new Buffer.with_cap(6) + buf.append("\\u0000") + var hx = cp.to_hex + var outid = 5 + for i in hx.chars.reverse_iterator do + buf[outid] = i + outid -= 1 + end + else + buf = new Buffer.with_cap(12) + buf.append("\\u0000\\u0000") + var lo = (((cp - 0x10000) & 0x3FF) + 0xDC00).to_hex + var hi = ((((cp - 0x10000) & 0xFFC00) >> 10) + 0xD800).to_hex + var out = 2 + for i in hi do + buf[out] = i + out += 1 + end + out = 8 + for i in lo do + buf[out] = i + out += 1 + end + end + return buf.to_s end private fun u8char_tos(r: NativeString, len: Int) `{ @@ -1642,6 +1946,16 @@ redef class Char return (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') end + # Is `self` an hexadecimal digit ? + # + # assert 'A'.is_hexdigit + # assert not 'G'.is_hexdigit + # assert 'a'.is_hexdigit + # assert not 'g'.is_hexdigit + # assert '5'.is_hexdigit + fun is_hexdigit: Bool do return (self >= '0' and self <= '9') or (self >= 'A' and self <= 'F') or + (self >= 'a' and self <= 'f') + # Returns true if the char is an alpha or a numeric digit # # assert 'a'.is_alphanumeric @@ -1655,6 +1969,19 @@ redef class Char do return self.is_numeric or self.is_alpha end + + # Returns `self` to its int value + # + # REQUIRE: `is_hexdigit` + fun from_hex: Int do + if self >= '0' and self <= '9' then return code_point - 0x30 + if self >= 'A' and self <= 'F' then return code_point - 0x37 + if self >= 'a' and self <= 'f' then return code_point - 0x57 + # Happens if self is not a hexdigit + assert self.is_hexdigit + # To make flow analysis happy + abort + end end redef class Collection[E] @@ -1695,7 +2022,11 @@ redef class Collection[E] # assert [1, 2, 3].join(":") == "1:2:3" # assert [1..3].join(":") == "1:2:3" # assert [1..3].join == "123" - fun join(separator: nullable Text): String + # + # if `last_separator` is given, then it is used to separate the last element. + # + # assert [1, 2, 3, 4].join(", ", " and ") == "1, 2, 3 and 4" + fun join(separator: nullable Text, last_separator: nullable Text): String do if is_empty then return "" @@ -1706,13 +2037,19 @@ redef class Collection[E] var e = i.item if e != null then s.append(e.to_s) + if last_separator == null then last_separator = separator + # Concat other items i.next while i.is_ok do - if separator != null then s.append(separator) e = i.item - if e != null then s.append(e.to_s) i.next + if i.is_ok then + if separator != null then s.append(separator) + else + if last_separator != null then s.append(last_separator) + end + if e != null then s.append(e.to_s) end return s.to_s end @@ -1799,7 +2136,12 @@ end # see `alpha_comparator` private class AlphaComparator super Comparator - redef fun compare(a, b) do return a.to_s <=> b.to_s + redef fun compare(a, b) do + if a == b then return 0 + if a == null then return -1 + if b == null then return 1 + return a.to_s <=> b.to_s + end end # Stateless comparator that naively use `to_s` to compare things. @@ -1819,17 +2161,49 @@ do end redef class NativeString - # Returns `self` as a new String. + # Get a `String` from the data at `self` copied into Nit memory + # + # Require: `self` is a null-terminated string. fun to_s_with_copy: String is abstract - # Returns `self` as a String of `length`. + # Get a `String` from `length` bytes at `self` + # + # The result may point to the data at `self` or + # it may make a copy in Nit controlled memory. + # This method should only be used when `self` was allocated by the Nit GC, + # or when manually controlling the deallocation of `self`. fun to_s_with_length(length: Int): String is abstract - # Returns `self` as a String with `bytelen` and `length` set + # Get a `String` from the raw `length` bytes at `self` # - # SEE: `abstract_text::Text` for more infos on the difference - # between `Text::bytelen` and `Text::length` + # The default value of `length` is the number of bytes before + # the first null character. + # + # The created `String` points to the data at `self`. + # This method should be used when `self` was allocated by the Nit GC, + # or when manually controlling the deallocation of `self`. + # + # /!\: This service does not clean the items for compliance with UTF-8, + # use only when the data has already been verified as valid UTF-8. + fun to_s_unsafe(length: nullable Int): String is abstract + + # Get a `String` from the raw `bytelen` bytes at `self` with `unilen` Unicode characters + # + # The created `String` points to the data at `self`. + # This method should be used when `self` was allocated by the Nit GC, + # or when manually controlling the deallocation of `self`. + # + # /!\: This service does not clean the items for compliance with UTF-8, + # use only when the data has already been verified as valid UTF-8. + # + # SEE: `abstract_text::Text` for more info on the difference + # between `Text::bytelen` and `Text::length`. fun to_s_full(bytelen, unilen: Int): String is abstract + + # Copies the content of `src` to `self` + # + # NOTE: `self` must be large enough to withold `self.bytelen` bytes + fun fill_from(src: Text) do src.copy_to_native(self, src.bytelen, 0, 0) end redef class NativeArray[E]