X-Git-Url: http://nitlanguage.org diff --git a/lib/core/bytes.nit b/lib/core/bytes.nit index 73802a7..f386c47 100644 --- a/lib/core/bytes.nit +++ b/lib/core/bytes.nit @@ -22,83 +22,83 @@ intrude import text::flat # Any kind of entity which can be searched for in a Sequence of Byte interface BytePattern # Return the first occurence of `self` in `b`, or -1 if not found - fun first_index_in(b: SequenceRead[Byte]): Int do return first_index_in_from(b, 0) + fun first_index_in(b: SequenceRead[Int]): Int do return first_index_in_from(b, 0) # Return the first occurence of `self` in `b` starting at `from`, or -1 if not found - fun first_index_in_from(b: SequenceRead[Byte], from: Int): Int is abstract + fun first_index_in_from(b: SequenceRead[Int], from: Int): Int is abstract # Return the last occurence of `self` in `b`, or -1 if not found - fun last_index_in(b: SequenceRead[Byte]): Int do return last_index_in_from(b, b.length - 1) + fun last_index_in(b: SequenceRead[Int]): Int do return last_index_in_from(b, b.length - 1) # Return the last occurence of `self` in `b`, or -1 if not found - fun last_index_in_from(b: SequenceRead[Byte], from: Int): Int is abstract + fun last_index_in_from(b: SequenceRead[Int], from: Int): Int is abstract # Returns the indexes of all the occurences of `self` in `b` - fun search_all_in(b: SequenceRead[Byte]): SequenceRead[Int] is abstract + fun search_all_in(b: SequenceRead[Int]): SequenceRead[Int] is abstract # Length of the pattern fun pattern_length: Int is abstract # Appends `self` to `b` - fun append_to(b: Sequence[Byte]) is abstract + fun append_to(b: Sequence[Int]) is abstract # Is `self` a prefix for `b` ? - fun is_prefix(b: SequenceRead[Byte]): Bool is abstract + fun is_prefix(b: SequenceRead[Int]): Bool is abstract # Is `self` a suffix for `b` ? - fun is_suffix(b: SequenceRead[Byte]): Bool is abstract + fun is_suffix(b: SequenceRead[Int]): Bool is abstract end -redef class Byte +redef class Int super BytePattern # Write self as a string into `ns` at position `pos` private fun add_digest_at(ns: CString, pos: Int) do - var tmp = (0xF0u8 & self) >> 4 - ns[pos] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8 - tmp = 0x0Fu8 & self - ns[pos + 1] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8 + var tmp = (0xF0 & self) >> 4 + ns[pos] = if tmp >= 0x0A then tmp + 0x37 else tmp + 0x30 + tmp = 0x0F & self + ns[pos + 1] = if tmp >= 0x0A then tmp + 0x37 else tmp + 0x30 end # Is `self` a valid hexadecimal digit (in ASCII) # # ~~~nit # intrude import core::bytes - # assert not '/'.ascii.is_valid_hexdigit - # assert '0'.ascii.is_valid_hexdigit - # assert '9'.ascii.is_valid_hexdigit - # assert not ':'.ascii.is_valid_hexdigit - # assert not '@'.ascii.is_valid_hexdigit - # assert 'A'.ascii.is_valid_hexdigit - # assert 'F'.ascii.is_valid_hexdigit - # assert not 'G'.ascii.is_valid_hexdigit - # assert not '`'.ascii.is_valid_hexdigit - # assert 'a'.ascii.is_valid_hexdigit - # assert 'f'.ascii.is_valid_hexdigit - # assert not 'g'.ascii.is_valid_hexdigit + # assert not u'/'.is_valid_hexdigit + # assert u'0'.is_valid_hexdigit + # assert u'9'.is_valid_hexdigit + # assert not u':'.is_valid_hexdigit + # assert not u'@'.is_valid_hexdigit + # assert u'A'.is_valid_hexdigit + # assert u'F'.is_valid_hexdigit + # assert not u'G'.is_valid_hexdigit + # assert not u'`'.is_valid_hexdigit + # assert u'a'.is_valid_hexdigit + # assert u'f'.is_valid_hexdigit + # assert not u'g'.is_valid_hexdigit # ~~~ private fun is_valid_hexdigit: Bool do - return (self >= 0x30u8 and self <= 0x39u8) or - (self >= 0x41u8 and self <= 0x46u8) or - (self >= 0x61u8 and self <= 0x66u8) + return (self >= 0x30 and self <= 0x39) or + (self >= 0x41 and self <= 0x46) or + (self >= 0x61 and self <= 0x66) end # `self` as a hexdigit to its byte value # # ~~~nit # intrude import core::bytes - # assert 0x39u8.hexdigit_to_byteval == 0x09u8 - # assert 0x43u8.hexdigit_to_byteval == 0x0Cu8 + # assert 0x39.hexdigit_to_byteval == 0x09 + # assert 0x43.hexdigit_to_byteval == 0x0C # ~~~ # # REQUIRE: `self.is_valid_hexdigit` - private fun hexdigit_to_byteval: Byte do - if self >= 0x30u8 and self <= 0x39u8 then - return self - 0x30u8 - else if self >= 0x41u8 and self <= 0x46u8 then - return self - 0x37u8 - else if self >= 0x61u8 and self <= 0x66u8 then - return self - 0x57u8 + private fun hexdigit_to_byteval: Int do + if self >= 0x30 and self <= 0x39 then + return self - 0x30 + else if self >= 0x41 and self <= 0x46 then + return self - 0x37 + else if self >= 0x61 and self <= 0x66 then + return self - 0x57 end # Happens only if the requirement is not met. # i.e. this abort is here to please the compiler @@ -130,20 +130,113 @@ redef class Byte redef fun append_to(b) do b.push self - # assert 'b'.ascii.is_suffix("baqsdb".to_bytes) - # assert not 'b'.ascii.is_suffix("baqsd".to_bytes) + # assert u'b'.is_suffix("baqsdb".to_bytes) + # assert not u'b'.is_suffix("baqsd".to_bytes) redef fun is_suffix(b) do return b.length != 0 and b.last == self - # assert 'b'.ascii.is_prefix("baqsdb".to_bytes) - # assert not 'b'.ascii.is_prefix("aqsdb".to_bytes) + # assert u'b'.is_prefix("baqsdb".to_bytes) + # assert not u'b'.is_prefix("aqsdb".to_bytes) redef fun is_prefix(b) do return b.length != 0 and b.first == self + + # A signed big-endian representation of `self` + # + # ~~~ + # assert 1.to_bytes.hexdigest == "01" + # assert 255.to_bytes.hexdigest == "FF" + # assert 256.to_bytes.hexdigest == "0100" + # assert 65535.to_bytes.hexdigest == "FFFF" + # assert 65536.to_bytes.hexdigest == "010000" + # ~~~ + # + # Negative values are converted to their two's complement. + # Be careful as the result can be ambiguous. + # + # ~~~ + # assert (-1).to_bytes.hexdigest == "FF" + # assert (-32).to_bytes.hexdigest == "E0" + # assert (-512).to_bytes.hexdigest == "FE00" + # assert (-65794).to_bytes.hexdigest == "FEFEFE" + # ~~~ + # + # Optionally, set `n_bytes` to the desired number of bytes in the output. + # This setting can disambiguate the result between positive and negative + # integers. Be careful with this parameter as the result may overflow. + # + # ~~~ + # assert 1.to_bytes(2).hexdigest == "0001" + # assert 65535.to_bytes(2).hexdigest == "FFFF" + # assert (-1).to_bytes(2).hexdigest == "FFFF" + # assert (-512).to_bytes(4).hexdigest == "FFFFFE00" + # assert 0x123456.to_bytes(2).hexdigest == "3456" + # ~~~ + # + # For 0, a Bytes object with single nul byte is returned (instead of an empty Bytes object). + # + # ~~~ + # assert 0.to_bytes.hexdigest == "00" + # ~~~ + # + # For positive integers, `Bytes::to_i` can reverse the operation. + # + # ~~~ + # assert 1234.to_bytes.to_i == 1234 + # ~~~ + # + # Require self >= 0 + fun to_bytes(n_bytes: nullable Int): Bytes do + + # If 0, force using at least one byte + if self == 0 and n_bytes == null then n_bytes = 1 + + # Compute the len (log256) + var len = 1 + var max = 256 + var s = self.abs + while s >= max do + len += 1 + max *= 256 + end + + # Two's complement + s = self + if self < 0 then + var ff = 0 + for j in [0..len[ do + ff *= 0x100 + ff += 0xFF + end + + s = ((-self) ^ ff) + 1 + end + + # Cut long values + if n_bytes != null and len > n_bytes then len = n_bytes + + # Allocate the buffer + var cap = n_bytes or else len + var res = new Bytes.with_capacity(cap) + + var filler = if self < 0 then 0xFF else 0 + for i in [0..cap[ do res[i] = filler + + # Fill it starting with the end + var i = cap + var sum = s + while i > cap - len do + i -= 1 + res[i] = sum % 256 + sum /= 256 + end + + return res + end end # A buffer containing Byte-manipulation facilities # # Uses Copy-On-Write when persisted class Bytes - super AbstractArray[Byte] + super AbstractArray[Int] super BytePattern # A CString being a char*, it can be used as underlying representation here. @@ -178,8 +271,8 @@ class Bytes redef fun is_empty do return length == 0 # var b = new Bytes.empty - # b.add 101u8 - # assert b[0] == 101u8 + # b.add 101 + # assert b[0] == 101 redef fun [](i) do assert i >= 0 assert i < length @@ -202,13 +295,13 @@ class Bytes fun trim: Bytes do var st = 0 while st < length do - if self[st] > 0x20u8 then break + if self[st] > 0x20 then break st += 1 end if st >= length then return new Bytes.empty var ed = length - 1 while ed > 0 do - if self[ed] > 0x20u8 then break + if self[ed] > 0x20 then break ed -= 1 end return slice(st, ed - st + 1) @@ -304,8 +397,8 @@ class Bytes var i = 0 var oi = 0 while i < length do - ns[oi] = 0x5Cu8 # b'\\' - ns[oi+1] = 0x78u8 # b'x' + ns[oi] = u'\\' + ns[oi+1] = u'x' self[i].add_digest_at(ns, oi+2) i += 1 oi += 4 @@ -328,12 +421,12 @@ class Bytes var oi = 0 while i < length do var c = self[i] - var b = 128u8 - while b > 0u8 do - if c & b == 0u8 then - ns[oi] = 0x30u8 # b'0' + var b = 128 + while b > 0 do + if c & b == 0 then + ns[oi] = u'0' else - ns[oi] = 0x31u8 # b'1' + ns[oi] = u'1' end oi += 1 b = b >> 1 @@ -394,7 +487,7 @@ class Bytes end # Two's complement is `signed` - if signed == true and not_empty and first > 0x80u8 then + if signed == true and not_empty and first > 0x80 then var ff = 0 for j in [0..length[ do ff *= 0x100 @@ -408,7 +501,7 @@ class Bytes end # var b = new Bytes.with_capacity(1) - # b[0] = 101u8 + # b[0] = 101 # assert b.to_s == "e" redef fun []=(i, v) do if persisted then regen @@ -419,7 +512,7 @@ class Bytes end # var b = new Bytes.empty - # b.add 101u8 + # b.add 101 # assert b.to_s == "e" redef fun add(c) do if persisted then regen @@ -446,7 +539,7 @@ class Bytes end # var b = new Bytes.empty - # b.append([104u8, 101u8, 108u8, 108u8, 111u8]) + # b.append([104, 101, 108, 108, 111]) # assert b.to_s == "hello" redef fun append(arr) do if arr isa Bytes then @@ -457,7 +550,7 @@ class Bytes end # var b = new Bytes.empty - # b.append([0x41u8, 0x41u8, 0x18u8]) + # b.append([0x41, 0x41, 0x18]) # b.pop # assert b.to_s == "AA" redef fun pop do @@ -556,7 +649,7 @@ class Bytes # Splits the content on self when encountering `b` # - # var a = "String is string".to_bytes.split_with('s'.ascii) + # var a = "String is string".to_bytes.split_with(u's') # assert a.length == 3 # assert a[0].hexdigest == "537472696E672069" # assert a[1].hexdigest == "20" @@ -576,7 +669,7 @@ class Bytes # Splits `self` in two parts at the first occurence of `b` # - # var a = "String is string".to_bytes.split_once_on('s'.ascii) + # var a = "String is string".to_bytes.split_once_on(u's') # assert a[0].hexdigest == "537472696E672069" # assert a[1].hexdigest == "20737472696E67" fun split_once_on(b: BytePattern): Array[Bytes] do @@ -590,7 +683,7 @@ class Bytes # Replaces all the occurences of `this` in `self` by `by` # - # var b = "String is string".to_bytes.replace(0x20u8, 0x41u8) + # var b = "String is string".to_bytes.replace(0x20, 0x41) # assert b.hexdigest == "537472696E6741697341737472696E67" fun replace(pattern: BytePattern, bytes: BytePattern): Bytes do if is_empty then return new Bytes.empty @@ -624,20 +717,20 @@ class Bytes var pos = 0 while pos < length do var b = self[pos] - if b != '%'.ascii then + if b != u'%' then tmp.add b pos += 1 continue end if length - pos < 2 then - tmp.add '%'.ascii + tmp.add u'%' pos += 1 continue end var bn = self[pos + 1] var bnn = self[pos + 2] if not bn.is_valid_hexdigit or not bnn.is_valid_hexdigit then - tmp.add '%'.ascii + tmp.add u'%' pos += 1 continue end @@ -673,7 +766,7 @@ class Bytes end private class BytesIterator - super IndexedIterator[Byte] + super IndexedIterator[Int] var tgt: CString @@ -690,107 +783,12 @@ private class BytesIterator redef fun item do return tgt[index] end -redef class Int - # A signed big-endian representation of `self` - # - # ~~~ - # assert 1.to_bytes.hexdigest == "01" - # assert 255.to_bytes.hexdigest == "FF" - # assert 256.to_bytes.hexdigest == "0100" - # assert 65535.to_bytes.hexdigest == "FFFF" - # assert 65536.to_bytes.hexdigest == "010000" - # ~~~ - # - # Negative values are converted to their two's complement. - # Be careful as the result can be ambiguous. - # - # ~~~ - # assert (-1).to_bytes.hexdigest == "FF" - # assert (-32).to_bytes.hexdigest == "E0" - # assert (-512).to_bytes.hexdigest == "FE00" - # assert (-65794).to_bytes.hexdigest == "FEFEFE" - # ~~~ - # - # Optionally, set `n_bytes` to the desired number of bytes in the output. - # This setting can disambiguate the result between positive and negative - # integers. Be careful with this parameter as the result may overflow. - # - # ~~~ - # assert 1.to_bytes(2).hexdigest == "0001" - # assert 65535.to_bytes(2).hexdigest == "FFFF" - # assert (-1).to_bytes(2).hexdigest == "FFFF" - # assert (-512).to_bytes(4).hexdigest == "FFFFFE00" - # assert 0x123456.to_bytes(2).hexdigest == "3456" - # ~~~ - # - # For 0, a Bytes object with single nul byte is returned (instead of an empty Bytes object). - # - # ~~~ - # assert 0.to_bytes.hexdigest == "00" - # ~~~ - # - # For positive integers, `Bytes::to_i` can reverse the operation. - # - # ~~~ - # assert 1234.to_bytes.to_i == 1234 - # ~~~ - # - # Require self >= 0 - fun to_bytes(n_bytes: nullable Int): Bytes do - - # If 0, force using at least one byte - if self == 0 and n_bytes == null then n_bytes = 1 - - # Compute the len (log256) - var len = 1 - var max = 256 - var s = self.abs - while s >= max do - len += 1 - max *= 256 - end - - # Two's complement - s = self - if self < 0 then - var ff = 0 - for j in [0..len[ do - ff *= 0x100 - ff += 0xFF - end - - s = ((-self) ^ ff) + 1 - end - - # Cut long values - if n_bytes != null and len > n_bytes then len = n_bytes - - # Allocate the buffer - var cap = n_bytes or else len - var res = new Bytes.with_capacity(cap) - - var filler = if self < 0 then 0xFFu8 else 0u8 - for i in [0..cap[ do res[i] = filler - - # Fill it starting with the end - var i = cap - var sum = s - while i > cap - len do - i -= 1 - res[i] = (sum % 256).to_b - sum /= 256 - end - - return res - end -end - redef class Text # Returns a mutable copy of `self`'s bytes # # ~~~nit # assert "String".to_bytes isa Bytes - # assert "String".to_bytes == [83u8, 116u8, 114u8, 105u8, 110u8, 103u8] + # assert "String".to_bytes == [83, 116, 114, 105, 110, 103] # ~~~ fun to_bytes: Bytes do var b = new Bytes.with_capacity(byte_length) @@ -817,7 +815,7 @@ redef class Text # Returns a new `Bytes` instance with the digest as content # - # assert "0B1F4D".hexdigest_to_bytes == [0x0Bu8, 0x1Fu8, 0x4Du8] + # assert "0B1F4D".hexdigest_to_bytes == [0x0B, 0x1F, 0x4D] # assert "0B1F4D".hexdigest_to_bytes.hexdigest == "0B1F4D" # # Characters that are not hexadecimal digits are ignored. @@ -852,7 +850,7 @@ redef class Text var ret = new Bytes.with_capacity((dlength+1) / 2) var i = (dlength+1) % 2 # current hex digit (1=high, 0=low) - var byte = 0u8 # current accumulated byte value + var byte = 0 # current accumulated byte value pos = 0 while pos < max do @@ -864,7 +862,7 @@ redef class Text # Last digit known: store and restart ret.add byte i = 1 - byte = 0u8 + byte = 0 end end pos += 1 @@ -922,7 +920,7 @@ redef class Text else if c == 'x' or c == 'X' then var hx = substring(i + 1, 2) if hx.is_hex then - res.add(hx.to_hex.to_b) + res.add hx.to_hex else res.add_char(c) end @@ -975,23 +973,23 @@ redef class Text while pos < max do var c = b[pos] pos += 1 - if c == 0x30u8 or c == 0x31u8 then bitlen += 1 # b'0' or b'1' + if c == u'0' or c == u'1' then bitlen += 1 end # Allocate (and take care of the padding) var ret = new Bytes.with_capacity((bitlen+7) / 8) var i = (bitlen+7) % 8 # current bit (7th=128, 0th=1) - var byte = 0u8 # current accumulated byte value + var byte = 0 # current accumulated byte value pos = 0 while pos < max do var c = b[pos] pos += 1 - if c == 0x30u8 then # b'0' + if c == u'0' then byte = byte << 1 - else if c == 0x31u8 then # b'1' - byte = byte << 1 | 1u8 + else if c == u'1' then + byte = byte << 1 | 1 else continue end @@ -1001,7 +999,7 @@ redef class Text # Last bit known: store and restart ret.add byte i = 7 - byte = 0u8 + byte = 0 end end return ret @@ -1037,7 +1035,7 @@ end # Joins an array of bytes `arr` separated by `sep` # -# assert join_bytes(["String".to_bytes, "is".to_bytes, "string".to_bytes], ' '.ascii).hexdigest == "537472696E6720697320737472696E67" +# assert join_bytes(["String".to_bytes, "is".to_bytes, "string".to_bytes], u' ').hexdigest == "537472696E6720697320737472696E67" fun join_bytes(arr: Array[Bytes], sep: nullable BytePattern): Bytes do if arr.is_empty then return new Bytes.empty sep = sep or else new Bytes.empty