X-Git-Url: http://nitlanguage.org diff --git a/lib/base64.nit b/lib/base64.nit index 2d941e9..26e1207 100644 --- a/lib/base64.nit +++ b/lib/base64.nit @@ -17,120 +17,234 @@ # Offers the base 64 encoding and decoding algorithms module base64 -redef class String +redef class Char + # Is `self` a valid Base64 character ? + fun is_base64_char: Bool do + if code_point >= 127 then return false + return ascii.is_base64_char + end +end - # Alphabet used by the base64 algorithm - private fun base64_chars : String - do - return "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" +redef class Byte + # Is `self` a valid Base64 character ? + fun is_base64_char: Bool do + if self == b'+' then return true + if self == b'/' then return true + if self > b'Z' then + if self < b'a' then return false + if self <= b'z' then return true + return false + end + if self >= b'A' then return true + if self <= b'9' and self >= b'0' then return true + return false end - private fun inverted_base64_chars : HashMap[Char,Int] - do - var inv_base64_chars = new HashMap[Char,Int] - for k in [0..base64_chars.length[ do - inv_base64_chars[ base64_chars.chars[k] ] = k + + # Returns the `base64` equivalent of `self` + # + # REQUIRE `self`.`is_base64_char` + fun to_base64_char: Byte do + if self == b'+' then return 62u8 + if self == b'/' then return 63u8 + if self > b'Z' then + if self < b'a' then abort + if self <= b'z' then return self - 71u8 + abort end - return inv_base64_chars + if self >= b'A' then return self - 0x41u8 + if self <= b'9' and self >= b'0' then return self + 4u8 + abort end +end - # Encodes the receiver string to base64. - # By default, uses "=" for padding. - fun encode_base64 : String do return encode_base64_custom_padding( '=' ) - fun encode_base64_custom_padding( padding : Char ) : String +redef class CString + # Alphabet used by the base64 algorithm + private fun base64_chars : Bytes do - var base64_chars = once base64_chars - var length = length + return b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" + end + # Encodes `self` to base64. + # + # By default, uses "=" for padding. + # + # assert "string".encode_base64 == "c3RyaW5n" + private fun encode_base64(length: Int): Bytes do + var base64_bytes = once base64_chars var steps = length / 3 - var chars_in_last_step = length % 3 - var result_length = steps*4 - if chars_in_last_step > 0 then result_length += 4 - var result = (padding.to_s*result_length).to_cstring + var bytes_in_last_step = length % 3 + var result_length = steps * 4 + if bytes_in_last_step > 0 then result_length += 4 + var result = new Bytes.with_capacity(result_length) + + var in_off = 0 + for s in [0 .. steps[ do + var ind = ((self[in_off] & 0b1111_1100u8) >> 2).to_i + result.add base64_bytes[ind] + ind = ((self[in_off] & 0b0000_0011u8) << 4).to_i | ((self[in_off + 1] & 0b1111_0000u8) >> 4).to_i + result.add base64_bytes[ind] + ind = ((self[in_off + 1] & 0b0000_1111u8) << 2).to_i | ((self[in_off + 2] & 0b1100_0000u8) >> 6).to_i + result.add base64_bytes[ind] + ind = (self[in_off + 2] & 0b0011_1111u8).to_i + result.add base64_bytes[ind] + in_off += 3 + end + if bytes_in_last_step == 1 then + result.add base64_bytes[((self[in_off] & 0b1111_1100u8) >> 2).to_i] + result.add base64_bytes[((self[in_off] & 0b0000_0011u8) << 4).to_i] + else if bytes_in_last_step == 2 then + result.add base64_bytes[((self[in_off] & 0b1111_1100u8) >> 2).to_i] + result.add base64_bytes[(((self[in_off] & 0b0000_0011u8) << 4) | ((self[in_off + 1] & 0b1111_0000u8) >> 4)).to_i] + result.add base64_bytes[((self[in_off + 1] & 0b0000_1111u8) << 2).to_i] + end + var rempad = if bytes_in_last_step > 0 then 3 - bytes_in_last_step else 0 + for i in [0 .. rempad[ do result.add b'=' - var mask_6bit = 63 + return result + end - for s in [0..steps[ do - var e = 0 - for ss in [0..3[ do - e += self.chars[s*3+ss].ascii.lshift((2-ss)*8) + # Decodes `self` from base64 + # + # assert "c3RyaW5n".decode_base64.to_s == "string" + # assert "c3Rya\nW5n".decode_base64.to_s == "string" + # assert "c3RyaW5nCg==".decode_base64.to_s == "string\n" + # assert "c3RyaW5nCg".decode_base64.to_s == "string\n" + # assert "c3RyaW5neQo=".decode_base64.to_s == "stringy\n" + # assert "c3RyaW5neQo".decode_base64.to_s == "stringy\n" + # + private fun decode_base64(length: Int): Bytes do + if length == 0 then return new Bytes.empty + + # Avoids constant unboxing + var pad = b'=' + + var result = new Bytes.with_capacity((length / 4 + 1) * 3) + + var curr = 0 + var cnt = 0 + var endpos = -1 + for i in [0 .. length[ do + var b = self[i] + if b == pad then + endpos = i + break end - for ss in [0..4[ do - result[s*4+3-ss] = base64_chars.chars[ e.rshift(ss*6).bin_and( mask_6bit ) ] + # Ignore whitespaces + if b <= 0x20u8 then continue + if not b.is_base64_char then continue + curr <<= 6 + curr += b.to_base64_char.to_i + cnt += 1 + if cnt == 4 then + result.add(((curr & 0xFF0000) >> 16).to_b) + result.add(((curr & 0xFF00) >> 8).to_b) + result.add((curr & 0xFF).to_b) + curr = 0 + cnt = 0 end end - - if chars_in_last_step == 1 then - var e = self.chars[length-1].ascii.lshift(16) - for ss in [0..2[ do - result[steps*4+1-ss] = base64_chars.chars[ e.rshift((ss+2)*6).bin_and( mask_6bit ) ] + if endpos != -1 or cnt != 0 then + var pads = 0 + for i in [endpos .. length[ do + var b = self[i] + if b <= 0x20u8 then continue + pads += 1 end - else if chars_in_last_step == 2 then - var e = self.chars[length-2].ascii.lshift(16) + - self.chars[length-1].ascii.lshift(8) - for ss in [0..3[ do - result[steps*4+2-ss] = base64_chars.chars[ e.rshift((ss+1)*6).bin_and( mask_6bit ) ] + if cnt == 2 then + curr >>= 4 + result.add(curr.to_b) + else if cnt == 3 then + curr >>= 2 + result.add(((curr & 0xFF00) >> 8).to_b) + result.add((curr & 0xFF).to_b) end end - - return result.to_s + return result end - # Decodes the receiver string from base64. - # By default, uses "=" for padding. - fun decode_base64 : String do return decode_base64_custom_padding( '=' ) - fun decode_base64_custom_padding( padding : Char ) : String - do - var inverted_base64_chars = once inverted_base64_chars - var length = length - assert length % 4 == 0 else print "base64::decode_base64 only supports strings of length multiple of 4" - - var steps = length / 4 - var result_length = steps*3 - - var padding_begin = self.search(padding) - var padding_count : Int - if padding_begin == null then - padding_count = 0 - else - padding_count = length - padding_begin.from - steps -= 1 - result_length -= padding_count + # Is `self` a well-formed Base64 entity ? + # + # ~~~nit + # assert "Qn03".is_base64 + # assert not "#sd=".is_base64 + # ~~~ + fun is_base64(length: Int): Bool do return check_base64(length) == null + + # Is `self` a well-formed Base64 entity ? + # + # Will return an Error otherwise with info on which part is erroneous. + fun check_base64(length: Int): nullable Error do + var rlen = 0 + var opos = length + for i in [0 .. length[ do + if self[i] == b'=' then + opos = i + break + end + if self[i].is_whitespace then continue + if not self[i].is_base64_char then return new Error("Invalid Base64 character at position {i}: {self[i].ascii}") + rlen += 1 + if rlen > 4 then rlen -= 4 end + var pad = 0 + for i in [opos .. length[ do + if self[i].is_whitespace then continue + if self[i] != b'=' then return new Error("Invalid padding character {self[i].ascii} at position {i}") + pad += 1 + end + if rlen + pad != 4 then return new Error("Invalid padding length") + return null + end +end - var result = ("#"*result_length).to_cstring +redef class Bytes - var mask_8bit = 255 + # Encodes the receiver string to base64 using a custom padding character. + # + # If using the default padding character `=`, see `encode_base64`. + fun encode_base64: Bytes do return items.encode_base64(length) - for s in [0..steps[ do - var e = 0 - for ss in [0..4[ do - e += inverted_base64_chars[self.chars[s*4+ss]].lshift((3-ss)*6) - end + # Decodes the receiver string to base64 using a custom padding character. + # + # Default padding character `=` + fun decode_base64: Bytes do return items.decode_base64(length) - for ss in [0..3[ do - result[s*3+ss] = e.rshift((2-ss)*8).bin_and( mask_8bit ).ascii - end - end + # Is `self` a well-formed Base64 entity ? + fun is_base64: Bool do return items.is_base64(length) - var s = steps - if padding_count == 1 then - var e = 0 - for ss in [0..3[ do - e += inverted_base64_chars[self.chars[s*4+ss]].lshift((3-ss)*6) - end + # Is `self` a well-formed Base64 entity ? + # + # Will return an Error otherwise with info on which part is erroneous. + fun check_base64: nullable Error do return items.check_base64(length) +end - for ss in [0..2[ do - result[s*3+ss] = e.rshift((2-ss)*8).bin_and( mask_8bit ).ascii - end - else if padding_count == 2 then - var e = 0 - for ss in [0..2[ do - e += inverted_base64_chars[self.chars[s*4+ss]].lshift((3-ss)*6) - end +redef class Text - result[s*3] = e.rshift(2*8).bin_and( mask_8bit ).ascii - end + # Encodes the receiver string to base64 using a custom padding character. + # + # If using the default padding character `=`, see `encode_base64`. + fun encode_base64: String do return to_cstring.encode_base64(byte_length).to_s - return result.to_s - end + # Decodes the receiver string to base64 using a custom padding character. + # + # Default padding character `=` + fun decode_base64: Bytes do return to_cstring.decode_base64(byte_length) + + # Is `self` a well-formed Base64 entity ? + fun is_base64: Bool do return to_cstring.is_base64(byte_length) + + # Is `self` a well-formed Base64 entity ? + # + # Will return an Error otherwise with info on which part is erroneous. + fun check_base64: nullable Error do return to_cstring.check_base64(byte_length) +end + +redef class FlatText + redef fun encode_base64 do return fast_cstring.encode_base64(byte_length).to_s + + redef fun decode_base64 do return fast_cstring.decode_base64(byte_length) + + redef fun is_base64 do return fast_cstring.is_base64(byte_length) + + redef fun check_base64 do return fast_cstring.check_base64(byte_length) end