Merge: Added contributing guidelines and link from readme
[nit.git] / lib / base64.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2013 Alexis Laferrière <alexis.laf@xymus.net>
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16
17 # Offers the base 64 encoding and decoding algorithms
18 module base64
19
20 redef class Char
21 # Is `self` a valid Base64 character ?
22 fun is_base64_char: Bool do
23 if code_point >= 127 then return false
24 return ascii.is_base64_char
25 end
26 end
27
28 redef class Byte
29 # Is `self` a valid Base64 character ?
30 fun is_base64_char: Bool do
31 if self == b'+' then return true
32 if self == b'/' then return true
33 if self > b'Z' then
34 if self < b'a' then return false
35 if self <= b'z' then return true
36 return false
37 end
38 if self >= b'A' then return true
39 if self <= b'9' and self >= b'0' then return true
40 return false
41 end
42
43 # Returns the `base64` equivalent of `self`
44 #
45 # REQUIRE `self`.`is_base64_char`
46 fun to_base64_char: Byte do
47 if self == b'+' then return 62u8
48 if self == b'/' then return 63u8
49 if self > b'Z' then
50 if self < b'a' then abort
51 if self <= b'z' then return self - 71u8
52 abort
53 end
54 if self >= b'A' then return self - 0x41u8
55 if self <= b'9' and self >= b'0' then return self + 4u8
56 abort
57 end
58 end
59
60 redef class NativeString
61 # Alphabet used by the base64 algorithm
62 private fun base64_chars : Bytes
63 do
64 return b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
65 end
66
67 # Encodes `self` to base64.
68 #
69 # By default, uses "=" for padding.
70 #
71 # assert "string".encode_base64 == "c3RyaW5n"
72 private fun encode_base64(length: Int): Bytes do
73 var base64_bytes = once base64_chars
74 var steps = length / 3
75 var bytes_in_last_step = length % 3
76 var result_length = steps * 4
77 if bytes_in_last_step > 0 then result_length += 4
78 var result = new Bytes.with_capacity(result_length)
79
80 var in_off = 0
81 for s in [0 .. steps[ do
82 var ind = ((self[in_off] & 0b1111_1100u8) >> 2).to_i
83 result.add base64_bytes[ind]
84 ind = ((self[in_off] & 0b0000_0011u8) << 4).to_i | ((self[in_off + 1] & 0b1111_0000u8) >> 4).to_i
85 result.add base64_bytes[ind]
86 ind = ((self[in_off + 1] & 0b0000_1111u8) << 2).to_i | ((self[in_off + 2] & 0b1100_0000u8) >> 6).to_i
87 result.add base64_bytes[ind]
88 ind = (self[in_off + 2] & 0b0011_1111u8).to_i
89 result.add base64_bytes[ind]
90 in_off += 3
91 end
92 if bytes_in_last_step == 1 then
93 result.add base64_bytes[((self[in_off] & 0b1111_1100u8) >> 2).to_i]
94 result.add base64_bytes[((self[in_off] & 0b0000_0011u8) << 4).to_i]
95 else if bytes_in_last_step == 2 then
96 result.add base64_bytes[((self[in_off] & 0b1111_1100u8) >> 2).to_i]
97 result.add base64_bytes[(((self[in_off] & 0b0000_0011u8) << 4) | ((self[in_off + 1] & 0b1111_0000u8) >> 4)).to_i]
98 result.add base64_bytes[((self[in_off + 1] & 0b0000_1111u8) << 2).to_i]
99 end
100 var rempad = if bytes_in_last_step > 0 then 3 - bytes_in_last_step else 0
101 for i in [0 .. rempad[ do result.add b'='
102
103 return result
104 end
105
106 # Decodes `self` from base64
107 #
108 # assert "c3RyaW5n".decode_base64.to_s == "string"
109 # assert "c3Rya\nW5n".decode_base64.to_s == "string"
110 # assert "c3RyaW5nCg==".decode_base64.to_s == "string\n"
111 # assert "c3RyaW5nCg".decode_base64.to_s == "string\n"
112 # assert "c3RyaW5neQo=".decode_base64.to_s == "stringy\n"
113 # assert "c3RyaW5neQo".decode_base64.to_s == "stringy\n"
114 #
115 private fun decode_base64(length: Int): Bytes do
116 if length == 0 then return new Bytes.empty
117
118 # Avoids constant unboxing
119 var pad = b'='
120
121 var result = new Bytes.with_capacity((length / 4 + 1) * 3)
122
123 var curr = 0
124 var cnt = 0
125 var endpos = -1
126 for i in [0 .. length[ do
127 var b = self[i]
128 if b == pad then
129 endpos = i
130 break
131 end
132 # Ignore whitespaces
133 if b <= 0x20u8 then continue
134 if not b.is_base64_char then continue
135 curr <<= 6
136 curr += b.to_base64_char.to_i
137 cnt += 1
138 if cnt == 4 then
139 result.add(((curr & 0xFF0000) >> 16).to_b)
140 result.add(((curr & 0xFF00) >> 8).to_b)
141 result.add((curr & 0xFF).to_b)
142 curr = 0
143 cnt = 0
144 end
145 end
146 if endpos != -1 or cnt != 0 then
147 var pads = 0
148 for i in [endpos .. length[ do
149 var b = self[i]
150 if b <= 0x20u8 then continue
151 pads += 1
152 end
153 if cnt == 2 then
154 curr >>= 4
155 result.add(curr.to_b)
156 else if cnt == 3 then
157 curr >>= 2
158 result.add(((curr & 0xFF00) >> 8).to_b)
159 result.add((curr & 0xFF).to_b)
160 end
161 end
162 return result
163 end
164
165 # Is `self` a well-formed Base64 entity ?
166 #
167 # ~~~nit
168 # assert "Qn03".is_base64
169 # assert not "#sd=".is_base64
170 # ~~~
171 fun is_base64(length: Int): Bool do return check_base64(length) == null
172
173 # Is `self` a well-formed Base64 entity ?
174 #
175 # Will return an Error otherwise with info on which part is erroneous.
176 fun check_base64(length: Int): nullable Error do
177 var rlen = 0
178 var opos = length
179 for i in [0 .. length[ do
180 if self[i] == b'=' then
181 opos = i
182 break
183 end
184 if self[i].is_whitespace then continue
185 if not self[i].is_base64_char then return new Error("Invalid Base64 character at position {i}: {self[i].ascii}")
186 rlen += 1
187 if rlen > 4 then rlen -= 4
188 end
189 var pad = 0
190 for i in [opos .. length[ do
191 if self[i].is_whitespace then continue
192 if self[i] != b'=' then return new Error("Invalid padding character {self[i].ascii} at position {i}")
193 pad += 1
194 end
195 if rlen + pad != 4 then return new Error("Invalid padding length")
196 return null
197 end
198 end
199
200 redef class Bytes
201
202 # Encodes the receiver string to base64 using a custom padding character.
203 #
204 # If using the default padding character `=`, see `encode_base64`.
205 fun encode_base64: Bytes do return items.encode_base64(length)
206
207 # Decodes the receiver string to base64 using a custom padding character.
208 #
209 # Default padding character `=`
210 fun decode_base64: Bytes do return items.decode_base64(length)
211
212 # Is `self` a well-formed Base64 entity ?
213 fun is_base64: Bool do return items.is_base64(length)
214
215 # Is `self` a well-formed Base64 entity ?
216 #
217 # Will return an Error otherwise with info on which part is erroneous.
218 fun check_base64: nullable Error do return items.check_base64(length)
219 end
220
221 redef class Text
222
223 # Encodes the receiver string to base64 using a custom padding character.
224 #
225 # If using the default padding character `=`, see `encode_base64`.
226 fun encode_base64: String do return to_cstring.encode_base64(bytelen).to_s
227
228 # Decodes the receiver string to base64 using a custom padding character.
229 #
230 # Default padding character `=`
231 fun decode_base64: Bytes do return to_cstring.decode_base64(bytelen)
232
233 # Is `self` a well-formed Base64 entity ?
234 fun is_base64: Bool do return to_cstring.is_base64(bytelen)
235
236 # Is `self` a well-formed Base64 entity ?
237 #
238 # Will return an Error otherwise with info on which part is erroneous.
239 fun check_base64: nullable Error do return to_cstring.check_base64(bytelen)
240 end
241
242 redef class FlatText
243 redef fun encode_base64 do return fast_cstring.encode_base64(bytelen).to_s
244
245 redef fun decode_base64 do return fast_cstring.decode_base64(bytelen)
246
247 redef fun is_base64 do return fast_cstring.is_base64(bytelen)
248
249 redef fun check_base64 do return fast_cstring.check_base64(bytelen)
250 end