lib/core: Added validation method for hexdigest
[nit.git] / lib / core / bytes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Services for byte streams and arrays
16 module bytes
17
18 import kernel
19 import collection::array
20 intrude import text::flat
21
22 redef class Byte
23 # Is `self` a valid hexadecimal digit (in ASCII)
24 #
25 # ~~~nit
26 # intrude import core::bytes
27 # assert not '/'.ascii.to_b.is_valid_hexdigit
28 # assert '0'.ascii.to_b.is_valid_hexdigit
29 # assert '9'.ascii.to_b.is_valid_hexdigit
30 # assert not ':'.ascii.to_b.is_valid_hexdigit
31 # assert not '@'.ascii.to_b.is_valid_hexdigit
32 # assert 'A'.ascii.to_b.is_valid_hexdigit
33 # assert 'F'.ascii.to_b.is_valid_hexdigit
34 # assert not 'G'.ascii.to_b.is_valid_hexdigit
35 # assert not '`'.ascii.to_b.is_valid_hexdigit
36 # assert 'a'.ascii.to_b.is_valid_hexdigit
37 # assert 'f'.ascii.to_b.is_valid_hexdigit
38 # assert not 'g'.ascii.to_b.is_valid_hexdigit
39 # ~~~
40 private fun is_valid_hexdigit: Bool do
41 return (self >= 0x30u8 and self <= 0x39u8) or
42 (self >= 0x41u8 and self <= 0x46u8) or
43 (self >= 0x61u8 and self <= 0x66u8)
44 end
45 end
46
47 # A buffer containing Byte-manipulation facilities
48 #
49 # Uses Copy-On-Write when persisted
50 class Bytes
51 super AbstractArray[Byte]
52
53 # A NativeString being a char*, it can be used as underlying representation here.
54 private var items: NativeString
55
56 # Number of bytes in the array
57 redef var length
58
59 # Capacity of the array
60 private var capacity: Int
61
62 # Has this buffer been persisted (to_s'd)?
63 #
64 # Used for Copy-On-Write
65 private var persisted = false
66
67 # var b = new Bytes.empty
68 # assert b.to_s == ""
69 init empty do
70 var ns = new NativeString(0)
71 init(ns, 0, 0)
72 end
73
74 # Init a `Bytes` with capacity `cap`
75 init with_capacity(cap: Int) do
76 var ns = new NativeString(cap)
77 init(ns, 0, cap)
78 end
79
80 redef fun is_empty do return length != 0
81
82 # var b = new Bytes.empty
83 # b.add 101u8
84 # assert b[0] == 101u8
85 redef fun [](i) do
86 assert i >= 0
87 assert i < length
88 return items[i]
89 end
90
91 # var b = new Bytes.with_capacity(1)
92 # b[0] = 101u8
93 # assert b.to_s == "e"
94 redef fun []=(i, v) do
95 if persisted then regen
96 assert i >= 0
97 assert i <= length
98 if i == length then add(v)
99 items[i] = v
100 end
101
102 # var b = new Bytes.empty
103 # b.add 101u8
104 # assert b.to_s == "e"
105 redef fun add(c) do
106 if persisted then regen
107 if length >= capacity then
108 enlarge(length)
109 end
110 items[length] = c
111 length += 1
112 end
113
114 # var b = new Bytes.empty
115 # b.append([104u8, 101u8, 108u8, 108u8, 111u8])
116 # assert b.to_s == "hello"
117 redef fun append(arr) do
118 if arr isa Bytes then
119 append_ns(arr.items, arr.length)
120 else
121 for i in arr do add i
122 end
123 end
124
125 # var b = new Bytes.empty
126 # b.append([0x41u8, 0x41u8, 0x18u8])
127 # b.pop
128 # assert b.to_s == "AA"
129 redef fun pop do
130 assert length >= 1
131 length -= 1
132 return items[length]
133 end
134
135 redef fun clear do length = 0
136
137 # Regenerates the buffer, necessary when it was persisted
138 private fun regen do
139 var nns = new NativeString(capacity)
140 items.copy_to(nns, length, 0, 0)
141 persisted = false
142 end
143
144 # Appends the `ln` first bytes of `ns` to self
145 fun append_ns(ns: NativeString, ln: Int) do
146 if persisted then regen
147 var nlen = length + ln
148 if nlen > capacity then enlarge(nlen)
149 ns.copy_to(items, ln, 0, length)
150 length += ln
151 end
152
153 # Appends `ln` bytes from `ns` starting at index `from` to self
154 fun append_ns_from(ns: NativeString, ln, from: Int) do
155 if persisted then regen
156 var nlen = length + ln
157 if nlen > capacity then enlarge(nlen)
158 ns.copy_to(items, ln, from, length)
159 length += ln
160 end
161
162 redef fun enlarge(sz) do
163 if capacity >= sz then return
164 persisted = false
165 while capacity < sz do capacity = capacity * 2 + 2
166 var ns = new NativeString(capacity)
167 items.copy_to(ns, length, 0, 0)
168 items = ns
169 end
170
171 redef fun to_s do
172 persisted = true
173 var b = self
174 if not is_utf8 then
175 b = clean_utf8
176 persisted = false
177 end
178 return new FlatString.with_infos(b.items, b.length, 0, b.length -1)
179 end
180
181 redef fun iterator do return new BytesIterator.with_buffer(self)
182
183 # Is the byte collection valid UTF-8 ?
184 fun is_utf8: Bool do
185 var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
186 var lobounds = once [0, 0x80, 0x800, 0x10000]
187 var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
188 var pos = 0
189 var len = length
190 var mits = items
191 while pos < len do
192 var nxst = mits.length_of_char_at(pos)
193 var charst_index = (nxst - 1) * 2
194 if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
195 var c = mits.char_at(pos)
196 var cp = c.ascii
197 if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
198 if cp >= 0xD800 and cp <= 0xDFFF or
199 cp == 0xFFFE or cp == 0xFFFF then return false
200 else
201 return false
202 end
203 else
204 return false
205 end
206 pos += nxst
207 end
208 return true
209 end
210
211 # Cleans the bytes of `self` to be UTF-8 compliant
212 private fun clean_utf8: Bytes do
213 var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
214 var badchar = once [0xEFu8, 0xBFu8, 0xBDu8]
215 var lobounds = once [0, 0x80, 0x800, 0x10000]
216 var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
217 var pos = 0
218 var len = length
219 var ret = new Bytes.with_capacity(len)
220 var mits = items
221 while pos < len do
222 var nxst = mits.length_of_char_at(pos)
223 var charst_index = (nxst - 1) * 2
224 if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
225 var c = mits.char_at(pos)
226 var cp = c.ascii
227 if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
228 if cp >= 0xD800 and cp <= 0xDFFF or
229 cp == 0xFFFE or cp == 0xFFFF then
230 ret.append badchar
231 pos += 1
232 else
233 var pend = pos + nxst
234 for i in [pos .. pend[ do ret.add mits[i]
235 pos += nxst
236 end
237 else
238 ret.append badchar
239 pos += 1
240 end
241 else
242 ret.append badchar
243 pos += 1
244 end
245 end
246 return ret
247 end
248 end
249
250 private class BytesIterator
251 super IndexedIterator[Byte]
252
253 var tgt: NativeString
254
255 redef var index
256
257 var max: Int
258
259 init with_buffer(b: Bytes) do init(b.items, 0, b.length - 1)
260
261 redef fun is_ok do return index < max
262
263 redef fun next do index += 1
264
265 redef fun item do return tgt[index]
266 end
267
268 redef class Text
269 # Returns a mutable copy of `self`'s bytes
270 #
271 # ~~~nit
272 # assert "String".to_bytes isa Bytes
273 # assert "String".to_bytes == [83u8, 116u8, 114u8, 105u8, 110u8, 103u8]
274 # ~~~
275 fun to_bytes: Bytes do
276 var b = new Bytes.with_capacity(bytelen)
277 append_to_bytes b
278 return b
279 end
280
281 # Is `self` a valid hexdigest ?
282 #
283 # assert "0B1d3F".is_valid_hexdigest
284 # assert not "5G".is_valid_hexdigest
285 fun is_valid_hexdigest: Bool do
286 for i in bytes do if not i.is_valid_hexdigit then return false
287 return true
288 end
289
290 # Appends `self.bytes` to `b`
291 fun append_to_bytes(b: Bytes) do
292 for s in substrings do
293 var from = if s isa FlatString then s.first_byte else 0
294 b.append_ns_from(s.items, s.bytelen, from)
295 end
296 end
297 end
298
299 redef class FlatText
300 redef fun append_to_bytes(b) do
301 var from = if self isa FlatString then first_byte else 0
302 b.append_ns_from(items, bytelen, from)
303 end
304 end
305
306 redef class NativeString
307 # Creates a new `Bytes` object from `self` with `strlen` as length
308 fun to_bytes: Bytes do
309 var len = cstring_length
310 return new Bytes(self, len, len)
311 end
312 end