1cac7ac61477ec2cc323dc177c691a1860cf417a
[nit.git] / lib / core / bytes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Services for byte streams and arrays
16 module bytes
17
18 import kernel
19 import collection::array
20 intrude import text::flat
21
22 redef class Byte
23 # Write self as a string into `ns` at position `pos`
24 private fun add_digest_at(ns: NativeString, pos: Int) do
25 var tmp = (0xF0u8 & self) >> 4
26 ns[pos] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
27 tmp = 0x0Fu8 & self
28 ns[pos + 1] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
29 end
30
31 # Is `self` a valid hexadecimal digit (in ASCII)
32 #
33 # ~~~nit
34 # intrude import core::bytes
35 # assert not '/'.ascii.is_valid_hexdigit
36 # assert '0'.ascii.is_valid_hexdigit
37 # assert '9'.ascii.is_valid_hexdigit
38 # assert not ':'.ascii.is_valid_hexdigit
39 # assert not '@'.ascii.is_valid_hexdigit
40 # assert 'A'.ascii.is_valid_hexdigit
41 # assert 'F'.ascii.is_valid_hexdigit
42 # assert not 'G'.ascii.is_valid_hexdigit
43 # assert not '`'.ascii.is_valid_hexdigit
44 # assert 'a'.ascii.is_valid_hexdigit
45 # assert 'f'.ascii.is_valid_hexdigit
46 # assert not 'g'.ascii.is_valid_hexdigit
47 # ~~~
48 private fun is_valid_hexdigit: Bool do
49 return (self >= 0x30u8 and self <= 0x39u8) or
50 (self >= 0x41u8 and self <= 0x46u8) or
51 (self >= 0x61u8 and self <= 0x66u8)
52 end
53
54 # `self` as a hexdigit to its byte value
55 #
56 # ~~~nit
57 # intrude import core::bytes
58 # assert 0x39u8.hexdigit_to_byteval == 0x09u8
59 # assert 0x43u8.hexdigit_to_byteval == 0x0Cu8
60 # ~~~
61 #
62 # REQUIRE: `self.is_valid_hexdigit`
63 private fun hexdigit_to_byteval: Byte do
64 if self >= 0x30u8 and self <= 0x39u8 then
65 return self - 0x30u8
66 else if self >= 0x41u8 and self <= 0x46u8 then
67 return self - 0x37u8
68 else if self >= 0x61u8 and self <= 0x66u8 then
69 return self - 0x57u8
70 end
71 # Happens only if the requirement is not met.
72 # i.e. this abort is here to please the compiler
73 abort
74 end
75 end
76
77 # A buffer containing Byte-manipulation facilities
78 #
79 # Uses Copy-On-Write when persisted
80 class Bytes
81 super AbstractArray[Byte]
82
83 # A NativeString being a char*, it can be used as underlying representation here.
84 var items: NativeString
85
86 # Number of bytes in the array
87 redef var length
88
89 # Capacity of the array
90 private var capacity: Int
91
92 # Has this buffer been persisted (to_s'd)?
93 #
94 # Used for Copy-On-Write
95 private var persisted = false
96
97 # var b = new Bytes.empty
98 # assert b.to_s == ""
99 init empty do
100 var ns = new NativeString(0)
101 init(ns, 0, 0)
102 end
103
104 # Init a `Bytes` with capacity `cap`
105 init with_capacity(cap: Int) do
106 var ns = new NativeString(cap)
107 init(ns, 0, cap)
108 end
109
110 redef fun is_empty do return length != 0
111
112 # var b = new Bytes.empty
113 # b.add 101u8
114 # assert b[0] == 101u8
115 redef fun [](i) do
116 assert i >= 0
117 assert i < length
118 return items[i]
119 end
120
121 # Returns self as a hexadecimal digest
122 fun hexdigest: String do
123 var elen = length * 2
124 var ns = new NativeString(elen)
125 var i = 0
126 var oi = 0
127 while i < length do
128 self[i].add_digest_at(ns, oi)
129 i += 1
130 oi += 2
131 end
132 return new FlatString.full(ns, elen, 0, elen - 1, elen)
133 end
134
135 # var b = new Bytes.with_capacity(1)
136 # b[0] = 101u8
137 # assert b.to_s == "e"
138 redef fun []=(i, v) do
139 if persisted then regen
140 assert i >= 0
141 assert i <= length
142 if i == length then add(v)
143 items[i] = v
144 end
145
146 # var b = new Bytes.empty
147 # b.add 101u8
148 # assert b.to_s == "e"
149 redef fun add(c) do
150 if persisted then regen
151 if length >= capacity then
152 enlarge(length)
153 end
154 items[length] = c
155 length += 1
156 end
157
158 # Adds the UTF-8 representation of `c` to `self`
159 #
160 # var b = new Bytes.empty
161 # b.add_char('A')
162 # b.add_char('キ')
163 # assert b.hexdigest == "41E382AD"
164 fun add_char(c: Char) do
165 if persisted then regen
166 var cln = c.u8char_len
167 var ln = length
168 enlarge(ln + cln)
169 items.set_char_at(length, c)
170 length += cln
171 end
172
173 # var b = new Bytes.empty
174 # b.append([104u8, 101u8, 108u8, 108u8, 111u8])
175 # assert b.to_s == "hello"
176 redef fun append(arr) do
177 if arr isa Bytes then
178 append_ns(arr.items, arr.length)
179 else
180 for i in arr do add i
181 end
182 end
183
184 # var b = new Bytes.empty
185 # b.append([0x41u8, 0x41u8, 0x18u8])
186 # b.pop
187 # assert b.to_s == "AA"
188 redef fun pop do
189 assert length >= 1
190 length -= 1
191 return items[length]
192 end
193
194 redef fun clear do length = 0
195
196 # Regenerates the buffer, necessary when it was persisted
197 private fun regen do
198 var nns = new NativeString(capacity)
199 items.copy_to(nns, length, 0, 0)
200 persisted = false
201 end
202
203 # Appends the `ln` first bytes of `ns` to self
204 fun append_ns(ns: NativeString, ln: Int) do
205 if persisted then regen
206 var nlen = length + ln
207 if nlen > capacity then enlarge(nlen)
208 ns.copy_to(items, ln, 0, length)
209 length += ln
210 end
211
212 # Appends `ln` bytes from `ns` starting at index `from` to self
213 fun append_ns_from(ns: NativeString, ln, from: Int) do
214 if persisted then regen
215 var nlen = length + ln
216 if nlen > capacity then enlarge(nlen)
217 ns.copy_to(items, ln, from, length)
218 length += ln
219 end
220
221 redef fun enlarge(sz) do
222 if capacity >= sz then return
223 persisted = false
224 while capacity < sz do capacity = capacity * 2 + 2
225 var ns = new NativeString(capacity)
226 items.copy_to(ns, length, 0, 0)
227 items = ns
228 end
229
230 redef fun to_s do
231 persisted = true
232 var b = self
233 var r = b.items.to_s_with_length(length)
234 if r != items then persisted = false
235 return r
236 end
237
238 redef fun iterator do return new BytesIterator.with_buffer(self)
239
240 end
241
242 private class BytesIterator
243 super IndexedIterator[Byte]
244
245 var tgt: NativeString
246
247 redef var index
248
249 var max: Int
250
251 init with_buffer(b: Bytes) do init(b.items, 0, b.length)
252
253 redef fun is_ok do return index < max
254
255 redef fun next do index += 1
256
257 redef fun item do return tgt[index]
258 end
259
260 redef class Text
261 # Returns a mutable copy of `self`'s bytes
262 #
263 # ~~~nit
264 # assert "String".to_bytes isa Bytes
265 # assert "String".to_bytes == [83u8, 116u8, 114u8, 105u8, 110u8, 103u8]
266 # ~~~
267 fun to_bytes: Bytes do
268 var b = new Bytes.with_capacity(bytelen)
269 append_to_bytes b
270 return b
271 end
272
273 # Is `self` a valid hexdigest ?
274 #
275 # assert "0B1d3F".is_valid_hexdigest
276 # assert not "5G".is_valid_hexdigest
277 fun is_valid_hexdigest: Bool do
278 for i in bytes do if not i.is_valid_hexdigit then return false
279 return true
280 end
281
282 # Appends `self.bytes` to `b`
283 fun append_to_bytes(b: Bytes) do
284 for s in substrings do
285 var from = if s isa FlatString then s.first_byte else 0
286 b.append_ns_from(s.items, s.bytelen, from)
287 end
288 end
289
290 # Returns a new `Bytes` instance with the digest as content
291 #
292 # assert "0B1F4D".hexdigest_to_bytes == [0x0Bu8, 0x1Fu8, 0x4Du8]
293 #
294 # REQUIRE: `self` is a valid hexdigest and hexdigest.length % 2 == 0
295 fun hexdigest_to_bytes: Bytes do
296 var b = bytes
297 var pos = 0
298 var max = bytelen
299 var ret = new Bytes.with_capacity(max / 2)
300 while pos < max do
301 ret.add((b[pos].hexdigit_to_byteval << 4) |
302 b[pos + 1].hexdigit_to_byteval)
303 pos += 2
304 end
305 return ret
306 end
307
308 # Gets the hexdigest of the bytes of `self`
309 #
310 # assert "&lt;STRING&#47;&rt;".hexdigest == "266C743B535452494E47262334373B2672743B"
311 fun hexdigest: String do
312 var ln = bytelen
313 var outns = new NativeString(ln * 2)
314 var oi = 0
315 for i in [0 .. ln[ do
316 bytes[i].add_digest_at(outns, oi)
317 oi += 2
318 end
319 return new FlatString.with_infos(outns, ln * 2, 0, ln * 2 - 1)
320 end
321 end
322
323 redef class FlatText
324 redef fun append_to_bytes(b) do
325 var from = if self isa FlatString then first_byte else 0
326 b.append_ns_from(items, bytelen, from)
327 end
328 end
329
330 redef class NativeString
331 # Creates a new `Bytes` object from `self` with `len` as length
332 #
333 # If `len` is null, strlen will determine the length of the Bytes
334 fun to_bytes(len: nullable Int): Bytes do
335 if len == null then len = cstring_length
336 return new Bytes(self, len, len)
337 end
338
339 # Creates a new `Bytes` object from a copy of `self` with `len` as length
340 #
341 # If `len` is null, strlen will determine the length of the Bytes
342 fun to_bytes_with_copy(len: nullable Int): Bytes do
343 if len == null then len = cstring_length
344 var nns = new NativeString(len)
345 copy_to(nns, len, 0, 0)
346 return new Bytes(nns, len, len)
347 end
348 end