227759a52d41e55bb22215fa52258883d68cc1da
[nit.git] / lib / core / bytes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Services for byte streams and arrays
16 module bytes
17
18 import kernel
19 import collection::array
20 intrude import text::flat
21
22 redef class Byte
23 # Write self as a string into `ns` at position `pos`
24 private fun add_digest_at(ns: NativeString, pos: Int) do
25 var tmp = (0xF0u8 & self) >> 4
26 ns[pos] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
27 tmp = 0x0Fu8 & self
28 ns[pos + 1] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
29 end
30
31 # Is `self` a valid hexadecimal digit (in ASCII)
32 #
33 # ~~~nit
34 # intrude import core::bytes
35 # assert not '/'.ascii.is_valid_hexdigit
36 # assert '0'.ascii.is_valid_hexdigit
37 # assert '9'.ascii.is_valid_hexdigit
38 # assert not ':'.ascii.is_valid_hexdigit
39 # assert not '@'.ascii.is_valid_hexdigit
40 # assert 'A'.ascii.is_valid_hexdigit
41 # assert 'F'.ascii.is_valid_hexdigit
42 # assert not 'G'.ascii.is_valid_hexdigit
43 # assert not '`'.ascii.is_valid_hexdigit
44 # assert 'a'.ascii.is_valid_hexdigit
45 # assert 'f'.ascii.is_valid_hexdigit
46 # assert not 'g'.ascii.is_valid_hexdigit
47 # ~~~
48 private fun is_valid_hexdigit: Bool do
49 return (self >= 0x30u8 and self <= 0x39u8) or
50 (self >= 0x41u8 and self <= 0x46u8) or
51 (self >= 0x61u8 and self <= 0x66u8)
52 end
53
54 # `self` as a hexdigit to its byte value
55 #
56 # ~~~nit
57 # intrude import core::bytes
58 # assert 0x39u8.hexdigit_to_byteval == 0x09u8
59 # assert 0x43u8.hexdigit_to_byteval == 0x0Cu8
60 # ~~~
61 #
62 # REQUIRE: `self.is_valid_hexdigit`
63 private fun hexdigit_to_byteval: Byte do
64 if self >= 0x30u8 and self <= 0x39u8 then
65 return self - 0x30u8
66 else if self >= 0x41u8 and self <= 0x46u8 then
67 return self - 0x37u8
68 else if self >= 0x61u8 and self <= 0x66u8 then
69 return self - 0x57u8
70 end
71 # Happens only if the requirement is not met.
72 # i.e. this abort is here to please the compiler
73 abort
74 end
75 end
76
77 # A buffer containing Byte-manipulation facilities
78 #
79 # Uses Copy-On-Write when persisted
80 class Bytes
81 super AbstractArray[Byte]
82
83 # A NativeString being a char*, it can be used as underlying representation here.
84 var items: NativeString
85
86 # Number of bytes in the array
87 redef var length
88
89 # Capacity of the array
90 private var capacity: Int
91
92 # Has this buffer been persisted (to_s'd)?
93 #
94 # Used for Copy-On-Write
95 private var persisted = false
96
97 # var b = new Bytes.empty
98 # assert b.to_s == ""
99 init empty do
100 var ns = new NativeString(0)
101 init(ns, 0, 0)
102 end
103
104 # Init a `Bytes` with capacity `cap`
105 init with_capacity(cap: Int) do
106 var ns = new NativeString(cap)
107 init(ns, 0, cap)
108 end
109
110 redef fun is_empty do return length != 0
111
112 # var b = new Bytes.empty
113 # b.add 101u8
114 # assert b[0] == 101u8
115 redef fun [](i) do
116 assert i >= 0
117 assert i < length
118 return items[i]
119 end
120
121 # Returns self as a hexadecimal digest
122 fun hexdigest: String do
123 var elen = length * 2
124 var ns = new NativeString(elen)
125 var i = 0
126 var oi = 0
127 while i < length do
128 self[i].add_digest_at(ns, oi)
129 i += 1
130 oi += 2
131 end
132 return new FlatString.full(ns, elen, 0, elen - 1, elen)
133 end
134
135 # var b = new Bytes.with_capacity(1)
136 # b[0] = 101u8
137 # assert b.to_s == "e"
138 redef fun []=(i, v) do
139 if persisted then regen
140 assert i >= 0
141 assert i <= length
142 if i == length then add(v)
143 items[i] = v
144 end
145
146 # var b = new Bytes.empty
147 # b.add 101u8
148 # assert b.to_s == "e"
149 redef fun add(c) do
150 if persisted then regen
151 if length >= capacity then
152 enlarge(length)
153 end
154 items[length] = c
155 length += 1
156 end
157
158 # var b = new Bytes.empty
159 # b.append([104u8, 101u8, 108u8, 108u8, 111u8])
160 # assert b.to_s == "hello"
161 redef fun append(arr) do
162 if arr isa Bytes then
163 append_ns(arr.items, arr.length)
164 else
165 for i in arr do add i
166 end
167 end
168
169 # var b = new Bytes.empty
170 # b.append([0x41u8, 0x41u8, 0x18u8])
171 # b.pop
172 # assert b.to_s == "AA"
173 redef fun pop do
174 assert length >= 1
175 length -= 1
176 return items[length]
177 end
178
179 redef fun clear do length = 0
180
181 # Regenerates the buffer, necessary when it was persisted
182 private fun regen do
183 var nns = new NativeString(capacity)
184 items.copy_to(nns, length, 0, 0)
185 persisted = false
186 end
187
188 # Appends the `ln` first bytes of `ns` to self
189 fun append_ns(ns: NativeString, ln: Int) do
190 if persisted then regen
191 var nlen = length + ln
192 if nlen > capacity then enlarge(nlen)
193 ns.copy_to(items, ln, 0, length)
194 length += ln
195 end
196
197 # Appends `ln` bytes from `ns` starting at index `from` to self
198 fun append_ns_from(ns: NativeString, ln, from: Int) do
199 if persisted then regen
200 var nlen = length + ln
201 if nlen > capacity then enlarge(nlen)
202 ns.copy_to(items, ln, from, length)
203 length += ln
204 end
205
206 redef fun enlarge(sz) do
207 if capacity >= sz then return
208 persisted = false
209 while capacity < sz do capacity = capacity * 2 + 2
210 var ns = new NativeString(capacity)
211 items.copy_to(ns, length, 0, 0)
212 items = ns
213 end
214
215 redef fun to_s do
216 persisted = true
217 var b = self
218 var r = b.items.to_s_with_length(length)
219 if r != items then persisted = false
220 return r
221 end
222
223 redef fun iterator do return new BytesIterator.with_buffer(self)
224
225 end
226
227 private class BytesIterator
228 super IndexedIterator[Byte]
229
230 var tgt: NativeString
231
232 redef var index
233
234 var max: Int
235
236 init with_buffer(b: Bytes) do init(b.items, 0, b.length)
237
238 redef fun is_ok do return index < max
239
240 redef fun next do index += 1
241
242 redef fun item do return tgt[index]
243 end
244
245 redef class Text
246 # Returns a mutable copy of `self`'s bytes
247 #
248 # ~~~nit
249 # assert "String".to_bytes isa Bytes
250 # assert "String".to_bytes == [83u8, 116u8, 114u8, 105u8, 110u8, 103u8]
251 # ~~~
252 fun to_bytes: Bytes do
253 var b = new Bytes.with_capacity(bytelen)
254 append_to_bytes b
255 return b
256 end
257
258 # Is `self` a valid hexdigest ?
259 #
260 # assert "0B1d3F".is_valid_hexdigest
261 # assert not "5G".is_valid_hexdigest
262 fun is_valid_hexdigest: Bool do
263 for i in bytes do if not i.is_valid_hexdigit then return false
264 return true
265 end
266
267 # Appends `self.bytes` to `b`
268 fun append_to_bytes(b: Bytes) do
269 for s in substrings do
270 var from = if s isa FlatString then s.first_byte else 0
271 b.append_ns_from(s.items, s.bytelen, from)
272 end
273 end
274
275 # Returns a new `Bytes` instance with the digest as content
276 #
277 # assert "0B1F4D".hexdigest_to_bytes == [0x0Bu8, 0x1Fu8, 0x4Du8]
278 #
279 # REQUIRE: `self` is a valid hexdigest and hexdigest.length % 2 == 0
280 fun hexdigest_to_bytes: Bytes do
281 var b = bytes
282 var pos = 0
283 var max = bytelen
284 var ret = new Bytes.with_capacity(max / 2)
285 while pos < max do
286 ret.add((b[pos].hexdigit_to_byteval << 4) |
287 b[pos + 1].hexdigit_to_byteval)
288 pos += 2
289 end
290 return ret
291 end
292 end
293
294 redef class FlatText
295 redef fun append_to_bytes(b) do
296 var from = if self isa FlatString then first_byte else 0
297 b.append_ns_from(items, bytelen, from)
298 end
299 end
300
301 redef class NativeString
302 # Creates a new `Bytes` object from `self` with `strlen` as length
303 fun to_bytes: Bytes do
304 var len = cstring_length
305 return new Bytes(self, len, len)
306 end
307 end