examples: annotate examples
[nit.git] / lib / core / bytes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Services for byte streams and arrays
16 module bytes
17
18 import kernel
19 import collection::array
20 intrude import text::flat
21
22 # Any kind of entity which can be searched for in a Sequence of Byte
23 interface BytePattern
24 # Return the first occurence of `self` in `b`, or -1 if not found
25 fun first_index_in(b: SequenceRead[Byte]): Int do return first_index_in_from(b, 0)
26
27 # Return the first occurence of `self` in `b` starting at `from`, or -1 if not found
28 fun first_index_in_from(b: SequenceRead[Byte], from: Int): Int is abstract
29
30 # Return the last occurence of `self` in `b`, or -1 if not found
31 fun last_index_in(b: SequenceRead[Byte]): Int do return last_index_in_from(b, b.length - 1)
32
33 # Return the last occurence of `self` in `b`, or -1 if not found
34 fun last_index_in_from(b: SequenceRead[Byte], from: Int): Int is abstract
35
36 # Returns the indexes of all the occurences of `self` in `b`
37 fun search_all_in(b: SequenceRead[Byte]): SequenceRead[Int] is abstract
38
39 # Length of the pattern
40 fun pattern_length: Int is abstract
41
42 # Appends `self` to `b`
43 fun append_to(b: Sequence[Byte]) is abstract
44
45 # Is `self` a prefix for `b` ?
46 fun is_prefix(b: SequenceRead[Byte]): Bool is abstract
47
48 # Is `self` a suffix for `b` ?
49 fun is_suffix(b: SequenceRead[Byte]): Bool is abstract
50 end
51
52 redef class Byte
53 super BytePattern
54
55 # Write self as a string into `ns` at position `pos`
56 private fun add_digest_at(ns: CString, pos: Int) do
57 var tmp = (0xF0u8 & self) >> 4
58 ns[pos] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
59 tmp = 0x0Fu8 & self
60 ns[pos + 1] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
61 end
62
63 # Is `self` a valid hexadecimal digit (in ASCII)
64 #
65 # ~~~nit
66 # intrude import core::bytes
67 # assert not '/'.ascii.is_valid_hexdigit
68 # assert '0'.ascii.is_valid_hexdigit
69 # assert '9'.ascii.is_valid_hexdigit
70 # assert not ':'.ascii.is_valid_hexdigit
71 # assert not '@'.ascii.is_valid_hexdigit
72 # assert 'A'.ascii.is_valid_hexdigit
73 # assert 'F'.ascii.is_valid_hexdigit
74 # assert not 'G'.ascii.is_valid_hexdigit
75 # assert not '`'.ascii.is_valid_hexdigit
76 # assert 'a'.ascii.is_valid_hexdigit
77 # assert 'f'.ascii.is_valid_hexdigit
78 # assert not 'g'.ascii.is_valid_hexdigit
79 # ~~~
80 private fun is_valid_hexdigit: Bool do
81 return (self >= 0x30u8 and self <= 0x39u8) or
82 (self >= 0x41u8 and self <= 0x46u8) or
83 (self >= 0x61u8 and self <= 0x66u8)
84 end
85
86 # `self` as a hexdigit to its byte value
87 #
88 # ~~~nit
89 # intrude import core::bytes
90 # assert 0x39u8.hexdigit_to_byteval == 0x09u8
91 # assert 0x43u8.hexdigit_to_byteval == 0x0Cu8
92 # ~~~
93 #
94 # REQUIRE: `self.is_valid_hexdigit`
95 private fun hexdigit_to_byteval: Byte do
96 if self >= 0x30u8 and self <= 0x39u8 then
97 return self - 0x30u8
98 else if self >= 0x41u8 and self <= 0x46u8 then
99 return self - 0x37u8
100 else if self >= 0x61u8 and self <= 0x66u8 then
101 return self - 0x57u8
102 end
103 # Happens only if the requirement is not met.
104 # i.e. this abort is here to please the compiler
105 abort
106 end
107
108 redef fun first_index_in_from(b, from) do
109 for i in [from .. b.length[ do if b[i] == self then return i
110 return -1
111 end
112
113 redef fun last_index_in_from(b, from) do
114 for i in [0 .. from].step(-1) do if b[i] == self then return i
115 return -1
116 end
117
118 redef fun search_all_in(b) do
119 var ret = new Array[Int]
120 var pos = 0
121 loop
122 pos = first_index_in_from(b, pos)
123 if pos == -1 then return ret
124 ret.add pos
125 pos += 1
126 end
127 end
128
129 redef fun pattern_length do return 1
130
131 redef fun append_to(b) do b.push self
132
133 # assert 'b'.ascii.is_suffix("baqsdb".to_bytes)
134 # assert not 'b'.ascii.is_suffix("baqsd".to_bytes)
135 redef fun is_suffix(b) do return b.length != 0 and b.last == self
136
137 # assert 'b'.ascii.is_prefix("baqsdb".to_bytes)
138 # assert not 'b'.ascii.is_prefix("aqsdb".to_bytes)
139 redef fun is_prefix(b) do return b.length != 0 and b.first == self
140 end
141
142 # A buffer containing Byte-manipulation facilities
143 #
144 # Uses Copy-On-Write when persisted
145 class Bytes
146 super AbstractArray[Byte]
147 super BytePattern
148
149 # A CString being a char*, it can be used as underlying representation here.
150 var items: CString
151
152 # Number of bytes in the array
153 redef var length
154
155 # Capacity of the array
156 private var capacity: Int
157
158 # Has this buffer been persisted (to_s'd)?
159 #
160 # Used for Copy-On-Write
161 private var persisted = false
162
163 # var b = new Bytes.empty
164 # assert b.to_s == ""
165 init empty do
166 var ns = new CString(0)
167 init(ns, 0, 0)
168 end
169
170 # Init a `Bytes` with capacity `cap`
171 init with_capacity(cap: Int) do
172 var ns = new CString(cap)
173 init(ns, 0, cap)
174 end
175
176 redef fun pattern_length do return length
177
178 redef fun is_empty do return length == 0
179
180 # var b = new Bytes.empty
181 # b.add 101u8
182 # assert b[0] == 101u8
183 redef fun [](i) do
184 assert i >= 0
185 assert i < length
186 return items[i]
187 end
188
189 # Returns a copy of `self`
190 fun clone: Bytes do
191 var b = new Bytes.with_capacity(length)
192 b.append(self)
193 return b
194 end
195
196 # Trims off the whitespaces at the beginning and the end of `self`
197 #
198 # var b = "102041426E6F1020" .hexdigest_to_bytes
199 # assert b.trim.hexdigest == "41426E6F"
200 #
201 # NOTE: A whitespace is defined here as a byte whose value is <= 0x20
202 fun trim: Bytes do
203 var st = 0
204 while st < length do
205 if self[st] > 0x20u8 then break
206 st += 1
207 end
208 if st >= length then return new Bytes.empty
209 var ed = length - 1
210 while ed > 0 do
211 if self[ed] > 0x20u8 then break
212 ed -= 1
213 end
214 return slice(st, ed - st + 1)
215 end
216
217 # Copy a subset of `self` starting at `from` and of `count` bytes
218 #
219 # var b = "abcd".to_bytes
220 # assert b.slice(1, 2).hexdigest == "6263"
221 # assert b.slice(-1, 2).hexdigest == "61"
222 # assert b.slice(1, 0).hexdigest == ""
223 # assert b.slice(2, 5).hexdigest == "6364"
224 fun slice(from, count: Int): Bytes do
225 if count <= 0 then return new Bytes.empty
226
227 if from < 0 then
228 count += from
229 if count < 0 then count = 0
230 from = 0
231 end
232
233 if (count + from) > length then count = length - from
234 if count <= 0 then return new Bytes.empty
235
236 var ret = new Bytes.with_capacity(count)
237
238 ret.append_ns(items.fast_cstring(from), count)
239 return ret
240 end
241
242 # Copy of `self` starting at `from`
243 #
244 # var b = "abcd".to_bytes
245 # assert b.slice_from(1).hexdigest == "626364"
246 # assert b.slice_from(-1).hexdigest == "61626364"
247 # assert b.slice_from(2).hexdigest == "6364"
248 fun slice_from(from: Int): Bytes do
249 if from >= length then return new Bytes.empty
250 if from < 0 then from = 0
251 return slice(from, length)
252 end
253
254 # Returns self as an hexadecimal digest.
255 #
256 # Also known as plain hexdump or postscript hexdump.
257 #
258 # ~~~
259 # var b = "abcd".to_bytes
260 # assert b.hexdigest == "61626364"
261 # assert b.hexdigest.hexdigest_to_bytes == b
262 # ~~~
263 fun hexdigest: String do
264 var elen = length * 2
265 var ns = new CString(elen)
266 var i = 0
267 var oi = 0
268 while i < length do
269 self[i].add_digest_at(ns, oi)
270 i += 1
271 oi += 2
272 end
273 return new FlatString.full(ns, elen, 0, elen)
274 end
275
276 # Return self as a C hexadecimal digest where bytes are prefixed by `\x`
277 #
278 # The output is compatible with literal stream of bytes for most languages
279 # including C and Nit.
280 #
281 # ~~~
282 # var b = "abcd".to_bytes
283 # assert b.chexdigest == "\\x61\\x62\\x63\\x64"
284 # assert b.chexdigest.unescape_to_bytes == b
285 # ~~~
286 fun chexdigest: String do
287 var elen = length * 4
288 var ns = new CString(elen)
289 var i = 0
290 var oi = 0
291 while i < length do
292 ns[oi] = 0x5Cu8 # b'\\'
293 ns[oi+1] = 0x78u8 # b'x'
294 self[i].add_digest_at(ns, oi+2)
295 i += 1
296 oi += 4
297 end
298 return new FlatString.full(ns, elen, 0, elen)
299 end
300
301
302 # Returns self as a stream of bits (0 and 1)
303 #
304 # ~~~
305 # var b = "abcd".to_bytes
306 # assert b.binarydigest == "01100001011000100110001101100100"
307 # assert b.binarydigest.binarydigest_to_bytes == b
308 # ~~~
309 fun binarydigest: String do
310 var elen = length * 8
311 var ns = new CString(elen)
312 var i = 0
313 var oi = 0
314 while i < length do
315 var c = self[i]
316 var b = 128u8
317 while b > 0u8 do
318 if c & b == 0u8 then
319 ns[oi] = 0x30u8 # b'0'
320 else
321 ns[oi] = 0x31u8 # b'1'
322 end
323 oi += 1
324 b = b >> 1
325 end
326 i += 1
327 end
328 return new FlatString.full(ns, elen, 0, elen)
329 end
330
331 # Interprets `self` as a big-endian integer (unsigned by default)
332 #
333 # ~~~
334 # var b = "0102".hexdigest_to_bytes
335 # assert b.to_i == 258
336 #
337 # assert "01".hexdigest_to_bytes.to_i == 1
338 # assert "FF".hexdigest_to_bytes.to_i == 255
339 # assert "0000".hexdigest_to_bytes.to_i == 0
340 # ~~~
341 #
342 # If `self.is_empty`, 0 is returned.
343 #
344 # ~~~
345 # assert "".hexdigest_to_bytes.to_i == 0
346 # ~~~
347 #
348 # If `signed == true`, the bytes are read as a signed integer.
349 # As usual, the sign bit is the left most bit, no matter the
350 # `length` of `self`.
351 #
352 # ~~~
353 # assert "01".hexdigest_to_bytes.to_i(true) == 1
354 # assert "FF".hexdigest_to_bytes.to_i(true) == -1
355 # assert "00FF".hexdigest_to_bytes.to_i(true) == 255
356 # assert "E0".hexdigest_to_bytes.to_i(true) == -32
357 # assert "FE00".hexdigest_to_bytes.to_i(true) == -512
358 # assert "FEFEFE".hexdigest_to_bytes.to_i(true) == -65794
359 # ~~~
360 #
361 # `Int::to_bytes` is a loosely reverse method.
362 #
363 # ~~~
364 # assert b.to_i.to_bytes == b
365 # assert (b.to_i + 1).to_bytes.hexdigest == "0103"
366 # assert "0001".hexdigest_to_bytes.to_i.to_bytes.hexdigest == "01"
367 #
368 # assert (-32).to_bytes.to_i(true) == -32
369 # ~~~
370 #
371 # Warning: `Int` might overflow for bytes with more than 60 bits.
372 fun to_i(signed: nullable Bool): Int do
373 var res = 0
374 var i = 0
375 while i < length do
376 res *= 256
377 res += self[i].to_i
378 i += 1
379 end
380
381 # Two's complement is `signed`
382 if signed == true and not_empty and first > 0x80u8 then
383 var ff = 0
384 for j in [0..length[ do
385 ff *= 0x100
386 ff += 0xFF
387 end
388
389 res = -((res ^ ff) + 1)
390 end
391
392 return res
393 end
394
395 # var b = new Bytes.with_capacity(1)
396 # b[0] = 101u8
397 # assert b.to_s == "e"
398 redef fun []=(i, v) do
399 if persisted then regen
400 assert i >= 0
401 assert i <= length
402 if i == length then add(v)
403 items[i] = v
404 end
405
406 # var b = new Bytes.empty
407 # b.add 101u8
408 # assert b.to_s == "e"
409 redef fun add(c) do
410 if persisted then regen
411 if length >= capacity then
412 enlarge(length)
413 end
414 items[length] = c
415 length += 1
416 end
417
418 # Adds the UTF-8 representation of `c` to `self`
419 #
420 # var b = new Bytes.empty
421 # b.add_char('A')
422 # b.add_char('キ')
423 # assert b.hexdigest == "41E382AD"
424 fun add_char(c: Char) do
425 if persisted then regen
426 var cln = c.u8char_len
427 var ln = length
428 enlarge(ln + cln)
429 items.set_char_at(length, c)
430 length += cln
431 end
432
433 # var b = new Bytes.empty
434 # b.append([104u8, 101u8, 108u8, 108u8, 111u8])
435 # assert b.to_s == "hello"
436 redef fun append(arr) do
437 if arr isa Bytes then
438 append_ns(arr.items, arr.length)
439 else
440 for i in arr do add i
441 end
442 end
443
444 # var b = new Bytes.empty
445 # b.append([0x41u8, 0x41u8, 0x18u8])
446 # b.pop
447 # assert b.to_s == "AA"
448 redef fun pop do
449 assert length >= 1
450 length -= 1
451 return items[length]
452 end
453
454 redef fun clear do length = 0
455
456 # Regenerates the buffer, necessary when it was persisted
457 private fun regen do
458 var nns = new CString(capacity)
459 items.copy_to(nns, length, 0, 0)
460 persisted = false
461 end
462
463 # Appends the `ln` first bytes of `ns` to self
464 fun append_ns(ns: CString, ln: Int) do
465 if persisted then regen
466 var nlen = length + ln
467 if nlen > capacity then enlarge(nlen)
468 ns.copy_to(items, ln, 0, length)
469 length += ln
470 end
471
472 # Appends `ln` bytes from `ns` starting at index `from` to self
473 fun append_ns_from(ns: CString, ln, from: Int) do
474 if persisted then regen
475 var nlen = length + ln
476 if nlen > capacity then enlarge(nlen)
477 ns.copy_to(items, ln, from, length)
478 length += ln
479 end
480
481 # Appends the bytes of `str` to `self`
482 fun append_text(str: Text) do str.append_to_bytes self
483
484 redef fun append_to(b) do b.append self
485
486 redef fun enlarge(sz) do
487 if capacity >= sz then return
488 persisted = false
489 if capacity < 16 then capacity = 16
490 while capacity < sz do capacity = capacity * 2 + 2
491 var ns = new CString(capacity)
492 items.copy_to(ns, length, 0, 0)
493 items = ns
494 end
495
496 redef fun to_s do
497 persisted = true
498 var b = self
499 var r = b.items.to_s_unsafe(length, copy=false)
500 if r != items then persisted = false
501 return r
502 end
503
504 redef fun iterator do return new BytesIterator.with_buffer(self)
505
506 redef fun first_index_in_from(b, from) do
507 if is_empty then return -1
508 var fst = self[0]
509 var bpos = fst.first_index_in_from(self, from)
510 for i in [0 .. length[ do
511 if self[i] != b[bpos] then return first_index_in_from(b, bpos + 1)
512 bpos += 1
513 end
514 return bpos
515 end
516
517 redef fun last_index_in_from(b, from) do
518 if is_empty then return -1
519 var lst = self[length - 1]
520 var bpos = lst.last_index_in_from(b, from)
521 for i in [0 .. length[.step(-1) do
522 if self[i] != b[bpos] then return last_index_in_from(b, bpos - 1)
523 bpos -= 1
524 end
525 return bpos
526 end
527
528 redef fun search_all_in(b) do
529 var ret = new Array[Int]
530 var pos = first_index_in_from(b, 0)
531 if pos == -1 then return ret
532 pos = pos + 1
533 ret.add pos
534 loop
535 pos = first_index_in_from(b, pos)
536 if pos == -1 then return ret
537 ret.add pos
538 pos += length
539 end
540 end
541
542 # Splits the content on self when encountering `b`
543 #
544 # var a = "String is string".to_bytes.split_with('s'.ascii)
545 # assert a.length == 3
546 # assert a[0].hexdigest == "537472696E672069"
547 # assert a[1].hexdigest == "20"
548 # assert a[2].hexdigest == "7472696E67"
549 fun split_with(b: BytePattern): Array[Bytes] do
550 var fst = b.search_all_in(self)
551 if fst.is_empty then return [clone]
552 var retarr = new Array[Bytes]
553 var prev = 0
554 for i in fst do
555 retarr.add(slice(prev, i - prev))
556 prev = i + b.pattern_length
557 end
558 retarr.add slice_from(prev)
559 return retarr
560 end
561
562 # Splits `self` in two parts at the first occurence of `b`
563 #
564 # var a = "String is string".to_bytes.split_once_on('s'.ascii)
565 # assert a[0].hexdigest == "537472696E672069"
566 # assert a[1].hexdigest == "20737472696E67"
567 fun split_once_on(b: BytePattern): Array[Bytes] do
568 var spl = b.first_index_in(self)
569 if spl == -1 then return [clone]
570 var ret = new Array[Bytes].with_capacity(2)
571 ret.add(slice(0, spl))
572 ret.add(slice_from(spl + b.pattern_length))
573 return ret
574 end
575
576 # Replaces all the occurences of `this` in `self` by `by`
577 #
578 # var b = "String is string".to_bytes.replace(0x20u8, 0x41u8)
579 # assert b.hexdigest == "537472696E6741697341737472696E67"
580 fun replace(pattern: BytePattern, bytes: BytePattern): Bytes do
581 if is_empty then return new Bytes.empty
582 var pos = pattern.search_all_in(self)
583 if pos.is_empty then return clone
584 var ret = new Bytes.with_capacity(length)
585 var prev = 0
586 for i in pos do
587 ret.append_ns(items.fast_cstring(prev), i - prev)
588 bytes.append_to ret
589 prev = i + pattern.pattern_length
590 end
591 ret.append(slice_from(pos.last + pattern.pattern_length))
592 return ret
593 end
594
595 # Decode `self` from percent (or URL) encoding to a clear string
596 #
597 # Invalid '%' are not decoded.
598 #
599 # assert "aBc09-._~".to_bytes.from_percent_encoding == "aBc09-._~".to_bytes
600 # assert "%25%28%29%3c%20%3e".to_bytes.from_percent_encoding == "%()< >".to_bytes
601 # assert ".com%2fpost%3fe%3dasdf%26f%3d123".to_bytes.from_percent_encoding == ".com/post?e=asdf&f=123".to_bytes
602 # assert "%25%28%29%3C%20%3E".to_bytes.from_percent_encoding == "%()< >".to_bytes
603 # assert "incomplete %".to_bytes.from_percent_encoding == "incomplete %".to_bytes
604 # assert "invalid % usage".to_bytes.from_percent_encoding == "invalid % usage".to_bytes
605 # assert "%c3%a9%e3%81%82%e3%81%84%e3%81%86".to_bytes.from_percent_encoding == "éあいう".to_bytes
606 # assert "%1 %A %C3%A9A9".to_bytes.from_percent_encoding == "%1 %A éA9".to_bytes
607 fun from_percent_encoding: Bytes do
608 var tmp = new Bytes.with_capacity(length)
609 var pos = 0
610 while pos < length do
611 var b = self[pos]
612 if b != '%'.ascii then
613 tmp.add b
614 pos += 1
615 continue
616 end
617 if length - pos < 2 then
618 tmp.add '%'.ascii
619 pos += 1
620 continue
621 end
622 var bn = self[pos + 1]
623 var bnn = self[pos + 2]
624 if not bn.is_valid_hexdigit or not bnn.is_valid_hexdigit then
625 tmp.add '%'.ascii
626 pos += 1
627 continue
628 end
629 tmp.add((bn.hexdigit_to_byteval << 4) + bnn.hexdigit_to_byteval)
630 pos += 3
631 end
632 return tmp
633 end
634
635 # Is `b` a prefix of `self` ?
636 fun has_prefix(b: BytePattern): Bool do return b.is_prefix(self)
637
638 # Is `b` a suffix of `self` ?
639 fun has_suffix(b: BytePattern): Bool do return b.is_suffix(self)
640
641 redef fun is_suffix(b) do
642 if length > b.length then return false
643 var j = b.length - 1
644 var i = length - 1
645 while i > 0 do
646 if self[i] != b[j] then return false
647 i -= 1
648 j -= 1
649 end
650 return true
651 end
652
653 redef fun is_prefix(b) do
654 if length > b.length then return false
655 for i in [0 .. length[ do if self[i] != b[i] then return false
656 return true
657 end
658 end
659
660 private class BytesIterator
661 super IndexedIterator[Byte]
662
663 var tgt: CString
664
665 redef var index
666
667 var max: Int
668
669 init with_buffer(b: Bytes) do init(b.items, 0, b.length)
670
671 redef fun is_ok do return index < max
672
673 redef fun next do index += 1
674
675 redef fun item do return tgt[index]
676 end
677
678 redef class Int
679 # A signed big-endian representation of `self`
680 #
681 # ~~~
682 # assert 1.to_bytes.hexdigest == "01"
683 # assert 255.to_bytes.hexdigest == "FF"
684 # assert 256.to_bytes.hexdigest == "0100"
685 # assert 65535.to_bytes.hexdigest == "FFFF"
686 # assert 65536.to_bytes.hexdigest == "010000"
687 # ~~~
688 #
689 # Negative values are converted to their two's complement.
690 # Be careful as the result can be ambiguous.
691 #
692 # ~~~
693 # assert (-1).to_bytes.hexdigest == "FF"
694 # assert (-32).to_bytes.hexdigest == "E0"
695 # assert (-512).to_bytes.hexdigest == "FE00"
696 # assert (-65794).to_bytes.hexdigest == "FEFEFE"
697 # ~~~
698 #
699 # Optionally, set `n_bytes` to the desired number of bytes in the output.
700 # This setting can disambiguate the result between positive and negative
701 # integers. Be careful with this parameter as the result may overflow.
702 #
703 # ~~~
704 # assert 1.to_bytes(2).hexdigest == "0001"
705 # assert 65535.to_bytes(2).hexdigest == "FFFF"
706 # assert (-1).to_bytes(2).hexdigest == "FFFF"
707 # assert (-512).to_bytes(4).hexdigest == "FFFFFE00"
708 # assert 0x123456.to_bytes(2).hexdigest == "3456"
709 # ~~~
710 #
711 # For 0, a Bytes object with single nul byte is returned (instead of an empty Bytes object).
712 #
713 # ~~~
714 # assert 0.to_bytes.hexdigest == "00"
715 # ~~~
716 #
717 # For positive integers, `Bytes::to_i` can reverse the operation.
718 #
719 # ~~~
720 # assert 1234.to_bytes.to_i == 1234
721 # ~~~
722 #
723 # Require self >= 0
724 fun to_bytes(n_bytes: nullable Int): Bytes do
725
726 # If 0, force using at least one byte
727 if self == 0 and n_bytes == null then n_bytes = 1
728
729 # Compute the len (log256)
730 var len = 1
731 var max = 256
732 var s = self.abs
733 while s >= max do
734 len += 1
735 max *= 256
736 end
737
738 # Two's complement
739 s = self
740 if self < 0 then
741 var ff = 0
742 for j in [0..len[ do
743 ff *= 0x100
744 ff += 0xFF
745 end
746
747 s = ((-self) ^ ff) + 1
748 end
749
750 # Cut long values
751 if n_bytes != null and len > n_bytes then len = n_bytes
752
753 # Allocate the buffer
754 var cap = n_bytes or else len
755 var res = new Bytes.with_capacity(cap)
756
757 var filler = if self < 0 then 0xFFu8 else 0u8
758 for i in [0..cap[ do res[i] = filler
759
760 # Fill it starting with the end
761 var i = cap
762 var sum = s
763 while i > cap - len do
764 i -= 1
765 res[i] = (sum % 256).to_b
766 sum /= 256
767 end
768
769 return res
770 end
771 end
772
773 redef class Text
774 # Returns a mutable copy of `self`'s bytes
775 #
776 # ~~~nit
777 # assert "String".to_bytes isa Bytes
778 # assert "String".to_bytes == [83u8, 116u8, 114u8, 105u8, 110u8, 103u8]
779 # ~~~
780 fun to_bytes: Bytes do
781 var b = new Bytes.with_capacity(byte_length)
782 append_to_bytes b
783 return b
784 end
785
786 # Is `self` a valid hexdigest ?
787 #
788 # assert "0B1d3F".is_valid_hexdigest
789 # assert not "5G".is_valid_hexdigest
790 fun is_valid_hexdigest: Bool do
791 for i in bytes do if not i.is_valid_hexdigit then return false
792 return true
793 end
794
795 # Appends `self.bytes` to `b`
796 fun append_to_bytes(b: Bytes) do
797 for s in substrings do
798 var from = if s isa FlatString then s.first_byte else 0
799 b.append_ns_from(s.items, s.byte_length, from)
800 end
801 end
802
803 # Returns a new `Bytes` instance with the digest as content
804 #
805 # assert "0B1F4D".hexdigest_to_bytes == [0x0Bu8, 0x1Fu8, 0x4Du8]
806 # assert "0B1F4D".hexdigest_to_bytes.hexdigest == "0B1F4D"
807 #
808 # Characters that are not hexadecimal digits are ignored.
809 #
810 # assert "z0B1 F4\nD".hexdigest_to_bytes.hexdigest == "0B1F4D"
811 # assert "\\x0b1 \\xf4d".hexdigest_to_bytes.hexdigest == "0B1F4D"
812 #
813 # When the number of hexadecimal digit is not even, then a leading 0 is
814 # implicitly considered to fill the left byte (the most significant one).
815 #
816 # assert "1".hexdigest_to_bytes.hexdigest == "01"
817 # assert "FFF".hexdigest_to_bytes.hexdigest == "0FFF"
818 #
819 # `Bytes::hexdigest` is a loosely reverse method since its
820 # results contain only pairs of uppercase hexadecimal digits.
821 #
822 # assert "ABCD".hexdigest_to_bytes.hexdigest == "ABCD"
823 # assert "a b c".hexdigest_to_bytes.hexdigest == "0ABC"
824 fun hexdigest_to_bytes: Bytes do
825 var b = bytes
826 var max = byte_length
827
828 var dlength = 0 # Number of hex digits
829 var pos = 0
830 while pos < max do
831 var c = b[pos]
832 if c.is_valid_hexdigit then dlength += 1
833 pos += 1
834 end
835
836 # Allocate the result buffer
837 var ret = new Bytes.with_capacity((dlength+1) / 2)
838
839 var i = (dlength+1) % 2 # current hex digit (1=high, 0=low)
840 var byte = 0u8 # current accumulated byte value
841
842 pos = 0
843 while pos < max do
844 var c = b[pos]
845 if c.is_valid_hexdigit then
846 byte = byte << 4 | c.hexdigit_to_byteval
847 i -= 1
848 if i < 0 then
849 # Last digit known: store and restart
850 ret.add byte
851 i = 1
852 byte = 0u8
853 end
854 end
855 pos += 1
856 end
857 return ret
858 end
859
860 # Gets the hexdigest of the bytes of `self`
861 #
862 # assert "&lt;STRING&#47;&rt;".hexdigest == "266C743B535452494E47262334373B2672743B"
863 fun hexdigest: String do
864 var ln = byte_length
865 var outns = new CString(ln * 2)
866 var oi = 0
867 for i in [0 .. ln[ do
868 bytes[i].add_digest_at(outns, oi)
869 oi += 2
870 end
871 return new FlatString.with_infos(outns, ln * 2, 0)
872 end
873
874 # Return a `Bytes` instance where Nit escape sequences are transformed.
875 #
876 # assert "B\\n\\x41\\u0103D3".unescape_to_bytes.hexdigest == "420A41F0908F93"
877 #
878 # `Bytes::chexdigest` is a loosely reverse methods since its result is only made
879 # of `"\x??"` escape sequences.
880 #
881 # assert "\\x41\\x42\\x43".unescape_to_bytes.chexdigest == "\\x41\\x42\\x43"
882 # assert "B\\n\\x41\\u0103D3".unescape_to_bytes.chexdigest == "\\x42\\x0A\\x41\\xF0\\x90\\x8F\\x93"
883 fun unescape_to_bytes: Bytes do
884 var res = new Bytes.with_capacity(self.byte_length)
885 var was_slash = false
886 var i = 0
887 while i < length do
888 var c = self[i]
889 if not was_slash then
890 if c == '\\' then
891 was_slash = true
892 else
893 res.add_char(c)
894 end
895 i += 1
896 continue
897 end
898 was_slash = false
899 if c == 'n' then
900 res.add_char('\n')
901 else if c == 'r' then
902 res.add_char('\r')
903 else if c == 't' then
904 res.add_char('\t')
905 else if c == '0' then
906 res.add_char('\0')
907 else if c == 'x' or c == 'X' then
908 var hx = substring(i + 1, 2)
909 if hx.is_hex then
910 res.add(hx.to_hex.to_b)
911 else
912 res.add_char(c)
913 end
914 i += 2
915 else if c == 'u' or c == 'U' then
916 var hx = substring(i + 1, 6)
917 if hx.is_hex then
918 res.add_char(hx.to_hex.code_point)
919 else
920 res.add_char(c)
921 end
922 i += 6
923 else
924 res.add_char(c)
925 end
926 i += 1
927 end
928 return res
929 end
930
931 # Return a `Bytes` by reading 0 and 1.
932 #
933 # assert "1010101100001101".binarydigest_to_bytes.hexdigest == "AB0D"
934 #
935 # Note that characters that are neither 0 or 1 are just ignored.
936 #
937 # assert "a1B01 010\n1100あ001101".binarydigest_to_bytes.hexdigest == "AB0D"
938 # assert "hello".binarydigest_to_bytes.is_empty
939 #
940 # When the number of bits is not divisible by 8, then leading 0 are
941 # implicitly considered to fill the left byte (the most significant one).
942 #
943 # assert "1".binarydigest_to_bytes.hexdigest == "01"
944 # assert "1111111".binarydigest_to_bytes.hexdigest == "7F"
945 # assert "1000110100".binarydigest_to_bytes.hexdigest == "0234"
946 #
947 # `Bytes::binarydigest` is a loosely reverse method since its
948 # results contain only 1 and 0 by blocks of 8.
949 #
950 # assert "1010101100001101".binarydigest_to_bytes.binarydigest == "1010101100001101"
951 # assert "1".binarydigest_to_bytes.binarydigest == "00000001"
952 fun binarydigest_to_bytes: Bytes
953 do
954 var b = bytes
955 var max = byte_length
956
957 # Count bits
958 var bitlen = 0
959 var pos = 0
960 while pos < max do
961 var c = b[pos]
962 pos += 1
963 if c == 0x30u8 or c == 0x31u8 then bitlen += 1 # b'0' or b'1'
964 end
965
966 # Allocate (and take care of the padding)
967 var ret = new Bytes.with_capacity((bitlen+7) / 8)
968
969 var i = (bitlen+7) % 8 # current bit (7th=128, 0th=1)
970 var byte = 0u8 # current accumulated byte value
971
972 pos = 0
973 while pos < max do
974 var c = b[pos]
975 pos += 1
976 if c == 0x30u8 then # b'0'
977 byte = byte << 1
978 else if c == 0x31u8 then # b'1'
979 byte = byte << 1 | 1u8
980 else
981 continue
982 end
983
984 i -= 1
985 if i < 0 then
986 # Last bit known: store and restart
987 ret.add byte
988 i = 7
989 byte = 0u8
990 end
991 end
992 return ret
993 end
994 end
995
996 redef class FlatText
997 redef fun append_to_bytes(b) do
998 var from = if self isa FlatString then first_byte else 0
999 if isset _items then b.append_ns_from(items, byte_length, from)
1000 end
1001 end
1002
1003 redef class CString
1004 # Creates a new `Bytes` object from `self` with `len` as length
1005 #
1006 # If `len` is null, strlen will determine the length of the Bytes
1007 fun to_bytes(len: nullable Int): Bytes do
1008 if len == null then len = cstring_length
1009 return new Bytes(self, len, len)
1010 end
1011
1012 # Creates a new `Bytes` object from a copy of `self` with `len` as length
1013 #
1014 # If `len` is null, strlen will determine the length of the Bytes
1015 fun to_bytes_with_copy(len: nullable Int): Bytes do
1016 if len == null then len = cstring_length
1017 var nns = new CString(len)
1018 copy_to(nns, len, 0, 0)
1019 return new Bytes(nns, len, len)
1020 end
1021 end
1022
1023 # Joins an array of bytes `arr` separated by `sep`
1024 #
1025 # assert join_bytes(["String".to_bytes, "is".to_bytes, "string".to_bytes], ' '.ascii).hexdigest == "537472696E6720697320737472696E67"
1026 fun join_bytes(arr: Array[Bytes], sep: nullable BytePattern): Bytes do
1027 if arr.is_empty then return new Bytes.empty
1028 sep = sep or else new Bytes.empty
1029 var endln = sep.pattern_length * (arr.length - 1)
1030 for i in arr do endln += i.length
1031 var ret = new Bytes.with_capacity(endln)
1032 ret.append(arr.first)
1033 for i in [1 .. arr.length[ do
1034 sep.append_to(ret)
1035 ret.append arr[i]
1036 end
1037 return ret
1038 end