73802a79b29ec2a4136095cb001f7a46893b58f1
[nit.git] / lib / core / bytes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Services for byte streams and arrays
16 module bytes
17
18 import kernel
19 import collection::array
20 intrude import text::flat
21
22 # Any kind of entity which can be searched for in a Sequence of Byte
23 interface BytePattern
24 # Return the first occurence of `self` in `b`, or -1 if not found
25 fun first_index_in(b: SequenceRead[Byte]): Int do return first_index_in_from(b, 0)
26
27 # Return the first occurence of `self` in `b` starting at `from`, or -1 if not found
28 fun first_index_in_from(b: SequenceRead[Byte], from: Int): Int is abstract
29
30 # Return the last occurence of `self` in `b`, or -1 if not found
31 fun last_index_in(b: SequenceRead[Byte]): Int do return last_index_in_from(b, b.length - 1)
32
33 # Return the last occurence of `self` in `b`, or -1 if not found
34 fun last_index_in_from(b: SequenceRead[Byte], from: Int): Int is abstract
35
36 # Returns the indexes of all the occurences of `self` in `b`
37 fun search_all_in(b: SequenceRead[Byte]): SequenceRead[Int] is abstract
38
39 # Length of the pattern
40 fun pattern_length: Int is abstract
41
42 # Appends `self` to `b`
43 fun append_to(b: Sequence[Byte]) is abstract
44
45 # Is `self` a prefix for `b` ?
46 fun is_prefix(b: SequenceRead[Byte]): Bool is abstract
47
48 # Is `self` a suffix for `b` ?
49 fun is_suffix(b: SequenceRead[Byte]): Bool is abstract
50 end
51
52 redef class Byte
53 super BytePattern
54
55 # Write self as a string into `ns` at position `pos`
56 private fun add_digest_at(ns: CString, pos: Int) do
57 var tmp = (0xF0u8 & self) >> 4
58 ns[pos] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
59 tmp = 0x0Fu8 & self
60 ns[pos + 1] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
61 end
62
63 # Is `self` a valid hexadecimal digit (in ASCII)
64 #
65 # ~~~nit
66 # intrude import core::bytes
67 # assert not '/'.ascii.is_valid_hexdigit
68 # assert '0'.ascii.is_valid_hexdigit
69 # assert '9'.ascii.is_valid_hexdigit
70 # assert not ':'.ascii.is_valid_hexdigit
71 # assert not '@'.ascii.is_valid_hexdigit
72 # assert 'A'.ascii.is_valid_hexdigit
73 # assert 'F'.ascii.is_valid_hexdigit
74 # assert not 'G'.ascii.is_valid_hexdigit
75 # assert not '`'.ascii.is_valid_hexdigit
76 # assert 'a'.ascii.is_valid_hexdigit
77 # assert 'f'.ascii.is_valid_hexdigit
78 # assert not 'g'.ascii.is_valid_hexdigit
79 # ~~~
80 private fun is_valid_hexdigit: Bool do
81 return (self >= 0x30u8 and self <= 0x39u8) or
82 (self >= 0x41u8 and self <= 0x46u8) or
83 (self >= 0x61u8 and self <= 0x66u8)
84 end
85
86 # `self` as a hexdigit to its byte value
87 #
88 # ~~~nit
89 # intrude import core::bytes
90 # assert 0x39u8.hexdigit_to_byteval == 0x09u8
91 # assert 0x43u8.hexdigit_to_byteval == 0x0Cu8
92 # ~~~
93 #
94 # REQUIRE: `self.is_valid_hexdigit`
95 private fun hexdigit_to_byteval: Byte do
96 if self >= 0x30u8 and self <= 0x39u8 then
97 return self - 0x30u8
98 else if self >= 0x41u8 and self <= 0x46u8 then
99 return self - 0x37u8
100 else if self >= 0x61u8 and self <= 0x66u8 then
101 return self - 0x57u8
102 end
103 # Happens only if the requirement is not met.
104 # i.e. this abort is here to please the compiler
105 abort
106 end
107
108 redef fun first_index_in_from(b, from) do
109 for i in [from .. b.length[ do if b[i] == self then return i
110 return -1
111 end
112
113 redef fun last_index_in_from(b, from) do
114 for i in [0 .. from].step(-1) do if b[i] == self then return i
115 return -1
116 end
117
118 redef fun search_all_in(b) do
119 var ret = new Array[Int]
120 var pos = 0
121 loop
122 pos = first_index_in_from(b, pos)
123 if pos == -1 then return ret
124 ret.add pos
125 pos += 1
126 end
127 end
128
129 redef fun pattern_length do return 1
130
131 redef fun append_to(b) do b.push self
132
133 # assert 'b'.ascii.is_suffix("baqsdb".to_bytes)
134 # assert not 'b'.ascii.is_suffix("baqsd".to_bytes)
135 redef fun is_suffix(b) do return b.length != 0 and b.last == self
136
137 # assert 'b'.ascii.is_prefix("baqsdb".to_bytes)
138 # assert not 'b'.ascii.is_prefix("aqsdb".to_bytes)
139 redef fun is_prefix(b) do return b.length != 0 and b.first == self
140 end
141
142 # A buffer containing Byte-manipulation facilities
143 #
144 # Uses Copy-On-Write when persisted
145 class Bytes
146 super AbstractArray[Byte]
147 super BytePattern
148
149 # A CString being a char*, it can be used as underlying representation here.
150 var items: CString
151
152 # Number of bytes in the array
153 redef var length
154
155 # Capacity of the array
156 private var capacity: Int
157
158 # Has this buffer been persisted (to_s'd)?
159 #
160 # Used for Copy-On-Write
161 private var persisted = false
162
163 # var b = new Bytes.empty
164 # assert b.to_s == ""
165 init empty do
166 var ns = new CString(0)
167 init(ns, 0, 0)
168 end
169
170 # Init a `Bytes` with capacity `cap`
171 init with_capacity(cap: Int) do
172 var ns = new CString(cap)
173 init(ns, 0, cap)
174 end
175
176 redef fun pattern_length do return length
177
178 redef fun is_empty do return length == 0
179
180 # var b = new Bytes.empty
181 # b.add 101u8
182 # assert b[0] == 101u8
183 redef fun [](i) do
184 assert i >= 0
185 assert i < length
186 return items[i]
187 end
188
189 # Returns a copy of `self`
190 fun clone: Bytes do
191 var b = new Bytes.with_capacity(length)
192 b.append(self)
193 return b
194 end
195
196 # Trims off the whitespaces at the beginning and the end of `self`
197 #
198 # var b = "102041426E6F1020" .hexdigest_to_bytes
199 # assert b.trim.hexdigest == "41426E6F"
200 #
201 # NOTE: A whitespace is defined here as a byte whose value is <= 0x20
202 fun trim: Bytes do
203 var st = 0
204 while st < length do
205 if self[st] > 0x20u8 then break
206 st += 1
207 end
208 if st >= length then return new Bytes.empty
209 var ed = length - 1
210 while ed > 0 do
211 if self[ed] > 0x20u8 then break
212 ed -= 1
213 end
214 return slice(st, ed - st + 1)
215 end
216
217 # Copy a subset of `self` starting at `from` and of `count` bytes
218 #
219 # var b = "abcd".to_bytes
220 # assert b.slice(1, 2).hexdigest == "6263"
221 # assert b.slice(-1, 2).hexdigest == "61"
222 # assert b.slice(1, 0).hexdigest == ""
223 # assert b.slice(2, 5).hexdigest == "6364"
224 fun slice(from, count: Int): Bytes do
225 if count <= 0 then return new Bytes.empty
226
227 if from < 0 then
228 count += from
229 if count < 0 then count = 0
230 from = 0
231 end
232
233 if (count + from) > length then count = length - from
234 if count <= 0 then return new Bytes.empty
235
236 var ret = new Bytes.with_capacity(count)
237
238 ret.append_ns(items.fast_cstring(from), count)
239 return ret
240 end
241
242 # Copy of `self` starting at `from`
243 #
244 # var b = "abcd".to_bytes
245 # assert b.slice_from(1).hexdigest == "626364"
246 # assert b.slice_from(-1).hexdigest == "61626364"
247 # assert b.slice_from(2).hexdigest == "6364"
248 fun slice_from(from: Int): Bytes do
249 if from >= length then return new Bytes.empty
250 if from < 0 then from = 0
251 return slice(from, length)
252 end
253
254 # Reverse the byte array in place
255 #
256 # var b = "abcd".to_bytes
257 # b.reverse
258 # assert b.to_s == "dcba"
259 fun reverse
260 do
261 var l = length
262 for i in [0..l/2[ do
263 var tmp = self[i]
264 self[i] = self[l-i-1]
265 self[l-i-1] = tmp
266 end
267 end
268
269 # Returns self as an hexadecimal digest.
270 #
271 # Also known as plain hexdump or postscript hexdump.
272 #
273 # ~~~
274 # var b = "abcd".to_bytes
275 # assert b.hexdigest == "61626364"
276 # assert b.hexdigest.hexdigest_to_bytes == b
277 # ~~~
278 fun hexdigest: String do
279 var elen = length * 2
280 var ns = new CString(elen)
281 var i = 0
282 var oi = 0
283 while i < length do
284 self[i].add_digest_at(ns, oi)
285 i += 1
286 oi += 2
287 end
288 return new FlatString.full(ns, elen, 0, elen)
289 end
290
291 # Return self as a C hexadecimal digest where bytes are prefixed by `\x`
292 #
293 # The output is compatible with literal stream of bytes for most languages
294 # including C and Nit.
295 #
296 # ~~~
297 # var b = "abcd".to_bytes
298 # assert b.chexdigest == "\\x61\\x62\\x63\\x64"
299 # assert b.chexdigest.unescape_to_bytes == b
300 # ~~~
301 fun chexdigest: String do
302 var elen = length * 4
303 var ns = new CString(elen)
304 var i = 0
305 var oi = 0
306 while i < length do
307 ns[oi] = 0x5Cu8 # b'\\'
308 ns[oi+1] = 0x78u8 # b'x'
309 self[i].add_digest_at(ns, oi+2)
310 i += 1
311 oi += 4
312 end
313 return new FlatString.full(ns, elen, 0, elen)
314 end
315
316
317 # Returns self as a stream of bits (0 and 1)
318 #
319 # ~~~
320 # var b = "abcd".to_bytes
321 # assert b.binarydigest == "01100001011000100110001101100100"
322 # assert b.binarydigest.binarydigest_to_bytes == b
323 # ~~~
324 fun binarydigest: String do
325 var elen = length * 8
326 var ns = new CString(elen)
327 var i = 0
328 var oi = 0
329 while i < length do
330 var c = self[i]
331 var b = 128u8
332 while b > 0u8 do
333 if c & b == 0u8 then
334 ns[oi] = 0x30u8 # b'0'
335 else
336 ns[oi] = 0x31u8 # b'1'
337 end
338 oi += 1
339 b = b >> 1
340 end
341 i += 1
342 end
343 return new FlatString.full(ns, elen, 0, elen)
344 end
345
346 # Interprets `self` as a big-endian integer (unsigned by default)
347 #
348 # ~~~
349 # var b = "0102".hexdigest_to_bytes
350 # assert b.to_i == 258
351 #
352 # assert "01".hexdigest_to_bytes.to_i == 1
353 # assert "FF".hexdigest_to_bytes.to_i == 255
354 # assert "0000".hexdigest_to_bytes.to_i == 0
355 # ~~~
356 #
357 # If `self.is_empty`, 0 is returned.
358 #
359 # ~~~
360 # assert "".hexdigest_to_bytes.to_i == 0
361 # ~~~
362 #
363 # If `signed == true`, the bytes are read as a signed integer.
364 # As usual, the sign bit is the left most bit, no matter the
365 # `length` of `self`.
366 #
367 # ~~~
368 # assert "01".hexdigest_to_bytes.to_i(true) == 1
369 # assert "FF".hexdigest_to_bytes.to_i(true) == -1
370 # assert "00FF".hexdigest_to_bytes.to_i(true) == 255
371 # assert "E0".hexdigest_to_bytes.to_i(true) == -32
372 # assert "FE00".hexdigest_to_bytes.to_i(true) == -512
373 # assert "FEFEFE".hexdigest_to_bytes.to_i(true) == -65794
374 # ~~~
375 #
376 # `Int::to_bytes` is a loosely reverse method.
377 #
378 # ~~~
379 # assert b.to_i.to_bytes == b
380 # assert (b.to_i + 1).to_bytes.hexdigest == "0103"
381 # assert "0001".hexdigest_to_bytes.to_i.to_bytes.hexdigest == "01"
382 #
383 # assert (-32).to_bytes.to_i(true) == -32
384 # ~~~
385 #
386 # Warning: `Int` might overflow for bytes with more than 60 bits.
387 fun to_i(signed: nullable Bool): Int do
388 var res = 0
389 var i = 0
390 while i < length do
391 res *= 256
392 res += self[i].to_i
393 i += 1
394 end
395
396 # Two's complement is `signed`
397 if signed == true and not_empty and first > 0x80u8 then
398 var ff = 0
399 for j in [0..length[ do
400 ff *= 0x100
401 ff += 0xFF
402 end
403
404 res = -((res ^ ff) + 1)
405 end
406
407 return res
408 end
409
410 # var b = new Bytes.with_capacity(1)
411 # b[0] = 101u8
412 # assert b.to_s == "e"
413 redef fun []=(i, v) do
414 if persisted then regen
415 assert i >= 0
416 assert i <= length
417 if i == length then add(v)
418 items[i] = v
419 end
420
421 # var b = new Bytes.empty
422 # b.add 101u8
423 # assert b.to_s == "e"
424 redef fun add(c) do
425 if persisted then regen
426 if length >= capacity then
427 enlarge(length)
428 end
429 items[length] = c
430 length += 1
431 end
432
433 # Adds the UTF-8 representation of `c` to `self`
434 #
435 # var b = new Bytes.empty
436 # b.add_char('A')
437 # b.add_char('キ')
438 # assert b.hexdigest == "41E382AD"
439 fun add_char(c: Char) do
440 if persisted then regen
441 var cln = c.u8char_len
442 var ln = length
443 enlarge(ln + cln)
444 items.set_char_at(length, c)
445 length += cln
446 end
447
448 # var b = new Bytes.empty
449 # b.append([104u8, 101u8, 108u8, 108u8, 111u8])
450 # assert b.to_s == "hello"
451 redef fun append(arr) do
452 if arr isa Bytes then
453 append_ns(arr.items, arr.length)
454 else
455 for i in arr do add i
456 end
457 end
458
459 # var b = new Bytes.empty
460 # b.append([0x41u8, 0x41u8, 0x18u8])
461 # b.pop
462 # assert b.to_s == "AA"
463 redef fun pop do
464 assert length >= 1
465 length -= 1
466 return items[length]
467 end
468
469 redef fun clear do length = 0
470
471 # Regenerates the buffer, necessary when it was persisted
472 private fun regen do
473 var nns = new CString(capacity)
474 items.copy_to(nns, length, 0, 0)
475 persisted = false
476 end
477
478 # Appends the `ln` first bytes of `ns` to self
479 fun append_ns(ns: CString, ln: Int) do
480 if persisted then regen
481 var nlen = length + ln
482 if nlen > capacity then enlarge(nlen)
483 ns.copy_to(items, ln, 0, length)
484 length += ln
485 end
486
487 # Appends `ln` bytes from `ns` starting at index `from` to self
488 fun append_ns_from(ns: CString, ln, from: Int) do
489 if persisted then regen
490 var nlen = length + ln
491 if nlen > capacity then enlarge(nlen)
492 ns.copy_to(items, ln, from, length)
493 length += ln
494 end
495
496 # Appends the bytes of `str` to `self`
497 fun append_text(str: Text) do str.append_to_bytes self
498
499 redef fun append_to(b) do b.append self
500
501 redef fun enlarge(sz) do
502 if capacity >= sz then return
503 persisted = false
504 if capacity < 16 then capacity = 16
505 while capacity < sz do capacity = capacity * 2 + 2
506 var ns = new CString(capacity)
507 items.copy_to(ns, length, 0, 0)
508 items = ns
509 end
510
511 redef fun to_s do
512 persisted = true
513 var b = self
514 var r = b.items.to_s_unsafe(length, copy=false)
515 if r != items then persisted = false
516 return r
517 end
518
519 redef fun iterator do return new BytesIterator.with_buffer(self)
520
521 redef fun first_index_in_from(b, from) do
522 if is_empty then return -1
523 var fst = self[0]
524 var bpos = fst.first_index_in_from(self, from)
525 for i in [0 .. length[ do
526 if self[i] != b[bpos] then return first_index_in_from(b, bpos + 1)
527 bpos += 1
528 end
529 return bpos
530 end
531
532 redef fun last_index_in_from(b, from) do
533 if is_empty then return -1
534 var lst = self[length - 1]
535 var bpos = lst.last_index_in_from(b, from)
536 for i in [0 .. length[.step(-1) do
537 if self[i] != b[bpos] then return last_index_in_from(b, bpos - 1)
538 bpos -= 1
539 end
540 return bpos
541 end
542
543 redef fun search_all_in(b) do
544 var ret = new Array[Int]
545 var pos = first_index_in_from(b, 0)
546 if pos == -1 then return ret
547 pos = pos + 1
548 ret.add pos
549 loop
550 pos = first_index_in_from(b, pos)
551 if pos == -1 then return ret
552 ret.add pos
553 pos += length
554 end
555 end
556
557 # Splits the content on self when encountering `b`
558 #
559 # var a = "String is string".to_bytes.split_with('s'.ascii)
560 # assert a.length == 3
561 # assert a[0].hexdigest == "537472696E672069"
562 # assert a[1].hexdigest == "20"
563 # assert a[2].hexdigest == "7472696E67"
564 fun split_with(b: BytePattern): Array[Bytes] do
565 var fst = b.search_all_in(self)
566 if fst.is_empty then return [clone]
567 var retarr = new Array[Bytes]
568 var prev = 0
569 for i in fst do
570 retarr.add(slice(prev, i - prev))
571 prev = i + b.pattern_length
572 end
573 retarr.add slice_from(prev)
574 return retarr
575 end
576
577 # Splits `self` in two parts at the first occurence of `b`
578 #
579 # var a = "String is string".to_bytes.split_once_on('s'.ascii)
580 # assert a[0].hexdigest == "537472696E672069"
581 # assert a[1].hexdigest == "20737472696E67"
582 fun split_once_on(b: BytePattern): Array[Bytes] do
583 var spl = b.first_index_in(self)
584 if spl == -1 then return [clone]
585 var ret = new Array[Bytes].with_capacity(2)
586 ret.add(slice(0, spl))
587 ret.add(slice_from(spl + b.pattern_length))
588 return ret
589 end
590
591 # Replaces all the occurences of `this` in `self` by `by`
592 #
593 # var b = "String is string".to_bytes.replace(0x20u8, 0x41u8)
594 # assert b.hexdigest == "537472696E6741697341737472696E67"
595 fun replace(pattern: BytePattern, bytes: BytePattern): Bytes do
596 if is_empty then return new Bytes.empty
597 var pos = pattern.search_all_in(self)
598 if pos.is_empty then return clone
599 var ret = new Bytes.with_capacity(length)
600 var prev = 0
601 for i in pos do
602 ret.append_ns(items.fast_cstring(prev), i - prev)
603 bytes.append_to ret
604 prev = i + pattern.pattern_length
605 end
606 ret.append(slice_from(pos.last + pattern.pattern_length))
607 return ret
608 end
609
610 # Decode `self` from percent (or URL) encoding to a clear string
611 #
612 # Invalid '%' are not decoded.
613 #
614 # assert "aBc09-._~".to_bytes.from_percent_encoding == "aBc09-._~".to_bytes
615 # assert "%25%28%29%3c%20%3e".to_bytes.from_percent_encoding == "%()< >".to_bytes
616 # assert ".com%2fpost%3fe%3dasdf%26f%3d123".to_bytes.from_percent_encoding == ".com/post?e=asdf&f=123".to_bytes
617 # assert "%25%28%29%3C%20%3E".to_bytes.from_percent_encoding == "%()< >".to_bytes
618 # assert "incomplete %".to_bytes.from_percent_encoding == "incomplete %".to_bytes
619 # assert "invalid % usage".to_bytes.from_percent_encoding == "invalid % usage".to_bytes
620 # assert "%c3%a9%e3%81%82%e3%81%84%e3%81%86".to_bytes.from_percent_encoding == "éあいう".to_bytes
621 # assert "%1 %A %C3%A9A9".to_bytes.from_percent_encoding == "%1 %A éA9".to_bytes
622 fun from_percent_encoding: Bytes do
623 var tmp = new Bytes.with_capacity(length)
624 var pos = 0
625 while pos < length do
626 var b = self[pos]
627 if b != '%'.ascii then
628 tmp.add b
629 pos += 1
630 continue
631 end
632 if length - pos < 2 then
633 tmp.add '%'.ascii
634 pos += 1
635 continue
636 end
637 var bn = self[pos + 1]
638 var bnn = self[pos + 2]
639 if not bn.is_valid_hexdigit or not bnn.is_valid_hexdigit then
640 tmp.add '%'.ascii
641 pos += 1
642 continue
643 end
644 tmp.add((bn.hexdigit_to_byteval << 4) + bnn.hexdigit_to_byteval)
645 pos += 3
646 end
647 return tmp
648 end
649
650 # Is `b` a prefix of `self` ?
651 fun has_prefix(b: BytePattern): Bool do return b.is_prefix(self)
652
653 # Is `b` a suffix of `self` ?
654 fun has_suffix(b: BytePattern): Bool do return b.is_suffix(self)
655
656 redef fun is_suffix(b) do
657 if length > b.length then return false
658 var j = b.length - 1
659 var i = length - 1
660 while i > 0 do
661 if self[i] != b[j] then return false
662 i -= 1
663 j -= 1
664 end
665 return true
666 end
667
668 redef fun is_prefix(b) do
669 if length > b.length then return false
670 for i in [0 .. length[ do if self[i] != b[i] then return false
671 return true
672 end
673 end
674
675 private class BytesIterator
676 super IndexedIterator[Byte]
677
678 var tgt: CString
679
680 redef var index
681
682 var max: Int
683
684 init with_buffer(b: Bytes) do init(b.items, 0, b.length)
685
686 redef fun is_ok do return index < max
687
688 redef fun next do index += 1
689
690 redef fun item do return tgt[index]
691 end
692
693 redef class Int
694 # A signed big-endian representation of `self`
695 #
696 # ~~~
697 # assert 1.to_bytes.hexdigest == "01"
698 # assert 255.to_bytes.hexdigest == "FF"
699 # assert 256.to_bytes.hexdigest == "0100"
700 # assert 65535.to_bytes.hexdigest == "FFFF"
701 # assert 65536.to_bytes.hexdigest == "010000"
702 # ~~~
703 #
704 # Negative values are converted to their two's complement.
705 # Be careful as the result can be ambiguous.
706 #
707 # ~~~
708 # assert (-1).to_bytes.hexdigest == "FF"
709 # assert (-32).to_bytes.hexdigest == "E0"
710 # assert (-512).to_bytes.hexdigest == "FE00"
711 # assert (-65794).to_bytes.hexdigest == "FEFEFE"
712 # ~~~
713 #
714 # Optionally, set `n_bytes` to the desired number of bytes in the output.
715 # This setting can disambiguate the result between positive and negative
716 # integers. Be careful with this parameter as the result may overflow.
717 #
718 # ~~~
719 # assert 1.to_bytes(2).hexdigest == "0001"
720 # assert 65535.to_bytes(2).hexdigest == "FFFF"
721 # assert (-1).to_bytes(2).hexdigest == "FFFF"
722 # assert (-512).to_bytes(4).hexdigest == "FFFFFE00"
723 # assert 0x123456.to_bytes(2).hexdigest == "3456"
724 # ~~~
725 #
726 # For 0, a Bytes object with single nul byte is returned (instead of an empty Bytes object).
727 #
728 # ~~~
729 # assert 0.to_bytes.hexdigest == "00"
730 # ~~~
731 #
732 # For positive integers, `Bytes::to_i` can reverse the operation.
733 #
734 # ~~~
735 # assert 1234.to_bytes.to_i == 1234
736 # ~~~
737 #
738 # Require self >= 0
739 fun to_bytes(n_bytes: nullable Int): Bytes do
740
741 # If 0, force using at least one byte
742 if self == 0 and n_bytes == null then n_bytes = 1
743
744 # Compute the len (log256)
745 var len = 1
746 var max = 256
747 var s = self.abs
748 while s >= max do
749 len += 1
750 max *= 256
751 end
752
753 # Two's complement
754 s = self
755 if self < 0 then
756 var ff = 0
757 for j in [0..len[ do
758 ff *= 0x100
759 ff += 0xFF
760 end
761
762 s = ((-self) ^ ff) + 1
763 end
764
765 # Cut long values
766 if n_bytes != null and len > n_bytes then len = n_bytes
767
768 # Allocate the buffer
769 var cap = n_bytes or else len
770 var res = new Bytes.with_capacity(cap)
771
772 var filler = if self < 0 then 0xFFu8 else 0u8
773 for i in [0..cap[ do res[i] = filler
774
775 # Fill it starting with the end
776 var i = cap
777 var sum = s
778 while i > cap - len do
779 i -= 1
780 res[i] = (sum % 256).to_b
781 sum /= 256
782 end
783
784 return res
785 end
786 end
787
788 redef class Text
789 # Returns a mutable copy of `self`'s bytes
790 #
791 # ~~~nit
792 # assert "String".to_bytes isa Bytes
793 # assert "String".to_bytes == [83u8, 116u8, 114u8, 105u8, 110u8, 103u8]
794 # ~~~
795 fun to_bytes: Bytes do
796 var b = new Bytes.with_capacity(byte_length)
797 append_to_bytes b
798 return b
799 end
800
801 # Is `self` a valid hexdigest ?
802 #
803 # assert "0B1d3F".is_valid_hexdigest
804 # assert not "5G".is_valid_hexdigest
805 fun is_valid_hexdigest: Bool do
806 for i in bytes do if not i.is_valid_hexdigit then return false
807 return true
808 end
809
810 # Appends `self.bytes` to `b`
811 fun append_to_bytes(b: Bytes) do
812 for s in substrings do
813 var from = if s isa FlatString then s.first_byte else 0
814 b.append_ns_from(s.items, s.byte_length, from)
815 end
816 end
817
818 # Returns a new `Bytes` instance with the digest as content
819 #
820 # assert "0B1F4D".hexdigest_to_bytes == [0x0Bu8, 0x1Fu8, 0x4Du8]
821 # assert "0B1F4D".hexdigest_to_bytes.hexdigest == "0B1F4D"
822 #
823 # Characters that are not hexadecimal digits are ignored.
824 #
825 # assert "z0B1 F4\nD".hexdigest_to_bytes.hexdigest == "0B1F4D"
826 # assert "\\x0b1 \\xf4d".hexdigest_to_bytes.hexdigest == "0B1F4D"
827 #
828 # When the number of hexadecimal digit is not even, then a leading 0 is
829 # implicitly considered to fill the left byte (the most significant one).
830 #
831 # assert "1".hexdigest_to_bytes.hexdigest == "01"
832 # assert "FFF".hexdigest_to_bytes.hexdigest == "0FFF"
833 #
834 # `Bytes::hexdigest` is a loosely reverse method since its
835 # results contain only pairs of uppercase hexadecimal digits.
836 #
837 # assert "ABCD".hexdigest_to_bytes.hexdigest == "ABCD"
838 # assert "a b c".hexdigest_to_bytes.hexdigest == "0ABC"
839 fun hexdigest_to_bytes: Bytes do
840 var b = bytes
841 var max = byte_length
842
843 var dlength = 0 # Number of hex digits
844 var pos = 0
845 while pos < max do
846 var c = b[pos]
847 if c.is_valid_hexdigit then dlength += 1
848 pos += 1
849 end
850
851 # Allocate the result buffer
852 var ret = new Bytes.with_capacity((dlength+1) / 2)
853
854 var i = (dlength+1) % 2 # current hex digit (1=high, 0=low)
855 var byte = 0u8 # current accumulated byte value
856
857 pos = 0
858 while pos < max do
859 var c = b[pos]
860 if c.is_valid_hexdigit then
861 byte = byte << 4 | c.hexdigit_to_byteval
862 i -= 1
863 if i < 0 then
864 # Last digit known: store and restart
865 ret.add byte
866 i = 1
867 byte = 0u8
868 end
869 end
870 pos += 1
871 end
872 return ret
873 end
874
875 # Gets the hexdigest of the bytes of `self`
876 #
877 # assert "&lt;STRING&#47;&rt;".hexdigest == "266C743B535452494E47262334373B2672743B"
878 fun hexdigest: String do
879 var ln = byte_length
880 var outns = new CString(ln * 2)
881 var oi = 0
882 for i in [0 .. ln[ do
883 bytes[i].add_digest_at(outns, oi)
884 oi += 2
885 end
886 return new FlatString.with_infos(outns, ln * 2, 0)
887 end
888
889 # Return a `Bytes` instance where Nit escape sequences are transformed.
890 #
891 # assert "B\\n\\x41\\u0103D3".unescape_to_bytes.hexdigest == "420A41F0908F93"
892 #
893 # `Bytes::chexdigest` is a loosely reverse methods since its result is only made
894 # of `"\x??"` escape sequences.
895 #
896 # assert "\\x41\\x42\\x43".unescape_to_bytes.chexdigest == "\\x41\\x42\\x43"
897 # assert "B\\n\\x41\\u0103D3".unescape_to_bytes.chexdigest == "\\x42\\x0A\\x41\\xF0\\x90\\x8F\\x93"
898 fun unescape_to_bytes: Bytes do
899 var res = new Bytes.with_capacity(self.byte_length)
900 var was_slash = false
901 var i = 0
902 while i < length do
903 var c = self[i]
904 if not was_slash then
905 if c == '\\' then
906 was_slash = true
907 else
908 res.add_char(c)
909 end
910 i += 1
911 continue
912 end
913 was_slash = false
914 if c == 'n' then
915 res.add_char('\n')
916 else if c == 'r' then
917 res.add_char('\r')
918 else if c == 't' then
919 res.add_char('\t')
920 else if c == '0' then
921 res.add_char('\0')
922 else if c == 'x' or c == 'X' then
923 var hx = substring(i + 1, 2)
924 if hx.is_hex then
925 res.add(hx.to_hex.to_b)
926 else
927 res.add_char(c)
928 end
929 i += 2
930 else if c == 'u' or c == 'U' then
931 var hx = substring(i + 1, 6)
932 if hx.is_hex then
933 res.add_char(hx.to_hex.code_point)
934 else
935 res.add_char(c)
936 end
937 i += 6
938 else
939 res.add_char(c)
940 end
941 i += 1
942 end
943 return res
944 end
945
946 # Return a `Bytes` by reading 0 and 1.
947 #
948 # assert "1010101100001101".binarydigest_to_bytes.hexdigest == "AB0D"
949 #
950 # Note that characters that are neither 0 or 1 are just ignored.
951 #
952 # assert "a1B01 010\n1100あ001101".binarydigest_to_bytes.hexdigest == "AB0D"
953 # assert "hello".binarydigest_to_bytes.is_empty
954 #
955 # When the number of bits is not divisible by 8, then leading 0 are
956 # implicitly considered to fill the left byte (the most significant one).
957 #
958 # assert "1".binarydigest_to_bytes.hexdigest == "01"
959 # assert "1111111".binarydigest_to_bytes.hexdigest == "7F"
960 # assert "1000110100".binarydigest_to_bytes.hexdigest == "0234"
961 #
962 # `Bytes::binarydigest` is a loosely reverse method since its
963 # results contain only 1 and 0 by blocks of 8.
964 #
965 # assert "1010101100001101".binarydigest_to_bytes.binarydigest == "1010101100001101"
966 # assert "1".binarydigest_to_bytes.binarydigest == "00000001"
967 fun binarydigest_to_bytes: Bytes
968 do
969 var b = bytes
970 var max = byte_length
971
972 # Count bits
973 var bitlen = 0
974 var pos = 0
975 while pos < max do
976 var c = b[pos]
977 pos += 1
978 if c == 0x30u8 or c == 0x31u8 then bitlen += 1 # b'0' or b'1'
979 end
980
981 # Allocate (and take care of the padding)
982 var ret = new Bytes.with_capacity((bitlen+7) / 8)
983
984 var i = (bitlen+7) % 8 # current bit (7th=128, 0th=1)
985 var byte = 0u8 # current accumulated byte value
986
987 pos = 0
988 while pos < max do
989 var c = b[pos]
990 pos += 1
991 if c == 0x30u8 then # b'0'
992 byte = byte << 1
993 else if c == 0x31u8 then # b'1'
994 byte = byte << 1 | 1u8
995 else
996 continue
997 end
998
999 i -= 1
1000 if i < 0 then
1001 # Last bit known: store and restart
1002 ret.add byte
1003 i = 7
1004 byte = 0u8
1005 end
1006 end
1007 return ret
1008 end
1009 end
1010
1011 redef class FlatText
1012 redef fun append_to_bytes(b) do
1013 var from = if self isa FlatString then first_byte else 0
1014 if isset _items then b.append_ns_from(items, byte_length, from)
1015 end
1016 end
1017
1018 redef class CString
1019 # Creates a new `Bytes` object from `self` with `len` as length
1020 #
1021 # If `len` is null, strlen will determine the length of the Bytes
1022 fun to_bytes(len: nullable Int): Bytes do
1023 if len == null then len = cstring_length
1024 return new Bytes(self, len, len)
1025 end
1026
1027 # Creates a new `Bytes` object from a copy of `self` with `len` as length
1028 #
1029 # If `len` is null, strlen will determine the length of the Bytes
1030 fun to_bytes_with_copy(len: nullable Int): Bytes do
1031 if len == null then len = cstring_length
1032 var nns = new CString(len)
1033 copy_to(nns, len, 0, 0)
1034 return new Bytes(nns, len, len)
1035 end
1036 end
1037
1038 # Joins an array of bytes `arr` separated by `sep`
1039 #
1040 # assert join_bytes(["String".to_bytes, "is".to_bytes, "string".to_bytes], ' '.ascii).hexdigest == "537472696E6720697320737472696E67"
1041 fun join_bytes(arr: Array[Bytes], sep: nullable BytePattern): Bytes do
1042 if arr.is_empty then return new Bytes.empty
1043 sep = sep or else new Bytes.empty
1044 var endln = sep.pattern_length * (arr.length - 1)
1045 for i in arr do endln += i.length
1046 var ret = new Bytes.with_capacity(endln)
1047 ret.append(arr.first)
1048 for i in [1 .. arr.length[ do
1049 sep.append_to(ret)
1050 ret.append arr[i]
1051 end
1052 return ret
1053 end