1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Services for byte streams and arrays
19 import collection
::array
20 intrude import text
::flat
22 # Any kind of entity which can be searched for in a Sequence of Byte
24 # Return the first occurence of `self` in `b`, or -1 if not found
25 fun first_index_in
(b
: SequenceRead[Byte]): Int do return first_index_in_from
(b
, 0)
27 # Return the first occurence of `self` in `b` starting at `from`, or -1 if not found
28 fun first_index_in_from
(b
: SequenceRead[Byte], from
: Int): Int is abstract
30 # Return the last occurence of `self` in `b`, or -1 if not found
31 fun last_index_in
(b
: SequenceRead[Byte]): Int do return last_index_in_from
(b
, b
.length
- 1)
33 # Return the last occurence of `self` in `b`, or -1 if not found
34 fun last_index_in_from
(b
: SequenceRead[Byte], from
: Int): Int is abstract
36 # Returns the indexes of all the occurences of `self` in `b`
37 fun search_all_in
(b
: SequenceRead[Byte]): SequenceRead[Int] is abstract
39 # Length of the pattern
40 fun pattern_length
: Int is abstract
42 # Appends `self` to `b`
43 fun append_to
(b
: Sequence[Byte]) is abstract
45 # Is `self` a prefix for `b` ?
46 fun is_prefix
(b
: SequenceRead[Byte]): Bool is abstract
48 # Is `self` a suffix for `b` ?
49 fun is_suffix
(b
: SequenceRead[Byte]): Bool is abstract
55 # Write self as a string into `ns` at position `pos`
56 private fun add_digest_at
(ns
: NativeString, pos
: Int) do
57 var tmp
= (0xF0u
8 & self) >> 4
58 ns
[pos
] = if tmp
>= 0x0Au
8 then tmp
+ 0x37u
8 else tmp
+ 0x30u
8
60 ns
[pos
+ 1] = if tmp
>= 0x0Au
8 then tmp
+ 0x37u
8 else tmp
+ 0x30u
8
63 # Is `self` a valid hexadecimal digit (in ASCII)
66 # intrude import core::bytes
67 # assert not '/'.ascii.is_valid_hexdigit
68 # assert '0'.ascii.is_valid_hexdigit
69 # assert '9'.ascii.is_valid_hexdigit
70 # assert not ':'.ascii.is_valid_hexdigit
71 # assert not '@'.ascii.is_valid_hexdigit
72 # assert 'A'.ascii.is_valid_hexdigit
73 # assert 'F'.ascii.is_valid_hexdigit
74 # assert not 'G'.ascii.is_valid_hexdigit
75 # assert not '`'.ascii.is_valid_hexdigit
76 # assert 'a'.ascii.is_valid_hexdigit
77 # assert 'f'.ascii.is_valid_hexdigit
78 # assert not 'g'.ascii.is_valid_hexdigit
80 private fun is_valid_hexdigit
: Bool do
81 return (self >= 0x30u
8 and self <= 0x39u
8) or
82 (self >= 0x41u
8 and self <= 0x46u
8) or
83 (self >= 0x61u
8 and self <= 0x66u
8)
86 # `self` as a hexdigit to its byte value
89 # intrude import core::bytes
90 # assert 0x39u8.hexdigit_to_byteval == 0x09u8
91 # assert 0x43u8.hexdigit_to_byteval == 0x0Cu8
94 # REQUIRE: `self.is_valid_hexdigit`
95 private fun hexdigit_to_byteval
: Byte do
96 if self >= 0x30u
8 and self <= 0x39u
8 then
98 else if self >= 0x41u
8 and self <= 0x46u
8 then
100 else if self >= 0x61u
8 and self <= 0x66u
8 then
103 # Happens only if the requirement is not met.
104 # i.e. this abort is here to please the compiler
108 redef fun first_index_in_from
(b
, from
) do
109 for i
in [from
.. b
.length
[ do if b
[i
] == self then return i
113 redef fun last_index_in_from
(b
, from
) do
114 for i
in [0 .. from
].step
(-1) do if b
[i
] == self then return i
118 redef fun search_all_in
(b
) do
119 var ret
= new Array[Int]
122 pos
= first_index_in_from
(b
, pos
)
123 if pos
== -1 then return ret
129 redef fun pattern_length
do return 1
131 redef fun append_to
(b
) do b
.push
self
133 # assert 'b'.ascii.is_suffix("baqsdb".to_bytes)
134 # assert not 'b'.ascii.is_suffix("baqsd".to_bytes)
135 redef fun is_suffix
(b
) do return b
.length
!= 0 and b
.last
== self
137 # assert 'b'.ascii.is_prefix("baqsdb".to_bytes)
138 # assert not 'b'.ascii.is_prefix("aqsdb".to_bytes)
139 redef fun is_prefix
(b
) do return b
.length
!= 0 and b
.first
== self
142 # A buffer containing Byte-manipulation facilities
144 # Uses Copy-On-Write when persisted
146 super AbstractArray[Byte]
149 # A NativeString being a char*, it can be used as underlying representation here.
150 var items
: NativeString
152 # Number of bytes in the array
155 # Capacity of the array
156 private var capacity
: Int
158 # Has this buffer been persisted (to_s'd)?
160 # Used for Copy-On-Write
161 private var persisted
= false
163 # var b = new Bytes.empty
164 # assert b.to_s == ""
166 var ns
= new NativeString(0)
170 # Init a `Bytes` with capacity `cap`
171 init with_capacity
(cap
: Int) do
172 var ns
= new NativeString(cap
)
176 redef fun pattern_length
do return length
178 redef fun is_empty
do return length
== 0
180 # var b = new Bytes.empty
182 # assert b[0] == 101u8
189 # Returns a copy of `self`
191 var b
= new Bytes.with_capacity
(length
)
196 # Trims off the whitespaces at the beginning and the end of `self`
198 # var b = "102041426E6F1020" .hexdigest_to_bytes
199 # assert b.trim.hexdigest == "41426E6F"
201 # NOTE: A whitespace is defined here as a byte whose value is <= 0x20
205 if self[st
] > 0x20u
8 then break
208 if st
>= length
then return new Bytes.empty
211 if self[ed
] > 0x20u
8 then break
214 return slice
(st
, ed
- st
+ 1)
217 # Returns a subset of the content of `self` starting at `from` and of length `count`
219 # var b = "abcd".to_bytes
220 # assert b.slice(1, 2).hexdigest == "6263"
221 # assert b.slice(-1, 2).hexdigest == "61"
222 # assert b.slice(1, 0).hexdigest == ""
223 # assert b.slice(2, 5).hexdigest == "6364"
224 fun slice
(from
, count
: Int): Bytes do
225 if count
<= 0 then return new Bytes.empty
229 if count
< 0 then count
= 0
233 if (count
+ from
) > length
then count
= length
- from
234 if count
<= 0 then return new Bytes.empty
236 var ret
= new Bytes.with_capacity
(count
)
238 ret
.append_ns
(items
.fast_cstring
(from
), count
)
242 # Returns a copy of `self` starting at `from`
244 # var b = "abcd".to_bytes
245 # assert b.slice_from(1).hexdigest == "626364"
246 # assert b.slice_from(-1).hexdigest == "61626364"
247 # assert b.slice_from(2).hexdigest == "6364"
248 fun slice_from
(from
: Int): Bytes do
249 if from
>= length
then return new Bytes.empty
250 if from
< 0 then from
= 0
251 return slice
(from
, length
)
254 # Returns self as an hexadecimal digest.
256 # Also known as plain hexdump or postscript hexdump.
259 # var b = "abcd".to_bytes
260 # assert b.hexdigest == "61626364"
261 # assert b.hexdigest.hexdigest_to_bytes == b
263 fun hexdigest
: String do
264 var elen
= length
* 2
265 var ns
= new NativeString(elen
)
269 self[i
].add_digest_at
(ns
, oi
)
273 return new FlatString.full
(ns
, elen
, 0, elen
)
276 # var b = new Bytes.with_capacity(1)
278 # assert b.to_s == "e"
279 redef fun []=(i
, v
) do
280 if persisted
then regen
283 if i
== length
then add
(v
)
287 # var b = new Bytes.empty
289 # assert b.to_s == "e"
291 if persisted
then regen
292 if length
>= capacity
then
299 # Adds the UTF-8 representation of `c` to `self`
301 # var b = new Bytes.empty
304 # assert b.hexdigest == "41E382AD"
305 fun add_char
(c
: Char) do
306 if persisted
then regen
307 var cln
= c
.u8char_len
310 items
.set_char_at
(length
, c
)
314 # var b = new Bytes.empty
315 # b.append([104u8, 101u8, 108u8, 108u8, 111u8])
316 # assert b.to_s == "hello"
317 redef fun append
(arr
) do
318 if arr
isa Bytes then
319 append_ns
(arr
.items
, arr
.length
)
321 for i
in arr
do add i
325 # var b = new Bytes.empty
326 # b.append([0x41u8, 0x41u8, 0x18u8])
328 # assert b.to_s == "AA"
335 redef fun clear
do length
= 0
337 # Regenerates the buffer, necessary when it was persisted
339 var nns
= new NativeString(capacity
)
340 items
.copy_to
(nns
, length
, 0, 0)
344 # Appends the `ln` first bytes of `ns` to self
345 fun append_ns
(ns
: NativeString, ln
: Int) do
346 if persisted
then regen
347 var nlen
= length
+ ln
348 if nlen
> capacity
then enlarge
(nlen
)
349 ns
.copy_to
(items
, ln
, 0, length
)
353 # Appends `ln` bytes from `ns` starting at index `from` to self
354 fun append_ns_from
(ns
: NativeString, ln
, from
: Int) do
355 if persisted
then regen
356 var nlen
= length
+ ln
357 if nlen
> capacity
then enlarge
(nlen
)
358 ns
.copy_to
(items
, ln
, from
, length
)
362 # Appends the bytes of `s` to `selftextextt`
363 fun append_text
(s
: Text) do
364 for i
in s
.substrings
do
365 append_ns
(i
.fast_cstring
, i
.bytelen
)
369 redef fun append_to
(b
) do b
.append
self
371 redef fun enlarge
(sz
) do
372 if capacity
>= sz
then return
374 while capacity
< sz
do capacity
= capacity
* 2 + 2
375 var ns
= new NativeString(capacity
)
376 items
.copy_to
(ns
, length
, 0, 0)
383 var r
= b
.items
.to_s_with_length
(length
)
384 if r
!= items
then persisted
= false
388 redef fun iterator
do return new BytesIterator.with_buffer
(self)
390 redef fun first_index_in_from
(b
, from
) do
391 if is_empty
then return -1
393 var bpos
= fst
.first_index_in_from
(self, from
)
394 for i
in [0 .. length
[ do
395 if self[i
] != b
[bpos
] then return first_index_in_from
(b
, bpos
+ 1)
401 redef fun last_index_in_from
(b
, from
) do
402 if is_empty
then return -1
403 var lst
= self[length
- 1]
404 var bpos
= lst
.last_index_in_from
(b
, from
)
405 for i
in [0 .. length
[.step
(-1) do
406 if self[i
] != b
[bpos
] then return last_index_in_from
(b
, bpos
- 1)
412 redef fun search_all_in
(b
) do
413 var ret
= new Array[Int]
414 var pos
= first_index_in_from
(b
, 0)
415 if pos
== -1 then return ret
419 pos
= first_index_in_from
(b
, pos
)
420 if pos
== -1 then return ret
426 # Splits the content on self when encountering `b`
428 # var a = "String is string".to_bytes.split_with('s'.ascii)
429 # assert a.length == 3
430 # assert a[0].hexdigest == "537472696E672069"
431 # assert a[1].hexdigest == "20"
432 # assert a[2].hexdigest == "7472696E67"
433 fun split_with
(b
: BytePattern): Array[Bytes] do
434 var fst
= b
.search_all_in
(self)
435 if fst
.is_empty
then return [clone
]
436 var retarr
= new Array[Bytes]
439 retarr
.add
(slice
(prev
, i
- prev
))
440 prev
= i
+ b
.pattern_length
442 retarr
.add slice_from
(prev
)
446 # Splits `self` in two parts at the first occurence of `b`
448 # var a = "String is string".to_bytes.split_once_on('s'.ascii)
449 # assert a[0].hexdigest == "537472696E672069"
450 # assert a[1].hexdigest == "20737472696E67"
451 fun split_once_on
(b
: BytePattern): Array[Bytes] do
452 var spl
= b
.first_index_in
(self)
453 if spl
== -1 then return [clone
]
454 var ret
= new Array[Bytes].with_capacity
(2)
455 ret
.add
(slice
(0, spl
))
456 ret
.add
(slice_from
(spl
+ b
.pattern_length
))
460 # Replaces all the occurences of `this` in `self` by `by`
462 # var b = "String is string".to_bytes.replace(0x20u8, 0x41u8)
463 # assert b.hexdigest == "537472696E6741697341737472696E67"
464 fun replace
(pattern
: BytePattern, bytes
: BytePattern): Bytes do
465 if is_empty
then return new Bytes.empty
466 var pos
= pattern
.search_all_in
(self)
467 if pos
.is_empty
then return clone
468 var ret
= new Bytes.with_capacity
(length
)
471 ret
.append_ns
(items
.fast_cstring
(prev
), i
- prev
)
473 prev
= i
+ pattern
.pattern_length
475 ret
.append
(slice_from
(pos
.last
+ pattern
.pattern_length
))
479 # Decode `self` from percent (or URL) encoding to a clear string
481 # Replace invalid use of '%' with '?'.
483 # assert "aBc09-._~".to_bytes.from_percent_encoding == "aBc09-._~".to_bytes
484 # assert "%25%28%29%3c%20%3e".to_bytes.from_percent_encoding == "%()< >".to_bytes
485 # assert ".com%2fpost%3fe%3dasdf%26f%3d123".to_bytes.from_percent_encoding == ".com/post?e=asdf&f=123".to_bytes
486 # assert "%25%28%29%3C%20%3E".to_bytes.from_percent_encoding == "%()< >".to_bytes
487 # assert "incomplete %".to_bytes.from_percent_encoding == "incomplete ?".to_bytes
488 # assert "invalid % usage".to_bytes.from_percent_encoding == "invalid ? usage".to_bytes
489 # assert "%c3%a9%e3%81%82%e3%81%84%e3%81%86".to_bytes.from_percent_encoding == "éあいう".to_bytes
490 fun from_percent_encoding
: Bytes do
491 var tmp
= new Bytes.with_capacity
(length
)
493 while pos
< length
do
495 if b
!= '%'.ascii
then
500 if length
- pos
< 2 then
505 var bn
= self[pos
+ 1]
506 var bnn
= self[pos
+ 2]
507 if not bn
.is_valid_hexdigit
or not bnn
.is_valid_hexdigit
then
512 tmp
.add
((bn
.hexdigit_to_byteval
<< 4) + bnn
.hexdigit_to_byteval
)
518 # Is `b` a prefix of `self` ?
519 fun has_prefix
(b
: BytePattern): Bool do return b
.is_prefix
(self)
521 # Is `b` a suffix of `self` ?
522 fun has_suffix
(b
: BytePattern): Bool do return b
.is_suffix
(self)
524 redef fun is_suffix
(b
) do
525 if length
> b
.length
then return false
529 if self[i
] != b
[j
] then return false
536 redef fun is_prefix
(b
) do
537 if length
> b
.length
then return false
538 for i
in [0 .. length
[ do if self[i
] != b
[i
] then return false
543 private class BytesIterator
544 super IndexedIterator[Byte]
546 var tgt
: NativeString
552 init with_buffer
(b
: Bytes) do init(b
.items
, 0, b
.length
)
554 redef fun is_ok
do return index
< max
556 redef fun next
do index
+= 1
558 redef fun item
do return tgt
[index
]
562 # Returns a mutable copy of `self`'s bytes
565 # assert "String".to_bytes isa Bytes
566 # assert "String".to_bytes == [83u8, 116u8, 114u8, 105u8, 110u8, 103u8]
568 fun to_bytes
: Bytes do
569 var b
= new Bytes.with_capacity
(bytelen
)
574 # Is `self` a valid hexdigest ?
576 # assert "0B1d3F".is_valid_hexdigest
577 # assert not "5G".is_valid_hexdigest
578 fun is_valid_hexdigest
: Bool do
579 for i
in bytes
do if not i
.is_valid_hexdigit
then return false
583 # Appends `self.bytes` to `b`
584 fun append_to_bytes
(b
: Bytes) do
585 for s
in substrings
do
586 var from
= if s
isa FlatString then s
.first_byte
else 0
587 b
.append_ns_from
(s
.items
, s
.bytelen
, from
)
591 # Returns a new `Bytes` instance with the digest as content
593 # assert "0B1F4D".hexdigest_to_bytes == [0x0Bu8, 0x1Fu8, 0x4Du8]
594 # assert "0B1F4D".hexdigest_to_bytes.hexdigest == "0B1F4D"
596 # REQUIRE: `self` is a valid hexdigest and hexdigest.length % 2 == 0
597 fun hexdigest_to_bytes
: Bytes do
601 var ret
= new Bytes.with_capacity
(max
/ 2)
603 ret
.add
((b
[pos
].hexdigit_to_byteval
<< 4) |
604 b
[pos
+ 1].hexdigit_to_byteval
)
610 # Gets the hexdigest of the bytes of `self`
612 # assert "<STRING/&rt;".hexdigest == "266C743B535452494E47262334373B2672743B"
613 fun hexdigest
: String do
615 var outns
= new NativeString(ln
* 2)
617 for i
in [0 .. ln
[ do
618 bytes
[i
].add_digest_at
(outns
, oi
)
621 return new FlatString.with_infos
(outns
, ln
* 2, 0)
624 # Return a `Bytes` instance where Nit escape sequences are transformed.
626 # assert "B\\n\\x41\\u0103D3".unescape_to_bytes.hexdigest == "420A41F0908F93"
627 fun unescape_to_bytes
: Bytes do
628 var res
= new Bytes.with_capacity
(self.bytelen
)
629 var was_slash
= false
633 if not was_slash
then
645 else if c
== 'r' then
647 else if c
== 't' then
649 else if c
== '0' then
651 else if c
== 'x' or c
== 'X' then
652 var hx
= substring
(i
+ 1, 2)
654 res
.add
(hx
.to_hex
.to_b
)
659 else if c
== 'u' or c
== 'U' then
660 var hx
= substring
(i
+ 1, 6)
662 res
.add_char
(hx
.to_hex
.code_point
)
677 redef fun append_to_bytes
(b
) do
678 var from
= if self isa FlatString then first_byte
else 0
679 b
.append_ns_from
(items
, bytelen
, from
)
683 redef class NativeString
684 # Creates a new `Bytes` object from `self` with `len` as length
686 # If `len` is null, strlen will determine the length of the Bytes
687 fun to_bytes
(len
: nullable Int): Bytes do
688 if len
== null then len
= cstring_length
689 return new Bytes(self, len
, len
)
692 # Creates a new `Bytes` object from a copy of `self` with `len` as length
694 # If `len` is null, strlen will determine the length of the Bytes
695 fun to_bytes_with_copy
(len
: nullable Int): Bytes do
696 if len
== null then len
= cstring_length
697 var nns
= new NativeString(len
)
698 copy_to
(nns
, len
, 0, 0)
699 return new Bytes(nns
, len
, len
)
703 # Joins an array of bytes `arr` separated by `sep`
705 # assert join_bytes(["String".to_bytes, "is".to_bytes, "string".to_bytes], ' '.ascii).hexdigest == "537472696E6720697320737472696E67"
706 fun join_bytes
(arr
: Array[Bytes], sep
: nullable BytePattern): Bytes do
707 if arr
.is_empty
then return new Bytes.empty
708 sep
= sep
or else new Bytes.empty
709 var endln
= sep
.pattern_length
* (arr
.length
- 1)
710 for i
in arr
do endln
+= i
.length
711 var ret
= new Bytes.with_capacity
(endln
)
712 ret
.append
(arr
.first
)
713 for i
in [1 .. arr
.length
[ do