1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Services for byte streams and arrays
19 import collection
::array
20 intrude import text
::flat
23 # Is `self` a valid hexadecimal digit (in ASCII)
26 # intrude import core::bytes
27 # assert not '/'.ascii.to_b.is_valid_hexdigit
28 # assert '0'.ascii.to_b.is_valid_hexdigit
29 # assert '9'.ascii.to_b.is_valid_hexdigit
30 # assert not ':'.ascii.to_b.is_valid_hexdigit
31 # assert not '@'.ascii.to_b.is_valid_hexdigit
32 # assert 'A'.ascii.to_b.is_valid_hexdigit
33 # assert 'F'.ascii.to_b.is_valid_hexdigit
34 # assert not 'G'.ascii.to_b.is_valid_hexdigit
35 # assert not '`'.ascii.to_b.is_valid_hexdigit
36 # assert 'a'.ascii.to_b.is_valid_hexdigit
37 # assert 'f'.ascii.to_b.is_valid_hexdigit
38 # assert not 'g'.ascii.to_b.is_valid_hexdigit
40 private fun is_valid_hexdigit
: Bool do
41 return (self >= 0x30u
8 and self <= 0x39u
8) or
42 (self >= 0x41u
8 and self <= 0x46u
8) or
43 (self >= 0x61u
8 and self <= 0x66u
8)
46 # `self` as a hexdigit to its byte value
49 # intrude import core::bytes
50 # assert 0x39u8.hexdigit_to_byteval == 0x09u8
51 # assert 0x43u8.hexdigit_to_byteval == 0x0Cu8
54 # REQUIRE: `self.is_valid_hexdigit`
55 private fun hexdigit_to_byteval
: Byte do
56 if self >= 0x30u
8 and self <= 0x39u
8 then
58 else if self >= 0x41u
8 and self <= 0x46u
8 then
60 else if self >= 0x61u
8 and self <= 0x66u
8 then
63 # Happens only if the requirement is not met.
64 # i.e. this abort is here to please the compiler
69 # A buffer containing Byte-manipulation facilities
71 # Uses Copy-On-Write when persisted
73 super AbstractArray[Byte]
75 # A NativeString being a char*, it can be used as underlying representation here.
76 private var items
: NativeString
78 # Number of bytes in the array
81 # Capacity of the array
82 private var capacity
: Int
84 # Has this buffer been persisted (to_s'd)?
86 # Used for Copy-On-Write
87 private var persisted
= false
89 # var b = new Bytes.empty
92 var ns
= new NativeString(0)
96 # Init a `Bytes` with capacity `cap`
97 init with_capacity
(cap
: Int) do
98 var ns
= new NativeString(cap
)
102 redef fun is_empty
do return length
!= 0
104 # var b = new Bytes.empty
106 # assert b[0] == 101u8
113 # var b = new Bytes.with_capacity(1)
115 # assert b.to_s == "e"
116 redef fun []=(i
, v
) do
117 if persisted
then regen
120 if i
== length
then add
(v
)
124 # var b = new Bytes.empty
126 # assert b.to_s == "e"
128 if persisted
then regen
129 if length
>= capacity
then
136 # var b = new Bytes.empty
137 # b.append([104u8, 101u8, 108u8, 108u8, 111u8])
138 # assert b.to_s == "hello"
139 redef fun append
(arr
) do
140 if arr
isa Bytes then
141 append_ns
(arr
.items
, arr
.length
)
143 for i
in arr
do add i
147 # var b = new Bytes.empty
148 # b.append([0x41u8, 0x41u8, 0x18u8])
150 # assert b.to_s == "AA"
157 redef fun clear
do length
= 0
159 # Regenerates the buffer, necessary when it was persisted
161 var nns
= new NativeString(capacity
)
162 items
.copy_to
(nns
, length
, 0, 0)
166 # Appends the `ln` first bytes of `ns` to self
167 fun append_ns
(ns
: NativeString, ln
: Int) do
168 if persisted
then regen
169 var nlen
= length
+ ln
170 if nlen
> capacity
then enlarge
(nlen
)
171 ns
.copy_to
(items
, ln
, 0, length
)
175 # Appends `ln` bytes from `ns` starting at index `from` to self
176 fun append_ns_from
(ns
: NativeString, ln
, from
: Int) do
177 if persisted
then regen
178 var nlen
= length
+ ln
179 if nlen
> capacity
then enlarge
(nlen
)
180 ns
.copy_to
(items
, ln
, from
, length
)
184 redef fun enlarge
(sz
) do
185 if capacity
>= sz
then return
187 while capacity
< sz
do capacity
= capacity
* 2 + 2
188 var ns
= new NativeString(capacity
)
189 items
.copy_to
(ns
, length
, 0, 0)
200 return new FlatString.with_infos
(b
.items
, b
.length
, 0, b
.length
-1)
203 redef fun iterator
do return new BytesIterator.with_buffer
(self)
205 # Is the byte collection valid UTF-8 ?
207 var charst
= once
[0x80u
8, 0u8
, 0xE0u
8, 0xC0u
8, 0xF0u
8, 0xE0u
8, 0xF8u
8, 0xF0u
8]
208 var lobounds
= once
[0, 0x80, 0x800, 0x10000]
209 var hibounds
= once
[0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
214 var nxst
= mits
.length_of_char_at
(pos
)
215 var charst_index
= (nxst
- 1) * 2
216 if mits
[pos
] & charst
[charst_index
] == charst
[charst_index
+ 1] then
217 var c
= mits
.char_at
(pos
)
219 if cp
<= hibounds
[nxst
- 1] and cp
>= lobounds
[nxst
- 1] then
220 if cp
>= 0xD800 and cp
<= 0xDFFF or
221 cp
== 0xFFFE or cp
== 0xFFFF then return false
233 # Cleans the bytes of `self` to be UTF-8 compliant
234 private fun clean_utf8
: Bytes do
235 var charst
= once
[0x80u
8, 0u8
, 0xE0u
8, 0xC0u
8, 0xF0u
8, 0xE0u
8, 0xF8u
8, 0xF0u
8]
236 var badchar
= once
[0xEFu
8, 0xBFu
8, 0xBDu
8]
237 var lobounds
= once
[0, 0x80, 0x800, 0x10000]
238 var hibounds
= once
[0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
241 var ret
= new Bytes.with_capacity
(len
)
244 var nxst
= mits
.length_of_char_at
(pos
)
245 var charst_index
= (nxst
- 1) * 2
246 if mits
[pos
] & charst
[charst_index
] == charst
[charst_index
+ 1] then
247 var c
= mits
.char_at
(pos
)
249 if cp
<= hibounds
[nxst
- 1] and cp
>= lobounds
[nxst
- 1] then
250 if cp
>= 0xD800 and cp
<= 0xDFFF or
251 cp
== 0xFFFE or cp
== 0xFFFF then
255 var pend
= pos
+ nxst
256 for i
in [pos
.. pend
[ do ret
.add mits
[i
]
272 private class BytesIterator
273 super IndexedIterator[Byte]
275 var tgt
: NativeString
281 init with_buffer
(b
: Bytes) do init(b
.items
, 0, b
.length
)
283 redef fun is_ok
do return index
< max
285 redef fun next
do index
+= 1
287 redef fun item
do return tgt
[index
]
291 # Returns a mutable copy of `self`'s bytes
294 # assert "String".to_bytes isa Bytes
295 # assert "String".to_bytes == [83u8, 116u8, 114u8, 105u8, 110u8, 103u8]
297 fun to_bytes
: Bytes do
298 var b
= new Bytes.with_capacity
(bytelen
)
303 # Is `self` a valid hexdigest ?
305 # assert "0B1d3F".is_valid_hexdigest
306 # assert not "5G".is_valid_hexdigest
307 fun is_valid_hexdigest
: Bool do
308 for i
in bytes
do if not i
.is_valid_hexdigit
then return false
312 # Appends `self.bytes` to `b`
313 fun append_to_bytes
(b
: Bytes) do
314 for s
in substrings
do
315 var from
= if s
isa FlatString then s
.first_byte
else 0
316 b
.append_ns_from
(s
.items
, s
.bytelen
, from
)
320 # Returns a new `Bytes` instance with the digest as content
322 # assert "0B1F4D".hexdigest_to_bytes == [0x0Bu8, 0x1Fu8, 0x4Du8]
324 # REQUIRE: `self` is a valid hexdigest and hexdigest.length % 2 == 0
325 fun hexdigest_to_bytes
: Bytes do
329 var ret
= new Bytes.with_capacity
(max
/ 2)
331 ret
.add
((b
[pos
].hexdigit_to_byteval
<< 4) |
332 b
[pos
+ 1].hexdigit_to_byteval
)
340 redef fun append_to_bytes
(b
) do
341 var from
= if self isa FlatString then first_byte
else 0
342 b
.append_ns_from
(items
, bytelen
, from
)
346 redef class NativeString
347 # Creates a new `Bytes` object from `self` with `strlen` as length
348 fun to_bytes
: Bytes do
349 var len
= cstring_length
350 return new Bytes(self, len
, len
)