ad1f6eff5417d57a80d822fdf67f026af92cf57c
1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Services for byte streams and arrays
19 import collection
::array
20 intrude import text
::flat
23 # Is `self` a valid hexadecimal digit (in ASCII)
26 # intrude import core::bytes
27 # assert not '/'.ascii.to_b.is_valid_hexdigit
28 # assert '0'.ascii.to_b.is_valid_hexdigit
29 # assert '9'.ascii.to_b.is_valid_hexdigit
30 # assert not ':'.ascii.to_b.is_valid_hexdigit
31 # assert not '@'.ascii.to_b.is_valid_hexdigit
32 # assert 'A'.ascii.to_b.is_valid_hexdigit
33 # assert 'F'.ascii.to_b.is_valid_hexdigit
34 # assert not 'G'.ascii.to_b.is_valid_hexdigit
35 # assert not '`'.ascii.to_b.is_valid_hexdigit
36 # assert 'a'.ascii.to_b.is_valid_hexdigit
37 # assert 'f'.ascii.to_b.is_valid_hexdigit
38 # assert not 'g'.ascii.to_b.is_valid_hexdigit
40 private fun is_valid_hexdigit
: Bool do
41 return (self >= 0x30u
8 and self <= 0x39u
8) or
42 (self >= 0x41u
8 and self <= 0x46u
8) or
43 (self >= 0x61u
8 and self <= 0x66u
8)
47 # A buffer containing Byte-manipulation facilities
49 # Uses Copy-On-Write when persisted
51 super AbstractArray[Byte]
53 # A NativeString being a char*, it can be used as underlying representation here.
54 private var items
: NativeString
56 # Number of bytes in the array
59 # Capacity of the array
60 private var capacity
: Int
62 # Has this buffer been persisted (to_s'd)?
64 # Used for Copy-On-Write
65 private var persisted
= false
67 # var b = new Bytes.empty
70 var ns
= new NativeString(0)
74 # Init a `Bytes` with capacity `cap`
75 init with_capacity
(cap
: Int) do
76 var ns
= new NativeString(cap
)
80 redef fun is_empty
do return length
!= 0
82 # var b = new Bytes.empty
84 # assert b[0] == 101u8
91 # var b = new Bytes.with_capacity(1)
93 # assert b.to_s == "e"
94 redef fun []=(i
, v
) do
95 if persisted
then regen
98 if i
== length
then add
(v
)
102 # var b = new Bytes.empty
104 # assert b.to_s == "e"
106 if persisted
then regen
107 if length
>= capacity
then
114 # var b = new Bytes.empty
115 # b.append([104u8, 101u8, 108u8, 108u8, 111u8])
116 # assert b.to_s == "hello"
117 redef fun append
(arr
) do
118 if arr
isa Bytes then
119 append_ns
(arr
.items
, arr
.length
)
121 for i
in arr
do add i
125 # var b = new Bytes.empty
126 # b.append([0x41u8, 0x41u8, 0x18u8])
128 # assert b.to_s == "AA"
135 redef fun clear
do length
= 0
137 # Regenerates the buffer, necessary when it was persisted
139 var nns
= new NativeString(capacity
)
140 items
.copy_to
(nns
, length
, 0, 0)
144 # Appends the `ln` first bytes of `ns` to self
145 fun append_ns
(ns
: NativeString, ln
: Int) do
146 if persisted
then regen
147 var nlen
= length
+ ln
148 if nlen
> capacity
then enlarge
(nlen
)
149 ns
.copy_to
(items
, ln
, 0, length
)
153 # Appends `ln` bytes from `ns` starting at index `from` to self
154 fun append_ns_from
(ns
: NativeString, ln
, from
: Int) do
155 if persisted
then regen
156 var nlen
= length
+ ln
157 if nlen
> capacity
then enlarge
(nlen
)
158 ns
.copy_to
(items
, ln
, from
, length
)
162 redef fun enlarge
(sz
) do
163 if capacity
>= sz
then return
165 while capacity
< sz
do capacity
= capacity
* 2 + 2
166 var ns
= new NativeString(capacity
)
167 items
.copy_to
(ns
, length
, 0, 0)
178 return new FlatString.with_infos
(b
.items
, b
.length
, 0, b
.length
-1)
181 redef fun iterator
do return new BytesIterator.with_buffer
(self)
183 # Is the byte collection valid UTF-8 ?
185 var charst
= once
[0x80u
8, 0u8
, 0xE0u
8, 0xC0u
8, 0xF0u
8, 0xE0u
8, 0xF8u
8, 0xF0u
8]
186 var lobounds
= once
[0, 0x80, 0x800, 0x10000]
187 var hibounds
= once
[0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
192 var nxst
= mits
.length_of_char_at
(pos
)
193 var charst_index
= (nxst
- 1) * 2
194 if mits
[pos
] & charst
[charst_index
] == charst
[charst_index
+ 1] then
195 var c
= mits
.char_at
(pos
)
197 if cp
<= hibounds
[nxst
- 1] and cp
>= lobounds
[nxst
- 1] then
198 if cp
>= 0xD800 and cp
<= 0xDFFF or
199 cp
== 0xFFFE or cp
== 0xFFFF then return false
211 # Cleans the bytes of `self` to be UTF-8 compliant
212 private fun clean_utf8
: Bytes do
213 var charst
= once
[0x80u
8, 0u8
, 0xE0u
8, 0xC0u
8, 0xF0u
8, 0xE0u
8, 0xF8u
8, 0xF0u
8]
214 var badchar
= once
[0xEFu
8, 0xBFu
8, 0xBDu
8]
215 var lobounds
= once
[0, 0x80, 0x800, 0x10000]
216 var hibounds
= once
[0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
219 var ret
= new Bytes.with_capacity
(len
)
222 var nxst
= mits
.length_of_char_at
(pos
)
223 var charst_index
= (nxst
- 1) * 2
224 if mits
[pos
] & charst
[charst_index
] == charst
[charst_index
+ 1] then
225 var c
= mits
.char_at
(pos
)
227 if cp
<= hibounds
[nxst
- 1] and cp
>= lobounds
[nxst
- 1] then
228 if cp
>= 0xD800 and cp
<= 0xDFFF or
229 cp
== 0xFFFE or cp
== 0xFFFF then
233 var pend
= pos
+ nxst
234 for i
in [pos
.. pend
[ do ret
.add mits
[i
]
250 private class BytesIterator
251 super IndexedIterator[Byte]
253 var tgt
: NativeString
259 init with_buffer
(b
: Bytes) do init(b
.items
, 0, b
.length
- 1)
261 redef fun is_ok
do return index
< max
263 redef fun next
do index
+= 1
265 redef fun item
do return tgt
[index
]
269 # Returns a mutable copy of `self`'s bytes
272 # assert "String".to_bytes isa Bytes
273 # assert "String".to_bytes == [83u8, 116u8, 114u8, 105u8, 110u8, 103u8]
275 fun to_bytes
: Bytes do
276 var b
= new Bytes.with_capacity
(bytelen
)
281 # Is `self` a valid hexdigest ?
283 # assert "0B1d3F".is_valid_hexdigest
284 # assert not "5G".is_valid_hexdigest
285 fun is_valid_hexdigest
: Bool do
286 for i
in bytes
do if not i
.is_valid_hexdigit
then return false
290 # Appends `self.bytes` to `b`
291 fun append_to_bytes
(b
: Bytes) do
292 for s
in substrings
do
293 var from
= if s
isa FlatString then s
.first_byte
else 0
294 b
.append_ns_from
(s
.items
, s
.bytelen
, from
)
300 redef fun append_to_bytes
(b
) do
301 var from
= if self isa FlatString then first_byte
else 0
302 b
.append_ns_from
(items
, bytelen
, from
)
306 redef class NativeString
307 # Creates a new `Bytes` object from `self` with `strlen` as length
308 fun to_bytes
: Bytes do
309 var len
= cstring_length
310 return new Bytes(self, len
, len
)