1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Services for byte streams and arrays
19 import collection
::array
20 intrude import text
::flat
22 # A buffer containing Byte-manipulation facilities
24 # Uses Copy-On-Write when persisted
26 super AbstractArray[Byte]
28 # A NativeString being a char*, it can be used as underlying representation here.
29 private var items
: NativeString
31 # Number of bytes in the array
34 # Capacity of the array
35 private var capacity
: Int
37 # Has this buffer been persisted (to_s'd)?
39 # Used for Copy-On-Write
40 private var persisted
= false
42 # var b = new Bytes.empty
45 var ns
= new NativeString(0)
49 # Init a `Bytes` with capacity `cap`
50 init with_capacity
(cap
: Int) do
51 var ns
= new NativeString(cap
)
55 redef fun is_empty
do return length
!= 0
57 # var b = new Bytes.empty
59 # assert b[0] == 101u8
66 # var b = new Bytes.with_capacity(1)
68 # assert b.to_s == "e"
69 redef fun []=(i
, v
) do
70 if persisted
then regen
73 if i
== length
then add
(v
)
77 # var b = new Bytes.empty
79 # assert b.to_s == "e"
81 if persisted
then regen
82 if length
>= capacity
then
89 # var b = new Bytes.empty
90 # b.append([104u8, 101u8, 108u8, 108u8, 111u8])
91 # assert b.to_s == "hello"
92 redef fun append
(arr
) do
94 append_ns
(arr
.items
, arr
.length
)
100 # var b = new Bytes.empty
101 # b.append([0x41u8, 0x41u8, 0x18u8])
103 # assert b.to_s == "AA"
110 redef fun clear
do length
= 0
112 # Regenerates the buffer, necessary when it was persisted
114 var nns
= new NativeString(capacity
)
115 items
.copy_to
(nns
, length
, 0, 0)
119 # Appends the `ln` first bytes of `ns` to self
120 fun append_ns
(ns
: NativeString, ln
: Int) do
121 if persisted
then regen
122 var nlen
= length
+ ln
123 if nlen
> capacity
then enlarge
(nlen
)
124 ns
.copy_to
(items
, ln
, 0, length
)
128 # Appends `ln` bytes from `ns` starting at index `from` to self
129 fun append_ns_from
(ns
: NativeString, ln
, from
: Int) do
130 if persisted
then regen
131 var nlen
= length
+ ln
132 if nlen
> capacity
then enlarge
(nlen
)
133 ns
.copy_to
(items
, ln
, from
, length
)
137 redef fun enlarge
(sz
) do
138 if capacity
>= sz
then return
140 while capacity
< sz
do capacity
= capacity
* 2 + 2
141 var ns
= new NativeString(capacity
)
142 items
.copy_to
(ns
, length
, 0, 0)
153 return new FlatString.with_infos
(b
.items
, b
.length
, 0, b
.length
-1)
156 redef fun iterator
do return new BytesIterator.with_buffer
(self)
158 # Is the byte collection valid UTF-8 ?
160 var charst
= once
[0x80u
8, 0u8
, 0xE0u
8, 0xC0u
8, 0xF0u
8, 0xE0u
8, 0xF8u
8, 0xF0u
8]
161 var lobounds
= once
[0, 0x80, 0x800, 0x10000]
162 var hibounds
= once
[0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
167 var nxst
= mits
.length_of_char_at
(pos
)
168 var charst_index
= (nxst
- 1) * 2
169 if mits
[pos
] & charst
[charst_index
] == charst
[charst_index
+ 1] then
170 var c
= mits
.char_at
(pos
)
172 if cp
<= hibounds
[nxst
- 1] and cp
>= lobounds
[nxst
- 1] then
173 if cp
>= 0xD800 and cp
<= 0xDFFF or
174 cp
== 0xFFFE or cp
== 0xFFFF then return false
186 # Cleans the bytes of `self` to be UTF-8 compliant
187 private fun clean_utf8
: Bytes do
188 var charst
= once
[0x80u
8, 0u8
, 0xE0u
8, 0xC0u
8, 0xF0u
8, 0xE0u
8, 0xF8u
8, 0xF0u
8]
189 var badchar
= once
[0xEFu
8, 0xBFu
8, 0xBDu
8]
190 var lobounds
= once
[0, 0x80, 0x800, 0x10000]
191 var hibounds
= once
[0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
194 var ret
= new Bytes.with_capacity
(len
)
197 var nxst
= mits
.length_of_char_at
(pos
)
198 var charst_index
= (nxst
- 1) * 2
199 if mits
[pos
] & charst
[charst_index
] == charst
[charst_index
+ 1] then
200 var c
= mits
.char_at
(pos
)
202 if cp
<= hibounds
[nxst
- 1] and cp
>= lobounds
[nxst
- 1] then
203 if cp
>= 0xD800 and cp
<= 0xDFFF or
204 cp
== 0xFFFE or cp
== 0xFFFF then
208 var pend
= pos
+ nxst
209 for i
in [pos
.. pend
[ do ret
.add mits
[i
]
225 private class BytesIterator
226 super IndexedIterator[Byte]
228 var tgt
: NativeString
234 init with_buffer
(b
: Bytes) do init(b
.items
, 0, b
.length
- 1)
236 redef fun is_ok
do return index
< max
238 redef fun next
do index
+= 1
240 redef fun item
do return tgt
[index
]
244 # Returns a mutable copy of `self`'s bytes
247 # assert "String".to_bytes isa Bytes
248 # assert "String".to_bytes == [83u8, 116u8, 114u8, 105u8, 110u8, 103u8]
250 fun to_bytes
: Bytes do
251 var b
= new Bytes.with_capacity
(bytelen
)
256 # Appends `self.bytes` to `b`
257 fun append_to_bytes
(b
: Bytes) do
258 for s
in substrings
do
259 var from
= if s
isa FlatString then s
.first_byte
else 0
260 b
.append_ns_from
(s
.items
, s
.bytelen
, from
)
266 redef fun append_to_bytes
(b
) do
267 var from
= if self isa FlatString then first_byte
else 0
268 b
.append_ns_from
(items
, bytelen
, from
)
272 redef class NativeString
273 # Creates a new `Bytes` object from `self` with `strlen` as length
274 fun to_bytes
: Bytes do
275 var len
= cstring_length
276 return new Bytes(self, len
, len
)