Merge: Clean UTF-8 string update
[nit.git] / lib / core / bytes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Services for byte streams and arrays
16 module bytes
17
18 import kernel
19 import collection::array
20 intrude import text::flat
21
22 redef class Byte
23 # Write self as a string into `ns` at position `pos`
24 private fun add_digest_at(ns: NativeString, pos: Int) do
25 var tmp = (0xF0u8 & self) >> 4
26 ns[pos] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
27 tmp = 0x0Fu8 & self
28 ns[pos + 1] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
29 end
30 end
31
32 # A buffer containing Byte-manipulation facilities
33 #
34 # Uses Copy-On-Write when persisted
35 class Bytes
36 super AbstractArray[Byte]
37
38 # A NativeString being a char*, it can be used as underlying representation here.
39 var items: NativeString
40
41 # Number of bytes in the array
42 redef var length
43
44 # Capacity of the array
45 private var capacity: Int
46
47 # Has this buffer been persisted (to_s'd)?
48 #
49 # Used for Copy-On-Write
50 private var persisted = false
51
52 # var b = new Bytes.empty
53 # assert b.to_s == ""
54 init empty do
55 var ns = new NativeString(0)
56 init(ns, 0, 0)
57 end
58
59 # Init a `Bytes` with capacity `cap`
60 init with_capacity(cap: Int) do
61 var ns = new NativeString(cap)
62 init(ns, 0, cap)
63 end
64
65 redef fun is_empty do return length != 0
66
67 # var b = new Bytes.empty
68 # b.add 101u8
69 # assert b[0] == 101u8
70 redef fun [](i) do
71 assert i >= 0
72 assert i < length
73 return items[i]
74 end
75
76 # Returns self as a hexadecimal digest
77 fun hexdigest: String do
78 var elen = length * 2
79 var ns = new NativeString(elen)
80 var i = 0
81 var oi = 0
82 while i < length do
83 self[i].add_digest_at(ns, oi)
84 i += 1
85 oi += 2
86 end
87 return new FlatString.full(ns, elen, 0, elen - 1, elen)
88 end
89
90 # var b = new Bytes.with_capacity(1)
91 # b[0] = 101u8
92 # assert b.to_s == "e"
93 redef fun []=(i, v) do
94 if persisted then regen
95 assert i >= 0
96 assert i <= length
97 if i == length then add(v)
98 items[i] = v
99 end
100
101 # var b = new Bytes.empty
102 # b.add 101u8
103 # assert b.to_s == "e"
104 redef fun add(c) do
105 if persisted then regen
106 if length >= capacity then
107 enlarge(length)
108 end
109 items[length] = c
110 length += 1
111 end
112
113 # var b = new Bytes.empty
114 # b.append([104u8, 101u8, 108u8, 108u8, 111u8])
115 # assert b.to_s == "hello"
116 redef fun append(arr) do
117 if arr isa Bytes then
118 append_ns(arr.items, arr.length)
119 else
120 for i in arr do add i
121 end
122 end
123
124 # var b = new Bytes.empty
125 # b.append([0x41u8, 0x41u8, 0x18u8])
126 # b.pop
127 # assert b.to_s == "AA"
128 redef fun pop do
129 assert length >= 1
130 length -= 1
131 return items[length]
132 end
133
134 redef fun clear do length = 0
135
136 # Regenerates the buffer, necessary when it was persisted
137 private fun regen do
138 var nns = new NativeString(capacity)
139 items.copy_to(nns, length, 0, 0)
140 persisted = false
141 end
142
143 # Appends the `ln` first bytes of `ns` to self
144 fun append_ns(ns: NativeString, ln: Int) do
145 if persisted then regen
146 var nlen = length + ln
147 if nlen > capacity then enlarge(nlen)
148 ns.copy_to(items, ln, 0, length)
149 length += ln
150 end
151
152 # Appends `ln` bytes from `ns` starting at index `from` to self
153 fun append_ns_from(ns: NativeString, ln, from: Int) do
154 if persisted then regen
155 var nlen = length + ln
156 if nlen > capacity then enlarge(nlen)
157 ns.copy_to(items, ln, from, length)
158 length += ln
159 end
160
161 redef fun enlarge(sz) do
162 if capacity >= sz then return
163 persisted = false
164 while capacity < sz do capacity = capacity * 2 + 2
165 var ns = new NativeString(capacity)
166 items.copy_to(ns, length, 0, 0)
167 items = ns
168 end
169
170 redef fun to_s do
171 persisted = true
172 var b = self
173 var r = b.items.to_s_with_length(length)
174 if r != items then persisted = false
175 return r
176 end
177
178 redef fun iterator do return new BytesIterator.with_buffer(self)
179
180 end
181
182 private class BytesIterator
183 super IndexedIterator[Byte]
184
185 var tgt: NativeString
186
187 redef var index
188
189 var max: Int
190
191 init with_buffer(b: Bytes) do init(b.items, 0, b.length - 1)
192
193 redef fun is_ok do return index < max
194
195 redef fun next do index += 1
196
197 redef fun item do return tgt[index]
198 end
199
200 redef class Text
201 # Returns a mutable copy of `self`'s bytes
202 #
203 # ~~~nit
204 # assert "String".to_bytes isa Bytes
205 # assert "String".to_bytes == [83u8, 116u8, 114u8, 105u8, 110u8, 103u8]
206 # ~~~
207 fun to_bytes: Bytes do
208 var b = new Bytes.with_capacity(bytelen)
209 append_to_bytes b
210 return b
211 end
212
213 # Appends `self.bytes` to `b`
214 fun append_to_bytes(b: Bytes) do
215 for s in substrings do
216 var from = if s isa FlatString then s.first_byte else 0
217 b.append_ns_from(s.items, s.bytelen, from)
218 end
219 end
220 end
221
222 redef class FlatText
223 redef fun append_to_bytes(b) do
224 var from = if self isa FlatString then first_byte else 0
225 b.append_ns_from(items, bytelen, from)
226 end
227 end
228
229 redef class NativeString
230 # Creates a new `Bytes` object from `self` with `strlen` as length
231 fun to_bytes: Bytes do
232 var len = cstring_length
233 return new Bytes(self, len, len)
234 end
235 end