ddb87ad54753b332271511dff71cfd681a0f06e5
[nit.git] / lib / binary / serialization.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Serialize and deserialize Nit objects to binary streams
16 #
17 # The serialized data format uses a dictionary structure similar to BSON:
18 #
19 # ~~~raw
20 # object = 0x01 # null
21 # | 0x02 id attributes # New object
22 # | 0x03 id # Ref to object
23 # | 0x04 int64 # Int
24 # | 0x05 int8 # Bool (int8 != 0)
25 # | 0x06 utf8 byte sequence # Char
26 # | 0x07 double(64 bits) # Float
27 # | 0x08 block # String
28 # | 0x09 block # CString
29 # | 0x0A flat_array; # Array[nullable Object]
30 #
31 # block = int64 int8*;
32 # cstring = int8* 0x00;
33 # id = int64;
34 #
35 # attributes = attribute* 0x00;
36 # attribute = cstring object;
37 # ~~~
38 module serialization
39
40 import ::serialization::caching
41 private import ::serialization::engine_tools
42 import binary
43 import more_collections
44
45 # ---
46 # Special bytes, marking the kind of objects in the stream and the end on an object
47
48 private fun kind_null: Byte do return 0x01u8
49 private fun kind_object_new: Byte do return 0x02u8
50 private fun kind_object_ref: Byte do return 0x03u8
51 private fun kind_int: Byte do return 0x04u8
52 private fun kind_bool: Byte do return 0x05u8
53 private fun kind_char: Byte do return 0x06u8
54 private fun kind_float: Byte do return 0x07u8
55 private fun kind_string: Byte do return 0x08u8
56 private fun kind_c_string: Byte do return 0x09u8
57 private fun kind_flat_array: Byte do return 0x0Au8
58
59 private fun new_object_end: Byte do return 0x00u8
60
61 #---
62 # Engines
63
64 # Writes Nit objects to the binary `stream`
65 #
66 # The output can be deserialized with `BinaryDeserializer`.
67 class BinarySerializer
68 super CachingSerializer
69
70 # Target writing stream
71 var stream: Writer is writable
72
73 redef var current_object = null
74
75 redef fun serialize(object)
76 do
77 if object == null then
78 stream.write_byte kind_null
79 else serialize_reference(object)
80 end
81
82 redef fun serialize_attribute(name, value)
83 do
84 stream.write_string name
85 super
86 end
87
88 redef fun serialize_reference(object)
89 do
90 if cache.has_object(object) then
91 # if already serialized, add local reference
92 var id = cache.id_for(object)
93 stream.write_byte kind_object_ref
94 stream.write_int64 id
95 else
96 # serialize here
97 var last_object = current_object
98 current_object = object
99 object.serialize_to_binary self
100 current_object = last_object
101 end
102 end
103
104 # Write `collection` as a simple list of objects
105 private fun serialize_flat_array(collection: Collection[nullable Object])
106 do
107 stream.write_byte kind_flat_array
108 stream.write_int64 collection.length
109 for e in collection do
110 if not try_to_serialize(e) then
111 assert e != null
112 warn "Element of {collection} is not serializable, it is a {e}"
113 serialize null
114 end
115 end
116 end
117 end
118
119 # Deserialize Nit objects from a binary `stream`
120 #
121 # Used with `BinarySerializer`.
122 class BinaryDeserializer
123 super CachingDeserializer
124
125 # Source `Reader` stream
126 var stream: Reader
127
128 # Last encountered object reference id.
129 #
130 # See `cache.received`.
131 private var just_opened_id: nullable Int = null
132
133 # Tree of attributes, deserialized but not yet claimed
134 private var unclaimed_attributes = new UnrolledList[HashMap[String, nullable Object]]
135
136 # Buffer for one char
137 private var char_buf: CString is lazy do return new CString(4)
138
139 # Read and deserialize the next attribute name and value
140 #
141 # A `peeked_char` can suffix the next attribute name.
142 #
143 # Returns `null` on error.
144 private fun deserialize_next_attribute(peeked_char: nullable Byte):
145 nullable Couple[String, nullable Object]
146 do
147 # Try the next attribute
148 var next_attribute_name = stream.read_string
149 var next_object = deserialize_next_object
150
151 if stream.last_error != null then return null
152
153 if peeked_char != null then
154 # Replace a char peeked to find an object end
155 next_attribute_name = "{peeked_char}{next_attribute_name}"
156 end
157
158 return new Couple[String, nullable Object](next_attribute_name, next_object)
159 end
160
161 redef fun deserialize_attribute(name, static_type)
162 do
163 if unclaimed_attributes.last.keys.has(name) then
164 # Pick in already deserialized attributes
165 var value = unclaimed_attributes.last[name]
166 unclaimed_attributes.last.keys.remove(name)
167 return value
168 end
169
170 # Read attributes until we find the wanted one named `name`
171 loop
172 var next = deserialize_next_attribute
173 if next == null then
174 # Error was already logged
175 return null
176 end
177
178 var next_attribute_name = next.first
179 var next_object = next.second
180
181 # Got the wanted object
182 if next_attribute_name == name then return next_object
183
184 # An invalid attribute name is an heuristic for invalid data.
185 # Hitting an object end marker will result in an empty string.
186 if not next_attribute_name.is_valid_id then
187
188 var error
189 if next_attribute_name.is_empty then
190 # Reached the end of the object
191 error = new Error("Deserialization Error: Attributes '{name}' not in stream.")
192 else
193 error = new Error("Deserialization Error: Got an invalid attribute name '{next_attribute_name}', expected '{name}'")
194 # TODO this is invalid data, break even on keep_going
195 end
196 errors.add error
197 return null
198 end
199
200 # It's not the next attribute, put it aside
201 unclaimed_attributes.last[next_attribute_name] = next_object
202 end
203 end
204
205 redef fun notify_of_creation(new_object)
206 do
207 var id = just_opened_id
208 if id == null then return
209 cache[id] = new_object
210 end
211
212 # Convert from simple Json object to Nit object
213 private fun deserialize_next_object: nullable Object
214 do
215 var kind = stream.read_byte
216 assert kind isa Byte else
217 # TODO break even on keep_going
218 return null
219 end
220
221 # After this point, all stream reading errors are caught later
222
223 if kind == kind_null then return null
224 if kind == kind_int then return stream.read_int64
225 if kind == kind_bool then return stream.read_bool
226 if kind == kind_float then return stream.read_double
227 if kind == kind_char then
228 var bf = char_buf
229 var b = stream.read_byte
230 if b == null then return '�'
231 var ln = b.u8len
232 bf[0] = b
233 for i in [1 .. ln[ do
234 b = stream.read_byte
235 if b == null then return '�'
236 bf[i] = b
237 end
238 return bf.to_s_unsafe(ln, copy=false)[0]
239 end
240 if kind == kind_string then return stream.read_block
241 if kind == kind_c_string then return stream.read_block.to_cstring
242
243 if kind == kind_flat_array then
244 # An array
245 var length = stream.read_int64
246 var array = new Array[nullable Object]
247 for i in length.times do
248 array.add deserialize_next_object
249 end
250 return array
251 end
252
253 if kind == kind_object_ref then
254 # A reference
255 var id = stream.read_int64
256 if stream.last_error != null then return null
257
258 if not cache.has_id(id) then
259 errors.add new Error("Deserialization Error: Unknown reference to id #{id}")
260 return null
261 end
262 return cache.object_for(id)
263 end
264
265 if kind == kind_object_new then
266 # A new object
267 var id = stream.read_int64
268 if stream.last_error != null then return null
269
270 if cache.has_id(id) then
271 errors.add new Error("Deserialization Error: Duplicated use of reference #{id}")
272 return null
273 end
274
275 var class_name = stream.read_string
276
277 if stream.last_error != null then return null
278
279 # Use the validity of the `class_name` as heuristic to detect invalid data
280 if not class_name.is_valid_id then
281 errors.add new Error("Deserialization Error: got an invalid class name '{class_name}'")
282 return null
283 end
284
285 # Prepare opening a new object
286 just_opened_id = id
287 unclaimed_attributes.push new HashMap[String, nullable Object]
288
289 var value = deserialize_class(class_name)
290
291 # Check for the attributes end marker
292 loop
293 var next_byte = stream.read_byte
294 if next_byte == new_object_end then break
295
296 # Fetch an additional attribute, even if it isn't expected
297 deserialize_next_attribute(next_byte)
298 end
299
300 # Close object
301 unclaimed_attributes.pop
302 just_opened_id = null
303
304 return value
305 end
306
307 errors.add new Error("Deserialization Error: Unknown binary object kind `{kind}`")
308 # TODO fatal error and break even on keep_going
309 return null
310 end
311
312 redef fun deserialize(static_type)
313 do
314 errors.clear
315
316 var value = deserialize_next_object
317
318 var error = stream.last_error
319 if error != null then
320 errors.add error
321 return true
322 end
323
324 return value
325 end
326 end
327
328 # ---
329 # Services
330
331 redef class Text
332 # Is `self` a valid identifier for a Nit class or property?
333 private fun is_valid_id: Bool
334 do
335 if trim.is_empty then return false
336
337 for c in chars do
338 if not (c.is_letter or c.is_numeric or c == '[' or c == ']' or
339 c == ' ' or c == ',' or c == '_') then return false
340 end
341
342 return true
343 end
344
345 redef fun serialize_to_binary(v)
346 do
347 v.stream.write_byte kind_string
348 v.stream.write_block to_s
349 end
350 end
351
352 # ---
353 # Per class serialization behavior
354
355 redef class Serializable
356 # Write the binary serialization header
357 #
358 # The header for a normal object is:
359 # 1. The kind of object on 8 bits, `0x01` for a new object.
360 # 2. The id of this object so it is not serialized more than once.
361 # 3. The name of the object type as a null terminated string.
362 private fun serialize_header_to_binary(v: BinarySerializer)
363 do
364 var id = v.cache.new_id_for(self)
365 v.stream.write_byte kind_object_new # is object intro
366 v.stream.write_int64 id
367 v.stream.write_string class_name
368 end
369
370 # Write a normal object to binary
371 private fun serialize_to_binary(v: BinarySerializer)
372 do
373 serialize_header_to_binary v
374 v.serialize_core self
375 v.stream.write_byte new_object_end
376 end
377 end
378
379 redef class Int
380 redef fun serialize_to_binary(v)
381 do
382 v.stream.write_byte kind_int
383 v.stream.write_int64 self
384 end
385 end
386
387 redef class Float
388 redef fun serialize_to_binary(v)
389 do
390 v.stream.write_byte kind_float
391 v.stream.write_double self
392 end
393 end
394
395 redef class Bool
396 redef fun serialize_to_binary(v)
397 do
398 v.stream.write_byte kind_bool
399 v.stream.write_bool self
400 end
401 end
402
403 redef class Char
404 redef fun serialize_to_binary(v)
405 do
406 v.stream.write_byte kind_char
407 for i in bytes do v.stream.write_byte i
408 end
409 end
410
411 redef class CString
412 redef fun serialize_to_binary(v)
413 do
414 v.stream.write_byte kind_c_string
415 v.stream.write_block to_s
416 end
417 end
418
419 redef class SimpleCollection[E]
420
421 redef fun serialize_to_binary(v)
422 do
423 serialize_header_to_binary v
424
425 v.stream.write_string "items"
426 v.serialize_flat_array self
427
428 v.stream.write_byte new_object_end
429 end
430
431 redef init from_deserializer(v)
432 do
433 # Give a chance to other engines, and defs
434 super
435
436 if v isa BinaryDeserializer then
437 v.notify_of_creation self
438 init
439
440 var items = v.deserialize_attribute("items")
441 assert items isa Array[nullable Object]
442 for item in items do
443 assert item isa E else
444 var item_type = "null"
445 if item != null then item_type = item.class_name
446
447 v.errors.add new Error("Deserialization Error: invalid type '{item_type}' for the collection '{class_name}'")
448 continue
449 end
450
451 add item
452 end
453 end
454 end
455 end
456
457 redef class Map[K, V]
458 redef fun serialize_to_binary(v)
459 do
460 serialize_header_to_binary v
461
462 v.serialize_core self
463
464 v.stream.write_string "keys"
465 v.serialize_flat_array keys
466
467 v.stream.write_string "values"
468 v.serialize_flat_array values
469
470 v.stream.write_byte new_object_end
471 end
472
473 # Instantiate a new `Array` from its serialized representation.
474 redef init from_deserializer(v)
475 do
476 # Give a chance to other engines, and defs
477 super
478
479 if v isa BinaryDeserializer then
480 v.notify_of_creation self
481
482 init
483
484 var keys = v.deserialize_attribute("keys")
485 var values = v.deserialize_attribute("values")
486 assert keys isa Array[nullable Object]
487 assert values isa Array[nullable Object]
488
489 for i in keys.length.times do
490 var key = keys[i]
491 var value = values[i]
492
493 if not key isa K then
494 var item_type = "null"
495 if key != null then item_type = key.class_name
496
497 v.errors.add new Error("Deserialization Error: Invalid key type '{item_type}' for '{class_name}'")
498 continue
499 end
500
501 if not value isa V then
502 var item_type = "null"
503 if value != null then item_type = value.class_name
504
505 v.errors.add new Error("Deserialization Error: Invalid value type '{item_type}' for '{class_name}'")
506 continue
507 end
508
509 self[key] = value
510 end
511 end
512 end
513 end