e0693045c4bd6be510256b5c6fc9139688c8c272
[nit.git] / lib / binary / serialization.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Serialize and deserialize Nit objects to binary streams
16 #
17 # The serialized data format uses a dictionary structure similar to BSON:
18 #
19 # ~~~raw
20 # object = 0x01 # null
21 # | 0x02 id attributes # New object
22 # | 0x03 id # Ref to object
23 # | 0x04 int64 # Int
24 # | 0x05 int8 # Bool (int8 != 0)
25 # | 0x06 utf8 byte sequence # Char
26 # | 0x07 double(64 bits) # Float
27 # | 0x08 block # String
28 # | 0x09 block # NativeString
29 # | 0x0A flat_array; # Array[nullable Object]
30 #
31 # block = int64 int8*;
32 # cstring = int8* 0x00;
33 # id = int64;
34 #
35 # attributes = attribute* 0x00;
36 # attribute = cstring object;
37 # ~~~
38 module serialization
39
40 import ::serialization::caching
41 private import ::serialization::engine_tools
42 import binary
43 import more_collections
44
45 # ---
46 # Special bytes, marking the kind of objects in the stream and the end on an object
47
48 private fun kind_null: Byte do return 0x01u8
49 private fun kind_object_new: Byte do return 0x02u8
50 private fun kind_object_ref: Byte do return 0x03u8
51 private fun kind_int: Byte do return 0x04u8
52 private fun kind_bool: Byte do return 0x05u8
53 private fun kind_char: Byte do return 0x06u8
54 private fun kind_float: Byte do return 0x07u8
55 private fun kind_string: Byte do return 0x08u8
56 private fun kind_native_string: Byte do return 0x09u8
57 private fun kind_flat_array: Byte do return 0x0Au8
58
59 private fun new_object_end: Byte do return 0x00u8
60
61 #---
62 # Engines
63
64 # Writes Nit objects to the binary `stream`
65 #
66 # The output can be deserialized with `BinaryDeserializer`.
67 class BinarySerializer
68 super CachingSerializer
69
70 # Target writing stream
71 var stream: Writer is writable
72
73 redef fun serialize(object)
74 do
75 if object == null then
76 stream.write_byte kind_null
77 else serialize_reference(object)
78 end
79
80 redef fun serialize_attribute(name, value)
81 do
82 stream.write_string name
83 super
84 end
85
86 redef fun serialize_reference(object)
87 do
88 if cache.has_object(object) then
89 # if already serialized, add local reference
90 var id = cache.id_for(object)
91 stream.write_byte kind_object_ref
92 stream.write_int64 id
93 else
94 # serialize here
95 object.serialize_to_binary self
96 end
97 end
98
99 # Write `collection` as a simple list of objects
100 private fun serialize_flat_array(collection: Collection[nullable Object])
101 do
102 stream.write_byte kind_flat_array
103 stream.write_int64 collection.length
104 for e in collection do
105 if not try_to_serialize(e) then
106 assert e != null
107 warn "Element of {collection} is not serializable, it is a {e}"
108 serialize null
109 end
110 end
111 end
112 end
113
114 # Deserialize Nit objects from a binary `stream`
115 #
116 # Used with `BinarySerializer`.
117 class BinaryDeserializer
118 super CachingDeserializer
119
120 # Source `Reader` stream
121 var stream: Reader
122
123 # Last encountered object reference id.
124 #
125 # See `cache.received`.
126 private var just_opened_id: nullable Int = null
127
128 # Tree of attributes, deserialized but not yet claimed
129 private var unclaimed_attributes = new UnrolledList[HashMap[String, nullable Object]]
130
131 # Buffer for one char
132 private var char_buf: NativeString is lazy do return new NativeString(4)
133
134 # Read and deserialize the next attribute name and value
135 #
136 # A `peeked_char` can suffix the next attribute name.
137 #
138 # Returns `null` on error.
139 private fun deserialize_next_attribute(peeked_char: nullable Byte):
140 nullable Couple[String, nullable Object]
141 do
142 # Try the next attribute
143 var next_attribute_name = stream.read_string
144 var next_object = deserialize_next_object
145
146 if stream.last_error != null then return null
147
148 if peeked_char != null then
149 # Replace a char peeked to find an object end
150 next_attribute_name = "{peeked_char}{next_attribute_name}"
151 end
152
153 return new Couple[String, nullable Object](next_attribute_name, next_object)
154 end
155
156 redef fun deserialize_attribute(name)
157 do
158 if unclaimed_attributes.last.keys.has(name) then
159 # Pick in already deserialized attributes
160 var value = unclaimed_attributes.last[name]
161 unclaimed_attributes.last.keys.remove(name)
162 return value
163 end
164
165 # Read attributes until we find the wanted one named `name`
166 loop
167 var next = deserialize_next_attribute
168 if next == null then
169 # Error was already logged
170 return null
171 end
172
173 var next_attribute_name = next.first
174 var next_object = next.second
175
176 # Got the wanted object
177 if next_attribute_name == name then return next_object
178
179 # An invalid attribute name is an heuristic for invalid data.
180 # Hitting an object end marker will result in an empty string.
181 assert next_attribute_name.is_valid_id else
182
183 var error
184 if next_attribute_name.is_empty then
185 # Reached the end of the object
186 error = new Error("Deserialization Error: Attributes '{name}' not in stream.")
187 else
188 error = new Error("Deserialization Error: Got an invalid attribute name '{next_attribute_name}', expected '{name}'")
189 # TODO this is invalid data, break even on keep_going
190 end
191 errors.add error
192 return null
193 end
194
195 # It's not the next attribute, put it aside
196 unclaimed_attributes.last[next_attribute_name] = next_object
197 end
198 end
199
200 redef fun notify_of_creation(new_object)
201 do
202 var id = just_opened_id
203 if id == null then return
204 cache[id] = new_object
205 end
206
207 # Convert from simple Json object to Nit object
208 private fun deserialize_next_object: nullable Object
209 do
210 var kind = stream.read_byte
211 assert kind isa Byte else
212 # TODO break even on keep_going
213 return null
214 end
215
216 # After this point, all stream reading errors are caught later
217
218 if kind == kind_null then return null
219 if kind == kind_int then return stream.read_int64
220 if kind == kind_bool then return stream.read_bool
221 if kind == kind_float then return stream.read_double
222 if kind == kind_char then
223 var bf = char_buf
224 var b = stream.read_byte
225 if b == null then return '�'
226 var ln = b.u8len
227 bf[0] = b
228 for i in [1 .. ln[ do
229 b = stream.read_byte
230 if b == null then return '�'
231 bf[i] = b
232 end
233 return bf.to_s_with_length(ln)[0]
234 end
235 if kind == kind_string then return stream.read_block
236 if kind == kind_native_string then return stream.read_block.to_cstring
237
238 if kind == kind_flat_array then
239 # An array
240 var length = stream.read_int64
241 var array = new Array[nullable Object]
242 for i in length.times do
243 array.add deserialize_next_object
244 end
245 return array
246 end
247
248 if kind == kind_object_ref then
249 # A reference
250 var id = stream.read_int64
251 if stream.last_error != null then return null
252
253 if not cache.has_id(id) then
254 errors.add new Error("Deserialization Error: Unknown reference to id #{id}")
255 return null
256 end
257 return cache.object_for(id)
258 end
259
260 if kind == kind_object_new then
261 # A new object
262 var id = stream.read_int64
263 if stream.last_error != null then return null
264
265 if cache.has_id(id) then
266 errors.add new Error("Deserialization Error: Duplicated use of reference #{id}")
267 return null
268 end
269
270 var class_name = stream.read_string
271
272 if stream.last_error != null then return null
273
274 # Use the validity of the `class_name` as heuristic to detect invalid data
275 if not class_name.is_valid_id then
276 errors.add new Error("Deserialization Error: got an invalid class name '{class_name}'")
277 return null
278 end
279
280 # Prepare opening a new object
281 just_opened_id = id
282 unclaimed_attributes.push new HashMap[String, nullable Object]
283
284 var value = deserialize_class(class_name)
285
286 # Check for the attributes end marker
287 loop
288 var next_byte = stream.read_byte
289 if next_byte == new_object_end then break
290
291 # Fetch an additional attribute, even if it isn't expected
292 deserialize_next_attribute(next_byte)
293 end
294
295 # Close object
296 unclaimed_attributes.pop
297 just_opened_id = null
298
299 return value
300 end
301
302 errors.add new Error("Deserialization Error: Unknown binary object kind `{kind}`")
303 # TODO fatal error and break even on keep_going
304 return null
305 end
306
307 redef fun deserialize
308 do
309 errors.clear
310
311 var value = deserialize_next_object
312
313 var error = stream.last_error
314 if error != null then
315 errors.add error
316 return true
317 end
318
319 return value
320 end
321 end
322
323 # ---
324 # Services
325
326 redef class Text
327 # Is `self` a valid identifier for a Nit class or property?
328 private fun is_valid_id: Bool
329 do
330 if trim.is_empty then return false
331
332 for c in chars do
333 if not (c.is_letter or c.is_numeric or c == '[' or c == ']' or
334 c == ' ' or c == ',' or c == '_') then return false
335 end
336
337 return true
338 end
339 end
340
341 # ---
342 # Per class serialization behavior
343
344 redef class Serializable
345 # Write the binary serialization header
346 #
347 # The header for a normal object is:
348 # 1. The kind of object on 8 bits, `0x01` for a new object.
349 # 2. The id of this object so it is not serialized more than once.
350 # 3. The name of the object type as a null terminated string.
351 private fun serialize_header_to_binary(v: BinarySerializer)
352 do
353 var id = v.cache.new_id_for(self)
354 v.stream.write_byte kind_object_new # is object intro
355 v.stream.write_int64 id
356 v.stream.write_string class_name
357 end
358
359 # Write a normal object to binary
360 private fun serialize_to_binary(v: BinarySerializer)
361 do
362 serialize_header_to_binary v
363 core_serialize_to v
364 v.stream.write_byte new_object_end
365 end
366 end
367
368 redef class Int
369 redef fun serialize_to_binary(v)
370 do
371 v.stream.write_byte kind_int
372 v.stream.write_int64 self
373 end
374 end
375
376 redef class Float
377 redef fun serialize_to_binary(v)
378 do
379 v.stream.write_byte kind_float
380 v.stream.write_double self
381 end
382 end
383
384 redef class Bool
385 redef fun serialize_to_binary(v)
386 do
387 v.stream.write_byte kind_bool
388 v.stream.write_bool self
389 end
390 end
391
392 redef class Char
393 redef fun serialize_to_binary(v)
394 do
395 v.stream.write_byte kind_char
396 for i in bytes do v.stream.write_byte i
397 end
398 end
399
400 redef class String
401 redef fun serialize_to_binary(v)
402 do
403 v.stream.write_byte kind_string
404 v.stream.write_block self
405 end
406 end
407
408 redef class NativeString
409 redef fun serialize_to_binary(v)
410 do
411 v.stream.write_byte kind_native_string
412 v.stream.write_block to_s
413 end
414 end
415
416 redef class SimpleCollection[E]
417
418 redef fun serialize_to_binary(v)
419 do
420 serialize_header_to_binary v
421
422 v.stream.write_string "items"
423 v.serialize_flat_array self
424
425 v.stream.write_byte new_object_end
426 end
427
428 redef init from_deserializer(v)
429 do
430 # Give a chance to other engines, and defs
431 super
432
433 if v isa BinaryDeserializer then
434 v.notify_of_creation self
435 init
436
437 var items = v.deserialize_attribute("items")
438 assert items isa Array[nullable Object]
439 for item in items do
440 assert item isa E else
441 var item_type = "null"
442 if item != null then item_type = item.class_name
443
444 v.errors.add new Error("Deserialization Error: invalid type '{item_type}' for the collection '{class_name}'")
445 continue
446 end
447
448 add item
449 end
450 end
451 end
452 end
453
454 redef class Map[K, V]
455 redef fun serialize_to_binary(v)
456 do
457 serialize_header_to_binary v
458
459 core_serialize_to v
460
461 v.stream.write_string "keys"
462 v.serialize_flat_array keys
463
464 v.stream.write_string "values"
465 v.serialize_flat_array values
466
467 v.stream.write_byte new_object_end
468 end
469
470 # Instantiate a new `Array` from its serialized representation.
471 redef init from_deserializer(v)
472 do
473 # Give a chance to other engines, and defs
474 super
475
476 if v isa BinaryDeserializer then
477 v.notify_of_creation self
478
479 init
480
481 var keys = v.deserialize_attribute("keys")
482 var values = v.deserialize_attribute("values")
483 assert keys isa Array[nullable Object]
484 assert values isa Array[nullable Object]
485
486 for i in keys.length.times do
487 var key = keys[i]
488 var value = values[i]
489
490 if not key isa K then
491 var item_type = "null"
492 if key != null then item_type = key.class_name
493
494 v.errors.add new Error("Deserialization Error: Invalid key type '{item_type}' for '{class_name}'")
495 continue
496 end
497
498 if not value isa V then
499 var item_type = "null"
500 if value != null then item_type = value.class_name
501
502 v.errors.add new Error("Deserialization Error: Invalid value type '{item_type}' for '{class_name}'")
503 continue
504 end
505
506 self[key] = value
507 end
508 end
509 end
510 end