af1b1b4a13929329d66b30a2c7a31976c02b54ec
[nit.git] / lib / binary / serialization.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Serialize and deserialize Nit objects to binary streams
16 #
17 # The serialized data format uses a dictionary structure similar to BSON:
18 #
19 # ~~~raw
20 # object = 0x01 # null
21 # | 0x02 id attributes # New object
22 # | 0x03 id # Ref to object
23 # | 0x04 int64 # Int
24 # | 0x05 int8 # Bool (int8 != 0)
25 # | 0x06 utf8 byte sequence # Char
26 # | 0x07 double(64 bits) # Float
27 # | 0x08 block # String
28 # | 0x09 block # CString
29 # | 0x0A flat_array; # Array[nullable Object]
30 #
31 # block = int64 int8*;
32 # cstring = int8* 0x00;
33 # id = int64;
34 #
35 # attributes = attribute* 0x00;
36 # attribute = cstring object;
37 # ~~~
38 module serialization
39
40 import ::serialization::caching
41 private import ::serialization::engine_tools
42 import binary
43 import more_collections
44
45 # ---
46 # Special bytes, marking the kind of objects in the stream and the end on an object
47
48 private fun kind_null: Byte do return 0x01u8
49 private fun kind_object_new: Byte do return 0x02u8
50 private fun kind_object_ref: Byte do return 0x03u8
51 private fun kind_int: Byte do return 0x04u8
52 private fun kind_bool: Byte do return 0x05u8
53 private fun kind_char: Byte do return 0x06u8
54 private fun kind_float: Byte do return 0x07u8
55 private fun kind_string: Byte do return 0x08u8
56 private fun kind_c_string: Byte do return 0x09u8
57 private fun kind_flat_array: Byte do return 0x0Au8
58
59 private fun new_object_end: Byte do return 0x00u8
60
61 #---
62 # Engines
63
64 # Writes Nit objects to the binary `stream`
65 #
66 # The output can be deserialized with `BinaryDeserializer`.
67 class BinarySerializer
68 super CachingSerializer
69
70 # Target writing stream
71 var stream: Writer is writable
72
73 redef var current_object = null
74
75 redef fun serialize(object)
76 do
77 if object == null then
78 stream.write_byte kind_null
79 else serialize_reference(object)
80 end
81
82 redef fun serialize_attribute(name, value)
83 do
84 stream.write_string name
85 super
86 end
87
88 redef fun serialize_reference(object)
89 do
90 if cache.has_object(object) then
91 # if already serialized, add local reference
92 var id = cache.id_for(object)
93 stream.write_byte kind_object_ref
94 stream.write_int64 id
95 else
96 # serialize here
97 var last_object = current_object
98 current_object = object
99 object.serialize_to_binary self
100 current_object = last_object
101 end
102 end
103
104 # Write `collection` as a simple list of objects
105 private fun serialize_flat_array(collection: Collection[nullable Object])
106 do
107 stream.write_byte kind_flat_array
108 stream.write_int64 collection.length
109 for e in collection do
110 if not try_to_serialize(e) then
111 assert e != null
112 warn "Element of {collection} is not serializable, it is a {e}"
113 serialize null
114 end
115 end
116 end
117 end
118
119 # Deserialize Nit objects from a binary `stream`
120 #
121 # Used with `BinarySerializer`.
122 class BinaryDeserializer
123 super CachingDeserializer
124
125 # Source `Reader` stream
126 var stream: Reader
127
128 # Last encountered object reference id.
129 #
130 # See `cache.received`.
131 private var just_opened_id: nullable Int = null
132
133 # Tree of attributes, deserialized but not yet claimed
134 private var unclaimed_attributes = new UnrolledList[HashMap[String, nullable Object]]
135
136 # Buffer for one char
137 private var char_buf: CString is lazy do return new CString(4)
138
139 # Read and deserialize the next attribute name and value
140 #
141 # A `peeked_char` can suffix the next attribute name.
142 #
143 # Returns `null` on error.
144 private fun deserialize_next_attribute(peeked_char: nullable Byte):
145 nullable Couple[String, nullable Object]
146 do
147 # Try the next attribute
148 var next_attribute_name = stream.read_string
149 var next_object = deserialize_next_object
150
151 if stream.last_error != null then return null
152
153 if peeked_char != null then
154 # Replace a char peeked to find an object end
155 next_attribute_name = "{peeked_char}{next_attribute_name}"
156 end
157
158 return new Couple[String, nullable Object](next_attribute_name, next_object)
159 end
160
161 redef fun deserialize_attribute(name, static_type)
162 do
163 if unclaimed_attributes.last.keys.has(name) then
164 # Pick in already deserialized attributes
165 var value = unclaimed_attributes.last[name]
166 unclaimed_attributes.last.keys.remove(name)
167 return value
168 end
169
170 # Read attributes until we find the wanted one named `name`
171 loop
172 var next = deserialize_next_attribute
173 if next == null then
174 # Error was already logged
175 return null
176 end
177
178 var next_attribute_name = next.first
179 var next_object = next.second
180
181 # Got the wanted object
182 if next_attribute_name == name then return next_object
183
184 # An invalid attribute name is an heuristic for invalid data.
185 # Hitting an object end marker will result in an empty string.
186 if not next_attribute_name.is_valid_id then
187
188 var error
189 if next_attribute_name.is_empty then
190 # Reached the end of the object
191 error = new Error("Deserialization Error: Attributes '{name}' not in stream.")
192 else
193 error = new Error("Deserialization Error: Got an invalid attribute name '{next_attribute_name}', expected '{name}'")
194 # TODO this is invalid data, break even on keep_going
195 end
196 errors.add error
197 return null
198 end
199
200 # It's not the next attribute, put it aside
201 unclaimed_attributes.last[next_attribute_name] = next_object
202 end
203 end
204
205 redef fun notify_of_creation(new_object)
206 do
207 var id = just_opened_id
208 if id == null then return
209 cache[id] = new_object
210 end
211
212 # Convert from simple Json object to Nit object
213 private fun deserialize_next_object: nullable Object
214 do
215 var kindi = stream.read_byte
216 assert kindi >= 0 else
217 # TODO break even on keep_going
218 return null
219 end
220 var kind = kindi.to_b
221
222 # After this point, all stream reading errors are caught later
223
224 if kind == kind_null then return null
225 if kind == kind_int then return stream.read_int64
226 if kind == kind_bool then return stream.read_bool
227 if kind == kind_float then return stream.read_double
228 if kind == kind_char then
229 var bf = char_buf
230 var b = stream.read_byte
231 if b < 0 then return '�'
232 var ln = b.to_b.u8len
233 bf[0] = b.to_b
234 for i in [1 .. ln[ do
235 b = stream.read_byte
236 if b < 0 then return '�'
237 bf[i] = b.to_b
238 end
239 return bf.to_s_unsafe(ln, copy=false)[0]
240 end
241 if kind == kind_string then return stream.read_block
242 if kind == kind_c_string then return stream.read_block.to_cstring
243
244 if kind == kind_flat_array then
245 # An array
246 var length = stream.read_int64
247 var array = new Array[nullable Object]
248 for i in length.times do
249 array.add deserialize_next_object
250 end
251 return array
252 end
253
254 if kind == kind_object_ref then
255 # A reference
256 var id = stream.read_int64
257 if stream.last_error != null then return null
258
259 if not cache.has_id(id) then
260 errors.add new Error("Deserialization Error: Unknown reference to id #{id}")
261 return null
262 end
263 return cache.object_for(id)
264 end
265
266 if kind == kind_object_new then
267 # A new object
268 var id = stream.read_int64
269 if stream.last_error != null then return null
270
271 if cache.has_id(id) then
272 errors.add new Error("Deserialization Error: Duplicated use of reference #{id}")
273 return null
274 end
275
276 var class_name = stream.read_string
277
278 if stream.last_error != null then return null
279
280 # Use the validity of the `class_name` as heuristic to detect invalid data
281 if not class_name.is_valid_id then
282 errors.add new Error("Deserialization Error: got an invalid class name '{class_name}'")
283 return null
284 end
285
286 # Prepare opening a new object
287 just_opened_id = id
288 unclaimed_attributes.push new HashMap[String, nullable Object]
289
290 var value = deserialize_class(class_name)
291
292 # Check for the attributes end marker
293 loop
294 var next_byte = stream.read_byte.to_b
295 if next_byte == new_object_end then break
296
297 # Fetch an additional attribute, even if it isn't expected
298 deserialize_next_attribute(next_byte)
299 end
300
301 # Close object
302 unclaimed_attributes.pop
303 just_opened_id = null
304
305 return value
306 end
307
308 errors.add new Error("Deserialization Error: Unknown binary object kind `{kind}`")
309 # TODO fatal error and break even on keep_going
310 return null
311 end
312
313 redef fun deserialize(static_type)
314 do
315 errors.clear
316
317 var value = deserialize_next_object
318
319 var error = stream.last_error
320 if error != null then
321 errors.add error
322 return true
323 end
324
325 return value
326 end
327 end
328
329 # ---
330 # Services
331
332 redef class Text
333 # Is `self` a valid identifier for a Nit class or property?
334 private fun is_valid_id: Bool
335 do
336 if trim.is_empty then return false
337
338 for c in chars do
339 if not (c.is_letter or c.is_numeric or c == '[' or c == ']' or
340 c == ' ' or c == ',' or c == '_') then return false
341 end
342
343 return true
344 end
345
346 redef fun serialize_to_binary(v)
347 do
348 v.stream.write_byte kind_string
349 v.stream.write_block to_s
350 end
351 end
352
353 # ---
354 # Per class serialization behavior
355
356 redef class Serializable
357 # Write the binary serialization header
358 #
359 # The header for a normal object is:
360 # 1. The kind of object on 8 bits, `0x01` for a new object.
361 # 2. The id of this object so it is not serialized more than once.
362 # 3. The name of the object type as a null terminated string.
363 private fun serialize_header_to_binary(v: BinarySerializer)
364 do
365 var id = v.cache.new_id_for(self)
366 v.stream.write_byte kind_object_new # is object intro
367 v.stream.write_int64 id
368 v.stream.write_string class_name
369 end
370
371 # Write a normal object to binary
372 private fun serialize_to_binary(v: BinarySerializer)
373 do
374 serialize_header_to_binary v
375 v.serialize_core self
376 v.stream.write_byte new_object_end
377 end
378 end
379
380 redef class Int
381 redef fun serialize_to_binary(v)
382 do
383 v.stream.write_byte kind_int
384 v.stream.write_int64 self
385 end
386 end
387
388 redef class Float
389 redef fun serialize_to_binary(v)
390 do
391 v.stream.write_byte kind_float
392 v.stream.write_double self
393 end
394 end
395
396 redef class Bool
397 redef fun serialize_to_binary(v)
398 do
399 v.stream.write_byte kind_bool
400 v.stream.write_bool self
401 end
402 end
403
404 redef class Char
405 redef fun serialize_to_binary(v)
406 do
407 v.stream.write_byte kind_char
408 for i in bytes do v.stream.write_byte i
409 end
410 end
411
412 redef class CString
413 redef fun serialize_to_binary(v)
414 do
415 v.stream.write_byte kind_c_string
416 v.stream.write_block to_s
417 end
418 end
419
420 redef class SimpleCollection[E]
421
422 redef fun serialize_to_binary(v)
423 do
424 serialize_header_to_binary v
425
426 v.stream.write_string "items"
427 v.serialize_flat_array self
428
429 v.stream.write_byte new_object_end
430 end
431
432 redef init from_deserializer(v)
433 do
434 # Give a chance to other engines, and defs
435 super
436
437 if v isa BinaryDeserializer then
438 v.notify_of_creation self
439 init
440
441 var items = v.deserialize_attribute("items")
442 assert items isa Array[nullable Object]
443 for item in items do
444 assert item isa E else
445 var item_type = "null"
446 if item != null then item_type = item.class_name
447
448 v.errors.add new Error("Deserialization Error: invalid type '{item_type}' for the collection '{class_name}'")
449 continue
450 end
451
452 add item
453 end
454 end
455 end
456 end
457
458 redef class Map[K, V]
459 redef fun serialize_to_binary(v)
460 do
461 serialize_header_to_binary v
462
463 v.serialize_core self
464
465 v.stream.write_string "keys"
466 v.serialize_flat_array keys
467
468 v.stream.write_string "values"
469 v.serialize_flat_array values
470
471 v.stream.write_byte new_object_end
472 end
473
474 # Instantiate a new `Array` from its serialized representation.
475 redef init from_deserializer(v)
476 do
477 # Give a chance to other engines, and defs
478 super
479
480 if v isa BinaryDeserializer then
481 v.notify_of_creation self
482
483 init
484
485 var keys = v.deserialize_attribute("keys")
486 var values = v.deserialize_attribute("values")
487 assert keys isa Array[nullable Object]
488 assert values isa Array[nullable Object]
489
490 for i in keys.length.times do
491 var key = keys[i]
492 var value = values[i]
493
494 if not key isa K then
495 var item_type = "null"
496 if key != null then item_type = key.class_name
497
498 v.errors.add new Error("Deserialization Error: Invalid key type '{item_type}' for '{class_name}'")
499 continue
500 end
501
502 if not value isa V then
503 var item_type = "null"
504 if value != null then item_type = value.class_name
505
506 v.errors.add new Error("Deserialization Error: Invalid value type '{item_type}' for '{class_name}'")
507 continue
508 end
509
510 self[key] = value
511 end
512 end
513 end
514 end