d36f9c0ddc79c4d40f37493e1f1f62838c42d43d
[nit.git] / lib / binary / serialization.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Serialize and deserialize Nit objects to binary streams
16 #
17 # The serialized data format uses a dictionary structure similar to BSON:
18 #
19 # ~~~raw
20 # object = 0x01 # null
21 # | 0x02 id attributes # New object
22 # | 0x03 id # Ref to object
23 # | 0x04 int64 # Int
24 # | 0x05 int8 # Bool (int8 != 0)
25 # | 0x06 int8 # Char
26 # | 0x07 double(64 bits) # Float
27 # | 0x08 block # String
28 # | 0x09 block # NativeString
29 # | 0x0A flat_array; # Array[nullable Object]
30 #
31 # block = int64 int8*;
32 # cstring = int8* 0x00;
33 # id = int64;
34 #
35 # attributes = attribute* 0x00;
36 # attribute = cstring object;
37 # ~~~
38 module serialization
39
40 import ::serialization::caching
41 private import ::serialization::engine_tools
42 import binary
43 import more_collections
44
45 # ---
46 # Special bytes, marking the kind of objects in the stream and the end on an object
47
48 private fun kind_null: Byte do return 0x01u8
49 private fun kind_object_new: Byte do return 0x02u8
50 private fun kind_object_ref: Byte do return 0x03u8
51 private fun kind_int: Byte do return 0x04u8
52 private fun kind_bool: Byte do return 0x05u8
53 private fun kind_char: Byte do return 0x06u8
54 private fun kind_float: Byte do return 0x07u8
55 private fun kind_string: Byte do return 0x08u8
56 private fun kind_native_string: Byte do return 0x09u8
57 private fun kind_flat_array: Byte do return 0x0Au8
58
59 private fun new_object_end: Byte do return 0x00u8
60
61 #---
62 # Engines
63
64 # Writes Nit objects to the binary `stream`
65 #
66 # The output can be deserialized with `BinaryDeserializer`.
67 class BinarySerializer
68 super CachingSerializer
69
70 # Target writing stream
71 var stream: Writer is writable
72
73 redef fun serialize(object)
74 do
75 if object == null then
76 stream.write_byte kind_null
77 else serialize_reference(object)
78 end
79
80 redef fun serialize_attribute(name, value)
81 do
82 stream.write_string name
83 super
84 end
85
86 redef fun serialize_reference(object)
87 do
88 if cache.has_object(object) then
89 # if already serialized, add local reference
90 var id = cache.id_for(object)
91 stream.write_byte kind_object_ref
92 stream.write_int64 id
93 else
94 # serialize here
95 object.serialize_to_binary self
96 end
97 end
98
99 # Write `collection` as a simple list of objects
100 private fun serialize_flat_array(collection: Collection[nullable Object])
101 do
102 stream.write_byte kind_flat_array
103 stream.write_int64 collection.length
104 for e in collection do
105 if not try_to_serialize(e) then
106 assert e != null
107 warn "Element of {collection} is not serializable, it is a {e}"
108 serialize null
109 end
110 end
111 end
112 end
113
114 # Deserialize Nit objects from a binary `stream`
115 #
116 # Used with `BinarySerializer`.
117 class BinaryDeserializer
118 super CachingDeserializer
119
120 # Source `Reader` stream
121 var stream: Reader
122
123 # Last encountered object reference id.
124 #
125 # See `cache.received`.
126 private var just_opened_id: nullable Int = null
127
128 # Tree of attributes, deserialized but not yet claimed
129 private var unclaimed_attributes = new UnrolledList[HashMap[String, nullable Object]]
130
131 # Read and deserialize the next attribute name and value
132 #
133 # A `peeked_char` can suffix the next attribute name.
134 #
135 # Returns `null` on error.
136 private fun deserialize_next_attribute(peeked_char: nullable Byte):
137 nullable Couple[String, nullable Object]
138 do
139 # Try the next attribute
140 var next_attribute_name = stream.read_string
141 var next_object = deserialize_next_object
142
143 if stream.last_error != null then return null
144
145 if peeked_char != null then
146 # Replace a char peeked to find an object end
147 next_attribute_name = "{peeked_char}{next_attribute_name}"
148 end
149
150 return new Couple[String, nullable Object](next_attribute_name, next_object)
151 end
152
153 redef fun deserialize_attribute(name)
154 do
155 if unclaimed_attributes.last.keys.has(name) then
156 # Pick in already deserialized attributes
157 var value = unclaimed_attributes.last[name]
158 unclaimed_attributes.last.keys.remove(name)
159 return value
160 end
161
162 # Read attributes until we find the wanted one named `name`
163 loop
164 var next = deserialize_next_attribute
165 if next == null then
166 # Error was already logged
167 return null
168 end
169
170 var next_attribute_name = next.first
171 var next_object = next.second
172
173 # Got the wanted object
174 if next_attribute_name == name then return next_object
175
176 # An invalid attribute name is an heuristic for invalid data.
177 # Hitting an object end marker will result in an empty string.
178 assert next_attribute_name.is_valid_id else
179
180 var error
181 if next_attribute_name.is_empty then
182 # Reached the end of the object
183 error = new Error("Deserialization Error: Attributes '{name}' not in stream.")
184 else
185 error = new Error("Deserialization Error: Got an invalid attribute name '{next_attribute_name}', expected '{name}'")
186 # TODO this is invalid data, break even on keep_going
187 end
188 errors.add error
189 return null
190 end
191
192 # It's not the next attribute, put it aside
193 unclaimed_attributes.last[next_attribute_name] = next_object
194 end
195 end
196
197 redef fun notify_of_creation(new_object)
198 do
199 var id = just_opened_id
200 if id == null then return
201 cache[id] = new_object
202 end
203
204 # Convert from simple Json object to Nit object
205 private fun deserialize_next_object: nullable Object
206 do
207 var kind = stream.read_byte
208 assert kind isa Byte else
209 # TODO break even on keep_going
210 return null
211 end
212
213 # After this point, all stream reading errors are caught later
214
215 if kind == kind_null then return null
216 if kind == kind_int then return stream.read_int64
217 if kind == kind_bool then return stream.read_bool
218 if kind == kind_float then return stream.read_double
219 if kind == kind_char then
220 var b = stream.read_byte
221 if b == null then return 0
222 return b.to_i.ascii
223 end
224 if kind == kind_string then return stream.read_block
225 if kind == kind_native_string then return stream.read_block.to_cstring
226
227 if kind == kind_flat_array then
228 # An array
229 var length = stream.read_int64
230 var array = new Array[nullable Object]
231 for i in length.times do
232 array.add deserialize_next_object
233 end
234 return array
235 end
236
237 if kind == kind_object_ref then
238 # A reference
239 var id = stream.read_int64
240 if stream.last_error != null then return null
241
242 if not cache.has_id(id) then
243 errors.add new Error("Deserialization Error: Unknown reference to id #{id}")
244 return null
245 end
246 return cache.object_for(id)
247 end
248
249 if kind == kind_object_new then
250 # A new object
251 var id = stream.read_int64
252 if stream.last_error != null then return null
253
254 if cache.has_id(id) then
255 errors.add new Error("Deserialization Error: Duplicated use of reference #{id}")
256 return null
257 end
258
259 var class_name = stream.read_string
260
261 if stream.last_error != null then return null
262
263 # Use the validity of the `class_name` as heuristic to detect invalid data
264 if not class_name.is_valid_id then
265 errors.add new Error("Deserialization Error: got an invalid class name '{class_name}'")
266 return null
267 end
268
269 # Prepare opening a new object
270 just_opened_id = id
271 unclaimed_attributes.push new HashMap[String, nullable Object]
272
273 var value = deserialize_class(class_name)
274
275 # Check for the attributes end marker
276 loop
277 var next_byte = stream.read_byte
278 if next_byte == new_object_end then break
279
280 # Fetch an additional attribute, even if it isn't expected
281 deserialize_next_attribute(next_byte)
282 end
283
284 # Close object
285 unclaimed_attributes.pop
286 just_opened_id = null
287
288 return value
289 end
290
291 errors.add new Error("Deserialization Error: Unknown binary object kind `{kind}`")
292 # TODO fatal error and break even on keep_going
293 return null
294 end
295
296 redef fun deserialize
297 do
298 errors.clear
299
300 var value = deserialize_next_object
301
302 var error = stream.last_error
303 if error != null then
304 errors.add error
305 return true
306 end
307
308 return value
309 end
310 end
311
312 # ---
313 # Services
314
315 redef class Text
316 # Is `self` a valid identifier for a Nit class or property?
317 private fun is_valid_id: Bool
318 do
319 if trim.is_empty then return false
320
321 for c in chars do
322 if not (c.is_letter or c.is_numeric or c == '[' or c == ']' or
323 c == ' ' or c == ',' or c == '_') then return false
324 end
325
326 return true
327 end
328 end
329
330 # ---
331 # Per class serialization behavior
332
333 redef class Serializable
334 # Write the binary serialization header
335 #
336 # The header for a normal object is:
337 # 1. The kind of object on 8 bits, `0x01` for a new object.
338 # 2. The id of this object so it is not serialized more than once.
339 # 3. The name of the object type as a null terminated string.
340 private fun serialize_header_to_binary(v: BinarySerializer)
341 do
342 var id = v.cache.new_id_for(self)
343 v.stream.write_byte kind_object_new # is object intro
344 v.stream.write_int64 id
345 v.stream.write_string class_name
346 end
347
348 # Write a normal object to binary
349 private fun serialize_to_binary(v: BinarySerializer)
350 do
351 serialize_header_to_binary v
352 core_serialize_to v
353 v.stream.write_byte new_object_end
354 end
355 end
356
357 redef class Int
358 redef fun serialize_to_binary(v)
359 do
360 v.stream.write_byte kind_int
361 v.stream.write_int64 self
362 end
363 end
364
365 redef class Float
366 redef fun serialize_to_binary(v)
367 do
368 v.stream.write_byte kind_float
369 v.stream.write_double self
370 end
371 end
372
373 redef class Bool
374 redef fun serialize_to_binary(v)
375 do
376 v.stream.write_byte kind_bool
377 v.stream.write_bool self
378 end
379 end
380
381 redef class Char
382 redef fun serialize_to_binary(v)
383 do
384 v.stream.write_byte kind_char
385 # Fix when UTF-8
386 v.stream.write_byte self.ascii.to_b
387 end
388 end
389
390 redef class String
391 redef fun serialize_to_binary(v)
392 do
393 v.stream.write_byte kind_string
394 v.stream.write_block self
395 end
396 end
397
398 redef class NativeString
399 redef fun serialize_to_binary(v)
400 do
401 v.stream.write_byte kind_native_string
402 v.stream.write_block to_s
403 end
404 end
405
406 redef class SimpleCollection[E]
407
408 redef fun serialize_to_binary(v)
409 do
410 serialize_header_to_binary v
411
412 v.stream.write_string "items"
413 v.serialize_flat_array self
414
415 v.stream.write_byte new_object_end
416 end
417
418 redef init from_deserializer(v)
419 do
420 # Give a chance to other engines, and defs
421 super
422
423 if v isa BinaryDeserializer then
424 v.notify_of_creation self
425 init
426
427 var items = v.deserialize_attribute("items")
428 assert items isa Array[nullable Object]
429 for item in items do
430 assert item isa E else
431 var item_type = "null"
432 if item != null then item_type = item.class_name
433
434 v.errors.add new Error("Deserialization Error: invalid type '{item_type}' for the collection '{class_name}'")
435 continue
436 end
437
438 add item
439 end
440 end
441 end
442 end
443
444 redef class Map[K, V]
445 redef fun serialize_to_binary(v)
446 do
447 serialize_header_to_binary v
448
449 core_serialize_to v
450
451 v.stream.write_string "keys"
452 v.serialize_flat_array keys
453
454 v.stream.write_string "values"
455 v.serialize_flat_array values
456
457 v.stream.write_byte new_object_end
458 end
459
460 # Instantiate a new `Array` from its serialized representation.
461 redef init from_deserializer(v)
462 do
463 # Give a chance to other engines, and defs
464 super
465
466 if v isa BinaryDeserializer then
467 v.notify_of_creation self
468
469 init
470
471 var keys = v.deserialize_attribute("keys")
472 var values = v.deserialize_attribute("values")
473 assert keys isa Array[nullable Object]
474 assert values isa Array[nullable Object]
475
476 for i in keys.length.times do
477 var key = keys[i]
478 var value = values[i]
479
480 if not key isa K then
481 var item_type = "null"
482 if key != null then item_type = key.class_name
483
484 v.errors.add new Error("Deserialization Error: Invalid key type '{item_type}' for '{class_name}'")
485 continue
486 end
487
488 if not value isa V then
489 var item_type = "null"
490 if value != null then item_type = value.class_name
491
492 v.errors.add new Error("Deserialization Error: Invalid value type '{item_type}' for '{class_name}'")
493 continue
494 end
495
496 self[key] = value
497 end
498 end
499 end
500 end