Merge: parse_examples: minor fixes
[nit.git] / lib / binary / serialization.nit
index d36f9c0..af1b1b4 100644 (file)
 # The serialized data format uses a dictionary structure similar to BSON:
 #
 # ~~~raw
-# object = 0x01                   # null
-#        | 0x02 id attributes     # New object
-#        | 0x03 id                # Ref to object
-#        | 0x04 int64             # Int
-#        | 0x05 int8              # Bool (int8 != 0)
-#        | 0x06 int8              # Char
-#        | 0x07 double(64 bits)   # Float
-#        | 0x08 block             # String
-#        | 0x09 block             # NativeString
-#        | 0x0A flat_array;       # Array[nullable Object]
+# object = 0x01                    # null
+#        | 0x02 id attributes      # New object
+#        | 0x03 id                 # Ref to object
+#        | 0x04 int64              # Int
+#        | 0x05 int8               # Bool (int8 != 0)
+#        | 0x06 utf8 byte sequence # Char
+#        | 0x07 double(64 bits)    # Float
+#        | 0x08 block              # String
+#        | 0x09 block              # CString
+#        | 0x0A flat_array;        # Array[nullable Object]
 #
 # block = int64 int8*;
 # cstring = int8* 0x00;
@@ -53,7 +53,7 @@ private fun kind_bool: Byte do return 0x05u8
 private fun kind_char: Byte do return 0x06u8
 private fun kind_float: Byte do return 0x07u8
 private fun kind_string: Byte do return 0x08u8
-private fun kind_native_string: Byte do return 0x09u8
+private fun kind_c_string: Byte do return 0x09u8
 private fun kind_flat_array: Byte do return 0x0Au8
 
 private fun new_object_end: Byte do return 0x00u8
@@ -70,6 +70,8 @@ class BinarySerializer
        # Target writing stream
        var stream: Writer is writable
 
+       redef var current_object = null
+
        redef fun serialize(object)
        do
                if object == null then
@@ -92,7 +94,10 @@ class BinarySerializer
                        stream.write_int64 id
                else
                        # serialize here
+                       var last_object = current_object
+                       current_object = object
                        object.serialize_to_binary self
+                       current_object = last_object
                end
        end
 
@@ -128,6 +133,9 @@ class BinaryDeserializer
        # Tree of attributes, deserialized but not yet claimed
        private var unclaimed_attributes = new UnrolledList[HashMap[String, nullable Object]]
 
+       # Buffer for one char
+       private var char_buf: CString is lazy do return new CString(4)
+
        # Read and deserialize the next attribute name and value
        #
        # A `peeked_char` can suffix the next attribute name.
@@ -150,7 +158,7 @@ class BinaryDeserializer
                return new Couple[String, nullable Object](next_attribute_name, next_object)
        end
 
-       redef fun deserialize_attribute(name)
+       redef fun deserialize_attribute(name, static_type)
        do
                if unclaimed_attributes.last.keys.has(name) then
                        # Pick in already deserialized attributes
@@ -175,7 +183,7 @@ class BinaryDeserializer
 
                        # An invalid attribute name is an heuristic for invalid data.
                        # Hitting an object end marker will result in an empty string.
-                       assert next_attribute_name.is_valid_id else
+                       if not next_attribute_name.is_valid_id then
 
                                var error
                                if next_attribute_name.is_empty then
@@ -204,11 +212,12 @@ class BinaryDeserializer
        # Convert from simple Json object to Nit object
        private fun deserialize_next_object: nullable Object
        do
-               var kind = stream.read_byte
-               assert kind isa Byte else
+               var kindi = stream.read_byte
+               assert kindi >= 0 else
                        # TODO break even on keep_going
                        return null
                end
+               var kind = kindi.to_b
 
                # After this point, all stream reading errors are caught later
 
@@ -217,12 +226,20 @@ class BinaryDeserializer
                if kind == kind_bool then return stream.read_bool
                if kind == kind_float then return stream.read_double
                if kind == kind_char then
+                       var bf = char_buf
                        var b = stream.read_byte
-                       if b == null then return 0
-                       return b.to_i.ascii
+                       if b < 0 then return '�'
+                       var ln = b.to_b.u8len
+                       bf[0] = b.to_b
+                       for i in [1 .. ln[ do
+                               b = stream.read_byte
+                               if b < 0 then return '�'
+                               bf[i] = b.to_b
+                       end
+                       return bf.to_s_unsafe(ln, copy=false)[0]
                end
                if kind == kind_string then return stream.read_block
-               if kind == kind_native_string then return stream.read_block.to_cstring
+               if kind == kind_c_string then return stream.read_block.to_cstring
 
                if kind == kind_flat_array then
                        # An array
@@ -274,7 +291,7 @@ class BinaryDeserializer
 
                        # Check for the attributes end marker
                        loop
-                               var next_byte = stream.read_byte
+                               var next_byte = stream.read_byte.to_b
                                if next_byte == new_object_end then break
 
                                # Fetch an additional attribute, even if it isn't expected
@@ -293,7 +310,7 @@ class BinaryDeserializer
                return null
        end
 
-       redef fun deserialize
+       redef fun deserialize(static_type)
        do
                errors.clear
 
@@ -325,6 +342,12 @@ redef class Text
 
                return true
        end
+
+       redef fun serialize_to_binary(v)
+       do
+               v.stream.write_byte kind_string
+               v.stream.write_block to_s
+       end
 end
 
 # ---
@@ -349,7 +372,7 @@ redef class Serializable
        private fun serialize_to_binary(v: BinarySerializer)
        do
                serialize_header_to_binary v
-               core_serialize_to v
+               v.serialize_core self
                v.stream.write_byte new_object_end
        end
 end
@@ -382,23 +405,14 @@ redef class Char
        redef fun serialize_to_binary(v)
        do
                v.stream.write_byte kind_char
-               # Fix when UTF-8
-               v.stream.write_byte self.ascii.to_b
-       end
-end
-
-redef class String
-       redef fun serialize_to_binary(v)
-       do
-               v.stream.write_byte kind_string
-               v.stream.write_block self
+               for i in bytes do v.stream.write_byte i
        end
 end
 
-redef class NativeString
+redef class CString
        redef fun serialize_to_binary(v)
        do
-               v.stream.write_byte kind_native_string
+               v.stream.write_byte kind_c_string
                v.stream.write_block to_s
        end
 end
@@ -446,7 +460,7 @@ redef class Map[K, V]
        do
                serialize_header_to_binary v
 
-               core_serialize_to v
+               v.serialize_core self
 
                v.stream.write_string "keys"
                v.serialize_flat_array keys