rename `NativeString` to `CString`
[nit.git] / lib / binary / serialization.nit
index ba39c3b..34de766 100644 (file)
 # The serialized data format uses a dictionary structure similar to BSON:
 #
 # ~~~raw
-# object = 0x01                   # null
-#        | 0x02 id attributes     # New object
-#        | 0x03 id                # Ref to object
-#        | 0x04 int64             # Int
-#        | 0x05 int8              # Bool (int8 != 0)
-#        | 0x06 int8              # Char
-#        | 0x07 double(64 bits)   # Float
-#        | 0x08 block             # String
-#        | 0x09 block             # NativeString
-#        | 0x0A flat_array;       # Array[nullable Object]
+# object = 0x01                    # null
+#        | 0x02 id attributes      # New object
+#        | 0x03 id                 # Ref to object
+#        | 0x04 int64              # Int
+#        | 0x05 int8               # Bool (int8 != 0)
+#        | 0x06 utf8 byte sequence # Char
+#        | 0x07 double(64 bits)    # Float
+#        | 0x08 block              # String
+#        | 0x09 block              # CString
+#        | 0x0A flat_array;        # Array[nullable Object]
 #
 # block = int64 int8*;
 # cstring = int8* 0x00;
@@ -45,18 +45,18 @@ import more_collections
 # ---
 # Special bytes, marking the kind of objects in the stream and the end on an object
 
-private fun kind_null: Int do return 0x01
-private fun kind_object_new: Int do return 0x02
-private fun kind_object_ref: Int do return 0x03
-private fun kind_int: Int do return 0x04
-private fun kind_bool: Int do return 0x05
-private fun kind_char: Int do return 0x06
-private fun kind_float: Int do return 0x07
-private fun kind_string: Int do return 0x08
-private fun kind_native_string: Int do return 0x09
-private fun kind_flat_array: Int do return 0x0A
+private fun kind_null: Byte do return 0x01u8
+private fun kind_object_new: Byte do return 0x02u8
+private fun kind_object_ref: Byte do return 0x03u8
+private fun kind_int: Byte do return 0x04u8
+private fun kind_bool: Byte do return 0x05u8
+private fun kind_char: Byte do return 0x06u8
+private fun kind_float: Byte do return 0x07u8
+private fun kind_string: Byte do return 0x08u8
+private fun kind_native_string: Byte do return 0x09u8
+private fun kind_flat_array: Byte do return 0x0Au8
 
-private fun new_object_end: Int do return 0x00
+private fun new_object_end: Byte do return 0x00u8
 
 #---
 # Engines
@@ -128,12 +128,15 @@ class BinaryDeserializer
        # Tree of attributes, deserialized but not yet claimed
        private var unclaimed_attributes = new UnrolledList[HashMap[String, nullable Object]]
 
+       # Buffer for one char
+       private var char_buf: CString is lazy do return new CString(4)
+
        # Read and deserialize the next attribute name and value
        #
        # A `peeked_char` can suffix the next attribute name.
        #
        # Returns `null` on error.
-       private fun deserialize_next_attribute(peeked_char: nullable Char):
+       private fun deserialize_next_attribute(peeked_char: nullable Byte):
                nullable Couple[String, nullable Object]
        do
                # Try the next attribute
@@ -150,7 +153,7 @@ class BinaryDeserializer
                return new Couple[String, nullable Object](next_attribute_name, next_object)
        end
 
-       redef fun deserialize_attribute(name)
+       redef fun deserialize_attribute(name, static_type)
        do
                if unclaimed_attributes.last.keys.has(name) then
                        # Pick in already deserialized attributes
@@ -175,7 +178,7 @@ class BinaryDeserializer
 
                        # An invalid attribute name is an heuristic for invalid data.
                        # Hitting an object end marker will result in an empty string.
-                       assert next_attribute_name.is_valid_id else
+                       if not next_attribute_name.is_valid_id then
 
                                var error
                                if next_attribute_name.is_empty then
@@ -205,7 +208,7 @@ class BinaryDeserializer
        private fun deserialize_next_object: nullable Object
        do
                var kind = stream.read_byte
-               assert kind isa Int else
+               assert kind isa Byte else
                        # TODO break even on keep_going
                        return null
                end
@@ -216,7 +219,19 @@ class BinaryDeserializer
                if kind == kind_int then return stream.read_int64
                if kind == kind_bool then return stream.read_bool
                if kind == kind_float then return stream.read_double
-               if kind == kind_char then return (stream.read_byte or else 0).ascii
+               if kind == kind_char then
+                       var bf = char_buf
+                       var b = stream.read_byte
+                       if b == null then return '�'
+                       var ln = b.u8len
+                       bf[0] = b
+                       for i in [1 .. ln[ do
+                               b = stream.read_byte
+                               if b == null then return '�'
+                               bf[i] = b
+                       end
+                       return bf.to_s_with_length(ln)[0]
+               end
                if kind == kind_string then return stream.read_block
                if kind == kind_native_string then return stream.read_block.to_cstring
 
@@ -274,7 +289,7 @@ class BinaryDeserializer
                                if next_byte == new_object_end then break
 
                                # Fetch an additional attribute, even if it isn't expected
-                               deserialize_next_attribute((next_byte or else 0).ascii)
+                               deserialize_next_attribute(next_byte)
                        end
 
                        # Close object
@@ -289,7 +304,7 @@ class BinaryDeserializer
                return null
        end
 
-       redef fun deserialize
+       redef fun deserialize(static_type)
        do
                errors.clear
 
@@ -321,6 +336,12 @@ redef class Text
 
                return true
        end
+
+       redef fun serialize_to_binary(v)
+       do
+               v.stream.write_byte kind_string
+               v.stream.write_block to_s
+       end
 end
 
 # ---
@@ -378,19 +399,11 @@ redef class Char
        redef fun serialize_to_binary(v)
        do
                v.stream.write_byte kind_char
-               v.stream.write_byte self.ascii
-       end
-end
-
-redef class String
-       redef fun serialize_to_binary(v)
-       do
-               v.stream.write_byte kind_string
-               v.stream.write_block self
+               for i in bytes do v.stream.write_byte i
        end
 end
 
-redef class NativeString
+redef class CString
        redef fun serialize_to_binary(v)
        do
                v.stream.write_byte kind_native_string