Merge: Faster ASCII-only strings
authorJean Privat <jean@pryen.org>
Thu, 28 Apr 2016 23:04:49 +0000 (19:04 -0400)
committerJean Privat <jean@pryen.org>
Thu, 28 Apr 2016 23:04:49 +0000 (19:04 -0400)
This simple PR re-introduces the concept of ASCII strings for faster no-overhead accesses to random characters in a string.

The class is private as it is to be used only internally, no client should care for the specific type of a String and all performance improvements will be automatic, should the optimization be possible.

Since we are dealing with UTF-8, the only case where O(1) access can efficiently and without much overhead (one `if` statement at creation) be determined, is when a string only contains ASCII characters.
Note that since the Buffer can change access semantics depending on what is appended during its lifetime, no ASCII-only FlatBuffer has been added.

This is implemented here by introducing two subclasses of FlatString, one for ASCII only strings, and one for regular Unicode strings (the old FlatString).

In terms of performance, in the compiler, there is a ~0.5% improvement, but in specific cases like JSON parsing for the `big_gov_data.json` (where not a single unicode character is present) bench, the difference amounts to ~35%.

Pull-Request: #2040
Reviewed-by: Jean Privat <jean@pryen.org>

1  2 
lib/json/serialization.nit

@@@ -99,7 -99,7 +99,7 @@@ class JsonSerialize
        # Target writing stream
        var stream: Writer
  
 -      # Write plain JSON? easier to read but does not support Nit deserialization
 +      # Write plain JSON? Standard JSON without metadata for deserialization
        #
        # If `false`, the default, serialize to support deserialization:
        #
        # * Does not support cycles, will replace the problematic references by `null`.
        # * Does not serialize the meta-data needed to deserialize the objects
        #   back to regular Nit objects.
 -      # * Keys of Nit `HashMap` are converted to their string reprensentation using `to_s`.
 +      # * Keys of Nit `HashMap` are converted to their string representation using `to_s`.
        var plain_json = false is writable
  
 +      # Write pretty JSON for human eyes?
 +      #
 +      # Toggles skipping lines between attributes of an object and
 +      # properly indent the written JSON.
 +      var pretty_json = false is writable
 +
 +      # Current indentation level used for writing `pretty_json`
 +      private var indent_level = 0
 +
        # List of the current open objects, the first is the main target of the serialization
        #
        # Used only when `plain_json == true` to detect cycles in serialization.
                        if plain_json then
                                for o in open_objects do
                                        if object.is_same_serialized(o) then
 -                                              # Cycle detected
 +                                              # Cycle, can't be managed in plain json
 +                                              warn "Cycle detected in serialized object, replacing reference with 'null'."
                                                stream.write "null"
                                                return
                                        end
        redef fun serialize_attribute(name, value)
        do
                if not plain_json or not first_attribute then
 -                      stream.write ", "
 +                      stream.write ","
                        first_attribute = false
                end
  
 +              new_line_and_indent
                stream.write "\""
                stream.write name
                stream.write "\": "
                if not plain_json and cache.has_object(object) then
                        # if already serialized, add local reference
                        var id = cache.id_for(object)
 -                      stream.write "\{\"__kind\": \"ref\", \"__id\": "
 +                      stream.write "\{"
 +                      indent_level += 1
 +                      new_line_and_indent
 +                      stream.write "\"__kind\": \"ref\", \"__id\": "
                        stream.write id.to_s
 +                      indent_level -= 1
 +                      new_line_and_indent
                        stream.write "\}"
                else
                        # serialize here
                        serialize object
                end
        end
 +
 +      # Write a new line and indent it, only if `pretty_json`
 +      private fun new_line_and_indent
 +      do
 +              if pretty_json then
 +                      stream.write "\n"
 +                      for i in indent_level.times do stream.write "\t"
 +              end
 +      end
  end
  
  # Deserializer from a Json string.
@@@ -380,7 -355,7 +380,7 @@@ class JsonDeserialize
                                var array_type = types.first
  
                                var typed_array
-                               if array_type == "FlatString" then
+                               if array_type == "ASCIIFlatString" or array_type == "UnicodeFlatString" then
                                        if has_nullable then
                                                typed_array = new Array[nullable FlatString]
                                        else typed_array = new Array[FlatString]
@@@ -500,9 -475,7 +500,9 @@@ redef class Serializabl
        do
                var id = v.cache.new_id_for(self)
                v.stream.write "\{"
 +              v.indent_level += 1
                if not v.plain_json then
 +                      v.new_line_and_indent
                        v.stream.write "\"__kind\": \"obj\", \"__id\": "
                        v.stream.write id.to_s
                        v.stream.write ", \"__class\": \""
                        v.stream.write "\""
                end
                core_serialize_to(v)
 +
 +              v.indent_level -= 1
 +              v.new_line_and_indent
                v.stream.write "\}"
        end
  
@@@ -575,21 -545,16 +575,21 @@@ redef class Collection[E
        private fun serialize_to_pure_json(v: JsonSerializer)
        do
                        v.stream.write "["
 +                      v.indent_level += 1
                        var is_first = true
                        for e in self do
                                if is_first then
                                        is_first = false
 -                              else v.stream.write ", "
 +                              else v.stream.write ","
 +                              v.new_line_and_indent
  
                                if not v.try_to_serialize(e) then
 +                                      assert e != null # null would have been serialized
                                        v.warn("element of type {e.class_name} is not serializable.")
                                end
                        end
 +                      v.indent_level -= 1
 +                      v.new_line_and_indent
                        v.stream.write "]"
        end
  end
@@@ -600,23 -565,16 +600,23 @@@ redef class SimpleCollection[E
                # Register as pseudo object
                if not v.plain_json then
                        var id = v.cache.new_id_for(self)
 -                      v.stream.write """{"__kind": "obj", "__id": """
 +                      v.stream.write """{"""
 +                      v.indent_level += 1
 +                      v.new_line_and_indent
 +                      v.stream.write """"__kind": "obj", "__id": """
                        v.stream.write id.to_s
                        v.stream.write """, "__class": """"
                        v.stream.write class_name
 -                      v.stream.write """", "__items": """
 +                      v.stream.write """","""
 +                      v.new_line_and_indent
 +                      v.stream.write """"__items": """
                end
  
                serialize_to_pure_json v
  
                if not v.plain_json then
 +                      v.indent_level -= 1
 +                      v.new_line_and_indent
                        v.stream.write "\}"
                end
        end
@@@ -645,49 -603,35 +645,49 @@@ redef class Map[K, V
  
                if v.plain_json then
                        v.stream.write "\{"
 +                      v.indent_level += 1
                        var first = true
                        for key, val in self do
                                if not first then
 -                                      v.stream.write ", "
 +                                      v.stream.write ","
                                else first = false
 +                              v.new_line_and_indent
  
                                var k = key or else "null"
                                v.stream.write k.to_s.to_json
                                v.stream.write ": "
                                if not v.try_to_serialize(val) then
 +                                      assert val != null # null would have been serialized
                                        v.warn("element of type {val.class_name} is not serializable.")
                                        v.stream.write "null"
                                end
                        end
 +                      v.indent_level -= 1
 +                      v.new_line_and_indent
                        v.stream.write "\}"
                else
 -                      v.stream.write """{"__kind": "obj", "__id": """
 +                      v.stream.write "\{"
 +                      v.indent_level += 1
 +                      v.new_line_and_indent
 +                      v.stream.write """"__kind": "obj", "__id": """
                        v.stream.write id.to_s
                        v.stream.write """, "__class": """"
                        v.stream.write class_name
                        v.stream.write """", "__length": """
                        v.stream.write length.to_s
  
 -                      v.stream.write """, "__keys": """
 +                      v.stream.write ","
 +                      v.new_line_and_indent
 +                      v.stream.write """"__keys": """
                        keys.serialize_to_pure_json v
  
 -                      v.stream.write """, "__values": """
 +                      v.stream.write ","
 +                      v.new_line_and_indent
 +                      v.stream.write """"__values": """
                        values.serialize_to_pure_json v
  
 +                      v.indent_level -= 1
 +                      v.new_line_and_indent
                        v.stream.write "\}"
                end
        end