From: Jean Privat Date: Sat, 28 Mar 2015 01:34:45 +0000 (+0700) Subject: Merge: Fast super strings X-Git-Tag: v0.7.3~9 X-Git-Url: http://nitlanguage.org?hp=4860c1e2ba88b281a1ea8de89968598da467d93d Merge: Fast super strings Superstrings, like "a{b}c", are managed in the AST as a special group of sub-expression nodes that are either literal string parts or standard expressions. The previous example is basically `["a", b, "c"]` Previously, the compilation of super-strings was direct: the values are grouped in an array and `to_s` is called on it. So in fact `"a{b}c"` was compiled as `["a", b, "c"].to_s`. This basic implementation is simple and correct. But it has some drawbacks: * a new Array[Object] (and a NativeArray[Object]) is allocated each time the super-string is evaluated. * all elements are to_s-ized in `Array::to_s`, even the literal parts. * an additional NativeArray[String] is allocated in `Array:to_s` to do the fast concatenation. Because of the numerous allocations, superstrings caused a lot of work to the GC. This PR provides a better, but more complex implementation: * instead of an Array[Object], a NativeArray[String] is directly build and a fast concatenation `native_to_s` is invoked. * the allocated NativeArray is cached in a static variable so it can be reused in next evaluation. * the literal string parts are stored in the native array as is, and only once just after the allocation of the native array. Results for nitc/nitc/nitc: before: 0m6.076s after: 0m5.512s (-9% not bad!) Pull-Request: #1219 Reviewed-by: Lucas Bajolet --- diff --git a/lib/standard/string.nit b/lib/standard/string.nit index 63c8226..4892f95 100644 --- a/lib/standard/string.nit +++ b/lib/standard/string.nit @@ -2184,6 +2184,48 @@ redef class Array[E] end end +redef class NativeArray[E] + # Join all the elements using `to_s` + # + # REQUIRE: `self isa NativeArray[String]` + # REQUIRE: all elements are initialized + fun native_to_s: String + do + assert self isa NativeArray[String] + var l = length + var na = self + var i = 0 + var sl = 0 + var mypos = 0 + while i < l do + sl += na[i].length + i += 1 + mypos += 1 + end + var ns = new NativeString(sl + 1) + ns[sl] = '\0' + i = 0 + var off = 0 + while i < mypos do + var tmp = na[i] + var tpl = tmp.length + if tmp isa FlatString then + tmp.items.copy_to(ns, tpl, tmp.index_from, off) + off += tpl + else + for j in tmp.substrings do + var s = j.as(FlatString) + var slen = s.length + s.items.copy_to(ns, slen, s.index_from, off) + off += slen + end + end + i += 1 + end + return ns.to_s_with_length(sl) + end +end + redef class Map[K,V] # Concatenate couple of 'key value'. # key and value are separated by `couple_sep`. diff --git a/src/compiler/abstract_compiler.nit b/src/compiler/abstract_compiler.nit index c3952e3..658f0e7 100644 --- a/src/compiler/abstract_compiler.nit +++ b/src/compiler/abstract_compiler.nit @@ -1126,6 +1126,14 @@ abstract class AbstractCompilerVisitor fun native_array_def(pname: String, ret_type: nullable MType, arguments: Array[RuntimeVariable]) is abstract + # Return an element of a native array. + # The method is unsafe and is just a direct wrapper for the specific implementation of native arrays + fun native_array_get(native_array: RuntimeVariable, index: Int): RuntimeVariable is abstract + + # Store an element in a native array. + # The method is unsafe and is just a direct wrapper for the specific implementation of native arrays + fun native_array_set(native_array: RuntimeVariable, index: Int, value: RuntimeVariable) is abstract + # Evaluate `args` as expressions in the call of `mpropdef` on `recv`. # This method is used to manage varargs in signatures and returns the real array # of runtime variables to use in the call. @@ -2767,14 +2775,64 @@ end redef class ASuperstringExpr redef fun expr(v) do - var array = new Array[RuntimeVariable] + var type_string = mtype.as(not null) + + # Collect elements of the superstring + var array = new Array[AExpr] for ne in self.n_exprs do + # Drop literal empty string. + # They appears in things like "{a}" that is ["", a, ""] if ne isa AStringFormExpr and ne.value == "" then continue # skip empty sub-strings - var i = v.expr(ne, null) - array.add(i) + array.add(ne) + end + + # Store the allocated native array in a static variable + # For reusing later + var varonce = v.get_name("varonce") + v.add("if (unlikely({varonce}==NULL)) \{") + + # The native array that will contains the elements to_s-ized. + # For fast concatenation. + var a = v.native_array_instance(type_string, v.int_instance(array.length)) + + v.add_decl("static {a.mtype.ctype} {varonce};") + + # Pre-fill the array with the literal string parts. + # So they do not need to be filled again when reused + for i in [0..array.length[ do + var ne = array[i] + if not ne isa AStringFormExpr then continue + var e = v.expr(ne, null) + v.native_array_set(a, i, e) end - var a = v.array_instance(array, v.object_type) - var res = v.send(v.get_property("to_s", a.mtype), [a]) + + v.add("\} else \{") + # Take the native-array from the store. + # The point is to prevent that some recursive execution use (and corrupt) the same native array + # WARNING: not thread safe! (FIXME?) + v.add("{a} = {varonce};") + v.add("{varonce} = NULL;") + v.add("\}") + + # Stringify the elements and put them in the native array + var to_s_method = v.get_property("to_s", v.object_type) + for i in [0..array.length[ do + var ne = array[i] + if ne isa AStringFormExpr then continue + var e = v.expr(ne, null) + # Skip the `to_s` if the element is already a String + if not e.mcasttype.is_subtype(v.compiler.mainmodule, null, type_string) then + e = v.send(to_s_method, [e]).as(not null) + end + v.native_array_set(a, i, e) + end + + # Fast join the native string to get the result + var res = v.send(v.get_property("native_to_s", a.mtype), [a]) + + # We finish to work with the native array, + # so store it so that it can be reused + v.add("{varonce} = {a};") return res end end diff --git a/src/compiler/global_compiler.nit b/src/compiler/global_compiler.nit index 9a89064..1aff84e 100644 --- a/src/compiler/global_compiler.nit +++ b/src/compiler/global_compiler.nit @@ -407,6 +407,19 @@ class GlobalCompilerVisitor return self.new_expr("NEW_{ret_type.c_name}({length})", ret_type) end + redef fun native_array_get(nat, i) + do + var recv = "((struct {nat.mcasttype.c_name}*){nat})->values" + var ret_type = nat.mcasttype.as(MClassType).arguments.first + return self.new_expr("{recv}[{i}]", ret_type) + end + + redef fun native_array_set(nat, i, val) + do + var recv = "((struct {nat.mcasttype.c_name}*){nat})->values" + self.add("{recv}[{i}]={val};") + end + redef fun calloc_array(ret_type, arguments) do self.ret(self.new_expr("NEW_{ret_type.c_name}({arguments[1]})", ret_type)) diff --git a/src/compiler/separate_compiler.nit b/src/compiler/separate_compiler.nit index 00125ea..ecfcc8e 100644 --- a/src/compiler/separate_compiler.nit +++ b/src/compiler/separate_compiler.nit @@ -2047,6 +2047,22 @@ class SeparateCompilerVisitor end end + redef fun native_array_get(nat, i) + do + var nclass = mmodule.native_array_class + var recv = "((struct instance_{nclass.c_name}*){nat})->values" + # Because the objects are boxed, return the box to avoid unnecessary (or broken) unboxing/reboxing + var res = self.new_expr("{recv}[{i}]", compiler.mainmodule.object_type) + return res + end + + redef fun native_array_set(nat, i, val) + do + var nclass = mmodule.native_array_class + var recv = "((struct instance_{nclass.c_name}*){nat})->values" + self.add("{recv}[{i}]={val};") + end + fun link_unresolved_type(mclassdef: MClassDef, mtype: MType) do assert mtype.need_anchor var compiler = self.compiler diff --git a/src/rapid_type_analysis.nit b/src/rapid_type_analysis.nit index bcae37f..69e9023 100644 --- a/src/rapid_type_analysis.nit +++ b/src/rapid_type_analysis.nit @@ -560,11 +560,13 @@ redef class ASuperstringExpr var object_type = mmodule.object_type var arraytype = mmodule.array_type(object_type) v.add_type(arraytype) - v.add_type(mmodule.native_array_type(object_type)) + var nattype = mmodule.native_array_type(object_type) + v.add_type(nattype) var prop = v.get_method(arraytype, "join") v.add_monomorphic_send(arraytype, prop) var prop2 = v.get_method(arraytype, "with_native") v.add_monomorphic_send(arraytype, prop2) + v.add_monomorphic_send(nattype, v.get_method(nattype, "native_to_s")) end end diff --git a/tests/sav/nitg-e/error_needed_method_alt4.res b/tests/sav/nitg-e/error_needed_method_alt4.res deleted file mode 100644 index d1e2d76..0000000 --- a/tests/sav/nitg-e/error_needed_method_alt4.res +++ /dev/null @@ -1 +0,0 @@ -alt/error_needed_method_alt4.nit:49,10--14: Fatal Error: NativeString must have a property named to_s_with_length.