From 9bac2ba673adcb42aa193467e1a7daf8ae42a779 Mon Sep 17 00:00:00 2001 From: Lucas Bajolet Date: Tue, 22 Mar 2016 11:42:30 -0400 Subject: [PATCH] compiler: Added prefixed and suffixed `String` support Signed-off-by: Lucas Bajolet --- src/compiler/abstract_compiler.nit | 81 ++++++++++++++++- src/frontend/glsl_validation.nit | 1 - src/frontend/i18n_phase.nit | 3 +- src/frontend/regex_phase.nit | 1 - src/literal.nit | 171 ++++++++++++++++++++++++++++++++++-- src/modelize/modelize_property.nit | 11 ++- src/rapid_type_analysis.nit | 5 ++ src/semantize/typing.nit | 52 +++++++++-- 8 files changed, 303 insertions(+), 22 deletions(-) diff --git a/src/compiler/abstract_compiler.nit b/src/compiler/abstract_compiler.nit index 17e44c7..0e40831 100644 --- a/src/compiler/abstract_compiler.nit +++ b/src/compiler/abstract_compiler.nit @@ -1602,6 +1602,18 @@ abstract class AbstractCompilerVisitor return res end + # Generates a NativeString instance fully escaped in C-style \xHH fashion + fun native_string_instance(ns: NativeString, len: Int): RuntimeVariable do + var mtype = mmodule.native_string_type + var nat = new_var(mtype) + var byte_esc = new Buffer.with_cap(len * 4) + for i in [0 .. len[ do + byte_esc.append("\\x{ns[i].to_s.substring_from(2)}") + end + self.add("{nat} = \"{byte_esc}\";") + return nat + end + # Generate a string value fun string_instance(string: String): RuntimeVariable do @@ -3574,14 +3586,75 @@ redef class AArrayExpr end end +redef class AugmentedStringFormExpr + # Factorize the making of a `Regex` object from a literal prefixed string + protected fun make_re(v: AbstractCompilerVisitor, rs: RuntimeVariable): nullable RuntimeVariable do + var re = to_re + assert re != null + var res = v.compile_callsite(re, [rs]) + if res == null then + print "Cannot call property `to_re` on {self}" + abort + end + for i in suffix.chars do + if i == 'i' then + var ign = ignore_case + assert ign != null + v.compile_callsite(ign, [res, v.bool_instance(true)]) + continue + end + if i == 'm' then + var nl = newline + assert nl != null + v.compile_callsite(nl, [res, v.bool_instance(true)]) + continue + end + if i == 'b' then + var ext = extended + assert ext != null + v.compile_callsite(ext, [res, v.bool_instance(false)]) + continue + end + # Should not happen, this needs to be updated + # along with the addition of new suffixes + abort + end + return res + end +end + redef class AStringFormExpr - redef fun expr(v) do return v.string_instance(self.value.as(not null)) + redef fun expr(v) do return v.string_instance(value) +end + +redef class AStringExpr + redef fun expr(v) do + var s = v.string_instance(value) + if is_string then return s + if is_bytestring then + var ns = v.native_string_instance(bytes.items, bytes.length) + var ln = v.int_instance(bytes.length) + var cs = to_bytes_with_copy + assert cs != null + var res = v.compile_callsite(cs, [ns, ln]) + assert res != null + s = res + else if is_re then + var res = make_re(v, s) + assert res != null + s = res + else + print "Unimplemented prefix or suffix for {self}" + abort + end + return s + end end redef class ASuperstringExpr redef fun expr(v) do - var type_string = mtype.as(not null) + var type_string = v.mmodule.string_type # Collect elements of the superstring var array = new Array[AExpr] @@ -3635,10 +3708,14 @@ redef class ASuperstringExpr # Fast join the native string to get the result var res = v.send(v.get_property("native_to_s", a.mtype), [a]) + assert res != null + + if is_re then res = make_re(v, res) # We finish to work with the native array, # so store it so that it can be reused v.add("{varonce} = {a};") + return res end end diff --git a/src/frontend/glsl_validation.nit b/src/frontend/glsl_validation.nit index 6d67d2c..2d61ce0 100644 --- a/src/frontend/glsl_validation.nit +++ b/src/frontend/glsl_validation.nit @@ -77,7 +77,6 @@ private class GLSLValidationPhase # Get the shader source var shader = nstring.value - assert shader != null # Copy the shader to a file # TODO make it more portable diff --git a/src/frontend/i18n_phase.nit b/src/frontend/i18n_phase.nit index 22010f6..14bf436 100644 --- a/src/frontend/i18n_phase.nit +++ b/src/frontend/i18n_phase.nit @@ -130,7 +130,7 @@ end redef class AStringExpr redef fun accept_string_finder(v) do - var str = value.as(not null).escape_to_gettext + var str = value.escape_to_gettext var code = "\"{str}\".get_translation(\"{v.domain}\", \"{v.languages_location}\")" var parse = v.toolcontext.parse_expr(code) replace_with(parse) @@ -147,7 +147,6 @@ redef class ASuperstringExpr if i isa AStartStringExpr or i isa AEndStringExpr or i isa AMidStringExpr then assert i isa AStringFormExpr var str = i.value - assert str != null fmt += str.replace("%", "%%") else fmt += "%" diff --git a/src/frontend/regex_phase.nit b/src/frontend/regex_phase.nit index 57e8822..7e711ff 100644 --- a/src/frontend/regex_phase.nit +++ b/src/frontend/regex_phase.nit @@ -59,7 +59,6 @@ redef class ACallExpr # Retrieve regex source var re_src = receiver.value - if re_src == null then return # Check for errors by compiling it right now var re = re_src.to_re diff --git a/src/literal.nit b/src/literal.nit index 72df12b..344bd4d 100644 --- a/src/literal.nit +++ b/src/literal.nit @@ -62,7 +62,7 @@ redef class AExpr fun as_string: nullable String do if not self isa AStringFormExpr then return null - return self.value.as(not null) + return self.value end # Get `self` as an `Int`. @@ -114,7 +114,10 @@ class AAugmentedLiteral protected var suffix: String is lazy do return text.substring_from(text.last_index_of(delimiter_end) + 1) # Content of the entity, without prefix nor suffix - protected var content: String is lazy do return text.substring_from(text.index_of(delimiter_start)).substring(0, text.last_index_of(delimiter_end) + 1) + protected var content: String is lazy do + var npr = text.substring_from(prefix.length) + return npr.substring(0, npr.length - suffix.length) + end end redef class ACharExpr @@ -158,19 +161,171 @@ redef class ACharExpr end end +# Any kind of string form with augmentations from prefixes or suffixes +class AugmentedStringFormExpr + super AAugmentedLiteral + + redef var delimiter_start = '"' + redef var delimiter_end = '"' + + # Is `self` a regular String object ? + fun is_string: Bool do return prefix == "" or prefix == "raw" + + # Is `self` a Regular Expression ? + fun is_re: Bool do return prefix == "re" + + # Is `self` a Byte String ? + fun is_bytestring: Bool do return prefix == "b" + + redef fun is_valid_augmentation do + if is_string and suffix == "" then return true + if is_bytestring and suffix == "" then return true + if is_re then + var suf = suffix + for i in suf.chars do + if i == 'i' then continue + if i == 'm' then continue + if i == 'b' then continue + return false + end + return true + end + if prefix != "" or suffix != "" then return false + return true + end +end + redef class AStringFormExpr + super AugmentedStringFormExpr + # The value of the literal string once computed. - var value: nullable String - redef fun accept_literal(v) - do - var txt = self.n_string.text + var value: String is noinit + + # The underlying bytes of the String, non-cleaned for UTF-8 + var bytes: Bytes is noinit + + redef fun text do return n_string.text + + # Returns the raw text read by the lexer + var raw_text: String is lazy do + var txt = content var behead = 1 var betail = 1 if txt.chars[0] == txt.chars[1] and txt.length >= 6 then behead = 3 betail = 3 - if txt.chars[0] == '"' and txt.chars[3] == '\n' then behead = 4 # ignore first \n in """ + if txt.chars[0] == delimiter_start and txt.chars[3] == '\n' then behead = 4 # ignore first \n in """ + end + return txt.substring(behead, txt.length - behead - betail) + end + + redef fun accept_literal(v) do + value = raw_text + bytes = raw_text.to_bytes + end +end + +redef class AEndStringExpr + redef var delimiter_end is lazy do return '"' + redef fun prefix do return "" +end + +redef class AStartStringExpr + redef var delimiter_start is lazy do + var str = n_string.text + for i in [0 .. str.length[ do + var c = str[i] + if c == '"' or c == '\'' then + return c + end + end + # Cannot happen, unless the parser is bugged + abort + end + + redef fun suffix do return "" +end + +redef class AMidStringExpr + redef fun prefix do return "" + redef fun suffix do return "" +end + +redef class AStringExpr + redef var delimiter_start is lazy do + var str = text + for i in [0 .. str.length[ do + var c = str[i] + if c == '"' or c == '\'' then + delimiter_end = c + return c + end + end + # Cannot happen, unless the parser is bugged + abort + end + + redef var delimiter_end is lazy do return delimiter_start + + redef fun accept_literal(v) + do + super + if not is_valid_augmentation then + v.toolcontext.error(hot_location, "Error: invalid prefix/suffix combination {prefix}/{suffix}") + return + end + if prefix != "raw" then + bytes = raw_text.unescape_to_bytes + value = bytes.to_s + end + end +end + +redef class ASuperstringExpr + super AugmentedStringFormExpr + + redef var prefix is lazy do + var fst = n_exprs.first + if fst isa AugmentedStringFormExpr then + var prf = fst.prefix + delimiter_start = fst.delimiter_start + delimiter_end = delimiter_start + return prf + end + return "" + end + + redef var suffix is lazy do + var lst = n_exprs.last + # Forces the system to update the delimiter's value + prefix + if lst isa AugmentedStringFormExpr then + lst.delimiter_end = delimiter_start + return lst.suffix + end + return "" + end + + redef fun accept_literal(v) + do + if is_bytestring then + v.toolcontext.error(hot_location, "Error: cannot produce a ByteString on a Superstring") + return + end + if not is_valid_augmentation then + v.toolcontext.error(hot_location, "Error: invalid prefix/suffix combination {prefix}/{suffix}") + return + end + end + + redef fun visit_all(v) do + super + if prefix != "raw" then + for i in n_exprs do + if not i isa AStringFormExpr then continue + i.bytes = i.raw_text.unescape_to_bytes + i.value = i.bytes.to_s + end end - self.value = txt.substring(behead, txt.length - behead - betail).unescape_nit end end diff --git a/src/modelize/modelize_property.nit b/src/modelize/modelize_property.nit index 23ac65f..df8f8bd 100644 --- a/src/modelize/modelize_property.nit +++ b/src/modelize/modelize_property.nit @@ -1406,7 +1406,16 @@ redef class AAttrPropdef var cla = modelbuilder.try_get_mclass_by_name(nexpr, mmodule, "String") if cla != null then mtype = cla.mclass_type else if nexpr isa AStringFormExpr then - var cla = modelbuilder.try_get_mclass_by_name(nexpr, mmodule, "String") + var cla: nullable MClass + if nexpr.is_bytestring then + cla = modelbuilder.try_get_mclass_by_name(nexpr, mmodule, "Bytes") + else if nexpr.is_re then + cla = modelbuilder.try_get_mclass_by_name(nexpr, mmodule, "Regex") + else if nexpr.is_string then + cla = modelbuilder.try_get_mclass_by_name(nexpr, mmodule, "String") + else + abort + end if cla != null then mtype = cla.mclass_type else modelbuilder.error(self, "Error: untyped attribute `{mreadpropdef}`. Implicit typing allowed only for literals and new.") diff --git a/src/rapid_type_analysis.nit b/src/rapid_type_analysis.nit index f763a56..42a757b 100644 --- a/src/rapid_type_analysis.nit +++ b/src/rapid_type_analysis.nit @@ -585,6 +585,11 @@ redef class AStringFormExpr v.add_type(native) var prop = v.get_method(native, "to_s_full") v.add_monomorphic_send(native, prop) + v.add_callsite(to_re) + v.add_callsite(ignore_case) + v.add_callsite(newline) + v.add_callsite(extended) + v.add_callsite(to_bytes_with_copy) end end diff --git a/src/semantize/typing.nit b/src/semantize/typing.nit index 08f3d65..ef8054a 100644 --- a/src/semantize/typing.nit +++ b/src/semantize/typing.nit @@ -1469,21 +1469,59 @@ redef class ACharExpr end end -redef class AStringFormExpr - redef fun accept_typing(v) - do +redef class AugmentedStringFormExpr + super AExpr + + # Text::to_re, used for prefix `re` + var to_re: nullable CallSite = null + # Regex::ignore_case, used for suffix `i` on `re` + var ignore_case: nullable CallSite = null + # Regex::newline, used for suffix `m` on `re` + var newline: nullable CallSite = null + # Regex::extended, used for suffix `b` on `re` + var extended: nullable CallSite = null + # NativeString::to_bytes_with_copy, used for prefix `b` + var to_bytes_with_copy: nullable CallSite = null + + redef fun accept_typing(v) do var mclass = v.get_mclass(self, "String") if mclass == null then return # Forward error - self.mtype = mclass.mclass_type + if is_bytestring then + to_bytes_with_copy = v.get_method(self, v.mmodule.native_string_type, "to_bytes_with_copy", false) + mclass = v.get_mclass(self, "Bytes") + else if is_re then + to_re = v.get_method(self, mclass.mclass_type, "to_re", false) + for i in suffix.chars do + mclass = v.get_mclass(self, "Regex") + if mclass == null then + v.error(self, "Error: `Regex` class unknown") + return + end + var service = "" + if i == 'i' then + service = "ignore_case=" + ignore_case = v.get_method(self, mclass.mclass_type, service, false) + else if i == 'm' then + service = "newline=" + newline = v.get_method(self, mclass.mclass_type, service, false) + else if i == 'b' then + service = "extended=" + extended = v.get_method(self, mclass.mclass_type, service, false) + else + v.error(self, "Type Error: Unrecognized suffix {i} in prefixed Regex") + abort + end + end + end + if mclass == null then return # Forward error + mtype = mclass.mclass_type end end redef class ASuperstringExpr redef fun accept_typing(v) do - var mclass = v.get_mclass(self, "String") - if mclass == null then return # Forward error - self.mtype = mclass.mclass_type + super var objclass = v.get_mclass(self, "Object") if objclass == null then return # Forward error var objtype = objclass.mclass_type -- 1.7.9.5