As a follow-up to #1991, here's the support mentioned in #1734 for bytestrings, regex and raw strings
NOTE: Depends on #1991 for integration, you may review the last 3 commits while #1991 is reviewed and eventually merged
TODO: Support degraded mode (Bytes and Byte only) for byte SuperStrings instead of refusing at compile-time
Pull-Request: #1992
Reviewed-by: Jean Privat <jean@pryen.org>
Reviewed-by: Alexis Laferrière <alexis.laf@xymus.net>
return res
end
+ # Generates a NativeString instance fully escaped in C-style \xHH fashion
+ fun native_string_instance(ns: NativeString, len: Int): RuntimeVariable do
+ var mtype = mmodule.native_string_type
+ var nat = new_var(mtype)
+ var byte_esc = new Buffer.with_cap(len * 4)
+ for i in [0 .. len[ do
+ byte_esc.append("\\x{ns[i].to_s.substring_from(2)}")
+ end
+ self.add("{nat} = \"{byte_esc}\";")
+ return nat
+ end
+
# Generate a string value
fun string_instance(string: String): RuntimeVariable
do
end
end
+redef class AugmentedStringFormExpr
+ # Factorize the making of a `Regex` object from a literal prefixed string
+ protected fun make_re(v: AbstractCompilerVisitor, rs: RuntimeVariable): nullable RuntimeVariable do
+ var re = to_re
+ assert re != null
+ var res = v.compile_callsite(re, [rs])
+ if res == null then
+ print "Cannot call property `to_re` on {self}"
+ abort
+ end
+ for i in suffix.chars do
+ if i == 'i' then
+ var ign = ignore_case
+ assert ign != null
+ v.compile_callsite(ign, [res, v.bool_instance(true)])
+ continue
+ end
+ if i == 'm' then
+ var nl = newline
+ assert nl != null
+ v.compile_callsite(nl, [res, v.bool_instance(true)])
+ continue
+ end
+ if i == 'b' then
+ var ext = extended
+ assert ext != null
+ v.compile_callsite(ext, [res, v.bool_instance(false)])
+ continue
+ end
+ # Should not happen, this needs to be updated
+ # along with the addition of new suffixes
+ abort
+ end
+ return res
+ end
+end
+
redef class AStringFormExpr
- redef fun expr(v) do return v.string_instance(self.value.as(not null))
+ redef fun expr(v) do return v.string_instance(value)
+end
+
+redef class AStringExpr
+ redef fun expr(v) do
+ var s = v.string_instance(value)
+ if is_string then return s
+ if is_bytestring then
+ var ns = v.native_string_instance(bytes.items, bytes.length)
+ var ln = v.int_instance(bytes.length)
+ var cs = to_bytes_with_copy
+ assert cs != null
+ var res = v.compile_callsite(cs, [ns, ln])
+ assert res != null
+ s = res
+ else if is_re then
+ var res = make_re(v, s)
+ assert res != null
+ s = res
+ else
+ print "Unimplemented prefix or suffix for {self}"
+ abort
+ end
+ return s
+ end
end
redef class ASuperstringExpr
redef fun expr(v)
do
- var type_string = mtype.as(not null)
+ var type_string = v.mmodule.string_type
# Collect elements of the superstring
var array = new Array[AExpr]
# Fast join the native string to get the result
var res = v.send(v.get_property("native_to_s", a.mtype), [a])
+ assert res != null
+
+ if is_re then res = make_re(v, res)
# We finish to work with the native array,
# so store it so that it can be reused
v.add("{varonce} = {a};")
+
return res
end
end
# Get the shader source
var shader = nstring.value
- assert shader != null
# Copy the shader to a file
# TODO make it more portable
redef class AStringExpr
redef fun accept_string_finder(v) do
- var str = value.as(not null).escape_to_gettext
+ var str = value.escape_to_gettext
var code = "\"{str}\".get_translation(\"{v.domain}\", \"{v.languages_location}\")"
var parse = v.toolcontext.parse_expr(code)
replace_with(parse)
if i isa AStartStringExpr or i isa AEndStringExpr or i isa AMidStringExpr then
assert i isa AStringFormExpr
var str = i.value
- assert str != null
fmt += str.replace("%", "%%")
else
fmt += "%"
# Retrieve regex source
var re_src = receiver.value
- if re_src == null then return
# Check for errors by compiling it right now
var re = re_src.to_re
return instance
end
+ # Return a new native string initialized with `txt`
+ fun native_string_instance_from_ns(txt: NativeString, len: Int): Instance
+ do
+ var instance = native_string_instance_len(len)
+ var val = instance.val
+ txt.copy_to(val, len, 0, 0)
+
+ return instance
+ end
+
# Return a new native string initialized of `length`
fun native_string_instance_len(length: Int): PrimitiveInstance[NativeString]
do
end
end
+redef class AugmentedStringFormExpr
+ # Factorize the making of a `Regex` object from a literal prefixed string
+ fun make_re(v: NaiveInterpreter, rs: Instance): nullable Instance do
+ var tore = to_re
+ assert tore != null
+ var res = v.callsite(tore, [rs])
+ if res == null then
+ print "Cannot call property `to_re` on {self}"
+ abort
+ end
+ for j in suffix.chars do
+ if j == 'i' then
+ var prop = ignore_case
+ assert prop != null
+ v.callsite(prop, [res, v.bool_instance(true)])
+ continue
+ end
+ if j == 'm' then
+ var prop = newline
+ assert prop != null
+ v.callsite(prop, [res, v.bool_instance(true)])
+ continue
+ end
+ if j == 'b' then
+ var prop = extended
+ assert prop != null
+ v.callsite(prop, [res, v.bool_instance(false)])
+ continue
+ end
+ # Should not happen, this needs to be updated
+ # along with the addition of new suffixes
+ abort
+ end
+ return res
+ end
+end
+
redef class AStringFormExpr
- redef fun expr(v)
- do
- var txt = self.value.as(not null)
- return v.string_instance(txt)
+ redef fun expr(v) do return v.string_instance(value)
+end
+
+redef class AStringExpr
+ redef fun expr(v) do
+ var s = v.string_instance(value)
+ if is_string then return s
+ if is_bytestring then
+ var ns = v.native_string_instance_from_ns(bytes.items, bytes.length)
+ var ln = v.int_instance(bytes.length)
+ var prop = to_bytes_with_copy
+ assert prop != null
+ var res = v.callsite(prop, [ns, ln])
+ if res == null then
+ print "Cannot call property `to_bytes` on {self}"
+ abort
+ end
+ s = res
+ else if is_re then
+ var res = make_re(v, s)
+ assert res != null
+ s = res
+ else
+ print "Unimplemented prefix or suffix for {self}"
+ abort
+ end
+ return s
end
end
var i = v.array_instance(array, v.mainmodule.object_type)
var res = v.send(v.force_get_primitive_method("plain_to_s", i.mtype), [i])
assert res != null
+ if is_re then res = make_re(v, res)
return res
end
end
fun as_string: nullable String
do
if not self isa AStringFormExpr then return null
- return self.value.as(not null)
+ return self.value
end
# Get `self` as an `Int`.
protected var suffix: String is lazy do return text.substring_from(text.last_index_of(delimiter_end) + 1)
# Content of the entity, without prefix nor suffix
- protected var content: String is lazy do return text.substring_from(text.index_of(delimiter_start)).substring(0, text.last_index_of(delimiter_end) + 1)
+ protected var content: String is lazy do
+ var npr = text.substring_from(prefix.length)
+ return npr.substring(0, npr.length - suffix.length)
+ end
end
redef class ACharExpr
end
end
+# Any kind of string form with augmentations from prefixes or suffixes
+class AugmentedStringFormExpr
+ super AAugmentedLiteral
+
+ redef var delimiter_start = '"'
+ redef var delimiter_end = '"'
+
+ # Is `self` a regular String object ?
+ fun is_string: Bool do return prefix == "" or prefix == "raw"
+
+ # Is `self` a Regular Expression ?
+ fun is_re: Bool do return prefix == "re"
+
+ # Is `self` a Byte String ?
+ fun is_bytestring: Bool do return prefix == "b"
+
+ redef fun is_valid_augmentation do
+ if is_string and suffix == "" then return true
+ if is_bytestring and suffix == "" then return true
+ if is_re then
+ var suf = suffix
+ for i in suf.chars do
+ if i == 'i' then continue
+ if i == 'm' then continue
+ if i == 'b' then continue
+ return false
+ end
+ return true
+ end
+ if prefix != "" or suffix != "" then return false
+ return true
+ end
+end
+
redef class AStringFormExpr
+ super AugmentedStringFormExpr
+
# The value of the literal string once computed.
- var value: nullable String
- redef fun accept_literal(v)
- do
- var txt = self.n_string.text
+ var value: String is noinit
+
+ # The underlying bytes of the String, non-cleaned for UTF-8
+ var bytes: Bytes is noinit
+
+ redef fun text do return n_string.text
+
+ # Returns the raw text read by the lexer
+ var raw_text: String is lazy do
+ var txt = content
var behead = 1
var betail = 1
if txt.chars[0] == txt.chars[1] and txt.length >= 6 then
behead = 3
betail = 3
- if txt.chars[0] == '"' and txt.chars[3] == '\n' then behead = 4 # ignore first \n in """
+ if txt.chars[0] == delimiter_start and txt.chars[3] == '\n' then behead = 4 # ignore first \n in """
+ end
+ return txt.substring(behead, txt.length - behead - betail)
+ end
+
+ redef fun accept_literal(v) do
+ value = raw_text
+ bytes = raw_text.to_bytes
+ end
+end
+
+redef class AEndStringExpr
+ redef var delimiter_end is lazy do return '"'
+ redef fun prefix do return ""
+end
+
+redef class AStartStringExpr
+ redef var delimiter_start is lazy do
+ var str = n_string.text
+ for i in [0 .. str.length[ do
+ var c = str[i]
+ if c == '"' or c == '\'' then
+ return c
+ end
+ end
+ # Cannot happen, unless the parser is bugged
+ abort
+ end
+
+ redef fun suffix do return ""
+end
+
+redef class AMidStringExpr
+ redef fun prefix do return ""
+ redef fun suffix do return ""
+end
+
+redef class AStringExpr
+ redef var delimiter_start is lazy do
+ var str = text
+ for i in [0 .. str.length[ do
+ var c = str[i]
+ if c == '"' or c == '\'' then
+ delimiter_end = c
+ return c
+ end
+ end
+ # Cannot happen, unless the parser is bugged
+ abort
+ end
+
+ redef var delimiter_end is lazy do return delimiter_start
+
+ redef fun accept_literal(v)
+ do
+ super
+ if not is_valid_augmentation then
+ v.toolcontext.error(hot_location, "Error: invalid prefix/suffix combination {prefix}/{suffix}")
+ return
+ end
+ if prefix != "raw" then
+ bytes = raw_text.unescape_to_bytes
+ value = bytes.to_s
+ end
+ end
+end
+
+redef class ASuperstringExpr
+ super AugmentedStringFormExpr
+
+ redef var prefix is lazy do
+ var fst = n_exprs.first
+ if fst isa AugmentedStringFormExpr then
+ var prf = fst.prefix
+ delimiter_start = fst.delimiter_start
+ delimiter_end = delimiter_start
+ return prf
+ end
+ return ""
+ end
+
+ redef var suffix is lazy do
+ var lst = n_exprs.last
+ # Forces the system to update the delimiter's value
+ prefix
+ if lst isa AugmentedStringFormExpr then
+ lst.delimiter_end = delimiter_start
+ return lst.suffix
+ end
+ return ""
+ end
+
+ redef fun accept_literal(v)
+ do
+ if is_bytestring then
+ v.toolcontext.error(hot_location, "Error: cannot produce a ByteString on a Superstring")
+ return
+ end
+ if not is_valid_augmentation then
+ v.toolcontext.error(hot_location, "Error: invalid prefix/suffix combination {prefix}/{suffix}")
+ return
+ end
+ end
+
+ redef fun visit_all(v) do
+ super
+ if prefix != "raw" then
+ for i in n_exprs do
+ if not i isa AStringFormExpr then continue
+ i.bytes = i.raw_text.unescape_to_bytes
+ i.value = i.bytes.to_s
+ end
end
- self.value = txt.substring(behead, txt.length - behead - betail).unescape_nit
end
end
var cla = modelbuilder.try_get_mclass_by_name(nexpr, mmodule, "String")
if cla != null then mtype = cla.mclass_type
else if nexpr isa AStringFormExpr then
- var cla = modelbuilder.try_get_mclass_by_name(nexpr, mmodule, "String")
+ var cla: nullable MClass
+ if nexpr.is_bytestring then
+ cla = modelbuilder.try_get_mclass_by_name(nexpr, mmodule, "Bytes")
+ else if nexpr.is_re then
+ cla = modelbuilder.try_get_mclass_by_name(nexpr, mmodule, "Regex")
+ else if nexpr.is_string then
+ cla = modelbuilder.try_get_mclass_by_name(nexpr, mmodule, "String")
+ else
+ abort
+ end
if cla != null then mtype = cla.mclass_type
else
modelbuilder.error(self, "Error: untyped attribute `{mreadpropdef}`. Implicit typing allowed only for literals and new.")
v.add_type(native)
var prop = v.get_method(native, "to_s_full")
v.add_monomorphic_send(native, prop)
+ v.add_callsite(to_re)
+ v.add_callsite(ignore_case)
+ v.add_callsite(newline)
+ v.add_callsite(extended)
+ v.add_callsite(to_bytes_with_copy)
end
end
end
end
-redef class AStringFormExpr
- redef fun accept_typing(v)
- do
+redef class AugmentedStringFormExpr
+ super AExpr
+
+ # Text::to_re, used for prefix `re`
+ var to_re: nullable CallSite = null
+ # Regex::ignore_case, used for suffix `i` on `re`
+ var ignore_case: nullable CallSite = null
+ # Regex::newline, used for suffix `m` on `re`
+ var newline: nullable CallSite = null
+ # Regex::extended, used for suffix `b` on `re`
+ var extended: nullable CallSite = null
+ # NativeString::to_bytes_with_copy, used for prefix `b`
+ var to_bytes_with_copy: nullable CallSite = null
+
+ redef fun accept_typing(v) do
var mclass = v.get_mclass(self, "String")
if mclass == null then return # Forward error
- self.mtype = mclass.mclass_type
+ if is_bytestring then
+ to_bytes_with_copy = v.get_method(self, v.mmodule.native_string_type, "to_bytes_with_copy", false)
+ mclass = v.get_mclass(self, "Bytes")
+ else if is_re then
+ to_re = v.get_method(self, mclass.mclass_type, "to_re", false)
+ for i in suffix.chars do
+ mclass = v.get_mclass(self, "Regex")
+ if mclass == null then
+ v.error(self, "Error: `Regex` class unknown")
+ return
+ end
+ var service = ""
+ if i == 'i' then
+ service = "ignore_case="
+ ignore_case = v.get_method(self, mclass.mclass_type, service, false)
+ else if i == 'm' then
+ service = "newline="
+ newline = v.get_method(self, mclass.mclass_type, service, false)
+ else if i == 'b' then
+ service = "extended="
+ extended = v.get_method(self, mclass.mclass_type, service, false)
+ else
+ v.error(self, "Type Error: Unrecognized suffix {i} in prefixed Regex")
+ abort
+ end
+ end
+ end
+ if mclass == null then return # Forward error
+ mtype = mclass.mclass_type
end
end
redef class ASuperstringExpr
redef fun accept_typing(v)
do
- var mclass = v.get_mclass(self, "String")
- if mclass == null then return # Forward error
- self.mtype = mclass.mclass_type
+ super
var objclass = v.get_mclass(self, "Object")
if objclass == null then return # Forward error
var objtype = objclass.mclass_type
--- /dev/null
+s isa Bytes
+StringAB
+537472696E674142
+s2 isa FlatString
+String𐏓
+s3 isa Bytes
+StringA�
+537472696E6741FD
+s4 isa Regex
+/String/
+true
+true
+false
+s5 isa FlatString
+String�
+s6 isa FlatString
+\nStr\x00
+s7 isa FlatString
+\nString66515\x41
+s8 isa FlatString
+
+String66515A
+s9 isa Regex
+/
+0x47String/
+true
+false
+true
+s10 isa Regex
+/String/
+false
+false
+false
+s11 isa Regex
+/String/
+false
+true
+true
--- /dev/null
+alt/test_augmented_alt1.nit:22,6--18: Error: cannot produce a ByteString on a Superstring
--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+var c1 = b'G'
+var c2 = u'𐏓'
+
+var s = b"String\x41\x42"
+var s2 = "String\u0103D3"
+var s3 = b"String\x41\xFD"
+var s4 = re"String"i
+#alt1 s4 = b"String{c1}"
+var s5 = "String\xFD"
+var s6 = raw"\nStr\x00"
+var s7 = raw"\nString{c2}\x41"
+var s8 = "\nString{c2}\x41"
+var s9 = re"\n{c1}String"imb
+var s10 = re"String"b
+var s11 = re"String"m
+
+print "s isa {s.class_name}"
+print s
+print s.hexdigest
+print "s2 isa {s2.class_name}"
+print s2
+print "s3 isa {s3.class_name}"
+print s3
+print s3.hexdigest
+print "s4 isa {s4.class_name}"
+print s4
+print s4.ignore_case
+print s4.extended
+print s4.newline
+print "s5 isa {s5.class_name}"
+print s5
+print "s6 isa {s6.class_name}"
+print s6
+print "s7 isa {s7.class_name}"
+print s7
+print "s8 isa {s8.class_name}"
+print s8
+print "s9 isa {s9.class_name}"
+print s9
+print s9.ignore_case
+print s9.extended
+print s9.newline
+print "s10 isa {s10.class_name}"
+print s10
+print s10.ignore_case
+print s10.extended
+print s10.newline
+print "s11 isa {s11.class_name}"
+print s11
+print s11.ignore_case
+print s11.extended
+print s11.newline