From: Jean Privat Date: Tue, 7 Jun 2016 16:07:37 +0000 (-0400) Subject: Merge: escape_to_c: Escape trigraphs X-Git-Url: http://nitlanguage.org?hp=acdd4fa93a6c6e298de6d44b795bedd5d3211dfe Merge: escape_to_c: Escape trigraphs Produce more standard-compliant code instead of relying on the default behaviour of gcc (or on its `-Wno-trigraphs` flag). See also: https://github.com/nitlang/nit/pull/1949/commits/000ac8de3c64790b0fbb0e868e406a6a280f3fa9 Signed-off-by: Jean-Christophe Beaupré Pull-Request: #2151 Reviewed-by: Jean Privat Reviewed-by: Lucas Bajolet Reviewed-by: Alexis Laferrière --- diff --git a/lib/core/text/abstract_text.nit b/lib/core/text/abstract_text.nit index 8fefb24..e07e0d4 100644 --- a/lib/core/text/abstract_text.nit +++ b/lib/core/text/abstract_text.nit @@ -590,10 +590,13 @@ abstract class Text return res.to_s end - # Escape " \ ' and non printable characters using the rules of literal C strings and characters + # Escape `"` `\` `'`, trigraphs and non printable characters using the rules of literal C strings and characters # - # assert "abAB12<>&".escape_to_c == "abAB12<>&" + # assert "abAB12<>&".escape_to_c == "abAB12<>&" # assert "\n\"'\\".escape_to_c == "\\n\\\"\\'\\\\" + # assert "allo???!".escape_to_c == "allo??\\?!" + # assert "??=??/??'??(??)".escape_to_c == "?\\?=?\\?/??\\'?\\?(?\\?)" + # assert "??!????-".escape_to_c == "?\\?!?\\??\\?-" # # Most non-printable characters (bellow ASCII 32) are escaped to an octal form `\nnn`. # Three digits are always used to avoid following digits to be interpreted as an element @@ -617,6 +620,24 @@ abstract class Text b.append("\\\'") else if c == '\\' then b.append("\\\\") + else if c == '?' then + # Escape if it is the last question mark of a ANSI C trigraph. + var j = i + 1 + if j < length then + var next = chars[j] + # We ignore `??'` because it will be escaped as `??\'`. + if + next == '!' or + next == '(' or + next == ')' or + next == '-' or + next == '/' or + next == '<' or + next == '=' or + next == '>' + then b.add('\\') + end + b.add('?') else if c.code_point < 32 then b.add('\\') var oct = c.code_point.to_base(8) @@ -640,6 +661,7 @@ abstract class Text # The result might no be legal in C but be used in other languages # # assert "ab|\{\}".escape_more_to_c("|\{\}") == "ab\\|\\\{\\\}" + # assert "allo???!".escape_more_to_c("") == "allo??\\?!" fun escape_more_to_c(chars: String): String do var b = new Buffer diff --git a/lib/core/text/flat.nit b/lib/core/text/flat.nit index 9e70321..6273609 100644 --- a/lib/core/text/flat.nit +++ b/lib/core/text/flat.nit @@ -225,6 +225,22 @@ redef class FlatText req_esc += 1 else if c == 0x5Cu8 then req_esc += 1 + else if c == 0x3Fu8 then + var j = pos + 1 + if j < length then + var next = its[j] + # We ignore `??'` because it will be escaped as `??\'`. + if + next == 0x21u8 or + next == 0x28u8 or + next == 0x29u8 or + next == 0x2Du8 or + next == 0x2Fu8 or + next == 0x3Cu8 or + next == 0x3Du8 or + next == 0x3Eu8 + then req_esc += 1 + end else if c < 32u8 then req_esc += 3 end @@ -280,6 +296,27 @@ redef class FlatText nns[opos] = 0x5Cu8 nns[opos + 1] = 0x5Cu8 opos += 2 + else if c == 0x3Fu8 then + var j = pos + 1 + if j < length then + var next = its[j] + # We ignore `??'` because it will be escaped as `??\'`. + if + next == 0x21u8 or + next == 0x28u8 or + next == 0x29u8 or + next == 0x2Du8 or + next == 0x2Fu8 or + next == 0x3Cu8 or + next == 0x3Du8 or + next == 0x3Eu8 + then + nns[opos] = 0x5Cu8 + opos += 1 + end + end + nns[opos] = 0x3Fu8 + opos += 1 else if c < 32u8 then nns[opos] = 0x5Cu8 nns[opos + 1] = 0x30u8 diff --git a/lib/core/text/test_abstract_text.nit b/lib/core/text/test_abstract_text.nit new file mode 100644 index 0000000..c67a991 --- /dev/null +++ b/lib/core/text/test_abstract_text.nit @@ -0,0 +1,61 @@ +# This file is part of NIT ( http://www.nitlanguage.org ). +# +# This file is free software, which comes along with NIT. This software is +# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. You can modify it is you want, provided this header +# is kept unaltered, and a notification of the changes is added. +# You are allowed to redistribute it and sell it, alone or is a part of +# another product. + +module test_abstract_text is test_suite + +import test_suite +import text +intrude import ropes + +class TestText + super TestSuite + + private var factories: Collection[TextFactory] = [ + new ConcatFactory, + new RopeBufferFactory, + new FlatBufferFactory + : TextFactory] + + fun test_escape_to_c do + for f in factories do + assert f.create("abAB12<>&").escape_to_c == "abAB12<>&" + assert f.create("\n\"'\\").escape_to_c == "\\n\\\"\\'\\\\" + assert f.create("allo???!").escape_to_c == "allo??\\?!" + assert f.create("??=??/??'??(??)").escape_to_c == "?\\?=?\\?/??\\'?\\?(?\\?)" + assert f.create("??!????-").escape_to_c == "?\\?!?\\??\\?-" + end + end +end + +# A factory that creates instances of a particular implementation of `Text` +interface TextFactory + + # Create a `Text` instance from the specified string + fun create(s: String): Text is abstract +end + + +class ConcatFactory + super TextFactory + + redef fun create(s) do return new Concat("", s) +end + +class RopeBufferFactory + super TextFactory + + redef fun create(s) do return new RopeBuffer.from(s) +end + +class FlatBufferFactory + super TextFactory + + redef fun create(s) do return new FlatBuffer.from(s) +end