Merge: escape_to_c: Escape trigraphs
authorJean Privat <jean@pryen.org>
Tue, 7 Jun 2016 16:07:37 +0000 (12:07 -0400)
committerJean Privat <jean@pryen.org>
Tue, 7 Jun 2016 16:07:37 +0000 (12:07 -0400)
Produce more standard-compliant code instead of relying on the default behaviour of gcc (or on its `-Wno-trigraphs` flag).

See also: https://github.com/nitlang/nit/pull/1949/commits/000ac8de3c64790b0fbb0e868e406a6a280f3fa9

Signed-off-by: Jean-Christophe Beaupré <jcbrinfo@users.noreply.github.com>

Pull-Request: #2151
Reviewed-by: Jean Privat <jean@pryen.org>
Reviewed-by: Lucas Bajolet <r4pass@hotmail.com>
Reviewed-by: Alexis Laferrière <alexis.laf@xymus.net>

lib/core/text/abstract_text.nit
lib/core/text/flat.nit
lib/core/text/test_abstract_text.nit [new file with mode: 0644]

index 8fefb24..e07e0d4 100644 (file)
@@ -590,10 +590,13 @@ abstract class Text
                return res.to_s
        end
 
-       # Escape " \ ' and non printable characters using the rules of literal C strings and characters
+       # Escape `"` `\` `'`, trigraphs and non printable characters using the rules of literal C strings and characters
        #
-       #     assert "abAB12<>&".escape_to_c         == "abAB12<>&"
+       #     assert "abAB12<>&".escape_to_c       == "abAB12<>&"
        #     assert "\n\"'\\".escape_to_c         == "\\n\\\"\\'\\\\"
+       #     assert "allo???!".escape_to_c        == "allo??\\?!"
+       #     assert "??=??/??'??(??)".escape_to_c == "?\\?=?\\?/??\\'?\\?(?\\?)"
+       #     assert "??!??<??>??-".escape_to_c    == "?\\?!?\\?<?\\?>?\\?-"
        #
        # Most non-printable characters (bellow ASCII 32) are escaped to an octal form `\nnn`.
        # Three digits are always used to avoid following digits to be interpreted as an element
@@ -617,6 +620,24 @@ abstract class Text
                                b.append("\\\'")
                        else if c == '\\' then
                                b.append("\\\\")
+                       else if c == '?' then
+                               # Escape if it is the last question mark of a ANSI C trigraph.
+                               var j = i + 1
+                               if j < length then
+                                       var next = chars[j]
+                                       # We ignore `??'` because it will be escaped as `??\'`.
+                                       if
+                                               next == '!' or
+                                               next == '(' or
+                                               next == ')' or
+                                               next == '-' or
+                                               next == '/' or
+                                               next == '<' or
+                                               next == '=' or
+                                               next == '>'
+                                       then b.add('\\')
+                               end
+                               b.add('?')
                        else if c.code_point < 32 then
                                b.add('\\')
                                var oct = c.code_point.to_base(8)
@@ -640,6 +661,7 @@ abstract class Text
        # The result might no be legal in C but be used in other languages
        #
        #     assert "ab|\{\}".escape_more_to_c("|\{\}") == "ab\\|\\\{\\\}"
+       #     assert "allo???!".escape_more_to_c("")     == "allo??\\?!"
        fun escape_more_to_c(chars: String): String
        do
                var b = new Buffer
index 9e70321..6273609 100644 (file)
@@ -225,6 +225,22 @@ redef class FlatText
                                req_esc += 1
                        else if c == 0x5Cu8 then
                                req_esc += 1
+                       else if c == 0x3Fu8 then
+                               var j = pos + 1
+                               if j < length then
+                                       var next = its[j]
+                                       # We ignore `??'` because it will be escaped as `??\'`.
+                                       if
+                                               next == 0x21u8 or
+                                               next == 0x28u8 or
+                                               next == 0x29u8 or
+                                               next == 0x2Du8 or
+                                               next == 0x2Fu8 or
+                                               next == 0x3Cu8 or
+                                               next == 0x3Du8 or
+                                               next == 0x3Eu8
+                                       then req_esc += 1
+                               end
                        else if c < 32u8 then
                                req_esc += 3
                        end
@@ -280,6 +296,27 @@ redef class FlatText
                                nns[opos] = 0x5Cu8
                                nns[opos + 1] = 0x5Cu8
                                opos += 2
+                       else if c == 0x3Fu8 then
+                               var j = pos + 1
+                               if j < length then
+                                       var next = its[j]
+                                       # We ignore `??'` because it will be escaped as `??\'`.
+                                       if
+                                               next == 0x21u8 or
+                                               next == 0x28u8 or
+                                               next == 0x29u8 or
+                                               next == 0x2Du8 or
+                                               next == 0x2Fu8 or
+                                               next == 0x3Cu8 or
+                                               next == 0x3Du8 or
+                                               next == 0x3Eu8
+                                       then
+                                               nns[opos] = 0x5Cu8
+                                               opos += 1
+                                       end
+                               end
+                               nns[opos] = 0x3Fu8
+                               opos += 1
                        else if c < 32u8 then
                                nns[opos] = 0x5Cu8
                                nns[opos + 1] = 0x30u8
diff --git a/lib/core/text/test_abstract_text.nit b/lib/core/text/test_abstract_text.nit
new file mode 100644 (file)
index 0000000..c67a991
--- /dev/null
@@ -0,0 +1,61 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# This file is free software, which comes along with NIT.  This software is
+# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without  even  the implied warranty of  MERCHANTABILITY or  FITNESS FOR A
+# PARTICULAR PURPOSE.  You can modify it is you want,  provided this header
+# is kept unaltered, and a notification of the changes is added.
+# You  are  allowed  to  redistribute it and sell it, alone or is a part of
+# another product.
+
+module test_abstract_text is test_suite
+
+import test_suite
+import text
+intrude import ropes
+
+class TestText
+       super TestSuite
+
+       private var factories: Collection[TextFactory] = [
+               new ConcatFactory,
+               new RopeBufferFactory,
+               new FlatBufferFactory
+       : TextFactory]
+
+       fun test_escape_to_c do
+               for f in factories do
+                       assert f.create("abAB12<>&").escape_to_c       == "abAB12<>&"
+                       assert f.create("\n\"'\\").escape_to_c         == "\\n\\\"\\'\\\\"
+                       assert f.create("allo???!").escape_to_c        == "allo??\\?!"
+                       assert f.create("??=??/??'??(??)").escape_to_c == "?\\?=?\\?/??\\'?\\?(?\\?)"
+                       assert f.create("??!??<??>??-").escape_to_c    == "?\\?!?\\?<?\\?>?\\?-"
+               end
+       end
+end
+
+# A factory that creates instances of a particular implementation of `Text`
+interface TextFactory
+
+       # Create a `Text` instance from the specified string
+       fun create(s: String): Text is abstract
+end
+
+
+class ConcatFactory
+       super TextFactory
+
+       redef fun create(s) do return new Concat("", s)
+end
+
+class RopeBufferFactory
+       super TextFactory
+
+       redef fun create(s) do return new RopeBuffer.from(s)
+end
+
+class FlatBufferFactory
+       super TextFactory
+
+       redef fun create(s) do return new FlatBuffer.from(s)
+end