Merge: escape_to_c: Escape trigraphs
authorJean Privat <jean@pryen.org>
Tue, 7 Jun 2016 16:07:37 +0000 (12:07 -0400)
committerJean Privat <jean@pryen.org>
Tue, 7 Jun 2016 16:07:37 +0000 (12:07 -0400)
Produce more standard-compliant code instead of relying on the default behaviour of gcc (or on its `-Wno-trigraphs` flag).

See also: https://github.com/nitlang/nit/pull/1949/commits/000ac8de3c64790b0fbb0e868e406a6a280f3fa9

Signed-off-by: Jean-Christophe Beaupré <jcbrinfo@users.noreply.github.com>

Pull-Request: #2151
Reviewed-by: Jean Privat <jean@pryen.org>
Reviewed-by: Lucas Bajolet <r4pass@hotmail.com>
Reviewed-by: Alexis Laferrière <alexis.laf@xymus.net>

1  2 
lib/core/text/abstract_text.nit

@@@ -590,10 -590,13 +590,13 @@@ abstract class Tex
                return res.to_s
        end
  
-       # Escape " \ ' and non printable characters using the rules of literal C strings and characters
+       # Escape `"` `\` `'`, trigraphs and non printable characters using the rules of literal C strings and characters
        #
-       #     assert "abAB12<>&".escape_to_c         == "abAB12<>&"
+       #     assert "abAB12<>&".escape_to_c       == "abAB12<>&"
        #     assert "\n\"'\\".escape_to_c         == "\\n\\\"\\'\\\\"
+       #     assert "allo???!".escape_to_c        == "allo??\\?!"
+       #     assert "??=??/??'??(??)".escape_to_c == "?\\?=?\\?/??\\'?\\?(?\\?)"
+       #     assert "??!??<??>??-".escape_to_c    == "?\\?!?\\?<?\\?>?\\?-"
        #
        # Most non-printable characters (bellow ASCII 32) are escaped to an octal form `\nnn`.
        # Three digits are always used to avoid following digits to be interpreted as an element
                                b.append("\\\'")
                        else if c == '\\' then
                                b.append("\\\\")
+                       else if c == '?' then
+                               # Escape if it is the last question mark of a ANSI C trigraph.
+                               var j = i + 1
+                               if j < length then
+                                       var next = chars[j]
+                                       # We ignore `??'` because it will be escaped as `??\'`.
+                                       if
+                                               next == '!' or
+                                               next == '(' or
+                                               next == ')' or
+                                               next == '-' or
+                                               next == '/' or
+                                               next == '<' or
+                                               next == '=' or
+                                               next == '>'
+                                       then b.add('\\')
+                               end
+                               b.add('?')
                        else if c.code_point < 32 then
                                b.add('\\')
                                var oct = c.code_point.to_base(8)
        # The result might no be legal in C but be used in other languages
        #
        #     assert "ab|\{\}".escape_more_to_c("|\{\}") == "ab\\|\\\{\\\}"
+       #     assert "allo???!".escape_more_to_c("")     == "allo??\\?!"
        fun escape_more_to_c(chars: String): String
        do
                var b = new Buffer
@@@ -1360,19 -1382,30 +1382,19 @@@ abstract class Strin
        # Letters that follow a letter are lowercased
        # Letters that follow a non-letter are upcased.
        #
 +      # If `keep_upper = true`, already uppercase letters are not lowercased.
 +      #
        # SEE : `Char::is_letter` for the definition of letter.
        #
        #     assert "jAVASCRIPT".capitalized == "Javascript"
        #     assert "i am root".capitalized == "I Am Root"
        #     assert "ab_c -ab0c ab\nc".capitalized == "Ab_C -Ab0C Ab\nC"
 -      fun capitalized: SELFTYPE do
 +      #     assert "preserve my ACRONYMS".capitalized(keep_upper=true) == "Preserve My ACRONYMS"
 +      fun capitalized(keep_upper: nullable Bool): SELFTYPE do
                if length == 0 then return self
  
                var buf = new Buffer.with_cap(length)
 -
 -              var curr = chars[0].to_upper
 -              var prev = curr
 -              buf[0] = curr
 -
 -              for i in [1 .. length[ do
 -                      prev = curr
 -                      curr = self[i]
 -                      if prev.is_letter then
 -                              buf[i] = curr.to_lower
 -                      else
 -                              buf[i] = curr.to_upper
 -                      end
 -              end
 -
 +              buf.capitalize(keep_upper=keep_upper, src=self)
                return buf.to_s
        end
  end
@@@ -1467,13 -1500,6 +1489,13 @@@ abstract class Buffe
        # Letters that follow a letter are lowercased
        # Letters that follow a non-letter are upcased.
        #
 +      # If `keep_upper = true`, uppercase letters are not lowercased.
 +      #
 +      # When `src` is specified, this method reads from `src` instead of `self`
 +      # but it still writes the result to the beginning of `self`.
 +      # This requires `self` to have the capacity to receive all of the
 +      # capitalized content of `src`.
 +      #
        # SEE: `Char::is_letter` for the definition of a letter.
        #
        #     var b = new FlatBuffer.from("jAVAsCriPt")
        #     b = new FlatBuffer.from("ab_c -ab0c ab\nc")
        #     b.capitalize
        #     assert b == "Ab_C -Ab0C Ab\nC"
 -      fun capitalize do
 +      #
 +      #     b = new FlatBuffer.from("12345")
 +      #     b.capitalize(src="foo")
 +      #     assert b == "Foo45"
 +      #
 +      #     b = new FlatBuffer.from("preserve my ACRONYMS")
 +      #     b.capitalize(keep_upper=true)
 +      #     assert b == "Preserve My ACRONYMS"
 +      fun capitalize(keep_upper: nullable Bool, src: nullable Text) do
 +              src = src or else self
 +              var length = src.length
                if length == 0 then return
 -              var c = self[0].to_upper
 +              keep_upper = keep_upper or else false
 +
 +              var c = src[0].to_upper
                self[0] = c
                var prev = c
                for i in [1 .. length[ do
                        prev = c
 -                      c = self[i]
 +                      c = src[i]
                        if prev.is_letter then
 -                              self[i] = c.to_lower
 +                              if keep_upper then
 +                                      self[i] = c
 +                              else
 +                                      self[i] = c.to_lower
 +                              end
                        else
                                self[i] = c.to_upper
                        end
  # see `alpha_comparator`
  private class AlphaComparator
        super Comparator
 -      redef fun compare(a, b) do return a.to_s <=> b.to_s
 +      redef fun compare(a, b) do
 +              if a == b then return 0
 +              if a == null then return -1
 +              if b == null then return 1
 +              return a.to_s <=> b.to_s
 +      end
  end
  
  # Stateless comparator that naively use `to_s` to compare things.