Merge: Added contributing guidelines and link from readme
[nit.git] / lib / core / text / abstract_text.nit
index c542631..e07e0d4 100644 (file)
@@ -590,10 +590,13 @@ abstract class Text
                return res.to_s
        end
 
-       # Escape " \ ' and non printable characters using the rules of literal C strings and characters
+       # Escape `"` `\` `'`, trigraphs and non printable characters using the rules of literal C strings and characters
        #
-       #     assert "abAB12<>&".escape_to_c         == "abAB12<>&"
+       #     assert "abAB12<>&".escape_to_c       == "abAB12<>&"
        #     assert "\n\"'\\".escape_to_c         == "\\n\\\"\\'\\\\"
+       #     assert "allo???!".escape_to_c        == "allo??\\?!"
+       #     assert "??=??/??'??(??)".escape_to_c == "?\\?=?\\?/??\\'?\\?(?\\?)"
+       #     assert "??!??<??>??-".escape_to_c    == "?\\?!?\\?<?\\?>?\\?-"
        #
        # Most non-printable characters (bellow ASCII 32) are escaped to an octal form `\nnn`.
        # Three digits are always used to avoid following digits to be interpreted as an element
@@ -617,6 +620,24 @@ abstract class Text
                                b.append("\\\'")
                        else if c == '\\' then
                                b.append("\\\\")
+                       else if c == '?' then
+                               # Escape if it is the last question mark of a ANSI C trigraph.
+                               var j = i + 1
+                               if j < length then
+                                       var next = chars[j]
+                                       # We ignore `??'` because it will be escaped as `??\'`.
+                                       if
+                                               next == '!' or
+                                               next == '(' or
+                                               next == ')' or
+                                               next == '-' or
+                                               next == '/' or
+                                               next == '<' or
+                                               next == '=' or
+                                               next == '>'
+                                       then b.add('\\')
+                               end
+                               b.add('?')
                        else if c.code_point < 32 then
                                b.add('\\')
                                var oct = c.code_point.to_base(8)
@@ -640,6 +661,7 @@ abstract class Text
        # The result might no be legal in C but be used in other languages
        #
        #     assert "ab|\{\}".escape_more_to_c("|\{\}") == "ab\\|\\\{\\\}"
+       #     assert "allo???!".escape_more_to_c("")     == "allo??\\?!"
        fun escape_more_to_c(chars: String): String
        do
                var b = new Buffer
@@ -1360,16 +1382,19 @@ abstract class String
        # Letters that follow a letter are lowercased
        # Letters that follow a non-letter are upcased.
        #
+       # If `keep_upper = true`, already uppercase letters are not lowercased.
+       #
        # SEE : `Char::is_letter` for the definition of letter.
        #
        #     assert "jAVASCRIPT".capitalized == "Javascript"
        #     assert "i am root".capitalized == "I Am Root"
        #     assert "ab_c -ab0c ab\nc".capitalized == "Ab_C -Ab0C Ab\nC"
-       fun capitalized: SELFTYPE do
+       #     assert "preserve my ACRONYMS".capitalized(keep_upper=true) == "Preserve My ACRONYMS"
+       fun capitalized(keep_upper: nullable Bool): SELFTYPE do
                if length == 0 then return self
 
                var buf = new Buffer.with_cap(length)
-               buf.capitalize(src=self)
+               buf.capitalize(keep_upper=keep_upper, src=self)
                return buf.to_s
        end
 end
@@ -1464,8 +1489,10 @@ abstract class Buffer
        # Letters that follow a letter are lowercased
        # Letters that follow a non-letter are upcased.
        #
-       # When `src` is specified, this method reads from `src`
-       # instead of `self` but still writes the result to the beginning of `self`.
+       # If `keep_upper = true`, uppercase letters are not lowercased.
+       #
+       # When `src` is specified, this method reads from `src` instead of `self`
+       # but it still writes the result to the beginning of `self`.
        # This requires `self` to have the capacity to receive all of the
        # capitalized content of `src`.
        #
@@ -1484,10 +1511,15 @@ abstract class Buffer
        #     b = new FlatBuffer.from("12345")
        #     b.capitalize(src="foo")
        #     assert b == "Foo45"
-       fun capitalize(src: nullable Text) do
+       #
+       #     b = new FlatBuffer.from("preserve my ACRONYMS")
+       #     b.capitalize(keep_upper=true)
+       #     assert b == "Preserve My ACRONYMS"
+       fun capitalize(keep_upper: nullable Bool, src: nullable Text) do
                src = src or else self
                var length = src.length
                if length == 0 then return
+               keep_upper = keep_upper or else false
 
                var c = src[0].to_upper
                self[0] = c
@@ -1496,7 +1528,11 @@ abstract class Buffer
                        prev = c
                        c = src[i]
                        if prev.is_letter then
-                               self[i] = c.to_lower
+                               if keep_upper then
+                                       self[i] = c
+                               else
+                                       self[i] = c.to_lower
+                               end
                        else
                                self[i] = c.to_upper
                        end
@@ -2100,7 +2136,12 @@ end
 # see `alpha_comparator`
 private class AlphaComparator
        super Comparator
-       redef fun compare(a, b) do return a.to_s <=> b.to_s
+       redef fun compare(a, b) do
+               if a == b then return 0
+               if a == null then return -1
+               if b == null then return 1
+               return a.to_s <=> b.to_s
+       end
 end
 
 # Stateless comparator that naively use `to_s` to compare things.
@@ -2158,6 +2199,11 @@ redef class NativeString
        # SEE: `abstract_text::Text` for more info on the difference
        # between `Text::bytelen` and `Text::length`.
        fun to_s_full(bytelen, unilen: Int): String is abstract
+
+       # Copies the content of `src` to `self`
+       #
+       # NOTE: `self` must be large enough to withold `self.bytelen` bytes
+       fun fill_from(src: Text) do src.copy_to_native(self, src.bytelen, 0, 0)
 end
 
 redef class NativeArray[E]