csv: Allow output customization.
authorJean-Christophe Beaupré <jcbrinfo@users.noreply.github.com>
Tue, 9 Dec 2014 17:31:59 +0000 (12:31 -0500)
committerJean-Christophe Beaupré <jcbrinfo@users.noreply.github.com>
Tue, 9 Dec 2014 17:39:31 +0000 (12:39 -0500)
Also, escape cells when needed.

Signed-off-by: Jean-Christophe Beaupré <jcbrinfo@users.noreply.github.com>

lib/csv/csv.nit
lib/csv/test_csv.nit [new file with mode: 0644]
src/metrics/mendel_metrics.nit
src/metrics/metrics_base.nit
src/rapid_type_analysis.nit

index e3d0c0b..257d565 100644 (file)
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# CSV output facilities
+# CSV document handling.
 module csv
 
+# Specifies a CSV format.
+class CsvFormat
+       # The character that delimits escaped value.
+       #
+       # The delimiter is escaped by doubling it.
+       var delimiter: Char
+
+       # The character that split each cell in a row.
+       var separator: Char
+
+       # The character that ends a row (end of line).
+       var eol: String
+
+       # Escape sequence for the delimiter.
+       private var escaping = "{delimiter}{delimiter}" is lazy
+
+       # Escape the specified cell.
+       private fun escape_cell(cell: String): Text do
+               var result = new RopeBuffer
+               result.add delimiter
+               result.append cell.replace(delimiter, escaping)
+               result.add delimiter
+               return result
+       end
+
+       # Can the specified value be inserted without any escaping?
+       private fun is_value_clean(value: String): Bool do
+               for c in value.chars do
+                       if c == delimiter then return false
+                       if c == separator then return false
+                       if eol.chars.has(c) then return false
+               end
+               return true
+       end
+end
+
 # A CSV document representation.
 class CsvDocument
        super Streamable
 
+       # The format to use.
+       #
+       # Defaults to `rfc4180`.
+       var format: CsvFormat = rfc4180 is writable
+
        # The header.
        #
        # Contains the name of all fields in this table.
@@ -45,26 +86,96 @@ class CsvDocument
                records.add(record)
        end
 
-       private fun write_line_to(line: Collection[String], stream: OStream)
-       do
-               var i = line.iterator
-               if i.is_ok then
-                       stream.write(i.item)
+       redef fun write_to(stream) do
+               var writer = new CsvWriter.with_format(stream, format)
+               writer.write_sequence(header)
+               for record in records do writer.write_sequence(record)
+       end
+
+       # Deprecated alias for `write_to_file`.
+       fun save(file: String) do write_to_file(file)
+end
+
+# Appends CSV rows to a file.
+#
+# By default, uses the format recommended by RFC 4180 (see `rfc4180`).
+#
+# Note: If a row contains only an empty cell, its representation is
+# undistinguishable from an empty line. This is because the empty values are
+# always written unescaped in order to avoid them to be interpreted as escaped
+# delimiters by some parsers.
+#
+# ~~~nit
+# var out = new StringOStream
+# var writer = new CsvWriter(out)
+# writer.write_row(1, 2.0, "foo\nbar")
+# writer.write_sequence([""])
+# assert out.to_s == """1,2.0,"foo\nbar"\r\n\r\n"""
+# ~~~
+class CsvWriter
+
+       # The output stream.
+       var ostream: OStream
+
+       # The format to use.
+       #
+       # Defaults to `rfc4180`.
+       var format: CsvFormat = rfc4180
+
+       # Do we escape all cells (except empty ones)?
+       #
+       # If `false` (the default), escape only cells that contain a metacharacter
+       # of the format. In all cases, empty cells are not escaped. This option
+       # permits to choose between the optimization of the performances (when
+       # `true`) and optimization of the size of the output (when `false`).
+       #
+       # Note: Escaping may not be correctly recognized by some parsers.
+       var always_escape = false is writable
+
+       # Create a new writer with the specified format.
+       init with_format(ostream:OStream, format: CsvFormat) do
+               self.ostream = ostream
+               self.format = format
+       end
+
+       # Append the specified sequence as a row.
+       #
+       # The representation of each cell is determined by `to_s`.
+       fun write_sequence(row: SequenceRead[Object]) do
+               if not row.is_empty then
+                       var i = row.iterator
+                       var separator = format.separator.to_s
+                       write_cell i.item.to_s
                        i.next
-                       while i.is_ok do
-                               stream.write(";")
-                               stream.write(i.item)
-                               i.next
+                       for cell in i do
+                               ostream.write separator
+                               write_cell cell.to_s
                        end
                end
-               stream.write("\n")
+               ostream.write format.eol
        end
 
-       redef fun write_to(stream) do
-               write_line_to(header, stream)
-               for record in records do write_line_to(record, stream)
-       end
+       # Append the specified row.
+       #
+       # The representation of each cell is determined by `to_s`.
+       fun write_row(row: Object...) do write_sequence(row)
 
-       # Deprecated alias for `write_to_file`.
-       fun save(file: String) do write_to_file(file)
+       # Close the output stream.
+       fun close do ostream.close
+
+       private fun write_cell(cell: String) do
+               if cell.is_empty then return
+               if not always_escape and format.is_value_clean(cell) then
+                       ostream.write cell
+               else
+                       ostream.write format.escape_cell(cell)
+               end
+       end
 end
+
+# The CSV format recommended by RFC 4180.
+#
+# * `delimiter`: `'"'`
+# * `separator`: `','`
+# * `eol`: `"\r\n"`
+fun rfc4180: CsvFormat do return once new CsvFormat('"', ',', "\r\n")
diff --git a/lib/csv/test_csv.nit b/lib/csv/test_csv.nit
new file mode 100644 (file)
index 0000000..c176a67
--- /dev/null
@@ -0,0 +1,85 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# This file is free software, which comes along with NIT. This software is
+# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. You can modify it is you want, provided this header
+# is kept unaltered, and a notification of the changes is added.
+# You are allowed to redistribute it and sell it, alone or is a part of
+# another product.
+
+# Tests for `csv`.
+module test_csv is test_suite
+
+import test_suite
+import csv
+
+class TestCsvWriter
+       super TestSuite
+
+       # The custom CSV format used in the tests.
+       private var custom_format = new CsvFormat('/', ':', "#")
+
+       # Expect to write `row` as `expected_rfc4180` and as `expected_custom`.
+       #
+       # Parameters:
+       #
+       # * `always_escape`: value of the `always_escape` option.
+       # * `row`: row to write.
+       # * `expected_rfc4180`: expected result in RFC 4180.
+       # * `expected_custom`: expected result in the custom CSV format.
+       private fun expect(always_escape: Bool, row: SequenceRead[String],
+                       expected_rfc4180: String,
+                       expected_custom: String) do
+               var out = new StringOStream
+               var writer = new CsvWriter(out)
+
+               writer.always_escape = always_escape
+               writer.write_sequence(row)
+               assert out.to_s == expected_rfc4180 else
+                       sys.stderr.write "\nFormat: RFC 4180\n"
+                       sys.stderr.write "Expecting: \"{expected_rfc4180.escape_to_nit}\"\n"
+                       sys.stderr.write "Got: \"{out.to_s.escape_to_nit}\"\n"
+               end
+               writer.close
+
+               out = new StringOStream
+               writer = new CsvWriter.with_format(out, custom_format)
+               writer.always_escape = always_escape
+               writer.write_sequence(row)
+               assert out.to_s == expected_custom else
+                       sys.stderr.write "\nFormat: {custom_format.delimiter}"
+                       sys.stderr.write " {custom_format.separator}"
+                       sys.stderr.write " {custom_format.eol.escape_to_nit}\n"
+                       sys.stderr.write "Expecting: \"{expected_custom.escape_to_nit}\"\n"
+                       sys.stderr.write "Got: \"{out.to_s.escape_to_nit}\"\n"
+               end
+               writer.close
+       end
+
+       fun test_empty do expect(true, new Array[String], "\r\n", "#")
+
+       fun test_one_cell do expect(true, ["foo/\"\r\n,"],
+                       "\"foo/\"\"\r\n,\"\r\n",
+                       "/foo//\"\r\n,/#")
+
+       fun test_optimize_size_escaped do expect(false, ["foo/\"\r\n,"],
+                       "\"foo/\"\"\r\n,\"\r\n",
+                       "/foo//\"\r\n,/#")
+
+       fun test_optimize_size_eol do expect(false, ["foo\r#\n"],
+                       "\"foo\r#\n\"\r\n",
+                       "/foo\r#\n/#")
+
+       fun test_optimize_size_unescaped do expect(false, ["foo"],
+                       "foo\r\n",
+                       "foo#")
+
+       fun test_multiple_cells do expect(true, ["1", "", "/"],
+                       "\"1\",,\"/\"\r\n",
+                       "/1/::////#")
+
+       fun test_multiple_cells_optimize_size do expect(false, ["1", "", "/"],
+                       "1,,/\r\n",
+                       "1::////#")
+end
index 80c4ed8..1ee1db2 100644 (file)
@@ -103,6 +103,7 @@ private class MendelMetricsPhase
 
                if csv then
                        var csvh = new CsvDocument
+                       csvh.format = new CsvFormat('"', ';', "\n")
                        csvh.header = ["povr", "ovr", "pext", "ext", "pspe", "spe", "prep", "rep", "eq"]
                        for mclass in mclasses do
                                var povr = mclass.is_pure_overrider(vis).object_id
index c99ec62..c51e656 100644 (file)
@@ -370,6 +370,8 @@ class MetricSet
        fun to_csv: CsvDocument do
                var csv = new CsvDocument
 
+               csv.format = new CsvFormat('"', ';', "\n")
+
                # set csv headers
                csv.header.add("entry")
                for metric in metrics do csv.header.add(metric.name)
index b4cf4ab..7881db0 100644 (file)
@@ -122,6 +122,7 @@ class RapidTypeAnalysis
                var types = typeset.to_a
                (new CachedAlphaComparator).sort(types)
                var res = new CsvDocument
+               res.format = new CsvFormat('"', ';', "\n")
                res.header = ["Type", "Resolution", "Liveness", "Cast-liveness"]
                for t in types do
                        var reso