# See the License for the specific language governing permissions and
# limitations under the License.
-# CSV output facilities
+# CSV document handling.
module csv
+# Specifies a CSV format.
+class CsvFormat
+ # The character that delimits escaped value.
+ #
+ # The delimiter is escaped by doubling it.
+ var delimiter: Char
+
+ # The character that split each cell in a row.
+ var separator: Char
+
+ # The character that ends a row (end of line).
+ var eol: String
+
+ # Escape sequence for the delimiter.
+ private var escaping = "{delimiter}{delimiter}" is lazy
+
+ # Escape the specified cell.
+ private fun escape_cell(cell: String): Text do
+ var result = new RopeBuffer
+ result.add delimiter
+ result.append cell.replace(delimiter, escaping)
+ result.add delimiter
+ return result
+ end
+
+ # Can the specified value be inserted without any escaping?
+ private fun is_value_clean(value: String): Bool do
+ for c in value.chars do
+ if c == delimiter then return false
+ if c == separator then return false
+ if eol.chars.has(c) then return false
+ end
+ return true
+ end
+end
+
# A CSV document representation.
class CsvDocument
super Streamable
+ # The format to use.
+ #
+ # Defaults to `rfc4180`.
+ var format: CsvFormat = rfc4180 is writable
+
# The header.
#
# Contains the name of all fields in this table.
records.add(record)
end
- private fun write_line_to(line: Collection[String], stream: OStream)
- do
- var i = line.iterator
- if i.is_ok then
- stream.write(i.item)
+ redef fun write_to(stream) do
+ var writer = new CsvWriter.with_format(stream, format)
+ writer.write_sequence(header)
+ for record in records do writer.write_sequence(record)
+ end
+
+ # Deprecated alias for `write_to_file`.
+ fun save(file: String) do write_to_file(file)
+end
+
+# Appends CSV rows to a file.
+#
+# By default, uses the format recommended by RFC 4180 (see `rfc4180`).
+#
+# Note: If a row contains only an empty cell, its representation is
+# undistinguishable from an empty line. This is because the empty values are
+# always written unescaped in order to avoid them to be interpreted as escaped
+# delimiters by some parsers.
+#
+# ~~~nit
+# var out = new StringOStream
+# var writer = new CsvWriter(out)
+# writer.write_row(1, 2.0, "foo\nbar")
+# writer.write_sequence([""])
+# assert out.to_s == """1,2.0,"foo\nbar"\r\n\r\n"""
+# ~~~
+class CsvWriter
+
+ # The output stream.
+ var ostream: OStream
+
+ # The format to use.
+ #
+ # Defaults to `rfc4180`.
+ var format: CsvFormat = rfc4180
+
+ # Do we escape all cells (except empty ones)?
+ #
+ # If `false` (the default), escape only cells that contain a metacharacter
+ # of the format. In all cases, empty cells are not escaped. This option
+ # permits to choose between the optimization of the performances (when
+ # `true`) and optimization of the size of the output (when `false`).
+ #
+ # Note: Escaping may not be correctly recognized by some parsers.
+ var always_escape = false is writable
+
+ # Create a new writer with the specified format.
+ init with_format(ostream:OStream, format: CsvFormat) do
+ self.ostream = ostream
+ self.format = format
+ end
+
+ # Append the specified sequence as a row.
+ #
+ # The representation of each cell is determined by `to_s`.
+ fun write_sequence(row: SequenceRead[Object]) do
+ if not row.is_empty then
+ var i = row.iterator
+ var separator = format.separator.to_s
+ write_cell i.item.to_s
i.next
- while i.is_ok do
- stream.write(";")
- stream.write(i.item)
- i.next
+ for cell in i do
+ ostream.write separator
+ write_cell cell.to_s
end
end
- stream.write("\n")
+ ostream.write format.eol
end
- redef fun write_to(stream) do
- write_line_to(header, stream)
- for record in records do write_line_to(record, stream)
- end
+ # Append the specified row.
+ #
+ # The representation of each cell is determined by `to_s`.
+ fun write_row(row: Object...) do write_sequence(row)
- # Deprecated alias for `write_to_file`.
- fun save(file: String) do write_to_file(file)
+ # Close the output stream.
+ fun close do ostream.close
+
+ private fun write_cell(cell: String) do
+ if cell.is_empty then return
+ if not always_escape and format.is_value_clean(cell) then
+ ostream.write cell
+ else
+ ostream.write format.escape_cell(cell)
+ end
+ end
end
+
+# The CSV format recommended by RFC 4180.
+#
+# * `delimiter`: `'"'`
+# * `separator`: `','`
+# * `eol`: `"\r\n"`
+fun rfc4180: CsvFormat do return once new CsvFormat('"', ',', "\r\n")
--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# This file is free software, which comes along with NIT. This software is
+# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. You can modify it is you want, provided this header
+# is kept unaltered, and a notification of the changes is added.
+# You are allowed to redistribute it and sell it, alone or is a part of
+# another product.
+
+# Tests for `csv`.
+module test_csv is test_suite
+
+import test_suite
+import csv
+
+class TestCsvWriter
+ super TestSuite
+
+ # The custom CSV format used in the tests.
+ private var custom_format = new CsvFormat('/', ':', "#")
+
+ # Expect to write `row` as `expected_rfc4180` and as `expected_custom`.
+ #
+ # Parameters:
+ #
+ # * `always_escape`: value of the `always_escape` option.
+ # * `row`: row to write.
+ # * `expected_rfc4180`: expected result in RFC 4180.
+ # * `expected_custom`: expected result in the custom CSV format.
+ private fun expect(always_escape: Bool, row: SequenceRead[String],
+ expected_rfc4180: String,
+ expected_custom: String) do
+ var out = new StringOStream
+ var writer = new CsvWriter(out)
+
+ writer.always_escape = always_escape
+ writer.write_sequence(row)
+ assert out.to_s == expected_rfc4180 else
+ sys.stderr.write "\nFormat: RFC 4180\n"
+ sys.stderr.write "Expecting: \"{expected_rfc4180.escape_to_nit}\"\n"
+ sys.stderr.write "Got: \"{out.to_s.escape_to_nit}\"\n"
+ end
+ writer.close
+
+ out = new StringOStream
+ writer = new CsvWriter.with_format(out, custom_format)
+ writer.always_escape = always_escape
+ writer.write_sequence(row)
+ assert out.to_s == expected_custom else
+ sys.stderr.write "\nFormat: {custom_format.delimiter}"
+ sys.stderr.write " {custom_format.separator}"
+ sys.stderr.write " {custom_format.eol.escape_to_nit}\n"
+ sys.stderr.write "Expecting: \"{expected_custom.escape_to_nit}\"\n"
+ sys.stderr.write "Got: \"{out.to_s.escape_to_nit}\"\n"
+ end
+ writer.close
+ end
+
+ fun test_empty do expect(true, new Array[String], "\r\n", "#")
+
+ fun test_one_cell do expect(true, ["foo/\"\r\n,"],
+ "\"foo/\"\"\r\n,\"\r\n",
+ "/foo//\"\r\n,/#")
+
+ fun test_optimize_size_escaped do expect(false, ["foo/\"\r\n,"],
+ "\"foo/\"\"\r\n,\"\r\n",
+ "/foo//\"\r\n,/#")
+
+ fun test_optimize_size_eol do expect(false, ["foo\r#\n"],
+ "\"foo\r#\n\"\r\n",
+ "/foo\r#\n/#")
+
+ fun test_optimize_size_unescaped do expect(false, ["foo"],
+ "foo\r\n",
+ "foo#")
+
+ fun test_multiple_cells do expect(true, ["1", "", "/"],
+ "\"1\",,\"/\"\r\n",
+ "/1/::////#")
+
+ fun test_multiple_cells_optimize_size do expect(false, ["1", "", "/"],
+ "1,,/\r\n",
+ "1::////#")
+end