# See the License for the specific language governing permissions and
# limitations under the License.
-# CSV output facilities
+# CSV document handling.
module csv
+# Specifies a CSV format.
+class CsvFormat
+ # The character that delimits escaped value.
+ #
+ # The delimiter is escaped by doubling it.
+ var delimiter: Char
+
+ # The character that split each cell in a row.
+ var separator: Char
+
+ # The character that ends a row (end of line).
+ var eol: String
+
+ # Escape sequence for the delimiter.
+ private var escaping = "{delimiter}{delimiter}" is lazy
+
+ # Escape the specified cell.
+ private fun escape_cell(cell: String): Text do
+ var result = new RopeBuffer
+ result.add delimiter
+ result.append cell.replace(delimiter, escaping)
+ result.add delimiter
+ return result
+ end
+
+ # Can the specified value be inserted without any escaping?
+ private fun is_value_clean(value: String): Bool do
+ for c in value.chars do
+ if c == delimiter then return false
+ if c == separator then return false
+ if eol.chars.has(c) then return false
+ end
+ return true
+ end
+end
+
# A CSV document representation.
class CsvDocument
super Streamable
+ # The format to use.
+ #
+ # Defaults to `rfc4180`.
+ var format: CsvFormat = rfc4180 is writable
+
# The header.
#
# Contains the name of all fields in this table.
records.add(record)
end
- private fun write_line_to(line: Collection[String], stream: OStream)
- do
- var i = line.iterator
- if i.is_ok then
- stream.write(i.item)
+ redef fun write_to(stream) do
+ var writer = new CsvWriter.with_format(stream, format)
+ writer.write_sequence(header)
+ for record in records do writer.write_sequence(record)
+ end
+
+ # Deprecated alias for `write_to_file`.
+ fun save(file: String) do write_to_file(file)
+end
+
+# Appends CSV rows to a file.
+#
+# By default, uses the format recommended by RFC 4180 (see `rfc4180`).
+#
+# Note: If a row contains only an empty cell, its representation is
+# undistinguishable from an empty line. This is because the empty values are
+# always written unescaped in order to avoid them to be interpreted as escaped
+# delimiters by some parsers.
+#
+# ~~~nit
+# var out = new StringOStream
+# var writer = new CsvWriter(out)
+# writer.write_row(1, 2.0, "foo\nbar")
+# writer.write_sequence([""])
+# assert out.to_s == """1,2.0,"foo\nbar"\r\n\r\n"""
+# ~~~
+class CsvWriter
+
+ # The output stream.
+ var ostream: OStream
+
+ # The format to use.
+ #
+ # Defaults to `rfc4180`.
+ var format: CsvFormat = rfc4180
+
+ # Do we escape all cells (except empty ones)?
+ #
+ # If `false` (the default), escape only cells that contain a metacharacter
+ # of the format. In all cases, empty cells are not escaped. This option
+ # permits to choose between the optimization of the performances (when
+ # `true`) and optimization of the size of the output (when `false`).
+ #
+ # Note: Escaping may not be correctly recognized by some parsers.
+ var always_escape = false is writable
+
+ # Create a new writer with the specified format.
+ init with_format(ostream:OStream, format: CsvFormat) do
+ self.ostream = ostream
+ self.format = format
+ end
+
+ # Append the specified sequence as a row.
+ #
+ # The representation of each cell is determined by `to_s`.
+ fun write_sequence(row: SequenceRead[Object]) do
+ if not row.is_empty then
+ var i = row.iterator
+ var separator = format.separator.to_s
+ write_cell i.item.to_s
i.next
- while i.is_ok do
- stream.write(";")
- stream.write(i.item)
- i.next
+ for cell in i do
+ ostream.write separator
+ write_cell cell.to_s
end
end
- stream.write("\n")
+ ostream.write format.eol
end
- redef fun write_to(stream) do
- write_line_to(header, stream)
- for record in records do write_line_to(record, stream)
- end
+ # Append the specified row.
+ #
+ # The representation of each cell is determined by `to_s`.
+ fun write_row(row: Object...) do write_sequence(row)
- # Deprecated alias for `write_to_file`.
- fun save(file: String) do write_to_file(file)
+ # Close the output stream.
+ fun close do ostream.close
+
+ private fun write_cell(cell: String) do
+ if cell.is_empty then return
+ if not always_escape and format.is_value_clean(cell) then
+ ostream.write cell
+ else
+ ostream.write format.escape_cell(cell)
+ end
+ end
end
+
+# The CSV format recommended by RFC 4180.
+#
+# * `delimiter`: `'"'`
+# * `separator`: `','`
+# * `eol`: `"\r\n"`
+fun rfc4180: CsvFormat do return once new CsvFormat('"', ',', "\r\n")