X-Git-Url: http://nitlanguage.org diff --git a/lib/csv/csv.nit b/lib/csv/csv.nit index e3d0c0b..257d565 100644 --- a/lib/csv/csv.nit +++ b/lib/csv/csv.nit @@ -12,13 +12,54 @@ # See the License for the specific language governing permissions and # limitations under the License. -# CSV output facilities +# CSV document handling. module csv +# Specifies a CSV format. +class CsvFormat + # The character that delimits escaped value. + # + # The delimiter is escaped by doubling it. + var delimiter: Char + + # The character that split each cell in a row. + var separator: Char + + # The character that ends a row (end of line). + var eol: String + + # Escape sequence for the delimiter. + private var escaping = "{delimiter}{delimiter}" is lazy + + # Escape the specified cell. + private fun escape_cell(cell: String): Text do + var result = new RopeBuffer + result.add delimiter + result.append cell.replace(delimiter, escaping) + result.add delimiter + return result + end + + # Can the specified value be inserted without any escaping? + private fun is_value_clean(value: String): Bool do + for c in value.chars do + if c == delimiter then return false + if c == separator then return false + if eol.chars.has(c) then return false + end + return true + end +end + # A CSV document representation. class CsvDocument super Streamable + # The format to use. + # + # Defaults to `rfc4180`. + var format: CsvFormat = rfc4180 is writable + # The header. # # Contains the name of all fields in this table. @@ -45,26 +86,96 @@ class CsvDocument records.add(record) end - private fun write_line_to(line: Collection[String], stream: OStream) - do - var i = line.iterator - if i.is_ok then - stream.write(i.item) + redef fun write_to(stream) do + var writer = new CsvWriter.with_format(stream, format) + writer.write_sequence(header) + for record in records do writer.write_sequence(record) + end + + # Deprecated alias for `write_to_file`. + fun save(file: String) do write_to_file(file) +end + +# Appends CSV rows to a file. +# +# By default, uses the format recommended by RFC 4180 (see `rfc4180`). +# +# Note: If a row contains only an empty cell, its representation is +# undistinguishable from an empty line. This is because the empty values are +# always written unescaped in order to avoid them to be interpreted as escaped +# delimiters by some parsers. +# +# ~~~nit +# var out = new StringOStream +# var writer = new CsvWriter(out) +# writer.write_row(1, 2.0, "foo\nbar") +# writer.write_sequence([""]) +# assert out.to_s == """1,2.0,"foo\nbar"\r\n\r\n""" +# ~~~ +class CsvWriter + + # The output stream. + var ostream: OStream + + # The format to use. + # + # Defaults to `rfc4180`. + var format: CsvFormat = rfc4180 + + # Do we escape all cells (except empty ones)? + # + # If `false` (the default), escape only cells that contain a metacharacter + # of the format. In all cases, empty cells are not escaped. This option + # permits to choose between the optimization of the performances (when + # `true`) and optimization of the size of the output (when `false`). + # + # Note: Escaping may not be correctly recognized by some parsers. + var always_escape = false is writable + + # Create a new writer with the specified format. + init with_format(ostream:OStream, format: CsvFormat) do + self.ostream = ostream + self.format = format + end + + # Append the specified sequence as a row. + # + # The representation of each cell is determined by `to_s`. + fun write_sequence(row: SequenceRead[Object]) do + if not row.is_empty then + var i = row.iterator + var separator = format.separator.to_s + write_cell i.item.to_s i.next - while i.is_ok do - stream.write(";") - stream.write(i.item) - i.next + for cell in i do + ostream.write separator + write_cell cell.to_s end end - stream.write("\n") + ostream.write format.eol end - redef fun write_to(stream) do - write_line_to(header, stream) - for record in records do write_line_to(record, stream) - end + # Append the specified row. + # + # The representation of each cell is determined by `to_s`. + fun write_row(row: Object...) do write_sequence(row) - # Deprecated alias for `write_to_file`. - fun save(file: String) do write_to_file(file) + # Close the output stream. + fun close do ostream.close + + private fun write_cell(cell: String) do + if cell.is_empty then return + if not always_escape and format.is_value_clean(cell) then + ostream.write cell + else + ostream.write format.escape_cell(cell) + end + end end + +# The CSV format recommended by RFC 4180. +# +# * `delimiter`: `'"'` +# * `separator`: `','` +# * `eol`: `"\r\n"` +fun rfc4180: CsvFormat do return once new CsvFormat('"', ',', "\r\n")