CSV document handling.

# CSV document handling.
module csv

redef class Text
	# Escape the content of `self` for inclusion in a CSV document
	private fun escape_to_csv(sep_char, delim_char: Char, eol: String): String do
		var add_sp = chars_to_escape_csv(sep_char, delim_char, eol)
		if add_sp == 0 then return to_s
		var bf = new Buffer.with_cap(add_sp + byte_length)
		bf.add '"'
		for i in [0 .. length[ do
			var c = self[i]
			if c == delim_char then
				bf.add c
			end
			bf.add c
		end
		bf.add '"'
		return bf.to_s
	end

	# How many more bytes should be allocated for CSV escaping ?
	private fun chars_to_escape_csv(sep_char, delim_char: Char, eol: String): Int do
		var more_ln = 0
		var ln = length
		var need_esc = false
		var fst_eol = eol.first
		var i = 0
		while i < ln do
			var c = self[i]
			if c == delim_char then more_ln += 1
			if c == fst_eol then
				need_esc = true
				for j in [1 .. eol.length[ do
					i += 1
					c = self[i]
					if c != eol[j] then
						i -= j
						need_esc = false
						break
					end
				end
			end
			if c == sep_char then need_esc = true
			i += 1
		end
		var more = more_ln * delim_char.u8char_len
		if need_esc then more += 2
		return more
	end

	# Unescape the content of `self` from CSV format to Nit String
	private fun unescape_csv(delim_char: Char): String do
		var to_un = chars_to_unescape_csv(delim_char)
		if to_un == 0 then return to_s
		var buf = new Buffer.with_cap(byte_length - to_un)
		var pos = 0
		var ln = length
		while pos < ln do
			var c = self[pos]
			if c == delim_char then pos += 1
			buf.add c
			pos += 1
		end
		return buf.to_s
	end

	# How many bytes should be removed for CSV unescaping ?
	private fun chars_to_unescape_csv(delim_char: Char): Int do
		var pos = 0
		var to_un = 0
		var ln = length
		while pos < ln do
			var c = self[pos]
			if c == delim_char then
				pos += 1
				to_un += 1
			end
			pos += 1
		end
		return to_un
	end
end

# Shared properties by all CSV-related classes
#
# This class is basically only here for implementation purposes and should not be used
# by clients for typing.
abstract class CsvStream
	# The character that delimits escaped value.
	#
	# The delimiter is escaped by doubling it.
	var delimiter = '"' is writable

	# The character that split each cell in a record.
	var separator = ',' is writable

	# The character that ends a record (end of line).
	var eol = "\n" is writable
end

# A CSV document representation.
class CsvDocument
	super Writable
	super CsvStream

	# The header.
	#
	# Contains the name of all fields in this table.
	var header = new Array[String] is writable, optional

	# The list of the records.
	#
	# All records must have the same length than `header`.
	var records = new Array[Array[String]] is writable, optional

	# Adds a new record to document containing the values in `objs`
	fun add_record(objs: Object...) do
		var ln = new Array[String].with_capacity(objs.length)
		for i in objs do ln.add(i.to_s)
		records.add ln
	end

	redef fun write_to(stream) do
		var s = new CsvWriter(stream)
		s.separator = separator
		s.eol = eol
		s.delimiter = delimiter
		if not header.is_empty then
			s.write_line header
		end
		s.write_lines(records)
	end

	# Load from the specified stream.
	#
	# Parameters:
	#
	# * `stream`: Input stream.
	# * `has_header`: Is the first record the header? - defaults to true
	# * `skip_empty`: Do we skip the empty lines? - defaults to true
	fun load_from(stream: Reader, has_header: nullable Bool, skip_empty: nullable Bool) do
		if has_header == null then has_header = true
		if skip_empty == null then skip_empty = true
		var reader = new CsvReader(stream)
		reader.separator = separator
		reader.eol = eol
		reader.delimiter = delimiter
		reader.skip_empty = skip_empty
	end
end

# Appends CSV records to a file.
#
# By default, uses the format recommended by RFC 4180 (see `rfc4180`).
#
# Note: If a record contains only an empty cell, its representation is
# undistinguishable from an empty line. This is because the empty values are
# always written unescaped in order to avoid them to be interpreted as escaped
# delimiters by some parsers.
#
# ~~~nit
# var out = new StringWriter
# var writer = new CsvWriter(out)
# writer.write_elements(1, 2.0, "foo\nbar")
# writer.write_line([""])
# assert out.to_s == """1,2.0,"foo\nbar"\n\n"""
# ~~~
class CsvWriter
	super CsvStream

	# The output stream.
	var ostream: Writer

	# Write several lines to a stream
	fun write_lines(lines: Array[Array[Object]]) do for i in lines do write_line i

	# Append the elements in `els` as a record.
	#
	# The representation of each cell is determined by `to_s`.
	fun write_elements(els: Object...) do
		var os = ostream
		var esc = delimiter
		var sep = separator
		var eol = eol
		for i in [0 .. els.length - 1[ do
			os.write(els[i].to_s.escape_to_csv(sep, esc, eol))
			os.write_char(sep)
		end
		os.write(els.last.to_s.escape_to_csv(sep, esc, eol))
		os.write(eol)
	end

	# Append the specified record.
	#
	# The representation of each cell is determined by `to_s`.
	fun write_line(line: Array[Object]) do
		var os = ostream
		var esc = delimiter
		var sep = separator
		var eol = eol
		for i in [0 .. line.length - 1[ do
			os.write(line[i].to_s.escape_to_csv(sep, esc, eol))
			os.write_char(sep)
		end
		os.write(line.last.to_s.escape_to_csv(sep, esc, eol))
		os.write(eol)
	end
end

# Reads records from a CSV file.
#
# By default, the format recognizes EOLs as `\n`
#
# ~~~nit
# var example = """
# foo,bar
# "Hello, word!",1234.5 + 42
# "Something
# ""else""\", baz
# """
# var reader = new CsvReader.from_string(example)
# var table = reader.read_all
#
# assert table.header  == ["foo","bar"]
# assert table.records == [["Hello, word!","1234.5 + 42"],
# 			["Something\n\"else\""," baz"]]
# ~~~
class CsvReader
	super CsvStream

	# The input stream.
	var istream: Reader

	# Do we skip the empty lines?
	#
	# Note: Even if this attribute is `false`, the presence of an line ending at
	# end of the last record does not change the number of returned record.
	# This is because the line endings are processed as terminators, not as
	# separators. Therefore, when there is more than one line ending at the end
	# of the file, the additional lines are interpreted as empty records that
	# are skipped only if `skip_empty` is set to `true`.
	#
	# `false` by default.
	var skip_empty: Bool = false is writable

	# Creates a new CSVReader from a `string` data
	init from_string(s: String) do init(new StringReader(s))

	# Reads the content of the Stream and interprets it as a CSV Document
	#
	# Optional parameter `has_header` determines whether the first line
	# of the CSV Document is header data.
	# Defaults to true
	fun read_all(has_header: nullable Bool): CsvDocument do
		var header: nullable Array[String] = null
		if has_header == null then has_header = true
		var iss = istream
		var res_data = new Array[Array[String]]
		var eol_st = eol.first
		var line = new Array[String]
		var esc = delimiter
		var sep = separator
		var eol = eol
		var is_eol = false
		var eol_buf = new Buffer.with_cap(eol.length)
		var c = iss.read_char
		var el = new Buffer
		while not iss.eof do
			if c == null then continue
			loop
				if c == esc then
					c = iss.read_char
					loop
						if c == esc then
							c = iss.read_char
							if c != esc then break
						end
						if c == null then break
						el.add c
						c = iss.read_char
					end
				end
				if c == sep then break
				if c == eol_st then
					eol_buf.add c.as(not null)
					is_eol = true
					for i in [1 .. eol.length[ do
						c = iss.read_char
						if c == null or c != eol[i] then
							is_eol = false
							el.append(eol_buf)
							eol_buf.clear
							break
						end
						eol_buf.add c
					end
					if not is_eol then continue
					eol_buf.clear
					break
				end
				if c == sep then break
				el.add c.as(not null)
				c = iss.read_char
				if c == null then break
			end
			line.add el.to_s
			el.clear
			if is_eol or iss.eof then
				c = iss.read_char
				is_eol = false
				if skip_empty and line.is_empty then
					continue
				end
				if has_header and header == null then
					header = line
				else res_data.add line
				line = new Array[String]
			end
			if c == sep then c = iss.read_char
		end
		if header == null then header = new Array[String]
		var doc = new CsvDocument
		doc.header = header
		doc.records = res_data
		return doc
	end
end

lib/csv/csv.nit:15,1--342,3

module csv

Summary

CSV document handling.

Introduced classes

CsvDocument

CsvReader

CsvStream

CsvWriter

Redefined classes

Text

CSV document handling.

Introduced classes

class CsvDocument

class CsvReader

abstract class CsvStream

class CsvWriter

Redefined classes

redef abstract class Text

Summary

All class definitions

CsvDocument

CsvReader

CsvStream

CsvWriter

Text

All class definitions

class CsvDocument

class CsvReader

abstract class CsvStream

class CsvWriter

redef abstract class Text

Summary

Ancestors

abstract_collection

abstract_text

array

bitset

bytes

circular_array

codec_base

codecs

collection

environ

error

exec

file

fixed_ints

fixed_ints_text

flat

gc

hash_collection

iso8859_1

kernel

list

math

native

numeric

protocol

queue

range

re

ropes

sorter

stream

string_search

text

time

union_find

utf8

Parents

core

Children

pop_handlers

Descendants

a_star-m

example_advanced_logger

example_angular

example_glob_route

example_hello

example_html_error_handler