1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # CSV document handling.
18 # Specifies a CSV format.
20 # The character that delimits escaped value.
22 # The delimiter is escaped by doubling it.
25 # The character that split each cell in a row.
28 # The character that ends a row (end of line).
31 # Escape sequence for the delimiter.
32 private var escaping
= "{delimiter}{delimiter}" is lazy
34 # Escape the specified cell.
35 private fun escape_cell
(cell
: String): Text do
36 var result
= new RopeBuffer
38 result
.append cell
.replace
(delimiter
, escaping
)
43 # Can the specified value be inserted without any escaping?
44 private fun is_value_clean
(value
: String): Bool do
45 for c
in value
.chars
do
46 if c
== delimiter
then return false
47 if c
== separator
then return false
48 if eol
.chars
.has
(c
) then return false
54 # A CSV document representation.
60 # Defaults to `rfc4180`.
61 var format
: CsvFormat = rfc4180
is writable
65 # Contains the name of all fields in this table.
66 var header
: Array[String] = new Array[String] is writable
68 # The list of the records.
70 # All records must have the same length than `header`.
71 var records
: Array[Array[String]] = new Array[Array[String]]
73 # Replace the header by the specified row.
74 fun set_header
(values
: Object...) do
76 for value
in values
do header
.add
(value
.to_s
)
79 # Append the specfied record.
80 fun add_record
(values
: Object...) do
81 assert values
.length
== header
.length
else
82 sys
.stderr
.write
"CSV error: Header declares {header.length} columns, record contains {values.length} values.\n"
84 var record
= new Array[String]
85 for value
in values
do record
.add
(value
.to_s
)
89 redef fun write_to
(stream
) do
90 var writer
= new CsvWriter.with_format
(stream
, format
)
91 writer
.write_sequence
(header
)
92 for record
in records
do writer
.write_sequence
(record
)
95 # Deprecated alias for `write_to_file`.
96 fun save
(file
: String) do write_to_file
(file
)
98 # Load from the specified stream.
102 # * `stream`: Input stream.
103 # * `has_header`: Is the first row the header?
104 # * `skip_empty`: Do we skip the empty lines?
105 # For details, see `CsvReader.skip_empty`.
106 fun load_from
(stream
: Reader, has_header
: Bool, skip_empty
: Bool) do
107 var reader
= new CsvReader.with_format
(stream
, format
)
108 reader
.skip_empty
= skip_empty
117 for record
in reader
do records
.add
(record
)
120 # Load from the specified file.
124 # * `path`: Path of the file.
125 # * `has_header`: Is the first row the header?
126 # * `skip_empty`: Do we skip the empty lines?
127 fun load
(path
: String, has_header
: Bool, skip_empty
: Bool) do
128 var istream
= new FileReader.open
(path
)
129 load_from
(istream
, has_header
, skip_empty
)
134 # Appends CSV rows to a file.
136 # By default, uses the format recommended by RFC 4180 (see `rfc4180`).
138 # Note: If a row contains only an empty cell, its representation is
139 # undistinguishable from an empty line. This is because the empty values are
140 # always written unescaped in order to avoid them to be interpreted as escaped
141 # delimiters by some parsers.
144 # var out = new StringWriter
145 # var writer = new CsvWriter(out)
146 # writer.write_row(1, 2.0, "foo\nbar")
147 # writer.write_sequence([""])
148 # assert out.to_s == """1,2.0,"foo\nbar"\r\n\r\n"""
157 # Defaults to `rfc4180`.
158 var format
: CsvFormat = rfc4180
160 # Do we escape all cells (except empty ones)?
162 # If `false` (the default), escape only cells that contain a metacharacter
163 # of the format. In all cases, empty cells are not escaped. This option
164 # permits to choose between the optimization of the performances (when
165 # `true`) and optimization of the size of the output (when `false`).
167 # Note: Escaping may not be correctly recognized by some parsers.
168 var always_escape
= false is writable
170 # Create a new writer with the specified format.
171 init with_format
(ostream
:Writer, format
: CsvFormat) do
172 self.ostream
= ostream
176 # Append the specified sequence as a row.
178 # The representation of each cell is determined by `to_s`.
179 fun write_sequence
(row
: SequenceRead[Object]) do
180 if not row
.is_empty
then
182 var separator
= format
.separator
.to_s
183 write_cell i
.item
.to_s
186 ostream
.write separator
190 ostream
.write format
.eol
193 # Append the specified row.
195 # The representation of each cell is determined by `to_s`.
196 fun write_row
(row
: Object...) do write_sequence
(row
)
198 # Close the output stream.
199 fun close
do ostream
.close
201 private fun write_cell
(cell
: String) do
202 if cell
.is_empty
then return
203 if not always_escape
and format
.is_value_clean
(cell
) then
206 ostream
.write format
.escape_cell
(cell
)
211 # Reads rows from a CSV file.
213 # By default, uses the format recommended by RFC 4180 (see `rfc4180`).
216 # var example = new StringReader("""
218 # "Hello, word!",1234.5 + 42\r
222 # var reader = new CsvReader(example)
223 # var table = new Array[Array[String]]
225 # for row in reader do table.add row
228 # ["Hello, word!","1234.5 + 42"],
229 # ["Something\r\n\"else\""," baz"]
233 super Iterator[Array[String]]
240 # Defaults to `rfc4180`.
241 var format
: CsvFormat = rfc4180
is lazy
243 # Do we skip the empty lines?
245 # Note: Even if this attribute is `false`, the presence of an line ending at
246 # end of the last row does not change the number of returned rows.
247 # This is because the line endings are processed as terminators, not as
248 # separators. Therefore, when there is more than one line ending at the end
249 # of the file, the additional lines are interpreted as empty rows that
250 # are skipped only if `skip_empty` is set to `true`.
252 # `false` by default.
253 var skip_empty
: Bool = false is writable
256 private var row
: nullable Array[String] = null
258 # Did we read something?
259 private var started
= false
261 # Create a new reader with the specified format.
262 init with_format
(istream
:Reader, format
: CsvFormat) do
263 self.istream
= istream
267 # Read the first row, if needed.
278 sys
.stderr
.write
"Already at the end of the stream.\n"
283 # Return the last read row.
286 return row
.as(not null)
294 # Free some internal ressources and set `is_ok` to `false`.
296 # Do not close the input stream.
297 redef fun finish
do row
= null
299 # Close the input stream.
300 fun close
do istream
.close
302 private fun read_row
: nullable Array[String] do
303 if istream
.eof
then return null
304 var row
= new Array[String]
305 var value
= new RopeBuffer
307 # Number of unescaped characters since the last delimiter or separator.
310 # Do we read the start of a row?
313 # Do we found a delimited string in the current cell?
314 var got_delimiter
= false
317 var c
= istream
.read_char
328 if c
== format
.delimiter
then
329 if got_delimiter
and unescaped
== 0 then
330 # Got an escaped delimiter.
331 value
.add format
.delimiter
333 # Read all bytes until the delimiter.
335 c
= istream
.read_char
336 assert not_eof
: c
!= null else
337 sys
.stderr
.write
"Unexpected end of file before the end of a delimited value.\n"
339 if c
== format
.delimiter
then break
345 else if c
== format
.separator
then
346 # Flush the value to the row.
350 got_delimiter
= false
354 if unescaped
>= format
.eol
.length
and
355 value
.has_suffix
(format
.eol
) then
356 var value_trimed
= value
.substring
(0,
357 value
.length
- format
.eol
.length
).to_s
358 if skip_empty
and row
.is_empty
and
359 value_trimed
.is_empty
and
360 not got_delimiter
then
361 # Skip the empty line.
377 # The CSV format recommended by [RFC 4180](https://tools.ietf.org/html/rfc4180).
379 # * `delimiter`: `'"'`
380 # * `separator`: `','`
382 fun rfc4180
: CsvFormat do return once
new CsvFormat('"', ',', "\r\n")