1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # CSV document handling.
19 # Escape the content of `self` for inclusion in a CSV document
20 private fun escape_to_csv
(sep_char
, delim_char
: Char, eol
: String): String do
21 var add_sp
= chars_to_escape_csv
(sep_char
, delim_char
, eol
)
22 if add_sp
== 0 then return to_s
23 var bf
= new Buffer.with_cap
(add_sp
+ bytelen
)
25 for i
in [0 .. length
[ do
27 if c
== delim_char
then
36 # How many more bytes should be allocated for CSV escaping ?
37 private fun chars_to_escape_csv
(sep_char
, delim_char
: Char, eol
: String): Int do
41 var fst_eol
= eol
.first
45 if c
== delim_char
then more_ln
+= 1
48 for j
in [1 .. eol
.length
[ do
58 if c
== sep_char
then need_esc
= true
61 var more
= more_ln
* delim_char
.u8char_len
62 if need_esc
then more
+= 2
66 # Unescape the content of `self` from CSV format to Nit String
67 private fun unescape_csv
(delim_char
: Char): String do
68 var to_un
= chars_to_unescape_csv
(delim_char
)
69 if to_un
== 0 then return to_s
70 var buf
= new Buffer.with_cap
(bytelen
- to_un
)
75 if c
== delim_char
then pos
+= 1
82 # How many bytes should be removed for CSV unescaping ?
83 private fun chars_to_unescape_csv
(delim_char
: Char): Int do
89 if c
== delim_char
then
99 # Shared properties by all CSV-related classes
101 # This class is basically only here for implementation purposes and should not be used
102 # by clients for typing.
103 abstract class CsvStream
104 # The character that delimits escaped value.
106 # The delimiter is escaped by doubling it.
107 var delimiter
= '"' is writable
109 # The character that split each cell in a record.
110 var separator
= ',' is writable
112 # The character that ends a record (end of line).
113 var eol
= "\n" is writable
116 # A CSV document representation.
123 # Contains the name of all fields in this table.
124 var header
= new Array[String] is writable, optional
126 # The list of the records.
128 # All records must have the same length than `header`.
129 var records
= new Array[Array[String]] is writable, optional
131 # Adds a new record to document containing the values in `objs`
132 fun add_record
(objs
: Object...) do
133 var ln
= new Array[String].with_capacity
(objs
.length
)
134 for i
in objs
do ln
.add
(i
.to_s
)
138 redef fun write_to
(stream
) do
139 var s
= new CsvWriter(stream
)
140 s
.separator
= separator
142 s
.delimiter
= delimiter
143 if not header
.is_empty
then
146 s
.write_lines
(records
)
149 # Load from the specified stream.
153 # * `stream`: Input stream.
154 # * `has_header`: Is the first record the header? - defaults to true
155 # * `skip_empty`: Do we skip the empty lines? - defaults to true
156 fun load_from
(stream
: Reader, has_header
: nullable Bool, skip_empty
: nullable Bool) do
157 if has_header
== null then has_header
= true
158 if skip_empty
== null then skip_empty
= true
159 var reader
= new CsvReader(stream
)
160 reader
.separator
= separator
162 reader
.delimiter
= delimiter
163 reader
.skip_empty
= skip_empty
167 # Appends CSV records to a file.
169 # By default, uses the format recommended by RFC 4180 (see `rfc4180`).
171 # Note: If a record contains only an empty cell, its representation is
172 # undistinguishable from an empty line. This is because the empty values are
173 # always written unescaped in order to avoid them to be interpreted as escaped
174 # delimiters by some parsers.
177 # var out = new StringWriter
178 # var writer = new CsvWriter(out)
179 # writer.write_elements(1, 2.0, "foo\nbar")
180 # writer.write_line([""])
181 # assert out.to_s == """1,2.0,"foo\nbar"\n\n"""
189 # Write several lines to a stream
190 fun write_lines
(lines
: Array[Array[Object]]) do for i
in lines
do write_line i
192 # Append the elements in `els` as a record.
194 # The representation of each cell is determined by `to_s`.
195 fun write_elements
(els
: Object...) do
200 for i
in [0 .. els
.length
- 1[ do
201 os
.write
(els
[i
].to_s
.escape_to_csv
(sep
, esc
, eol
))
204 os
.write
(els
.last
.to_s
.escape_to_csv
(sep
, esc
, eol
))
208 # Append the specified record.
210 # The representation of each cell is determined by `to_s`.
211 fun write_line
(line
: Array[Object]) do
216 for i
in [0 .. line
.length
- 1[ do
217 os
.write
(line
[i
].to_s
.escape_to_csv
(sep
, esc
, eol
))
220 os
.write
(line
.last
.to_s
.escape_to_csv
(sep
, esc
, eol
))
225 # Reads records from a CSV file.
227 # By default, the format recognizes EOLs as `\n`
232 # "Hello, word!",1234.5 + 42
236 # var reader = new CsvReader.from_string(example)
237 # var table = reader.read_all
239 # assert table.header == ["foo","bar"]
240 # assert table.records == [["Hello, word!","1234.5 + 42"],
241 # ["Something\n\"else\""," baz"]]
249 # Do we skip the empty lines?
251 # Note: Even if this attribute is `false`, the presence of an line ending at
252 # end of the last record does not change the number of returned record.
253 # This is because the line endings are processed as terminators, not as
254 # separators. Therefore, when there is more than one line ending at the end
255 # of the file, the additional lines are interpreted as empty records that
256 # are skipped only if `skip_empty` is set to `true`.
258 # `false` by default.
259 var skip_empty
: Bool = false is writable
261 # Creates a new CSVReader from a `string` data
262 init from_string
(s
: String) do init(new StringReader(s
))
264 # Reads the content of the Stream and interprets it as a CSV Document
266 # Optional parameter `has_header` determines whether the first line
267 # of the CSV Document is header data.
269 fun read_all
(has_header
: nullable Bool): CsvDocument do
270 var header
: nullable Array[String] = null
271 if has_header
== null then has_header
= true
273 var res_data
= new Array[Array[String]]
274 var eol_st
= eol
.first
275 var line
= new Array[String]
280 var eol_buf
= new Buffer.with_cap
(eol
.length
)
281 var c
= iss
.read_char
284 if c
== null then continue
291 if c
!= esc
then break
293 if c
== null then break
298 if c
== sep
then break
300 eol_buf
.add c
.as(not null)
302 for i
in [1 .. eol
.length
[ do
304 if c
== null or c
!= eol
[i
] then
312 if not is_eol
then continue
316 if c
== sep
then break
317 el
.add c
.as(not null)
319 if c
== null then break
323 if is_eol
or iss
.eof
then
326 if skip_empty
and line
.is_empty
then
329 if has_header
and header
== null then
331 else res_data
.add line
332 line
= new Array[String]
334 if c
== sep
then c
= iss
.read_char
336 if header
== null then header
= new Array[String]
337 var doc
= new CsvDocument
339 doc
.records
= res_data