1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
12 module saxophonit
::lexer
18 # Except when noted otherwise, `accept` and `expect` functions return `true` on
19 # success and `false` on mismatch and at the end of the file.
20 # They both foward the cursor to the next byte on success, but only `expect`
21 # functions fire a fatal error on mismatch.
25 var reader_model
: XophonReaderModel
27 # The input to read from.
28 var input
: IStream is writable
30 # Alias to `reader_model.locator`.
31 private var locator
: SAXLocatorImpl is noinit
34 locator
= reader_model
.locator
.as(not null)
39 # Equals `-1` on end of file or error.
40 private var last_char
: Int = -1
42 # Before end-of-line handling, was the last read byte a CARRIAGE RETURN?
43 private var was_cr
: Bool = false
46 # Expect a value delimiter (`"` or `'`).
48 # If the last read byte is a delimiter, return the delimiter and
49 # read the next byte. Else, return `-1`.
50 fun expect_delimiter
: Int do
53 else if accept
('\'') then
56 fire_unexpected_char
(". Expecting `\"` or `'`")
61 # Is the last read byte matches the `Char` production?
62 fun is_xml_char:Bool do
63 # TODO: Handle code points above 0x7F.
64 return last_char >= 32 or
69 # Push the last read byte in the specified buffer and read the next byte.
71 # If the last read byte is forbidden, fire a fatal error instead.
72 fun expect_xml_char(buffer: Buffer): Bool do
74 buffer.chars.push(last_char.ascii)
78 return fire_fatal_error("Unexpected end of file.")
80 return fire_fatal_error("Forbidden character.")
85 # Like `expect_xml_char`, but normalize white space and forbid `<`.
87 # SEE: The “3.3.3 Attribute-Value Normalization” section of any XML
89 fun expect_att_value_char(buffer: Buffer): Bool do
91 buffer.chars.push(' ')
94 else if last_char == '<'.ascii then
95 return fire_fatal_error("`<` is forbidden in attribute values.")
97 return expect_xml_char(buffer)
101 # Is the last read byte matches the `S` production?
103 return last_char == 32 or last_char == 9 or last_char == 10
106 # Skip a `S?` token and return `true`.
108 while is_s do read_char
112 # Accept a `S` token.
113 fun accept_s: Bool do
123 fun expect_s: Bool do
124 return (accept_s and skip_s) or fire_unexpected_char(". Expecting white space")
127 # Is the last read byte matches the `NameStartChar` production?
128 fun is_name_start_char: Bool do
129 # TODO: Handle code points above 0x7F.
130 return ['A
'.ascii .. 'Z
'.ascii].has(last_char) or
131 ['a
'.ascii .. 'z
'.ascii].has(last_char) or
132 last_char == '_
'.ascii or
133 last_char == ':'.ascii or
137 # Is the last read byte matches the `NameChar` production?
138 fun is_name_char: Bool do
139 # TODO: Handle code points above 0x7F.
140 return is_name_start_char or
141 last_char == '-'.ascii or
142 last_char == '.'.ascii or
146 # Expect a `Name` tokn.
148 # Append the parsed name to `buffer`.
149 fun expect_name(buffer: Buffer): Bool do
150 if not is_name_start_char then
151 return fire_unexpected_char(" at the beginning of a name")
153 buffer.chars.push(last_char.ascii)
155 while is_name_char do
156 buffer.chars.push(last_char.ascii)
162 # Expect a `PITarget` token.
164 # Append the parsed name to `buffer`.
165 fun expect_pi_target(buffer: Buffer): Bool do
166 return expect_name(buffer) and check_pi_target(buffer)
169 # Ensure the target is not `xml` (case-insensitive).
171 # Also, fire an error if the target contains a colon.
172 fun check_pi_target(target: Text): Bool do
173 var is_invalid = target.length == 3 and
174 (target.chars[0] == 'X
' or target.chars[0] == 'x
') and
175 (target.chars[0] == 'M
' or target.chars[0] == 'm
') and
176 (target.chars[0] == 'L
' or target.chars[0] == 'l
')
179 return fire_fatal_error("Forbidden processing target `{target}`.")
181 if target.has(":") then
182 reader_model.fire_error("The processing target `{target}` contains a colon.", null)
188 # Is the last read byte matches the `[0-9]` production?
189 fun is_digit: Bool do
190 return ['0'.ascii .. '9'.ascii].has(last_char)
193 # Accept a `[0-9]+` token.
194 fun accept_digits(buffer: Buffer): Bool do
197 buffer.chars.push(last_char.ascii)
199 if not is_digit then return true
206 # Expect a `[0-9]+` token.
207 fun expect_digits(buffer: Buffer): Bool do
208 return accept_digits(buffer) or fire_unexpected_char(". Expecting a decimal digit")
211 # Is `last_char` matches the `[0-9a-fA-F]` production?
213 return ['0'.ascii .. '9'.ascii].has(last_char) or
214 ['A
'.ascii .. 'Z
'.ascii].has(last_char) or
215 ['a
'.ascii .. 'Z
'.ascii].has(last_char)
218 # Expect a `[0-9a-fA-F]+` token.
219 fun expect_hex(buffer: Buffer): Bool do
222 buffer.chars.push(last_char.ascii)
224 if not is_hex then return true
227 return fire_unexpected_char(". Expecting an hexadecimal digit")
232 fun expect_eq: Bool do
233 return skip_s and expect('=', "") and skip_s
237 ############################################################################
240 # Read a byte and put it in `last_char`.
242 # In case of an end-of-file or an error, put -1 in `last_char`.
243 private fun read_char do
244 if locator.line_number < 0 then
245 locator.line_number = 1
246 locator.column_number = 1
247 else if last_char < 0 then
248 fire_fatal_error("Internal error: Already at the end of the file.")
250 else if last_char == '\n
'.ascii then
251 locator.line_number += 1
252 locator.column_number = 1
254 locator.column_number += 1
257 last_char = input.read_char
258 if last_char < 0 then
262 # XML 1.0 end-of-line handling
263 # Note: Regardless the XML version, any EOL defined by the
264 # recommandation MUST be reported as a single LINE FEED.
265 if was_cr and last_char == '\n
'.ascii then
266 # EOL already reported. => Skip this byte.
267 last_char = input.read_char
269 was_cr = last_char == '\r
'.ascii
271 # Regardless the following byte, '\r
' always introduce an EOL.
272 last_char = '\n
'.ascii
276 # Is it the end of the stream?
278 # Also return `true` after a fatal error.
279 fun eof: Bool do return last_char < 0
295 # Does the last read byte equal `c`?
296 fun is_int(c: Int): Bool do return last_char == c
298 # Does the last read byte equal `c`?
299 fun is_char(c: Char): Bool do return last_char == c.ascii
301 # Expect the specified byte.
302 fun accept_int(expected: Int): Bool do
303 if last_char == expected then
311 # Accept the specified byte.
312 fun accept(expected: Char): Bool do
313 return accept_int(expected.ascii)
316 # Ensure the last read byte is equal to `expected`.
318 # If it is, read the next byte. If not, fire a fatal error using
319 # `context`. `context` is the part of the message that gives the context.
320 # For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
321 # `context` is `" in y"`.
323 # Return `true` if and only if the last read byte as the expected value.
324 fun expect_int(expected: Int, context: String): Bool do
325 return accept_int(expected) or
326 fire_unexpected_char("{context}. Expecting `{expected.ascii}`.")
329 # Ensure the last read byte is equal to `expected`.
331 # If it is, read the next byte. If not, fire a fatal error using
332 # `context`. `context` is the part of the message that gives the context.
333 # For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
334 # `context` is `" in y"`.
336 # Return `true` if and only if the last read byte as the expected value.
337 fun expect(expected: Char, context: String): Bool do
338 return accept(expected) or
339 fire_unexpected_char("{context}. Expecting `{expected}`.")
342 # Ensure the last read byte and following bytes match `expected`.
344 # If it is, read one more byte. If not, fire a fatal error using
345 # `context`. `context` is the part of the message that gives the context.
346 # For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
347 # `context` is `" in y"`.
349 # Return `true` if and only if the last read byte and following bytes
351 fun expect_string(expected: String, context: String): Bool do
352 var chars = expected.chars
355 while i < chars.length do
356 if not accept(chars[i]) then
358 return fire_fatal_error("Unexpected " +
359 "`{expected.substring(0, i)}{last_char.ascii.to_s}`" +
360 "{context}. Expecting `{expected}`.")
362 return fire_fatal_error("Unexpected end of file{context}. " +
363 "Expecting `{expected}`.")
365 return fire_fatal_error("Forbidden character.")
374 ############################################################################
377 # Fire a fatal error about an unexpected character.
380 fun fire_unexpected_char(rest_of_message: String): Bool do
382 return fire_fatal_error("Unexpected character `{last_char.ascii.to_s}`{rest_of_message}.")
384 return fire_fatal_error("Unexpected end of file{rest_of_message}.")
386 return fire_fatal_error("Forbidden character.")
390 # Fire a fatal error with the specified message.
393 private fun fire_fatal_error(message: String): Bool do
394 reader_model.fire_fatal_error(message, null)