1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
12 module saxophonit
::lexer
18 # Except when noted otherwise, `accept` and `expect` functions return `true` on
19 # success and `false` on mismatch and at the end of the file.
20 # They both foward the cursor to the next byte on success, but only `expect`
21 # functions fire a fatal error on mismatch.
25 var reader_model
: XophonReaderModel
27 # The input to read from.
28 var input
: Reader is writable
30 # Alias to `reader_model.locator`.
31 private var locator
: SAXLocatorImpl is noinit
34 locator
= reader_model
.locator
.as(not null)
39 # Equals `-1` on end of file or error.
40 private var last_char
: Int = -1
42 # Before end-of-line handling, was the last read byte a CARRIAGE RETURN?
43 private var was_cr
: Bool = false
46 # Expect a value delimiter (`"` or `'`).
48 # If the last read byte is a delimiter, return the delimiter and
49 # read the next byte. Else, return `-1`.
50 fun expect_delimiter
: Int do
53 else if accept
('\'') then
56 fire_unexpected_char
(". Expecting `\"` or `'`")
61 # Is the last read byte matches the `Char` production?
62 fun is_xml_char:Bool do
63 # TODO: Handle code points above 0x7F.
64 return last_char >= 32 or
69 # Push the last read byte in the specified buffer and read the next byte.
71 # If the last read byte is forbidden, fire a fatal error instead.
72 fun expect_xml_char(buffer: Buffer): Bool do
74 buffer.chars.push(last_char.ascii)
78 return fire_fatal_error("Unexpected end of file.")
80 return fire_fatal_error("Forbidden character.")
85 # Like `expect_xml_char`, but normalize white space and forbid `<`.
87 # SEE: The “3.3.3 Attribute-Value Normalization” section of any XML
89 fun expect_att_value_char(buffer: Buffer): Bool do
91 buffer.chars.push(' ')
94 else if last_char == '<'.ascii then
95 return fire_fatal_error("`<` is forbidden in attribute values.")
97 return expect_xml_char(buffer)
101 # Is the last read byte matches the `S` production?
103 return last_char == 32 or last_char == 9 or last_char == 10
106 # Skip a `S?` token and return `true`.
108 while is_s do read_char
112 # Accept a `S` token.
113 fun accept_s: Bool do
123 fun expect_s: Bool do
124 return (accept_s and skip_s) or fire_unexpected_char(". Expecting white space")
127 # Is the last read byte matches the `NameStartChar` production?
128 fun is_name_start_char: Bool do
129 # TODO: Handle code points above 0x7F.
130 return ['A
'.ascii .. 'Z
'.ascii].has(last_char) or
131 ['a
'.ascii .. 'z
'.ascii].has(last_char) or
132 last_char == '_
'.ascii or
133 last_char == ':'.ascii or
137 # Is the last read byte matches the `NameChar` production?
138 fun is_name_char: Bool do
139 # TODO: Handle code points above 0x7F.
140 return is_name_start_char or
141 last_char == '-'.ascii or
142 last_char == '.'.ascii or
146 # Expect a `Name` tokn.
148 # Append the parsed name to `buffer`.
149 fun expect_name(buffer: Buffer): Bool do
150 if not is_name_start_char then
151 return fire_unexpected_char(" at the beginning of a name")
153 buffer.chars.push(last_char.ascii)
155 while is_name_char do
156 buffer.chars.push(last_char.ascii)
162 # Expect a `PITarget` token.
164 # Append the parsed name to `buffer`.
165 fun expect_pi_target(buffer: Buffer): Bool do
166 return expect_name(buffer) and check_pi_target(buffer)
169 # Ensure the target is not `xml` (case-insensitive).
171 # Also, fire an error if the target contains a colon.
172 fun check_pi_target(target: Text): Bool do
173 var is_invalid = target.length == 3 and
174 (target.chars[0] == 'X
' or target.chars[0] == 'x
') and
175 (target.chars[0] == 'M
' or target.chars[0] == 'm
') and
176 (target.chars[0] == 'L
' or target.chars[0] == 'l
')
179 return fire_fatal_error("Forbidden processing target `{target}`.")
181 if target.has(":") then
182 reader_model.fire_error("The processing target `{target}` contains a colon.", null)
188 # Is the last read byte matches the `[0-9]` production?
189 fun is_digit: Bool do
190 return ['0'.ascii .. '9'.ascii].has(last_char)
193 # Accept a `[0-9]+` token.
194 fun accept_digits(buffer: Buffer): Bool do
197 buffer.chars.push(last_char.ascii)
199 if not is_digit then return true
206 # Expect a `[0-9]+` token.
207 fun expect_digits(buffer: Buffer): Bool do
208 return accept_digits(buffer) or fire_unexpected_char(". Expecting a decimal digit")
211 # Is `last_char` matches the `[0-9a-fA-F]` production?
213 return ['0'.ascii .. '9'.ascii].has(last_char) or
214 ['A
'.ascii .. 'Z
'.ascii].has(last_char) or
215 ['a
'.ascii .. 'Z
'.ascii].has(last_char)
218 # Expect a `[0-9a-fA-F]+` token.
219 fun expect_hex(buffer: Buffer): Bool do
222 buffer.chars.push(last_char.ascii)
224 if not is_hex then return true
227 return fire_unexpected_char(". Expecting an hexadecimal digit")
232 fun expect_eq: Bool do
233 return skip_s and expect('=', "") and skip_s
237 ############################################################################
240 # Read a byte and put it in `last_char`.
242 # In case of an end-of-file or an error, put -1 in `last_char`.
243 private fun read_char do
244 if locator.line_number < 0 then
245 locator.line_number = 1
246 locator.column_number = 1
247 else if last_char < 0 then
248 fire_fatal_error("Internal error: Already at the end of the file.")
250 else if last_char == '\n
'.ascii then
251 locator.line_number += 1
252 locator.column_number = 1
254 locator.column_number += 1
257 var s = input.read_byte
264 # XML 1.0 end-of-line handling
265 # Note: Regardless the XML version, any EOL defined by the
266 # recommandation MUST be reported as a single LINE FEED.
267 if was_cr and last_char == '\n
'.ascii then
268 # EOL already reported. => Skip this byte.
270 if s == null then s = -1
273 was_cr = last_char == '\r
'.ascii
275 # Regardless the following byte, '\r
' always introduce an EOL.
276 last_char = '\n
'.ascii
280 # Is it the end of the stream?
282 # Also return `true` after a fatal error.
283 fun eof: Bool do return last_char < 0
299 # Does the last read byte equal `c`?
300 fun is_int(c: Int): Bool do return last_char == c
302 # Does the last read byte equal `c`?
303 fun is_char(c: Char): Bool do return last_char == c.ascii
305 # Expect the specified byte.
306 fun accept_int(expected: Int): Bool do
307 if last_char == expected then
315 # Accept the specified byte.
316 fun accept(expected: Char): Bool do
317 return accept_int(expected.ascii)
320 # Ensure the last read byte is equal to `expected`.
322 # If it is, read the next byte. If not, fire a fatal error using
323 # `context`. `context` is the part of the message that gives the context.
324 # For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
325 # `context` is `" in y"`.
327 # Return `true` if and only if the last read byte as the expected value.
328 fun expect_int(expected: Int, context: String): Bool do
329 return accept_int(expected) or
330 fire_unexpected_char("{context}. Expecting `{expected.ascii}`.")
333 # Ensure the last read byte is equal to `expected`.
335 # If it is, read the next byte. If not, fire a fatal error using
336 # `context`. `context` is the part of the message that gives the context.
337 # For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
338 # `context` is `" in y"`.
340 # Return `true` if and only if the last read byte as the expected value.
341 fun expect(expected: Char, context: String): Bool do
342 return accept(expected) or
343 fire_unexpected_char("{context}. Expecting `{expected}`.")
346 # Ensure the last read byte and following bytes match `expected`.
348 # If it is, read one more byte. If not, fire a fatal error using
349 # `context`. `context` is the part of the message that gives the context.
350 # For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
351 # `context` is `" in y"`.
353 # Return `true` if and only if the last read byte and following bytes
355 fun expect_string(expected: String, context: String): Bool do
356 var chars = expected.chars
359 while i < chars.length do
360 if not accept(chars[i]) then
362 return fire_fatal_error("Unexpected " +
363 "`{expected.substring(0, i)}{last_char.ascii.to_s}`" +
364 "{context}. Expecting `{expected}`.")
366 return fire_fatal_error("Unexpected end of file{context}. " +
367 "Expecting `{expected}`.")
369 return fire_fatal_error("Forbidden character.")
378 ############################################################################
381 # Fire a fatal error about an unexpected character.
384 fun fire_unexpected_char(rest_of_message: String): Bool do
386 return fire_fatal_error("Unexpected character `{last_char.ascii.to_s}`{rest_of_message}.")
388 return fire_fatal_error("Unexpected end of file{rest_of_message}.")
390 return fire_fatal_error("Forbidden character.")
394 # Fire a fatal error with the specified message.
397 private fun fire_fatal_error(message: String): Bool do
398 reader_model.fire_fatal_error(message, null)