4f77ab8148577d171b3f844c5718f0916d29402d
1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
12 module saxophonit
::lexer
18 # Except when noted otherwise, `accept` and `expect` functions return `true` on
19 # success and `false` on mismatch and at the end of the file.
20 # They both foward the cursor to the next byte on success, but only `expect`
21 # functions fire a fatal error on mismatch.
23 var reader_model
: XophonReaderModel
24 var input
: IStream is writable
25 private var locator
: SAXLocatorImpl is noinit
28 locator
= reader_model
.locator
.as(not null)
33 # Equals `-1` on end of file or error.
34 private var last_char
: Int = -1
36 # Before end-of-line handling, was the last read byte a CARRIAGE RETURN?
37 private var was_cr
: Bool = false
40 # Expect a value delimiter (`"` or `'`).
42 # If the last read byte is a delimiter, return the delimiter and
43 # read the next byte. Else, return `-1`.
44 fun expect_delimiter
: Int do
47 else if accept
('\'') then
50 fire_unexpected_char
(". Expecting `\"` or `'`")
55 # Is the last read byte matches the `Char` production?
56 fun is_xml_char:Bool do
57 # TODO: Handle code points above 0x7F.
58 return last_char >= 32 or
63 # Push the last read byte in the specified buffer and read the next byte.
65 # If the last read byte is forbidden, fire a fatal error instead.
66 fun expect_xml_char(buffer: Buffer): Bool do
68 buffer.chars.push(last_char.ascii)
72 return fire_fatal_error("Unexpected end of file.")
74 return fire_fatal_error("Forbidden character.")
79 # Like `expect_xml_char`, but normalize white space and forbid `<`.
81 # SEE: The “3.3.3 Attribute-Value Normalization” section of any XML
83 fun expect_att_value_char(buffer: Buffer): Bool do
85 buffer.chars.push(' ')
88 else if last_char == '<'.ascii then
89 return fire_fatal_error("`<` is forbidden in attribute values.")
91 return expect_xml_char(buffer)
95 # Is the last read byte matches the `S` production?
97 return last_char == 32 or last_char == 9 or last_char == 10
100 # Skip a `S?` token and return `true`.
102 while is_s do read_char
106 # Accept a `S` token.
107 fun accept_s: Bool do
117 fun expect_s: Bool do
118 return (accept_s and skip_s) or fire_unexpected_char(". Expecting white space")
121 # Is the last read byte matches the `NameStartChar` production?
122 fun is_name_start_char: Bool do
123 # TODO: Handle code points above 0x7F.
124 return ['A
'.ascii .. 'Z
'.ascii].has(last_char) or
125 ['a
'.ascii .. 'z
'.ascii].has(last_char) or
126 last_char == '_
'.ascii or
127 last_char == ':'.ascii or
131 # Is the last read byte matches the `NameChar` production?
132 fun is_name_char: Bool do
133 # TODO: Handle code points above 0x7F.
134 return is_name_start_char or
135 last_char == '-'.ascii or
136 last_char == '.'.ascii or
140 # Expect a `Name` tokn.
142 # Append the parsed name to `buffer`.
143 fun expect_name(buffer: Buffer): Bool do
144 if not is_name_start_char then
145 return fire_unexpected_char(" at the beginning of a name")
147 buffer.chars.push(last_char.ascii)
149 while is_name_char do
150 buffer.chars.push(last_char.ascii)
156 # Expect a `PITarget` token.
158 # Append the parsed name to `buffer`.
159 fun expect_pi_target(buffer: Buffer): Bool do
160 return expect_name(buffer) and check_pi_target(buffer)
163 # Ensure the target is not `xml` (case-insensitive).
165 # Also, fire an error if the target contains a colon.
166 fun check_pi_target(target: Text): Bool do
167 var is_invalid = target.length == 3 and
168 (target.chars[0] == 'X
' or target.chars[0] == 'x
') and
169 (target.chars[0] == 'M
' or target.chars[0] == 'm
') and
170 (target.chars[0] == 'L
' or target.chars[0] == 'l
')
173 return fire_fatal_error("Forbidden processing target `{target}`.")
175 if target.has(":") then
176 reader_model.fire_error("The processing target `{target}` contains a colon.", null)
182 # Is the last read byte matches the `[0-9]` production?
183 fun is_digit: Bool do
184 return ['0'.ascii .. '9'.ascii].has(last_char)
187 # Accept a `[0-9]+` token.
188 fun accept_digits(buffer: Buffer): Bool do
191 buffer.chars.push(last_char.ascii)
193 if not is_digit then return true
200 # Expect a `[0-9]+` token.
201 fun expect_digits(buffer: Buffer): Bool do
202 return accept_digits(buffer) or fire_unexpected_char(". Expecting a decimal digit")
205 # Is `last_char` matches the `[0-9a-fA-F]` production?
207 return ['0'.ascii .. '9'.ascii].has(last_char) or
208 ['A
'.ascii .. 'Z
'.ascii].has(last_char) or
209 ['a
'.ascii .. 'Z
'.ascii].has(last_char)
212 # Expect a `[0-9a-fA-F]+` token.
213 fun expect_hex(buffer: Buffer): Bool do
216 buffer.chars.push(last_char.ascii)
218 if not is_hex then return true
221 return fire_unexpected_char(". Expecting an hexadecimal digit")
226 fun expect_eq: Bool do
227 return skip_s and expect('=', "") and skip_s
231 ############################################################################
234 # Read a byte and put it in `last_char`.
236 # In case of an end-of-file or an error, put -1 in `last_char`.
237 private fun read_char do
238 if locator.line_number < 0 then
239 locator.line_number = 1
240 locator.column_number = 1
241 else if last_char < 0 then
242 fire_fatal_error("Internal error: Already at the end of the file.")
244 else if last_char == '\n
'.ascii then
245 locator.line_number += 1
246 locator.column_number = 1
248 locator.column_number += 1
251 last_char = input.read_char
252 if last_char < 0 then
256 # XML 1.0 end-of-line handling
257 # Note: Regardless the XML version, any EOL defined by the
258 # recommandation MUST be reported as a single LINE FEED.
259 if was_cr and last_char == '\n
'.ascii then
260 # EOL already reported. => Skip this byte.
261 last_char = input.read_char
263 was_cr = last_char == '\r
'.ascii
265 # Regardless the following byte, '\r
' always introduce an EOL.
266 last_char = '\n
'.ascii
270 # Is it the end of the stream?
272 # Also return `true` after a fatal error.
273 fun eof: Bool do return last_char < 0
289 # Does the last read byte equal `c`?
290 fun is_int(c: Int): Bool do return last_char == c
292 # Does the last read byte equal `c`?
293 fun is_char(c: Char): Bool do return last_char == c.ascii
295 # Expect the specified byte.
296 fun accept_int(expected: Int): Bool do
297 if last_char == expected then
305 # Accept the specified byte.
306 fun accept(expected: Char): Bool do
307 return accept_int(expected.ascii)
310 # Ensure the last read byte is equal to `expected`.
312 # If it is, read the next byte. If not, fire a fatal error using
313 # `context`. `context` is the part of the message that gives the context.
314 # For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
315 # `context` is `" in y"`.
317 # Return `true` if and only if the last read byte as the expected value.
318 fun expect_int(expected: Int, context: String): Bool do
319 return accept_int(expected) or
320 fire_unexpected_char("{context}. Expecting `{expected.ascii}`.")
323 # Ensure the last read byte is equal to `expected`.
325 # If it is, read the next byte. If not, fire a fatal error using
326 # `context`. `context` is the part of the message that gives the context.
327 # For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
328 # `context` is `" in y"`.
330 # Return `true` if and only if the last read byte as the expected value.
331 fun expect(expected: Char, context: String): Bool do
332 return accept(expected) or
333 fire_unexpected_char("{context}. Expecting `{expected}`.")
336 # Ensure the last read byte and following bytes match `expected`.
338 # If it is, read one more byte. If not, fire a fatal error using
339 # `context`. `context` is the part of the message that gives the context.
340 # For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
341 # `context` is `" in y"`.
343 # Return `true` if and only if the last read byte and following bytes
345 fun expect_string(expected: String, context: String): Bool do
346 var chars = expected.chars
349 while i < chars.length do
350 if not accept(chars[i]) then
352 return fire_fatal_error("Unexpected " +
353 "`{expected.substring(0, i)}{last_char.ascii.to_s}`" +
354 "{context}. Expecting `{expected}`.")
356 return fire_fatal_error("Unexpected end of file{context}. " +
357 "Expecting `{expected}`.")
359 return fire_fatal_error("Forbidden character.")
368 ############################################################################
371 # Fire a fatal error about an unexpected character.
374 fun fire_unexpected_char(rest_of_message: String): Bool do
376 return fire_fatal_error("Unexpected character `{last_char.ascii.to_s}`{rest_of_message}.")
378 return fire_fatal_error("Unexpected end of file{rest_of_message}.")
380 return fire_fatal_error("Forbidden character.")
384 # Fire a fatal error with the specified message.
387 private fun fire_fatal_error(message: String): Bool do
388 reader_model.fire_fatal_error(message, null)