saxophonit :: XophonLexer :: accept_digits
Accept a[0-9]+
token.
saxophonit :: XophonLexer :: accept_int
Expect the specified byte.saxophonit :: XophonLexer :: check_pi_target
Ensure the target is notxml
(case-insensitive).
saxophonit :: XophonLexer :: defaultinit
saxophonit :: XophonLexer :: expect
Ensure the last read byte is equal toexpected
.
saxophonit :: XophonLexer :: expect_att_value_char
Likeexpect_xml_char
, but normalize white space and forbid <
.
saxophonit :: XophonLexer :: expect_delimiter
Expect a value delimiter ("
or '
).
saxophonit :: XophonLexer :: expect_digits
Expect a[0-9]+
token.
saxophonit :: XophonLexer :: expect_hex
Expect a[0-9a-fA-F]+
token.
saxophonit :: XophonLexer :: expect_int
Ensure the last read byte is equal toexpected
.
saxophonit :: XophonLexer :: expect_pi_target
Expect aPITarget
token.
saxophonit :: XophonLexer :: expect_string
Ensure the last read byte and following bytes matchexpected
.
saxophonit :: XophonLexer :: expect_xml_char
Push the last read byte in the specified buffer and read the next byte.saxophonit :: XophonLexer :: fire_unexpected_char
Fire a fatal error about an unexpected character.saxophonit :: XophonLexer :: is_digit
Is the last read byte matches the[0-9]
production?
saxophonit :: XophonLexer :: is_hex
Islast_char
matches the [0-9a-fA-F]
production?
saxophonit :: XophonLexer :: is_name_char
Is the last read byte matches theNameChar
production?
saxophonit :: XophonLexer :: is_name_start_char
Is the last read byte matches theNameStartChar
production?
saxophonit :: XophonLexer :: is_xml_char
Is the last read byte matches theChar
production?
saxophonit :: XophonLexer :: reader_model=
The model.saxophonit $ XophonLexer :: SELF
Type of this instance, automatically specialized in every classsaxophonit $ XophonLexer :: init
saxophonit :: XophonLexer :: accept_digits
Accept a[0-9]+
token.
saxophonit :: XophonLexer :: accept_int
Expect the specified byte.saxophonit :: XophonLexer :: check_pi_target
Ensure the target is notxml
(case-insensitive).
core :: Object :: class_factory
Implementation used byget_class
to create the specific class.
core :: Object :: defaultinit
saxophonit :: XophonLexer :: defaultinit
saxophonit :: XophonLexer :: expect
Ensure the last read byte is equal toexpected
.
saxophonit :: XophonLexer :: expect_att_value_char
Likeexpect_xml_char
, but normalize white space and forbid <
.
saxophonit :: XophonLexer :: expect_delimiter
Expect a value delimiter ("
or '
).
saxophonit :: XophonLexer :: expect_digits
Expect a[0-9]+
token.
saxophonit :: XophonLexer :: expect_hex
Expect a[0-9a-fA-F]+
token.
saxophonit :: XophonLexer :: expect_int
Ensure the last read byte is equal toexpected
.
saxophonit :: XophonLexer :: expect_pi_target
Expect aPITarget
token.
saxophonit :: XophonLexer :: expect_string
Ensure the last read byte and following bytes matchexpected
.
saxophonit :: XophonLexer :: expect_xml_char
Push the last read byte in the specified buffer and read the next byte.saxophonit :: XophonLexer :: fire_unexpected_char
Fire a fatal error about an unexpected character.saxophonit :: XophonLexer :: is_digit
Is the last read byte matches the[0-9]
production?
saxophonit :: XophonLexer :: is_hex
Islast_char
matches the [0-9a-fA-F]
production?
saxophonit :: XophonLexer :: is_name_char
Is the last read byte matches theNameChar
production?
saxophonit :: XophonLexer :: is_name_start_char
Is the last read byte matches theNameStartChar
production?
core :: Object :: is_same_instance
Return true ifself
and other
are the same instance (i.e. same identity).
core :: Object :: is_same_serialized
Isself
the same as other
in a serialization context?
core :: Object :: is_same_type
Return true ifself
and other
have the same dynamic type.
saxophonit :: XophonLexer :: is_xml_char
Is the last read byte matches theChar
production?
core :: Object :: output_class_name
Display class name on stdout (debug only).saxophonit :: XophonLexer :: reader_model=
The model.
# SAXophoNit’s lexer
#
# Except when noted otherwise, `accept` and `expect` functions return `true` on
# success and `false` on mismatch and at the end of the file.
# They both foward the cursor to the next byte on success, but only `expect`
# functions fire a fatal error on mismatch.
class XophonLexer
# The model.
var reader_model: XophonReaderModel
# The input to read from.
var input: Reader is writable
# Alias to `reader_model.locator`.
private var locator: SAXLocatorImpl is noinit
init do
locator = reader_model.locator.as(not null)
end
# Last read byte.
#
# Equals `-1` on end of file or error.
private var last_char = -1
# Before end-of-line handling, was the last read byte a CARRIAGE RETURN?
private var was_cr: Bool = false
# Expect a value delimiter (`"` or `'`).
#
# If the last read byte is a delimiter, return the delimiter and
# read the next byte. Else, return `-1`.
fun expect_delimiter: Int do
if accept('"') then
return '"'.code_point
else if accept('\'') then
return '\''.code_point
else
fire_unexpected_char(". Expecting `\"` or `'`")
return -1
end
end
# Is the last read byte matches the `Char` production?
fun is_xml_char:Bool do
# TODO: Handle code points above 0x7F.
return last_char >= 32 or
last_char == 9 or
last_char == 10
end
# Push the last read byte in the specified buffer and read the next byte.
#
# If the last read byte is forbidden, fire a fatal error instead.
fun expect_xml_char(buffer: Buffer): Bool do
if is_xml_char then
buffer.chars.push(last_char.code_point)
read_char
return true
else if eof then
return fire_fatal_error("Unexpected end of file.")
else
return fire_fatal_error("Forbidden character.")
end
end
# Like `expect_xml_char`, but normalize white space and forbid `<`.
#
# SEE: The “3.3.3 Attribute-Value Normalization” section of any XML
# recommendation.
fun expect_att_value_char(buffer: Buffer): Bool do
if is_s then
buffer.chars.push(' ')
read_char
return true
else if last_char == '<'.code_point then
return fire_fatal_error("`<` is forbidden in attribute values.")
else
return expect_xml_char(buffer)
end
end
# Is the last read byte matches the `S` production?
fun is_s:Bool do
return last_char == 32 or last_char == 9 or last_char == 10
end
# Skip a `S?` token and return `true`.
fun skip_s: Bool do
while is_s do read_char
return true
end
# Accept a `S` token.
fun accept_s: Bool do
if is_s then
read_char
return skip_s
else
return false
end
end
# Expect `S`.
fun expect_s: Bool do
return (accept_s and skip_s) or fire_unexpected_char(". Expecting white space")
end
# Is the last read byte matches the `NameStartChar` production?
fun is_name_start_char: Bool do
# TODO: Handle code points above 0x7F.
return ['A'.code_point .. 'Z'.code_point].has(last_char) or
['a'.code_point .. 'z'.code_point].has(last_char) or
last_char == '_'.code_point or
last_char == ':'.code_point or
last_char > 127
end
# Is the last read byte matches the `NameChar` production?
fun is_name_char: Bool do
# TODO: Handle code points above 0x7F.
return is_name_start_char or
last_char == '-'.code_point or
last_char == '.'.code_point or
is_digit
end
# Expect a `Name` tokn.
#
# Append the parsed name to `buffer`.
fun expect_name(buffer: Buffer): Bool do
if not is_name_start_char then
return fire_unexpected_char(" at the beginning of a name")
end
buffer.chars.push(last_char.code_point)
read_char
while is_name_char do
buffer.chars.push(last_char.code_point)
read_char
end
return true
end
# Expect a `PITarget` token.
#
# Append the parsed name to `buffer`.
fun expect_pi_target(buffer: Buffer): Bool do
return expect_name(buffer) and check_pi_target(buffer)
end
# Ensure the target is not `xml` (case-insensitive).
#
# Also, fire an error if the target contains a colon.
fun check_pi_target(target: Text): Bool do
var is_invalid = target.length == 3 and
(target.chars[0] == 'X' or target.chars[0] == 'x') and
(target.chars[0] == 'M' or target.chars[0] == 'm') and
(target.chars[0] == 'L' or target.chars[0] == 'l')
if is_invalid then
return fire_fatal_error("Forbidden processing target `{target}`.")
else
if target.has(":") then
reader_model.fire_error("The processing target `{target}` contains a colon.", null)
end
return true
end
end
# Is the last read byte matches the `[0-9]` production?
fun is_digit: Bool do
return ['0'.code_point .. '9'.code_point].has(last_char)
end
# Accept a `[0-9]+` token.
fun accept_digits(buffer: Buffer): Bool do
if is_digit then
loop
buffer.chars.push(last_char.code_point)
read_char
if not is_digit then return true
end
else
return false
end
end
# Expect a `[0-9]+` token.
fun expect_digits(buffer: Buffer): Bool do
return accept_digits(buffer) or fire_unexpected_char(". Expecting a decimal digit")
end
# Is `last_char` matches the `[0-9a-fA-F]` production?
fun is_hex: Bool do
return ['0'.code_point .. '9'.code_point].has(last_char) or
['A'.code_point .. 'Z'.code_point].has(last_char) or
['a'.code_point .. 'Z'.code_point].has(last_char)
end
# Expect a `[0-9a-fA-F]+` token.
fun expect_hex(buffer: Buffer): Bool do
if is_hex then
loop
buffer.chars.push(last_char.code_point)
read_char
if not is_hex then return true
end
else
return fire_unexpected_char(". Expecting an hexadecimal digit")
end
end
# Expect `Eq`.
fun expect_eq: Bool do
return skip_s and expect('=', "") and skip_s
end
############################################################################
# General
# Read a byte and put it in `last_char`.
#
# In case of an end-of-file or an error, put -1 in `last_char`.
private fun read_char do
if locator.line_number < 0 then
locator.line_number = 1
locator.column_number = 1
else if last_char < 0 then
fire_fatal_error("Internal error: Already at the end of the file.")
return
else if last_char == '\n'.code_point then
locator.line_number += 1
locator.column_number = 1
else
locator.column_number += 1
end
var s = input.read_byte
if s < 0 then
last_char = -1
return
end
last_char = s.to_i
# XML 1.0 end-of-line handling
# Note: Regardless the XML version, any EOL defined by the
# recommandation MUST be reported as a single LINE FEED.
if was_cr and last_char == '\n'.code_point then
# EOL already reported. => Skip this byte.
s = input.read_byte
if s < 0 then
last_char = -1
else
last_char = s
end
end
was_cr = last_char == '\r'.code_point
if was_cr then
# Regardless the following byte, '\r' always introduce an EOL.
last_char = '\n'.code_point
end
end
# Is it the end of the stream?
#
# Also return `true` after a fatal error.
fun eof: Bool do return last_char < 0
# Start the lexer.
fun start do
if eof then
last_char = 0
read_char
end
end
# Close the input.
fun close do
last_char = -1
input.close
end
# Does the last read byte equal `c`?
fun is_int(c: Int): Bool do return last_char == c
# Does the last read byte equal `c`?
fun is_char(c: Char): Bool do return last_char == c.code_point
# Expect the specified byte.
fun accept_int(expected: Int): Bool do
if last_char == expected then
read_char
return true
else
return false
end
end
# Accept the specified byte.
fun accept(expected: Char): Bool do
return accept_int(expected.code_point)
end
# Ensure the last read byte is equal to `expected`.
#
# If it is, read the next byte. If not, fire a fatal error using
# `context`. `context` is the part of the message that gives the context.
# For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
# `context` is `" in y"`.
#
# Return `true` if and only if the last read byte as the expected value.
fun expect_int(expected: Int, context: String): Bool do
return accept_int(expected) or
fire_unexpected_char("{context}. Expecting `{expected.code_point}`.")
end
# Ensure the last read byte is equal to `expected`.
#
# If it is, read the next byte. If not, fire a fatal error using
# `context`. `context` is the part of the message that gives the context.
# For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
# `context` is `" in y"`.
#
# Return `true` if and only if the last read byte as the expected value.
fun expect(expected: Char, context: String): Bool do
return accept(expected) or
fire_unexpected_char("{context}. Expecting `{expected}`.")
end
# Ensure the last read byte and following bytes match `expected`.
#
# If it is, read one more byte. If not, fire a fatal error using
# `context`. `context` is the part of the message that gives the context.
# For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
# `context` is `" in y"`.
#
# Return `true` if and only if the last read byte and following bytes
# match `expected`.
fun expect_string(expected: String, context: String): Bool do
var chars = expected.chars
var i = 0
while i < chars.length do
if not accept(chars[i]) then
if is_xml_char then
return fire_fatal_error("Unexpected " +
"`{expected.substring(0, i)}{last_char.code_point.to_s}`" +
"{context}. Expecting `{expected}`.")
else if eof then
return fire_fatal_error("Unexpected end of file{context}. " +
"Expecting `{expected}`.")
else
return fire_fatal_error("Forbidden character.")
end
end
i += 1
end
return true
end
############################################################################
# Dispatching
# Fire a fatal error about an unexpected character.
#
# Return `false`.
fun fire_unexpected_char(rest_of_message: String): Bool do
if is_xml_char then
return fire_fatal_error("Unexpected character `{last_char.code_point.to_s}`{rest_of_message}.")
else if eof then
return fire_fatal_error("Unexpected end of file{rest_of_message}.")
else
return fire_fatal_error("Forbidden character.")
end
end
# Fire a fatal error with the specified message.
#
# Return `false`.
private fun fire_fatal_error(message: String): Bool do
reader_model.fire_fatal_error(message, null)
last_char = -1
return false
end
end
lib/saxophonit/lexer.nit:16,1--405,3