SAXophoNit’s lexer

Except when noted otherwise, accept and expect functions return true on success and false on mismatch and at the end of the file. They both foward the cursor to the next byte on success, but only expect functions fire a fatal error on mismatch.

Introduced properties

fun accept(expected: Char): Bool

saxophonit :: XophonLexer :: accept

Accept the specified byte.
fun accept_digits(buffer: Buffer): Bool

saxophonit :: XophonLexer :: accept_digits

Accept a [0-9]+ token.
fun accept_int(expected: Int): Bool

saxophonit :: XophonLexer :: accept_int

Expect the specified byte.
fun accept_s: Bool

saxophonit :: XophonLexer :: accept_s

Accept a S token.
fun check_pi_target(target: Text): Bool

saxophonit :: XophonLexer :: check_pi_target

Ensure the target is not xml (case-insensitive).
fun close

saxophonit :: XophonLexer :: close

Close the input.
fun eof: Bool

saxophonit :: XophonLexer :: eof

Is it the end of the stream?
fun expect(expected: Char, context: String): Bool

saxophonit :: XophonLexer :: expect

Ensure the last read byte is equal to expected.
fun expect_att_value_char(buffer: Buffer): Bool

saxophonit :: XophonLexer :: expect_att_value_char

Like expect_xml_char, but normalize white space and forbid <.
fun expect_delimiter: Int

saxophonit :: XophonLexer :: expect_delimiter

Expect a value delimiter (" or ').
fun expect_digits(buffer: Buffer): Bool

saxophonit :: XophonLexer :: expect_digits

Expect a [0-9]+ token.
fun expect_eq: Bool

saxophonit :: XophonLexer :: expect_eq

Expect Eq.
fun expect_hex(buffer: Buffer): Bool

saxophonit :: XophonLexer :: expect_hex

Expect a [0-9a-fA-F]+ token.
fun expect_int(expected: Int, context: String): Bool

saxophonit :: XophonLexer :: expect_int

Ensure the last read byte is equal to expected.
fun expect_name(buffer: Buffer): Bool

saxophonit :: XophonLexer :: expect_name

Expect a Name tokn.
fun expect_pi_target(buffer: Buffer): Bool

saxophonit :: XophonLexer :: expect_pi_target

Expect a PITarget token.
fun expect_s: Bool

saxophonit :: XophonLexer :: expect_s

Expect S.
fun expect_string(expected: String, context: String): Bool

saxophonit :: XophonLexer :: expect_string

Ensure the last read byte and following bytes match expected.
fun expect_xml_char(buffer: Buffer): Bool

saxophonit :: XophonLexer :: expect_xml_char

Push the last read byte in the specified buffer and read the next byte.
fun fire_unexpected_char(rest_of_message: String): Bool

saxophonit :: XophonLexer :: fire_unexpected_char

Fire a fatal error about an unexpected character.
fun input: Reader

saxophonit :: XophonLexer :: input

The input to read from.
fun input=(input: Reader)

saxophonit :: XophonLexer :: input=

The input to read from.
fun is_char(c: Char): Bool

saxophonit :: XophonLexer :: is_char

Does the last read byte equal c?
fun is_digit: Bool

saxophonit :: XophonLexer :: is_digit

Is the last read byte matches the [0-9] production?
fun is_hex: Bool

saxophonit :: XophonLexer :: is_hex

Is last_char matches the [0-9a-fA-F] production?
fun is_int(c: Int): Bool

saxophonit :: XophonLexer :: is_int

Does the last read byte equal c?
fun is_name_char: Bool

saxophonit :: XophonLexer :: is_name_char

Is the last read byte matches the NameChar production?
fun is_name_start_char: Bool

saxophonit :: XophonLexer :: is_name_start_char

Is the last read byte matches the NameStartChar production?
fun is_s: Bool

saxophonit :: XophonLexer :: is_s

Is the last read byte matches the S production?
fun is_xml_char: Bool

saxophonit :: XophonLexer :: is_xml_char

Is the last read byte matches the Char production?
protected fun reader_model=(reader_model: XophonReaderModel)

saxophonit :: XophonLexer :: reader_model=

The model.
fun skip_s: Bool

saxophonit :: XophonLexer :: skip_s

Skip a S? token and return true.
fun start

saxophonit :: XophonLexer :: start

Start the lexer.

Redefined properties

redef type SELF: XophonLexer

saxophonit $ XophonLexer :: SELF

Type of this instance, automatically specialized in every class
redef init init

saxophonit $ XophonLexer :: init

All properties

fun !=(other: nullable Object): Bool

core :: Object :: !=

Have self and other different values?
fun ==(other: nullable Object): Bool

core :: Object :: ==

Have self and other the same value?
type CLASS: Class[SELF]

core :: Object :: CLASS

The type of the class of self.
type SELF: Object

core :: Object :: SELF

Type of this instance, automatically specialized in every class
fun accept(expected: Char): Bool

saxophonit :: XophonLexer :: accept

Accept the specified byte.
fun accept_digits(buffer: Buffer): Bool

saxophonit :: XophonLexer :: accept_digits

Accept a [0-9]+ token.
fun accept_int(expected: Int): Bool

saxophonit :: XophonLexer :: accept_int

Expect the specified byte.
fun accept_s: Bool

saxophonit :: XophonLexer :: accept_s

Accept a S token.
fun check_pi_target(target: Text): Bool

saxophonit :: XophonLexer :: check_pi_target

Ensure the target is not xml (case-insensitive).
protected fun class_factory(name: String): CLASS

core :: Object :: class_factory

Implementation used by get_class to create the specific class.
fun class_name: String

core :: Object :: class_name

The class name of the object.
fun close

saxophonit :: XophonLexer :: close

Close the input.
fun eof: Bool

saxophonit :: XophonLexer :: eof

Is it the end of the stream?
fun expect(expected: Char, context: String): Bool

saxophonit :: XophonLexer :: expect

Ensure the last read byte is equal to expected.
fun expect_att_value_char(buffer: Buffer): Bool

saxophonit :: XophonLexer :: expect_att_value_char

Like expect_xml_char, but normalize white space and forbid <.
fun expect_delimiter: Int

saxophonit :: XophonLexer :: expect_delimiter

Expect a value delimiter (" or ').
fun expect_digits(buffer: Buffer): Bool

saxophonit :: XophonLexer :: expect_digits

Expect a [0-9]+ token.
fun expect_eq: Bool

saxophonit :: XophonLexer :: expect_eq

Expect Eq.
fun expect_hex(buffer: Buffer): Bool

saxophonit :: XophonLexer :: expect_hex

Expect a [0-9a-fA-F]+ token.
fun expect_int(expected: Int, context: String): Bool

saxophonit :: XophonLexer :: expect_int

Ensure the last read byte is equal to expected.
fun expect_name(buffer: Buffer): Bool

saxophonit :: XophonLexer :: expect_name

Expect a Name tokn.
fun expect_pi_target(buffer: Buffer): Bool

saxophonit :: XophonLexer :: expect_pi_target

Expect a PITarget token.
fun expect_s: Bool

saxophonit :: XophonLexer :: expect_s

Expect S.
fun expect_string(expected: String, context: String): Bool

saxophonit :: XophonLexer :: expect_string

Ensure the last read byte and following bytes match expected.
fun expect_xml_char(buffer: Buffer): Bool

saxophonit :: XophonLexer :: expect_xml_char

Push the last read byte in the specified buffer and read the next byte.
fun fire_unexpected_char(rest_of_message: String): Bool

saxophonit :: XophonLexer :: fire_unexpected_char

Fire a fatal error about an unexpected character.
fun get_class: CLASS

core :: Object :: get_class

The meta-object representing the dynamic type of self.
fun hash: Int

core :: Object :: hash

The hash code of the object.
init init

core :: Object :: init

fun input: Reader

saxophonit :: XophonLexer :: input

The input to read from.
fun input=(input: Reader)

saxophonit :: XophonLexer :: input=

The input to read from.
fun inspect: String

core :: Object :: inspect

Developer readable representation of self.
protected fun inspect_head: String

core :: Object :: inspect_head

Return "CLASSNAME:#OBJECTID".
fun is_char(c: Char): Bool

saxophonit :: XophonLexer :: is_char

Does the last read byte equal c?
fun is_digit: Bool

saxophonit :: XophonLexer :: is_digit

Is the last read byte matches the [0-9] production?
fun is_hex: Bool

saxophonit :: XophonLexer :: is_hex

Is last_char matches the [0-9a-fA-F] production?
fun is_int(c: Int): Bool

saxophonit :: XophonLexer :: is_int

Does the last read byte equal c?
fun is_name_char: Bool

saxophonit :: XophonLexer :: is_name_char

Is the last read byte matches the NameChar production?
fun is_name_start_char: Bool

saxophonit :: XophonLexer :: is_name_start_char

Is the last read byte matches the NameStartChar production?
fun is_s: Bool

saxophonit :: XophonLexer :: is_s

Is the last read byte matches the S production?
intern fun is_same_instance(other: nullable Object): Bool

core :: Object :: is_same_instance

Return true if self and other are the same instance (i.e. same identity).
fun is_same_serialized(other: nullable Object): Bool

core :: Object :: is_same_serialized

Is self the same as other in a serialization context?
intern fun is_same_type(other: Object): Bool

core :: Object :: is_same_type

Return true if self and other have the same dynamic type.
fun is_xml_char: Bool

saxophonit :: XophonLexer :: is_xml_char

Is the last read byte matches the Char production?
intern fun object_id: Int

core :: Object :: object_id

An internal hash code for the object based on its identity.
fun output

core :: Object :: output

Display self on stdout (debug only).
intern fun output_class_name

core :: Object :: output_class_name

Display class name on stdout (debug only).
protected fun reader_model=(reader_model: XophonReaderModel)

saxophonit :: XophonLexer :: reader_model=

The model.
fun serialization_hash: Int

core :: Object :: serialization_hash

Hash value use for serialization
fun skip_s: Bool

saxophonit :: XophonLexer :: skip_s

Skip a S? token and return true.
fun start

saxophonit :: XophonLexer :: start

Start the lexer.
intern fun sys: Sys

core :: Object :: sys

Return the global sys object, the only instance of the Sys class.
abstract fun to_jvalue(env: JniEnv): JValue

core :: Object :: to_jvalue

fun to_s: String

core :: Object :: to_s

User readable representation of self.
package_diagram saxophonit::XophonLexer XophonLexer core::Object Object saxophonit::XophonLexer->core::Object

Parents

interface Object

core :: Object

The root of the class hierarchy.

Class definitions

saxophonit $ XophonLexer
# SAXophoNit’s lexer
#
# Except when noted otherwise, `accept` and `expect` functions return `true` on
# success and `false` on mismatch and at the end of the file.
# They both foward the cursor to the next byte on success, but only `expect`
# functions fire a fatal error on mismatch.
class XophonLexer

	# The model.
	var reader_model: XophonReaderModel

	# The input to read from.
	var input: Reader is writable

	# Alias to `reader_model.locator`.
	private var locator: SAXLocatorImpl is noinit

	init do
		locator = reader_model.locator.as(not null)
	end

	# Last read byte.
	#
	# Equals `-1` on end of file or error.
	private var last_char = -1

	# Before end-of-line handling, was the last read byte a CARRIAGE RETURN?
	private var was_cr: Bool = false


	# Expect a value delimiter (`"` or `'`).
	#
	# If the last read byte is a delimiter, return the delimiter and
	# read the next byte. Else, return `-1`.
	fun expect_delimiter: Int do
		if accept('"') then
			return '"'.code_point
		else if accept('\'') then
			return '\''.code_point
		else
			fire_unexpected_char(". Expecting `\"` or `'`")
			return -1
		end
	end

	# Is the last read byte matches the `Char` production?
	fun is_xml_char:Bool do
		# TODO: Handle code points above 0x7F.
		return last_char >= 32 or
				last_char == 9 or
				last_char == 10
	end

	# Push the last read byte in the specified buffer and read the next byte.
	#
	# If the last read byte is forbidden, fire a fatal error instead.
	fun expect_xml_char(buffer: Buffer): Bool do
		if is_xml_char then
			buffer.chars.push(last_char.code_point)
			read_char
			return true
		else if eof then
			return fire_fatal_error("Unexpected end of file.")
		else
			return fire_fatal_error("Forbidden character.")
		end
	end


	# Like `expect_xml_char`, but normalize white space and forbid `<`.
	#
	# SEE: The “3.3.3 Attribute-Value Normalization” section of any XML
	# recommendation.
	fun expect_att_value_char(buffer: Buffer): Bool do
		if is_s then
			buffer.chars.push(' ')
			read_char
			return true
		else if last_char == '<'.code_point then
			return fire_fatal_error("`<` is forbidden in attribute values.")
		else
			return expect_xml_char(buffer)
		end
	end

	# Is the last read byte matches the `S` production?
	fun is_s:Bool do
		return last_char == 32 or last_char == 9 or last_char == 10
	end

	# Skip a `S?` token and return `true`.
	fun skip_s: Bool do
		while is_s do read_char
		return true
	end

	# Accept a `S` token.
	fun accept_s: Bool do
		if is_s then
			read_char
			return skip_s
		else
			return false
		end
	end

	# Expect `S`.
	fun expect_s: Bool do
		return (accept_s and skip_s) or fire_unexpected_char(". Expecting white space")
	end

	# Is the last read byte matches the `NameStartChar` production?
	fun is_name_start_char: Bool do
		# TODO: Handle code points above 0x7F.
		return ['A'.code_point .. 'Z'.code_point].has(last_char) or
				['a'.code_point .. 'z'.code_point].has(last_char) or
				last_char == '_'.code_point or
				last_char == ':'.code_point or
				last_char > 127
	end

	# Is the last read byte matches the `NameChar` production?
	fun is_name_char: Bool do
		# TODO: Handle code points above 0x7F.
		return is_name_start_char or
				last_char == '-'.code_point or
				last_char == '.'.code_point or
				is_digit
	end

	# Expect a `Name` tokn.
	#
	# Append the parsed name to `buffer`.
	fun expect_name(buffer: Buffer): Bool do
		if not is_name_start_char then
			return fire_unexpected_char(" at the beginning of a name")
		end
		buffer.chars.push(last_char.code_point)
		read_char
		while is_name_char do
			buffer.chars.push(last_char.code_point)
			read_char
		end
		return true
	end

	# Expect a `PITarget` token.
	#
	# Append the parsed name to `buffer`.
	fun expect_pi_target(buffer: Buffer): Bool do
		return expect_name(buffer) and check_pi_target(buffer)
	end

	# Ensure the target is not `xml` (case-insensitive).
	#
	# Also, fire an error if the target contains a colon.
	fun check_pi_target(target: Text): Bool do
		var is_invalid = target.length == 3 and
				(target.chars[0] == 'X' or target.chars[0] == 'x') and
				(target.chars[0] == 'M' or target.chars[0] == 'm') and
				(target.chars[0] == 'L' or target.chars[0] == 'l')

		if is_invalid then
			return fire_fatal_error("Forbidden processing target `{target}`.")
		else
			if target.has(":") then
				reader_model.fire_error("The processing target `{target}` contains a colon.", null)
			end
			return true
		end
	end

	# Is the last read byte matches the `[0-9]` production?
	fun is_digit: Bool do
		return ['0'.code_point .. '9'.code_point].has(last_char)
	end

	# Accept a `[0-9]+` token.
	fun accept_digits(buffer: Buffer): Bool do
		if is_digit then
			loop
				buffer.chars.push(last_char.code_point)
				read_char
				if not is_digit then return true
			end
		else
			return false
		end
	end

	# Expect a `[0-9]+` token.
	fun expect_digits(buffer: Buffer): Bool do
		return accept_digits(buffer) or fire_unexpected_char(". Expecting a decimal digit")
	end

	# Is `last_char` matches the `[0-9a-fA-F]` production?
	fun is_hex: Bool do
		return ['0'.code_point .. '9'.code_point].has(last_char) or
				['A'.code_point .. 'Z'.code_point].has(last_char) or
				['a'.code_point .. 'Z'.code_point].has(last_char)
	end

	# Expect a `[0-9a-fA-F]+` token.
	fun expect_hex(buffer: Buffer): Bool do
		if is_hex then
			loop
				buffer.chars.push(last_char.code_point)
				read_char
				if not is_hex then return true
			end
		else
			return fire_unexpected_char(". Expecting an hexadecimal digit")
		end
	end

	# Expect `Eq`.
	fun expect_eq: Bool do
		return skip_s and expect('=', "") and skip_s
	end


	############################################################################
	# General

	# Read a byte and put it in `last_char`.
	#
	# In case of an end-of-file or an error, put -1 in `last_char`.
	private fun read_char do
		if locator.line_number < 0 then
			locator.line_number = 1
			locator.column_number = 1
		else if last_char < 0 then
			fire_fatal_error("Internal error: Already at the end of the file.")
			return
		else if last_char == '\n'.code_point then
			locator.line_number += 1
			locator.column_number = 1
		else
			locator.column_number += 1
		end

		var s = input.read_byte
		if s < 0 then
			last_char = -1
			return
		end
		last_char = s.to_i

		# XML 1.0 end-of-line handling
		# Note: Regardless the XML version, any EOL defined by the
		# recommandation MUST be reported as a single LINE FEED.
		if was_cr and last_char == '\n'.code_point then
			# EOL already reported. => Skip this byte.
			s = input.read_byte
			if s < 0 then
				last_char = -1
			else
				last_char = s
			end
		end
		was_cr = last_char == '\r'.code_point
		if was_cr then
			# Regardless the following byte, '\r' always introduce an EOL.
			last_char = '\n'.code_point
		end
	end

	# Is it the end of the stream?
	#
	# Also return `true` after a fatal error.
	fun eof: Bool do return last_char < 0

	# Start the lexer.
	fun start do
		if eof then
			last_char = 0
			read_char
		end
	end

	# Close the input.
	fun close do
		last_char = -1
		input.close
	end

	# Does the last read byte equal `c`?
	fun is_int(c: Int): Bool do return last_char == c

	# Does the last read byte equal `c`?
	fun is_char(c: Char): Bool do return last_char == c.code_point

	# Expect the specified byte.
	fun accept_int(expected: Int): Bool do
		if last_char == expected then
			read_char
			return true
		else
			return false
		end
	end

	# Accept the specified byte.
	fun accept(expected: Char): Bool do
		return accept_int(expected.code_point)
	end

	# Ensure the last read byte is equal to `expected`.
	#
	# If it is, read the next byte. If not, fire a fatal error using
	# `context`. `context` is the part of the message that gives the context.
	# For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
	# `context` is `" in y"`.
	#
	# Return `true` if and only if the last read byte as the expected value.
	fun expect_int(expected: Int, context: String): Bool do
		return accept_int(expected) or
				fire_unexpected_char("{context}. Expecting `{expected.code_point}`.")
	end

	# Ensure the last read byte is equal to `expected`.
	#
	# If it is, read the next byte. If not, fire a fatal error using
	# `context`. `context` is the part of the message that gives the context.
	# For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
	# `context` is `" in y"`.
	#
	# Return `true` if and only if the last read byte as the expected value.
	fun expect(expected: Char, context: String): Bool do
		return accept(expected) or
				fire_unexpected_char("{context}. Expecting `{expected}`.")
	end

	# Ensure the last read byte and following bytes match `expected`.
	#
	# If it is, read one more byte. If not, fire a fatal error using
	# `context`. `context` is the part of the message that gives the context.
	# For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
	# `context` is `" in y"`.
	#
	# Return `true` if and only if the last read byte and following bytes
	# match `expected`.
	fun expect_string(expected: String, context: String): Bool do
		var chars = expected.chars
		var i = 0

		while i < chars.length do
			if not accept(chars[i]) then
				if is_xml_char then
					return fire_fatal_error("Unexpected " +
							"`{expected.substring(0, i)}{last_char.code_point.to_s}`" +
							"{context}. Expecting `{expected}`.")
				else if eof then
					return fire_fatal_error("Unexpected end of file{context}. " +
							"Expecting `{expected}`.")
				else
					return fire_fatal_error("Forbidden character.")
				end
			end
			i += 1
		end
		return true
	end


	############################################################################
	# Dispatching

	# Fire a fatal error about an unexpected character.
	#
	# Return `false`.
	fun fire_unexpected_char(rest_of_message: String): Bool do
		if is_xml_char then
			return fire_fatal_error("Unexpected character `{last_char.code_point.to_s}`{rest_of_message}.")
		else if eof then
			return fire_fatal_error("Unexpected end of file{rest_of_message}.")
		else
			return fire_fatal_error("Forbidden character.")
		end
	end

	# Fire a fatal error with the specified message.
	#
	# Return `false`.
	private fun fire_fatal_error(message: String): Bool do
		reader_model.fire_fatal_error(message, null)
		last_char = -1
		return false
	end
end
lib/saxophonit/lexer.nit:16,1--405,3