Property definitions

saxophonit $ XophonLexer :: defaultinit
# SAXophoNit’s lexer
#
# Except when noted otherwise, `accept` and `expect` functions return `true` on
# success and `false` on mismatch and at the end of the file.
# They both foward the cursor to the next byte on success, but only `expect`
# functions fire a fatal error on mismatch.
class XophonLexer

	# The model.
	var reader_model: XophonReaderModel

	# The input to read from.
	var input: Reader is writable

	# Alias to `reader_model.locator`.
	private var locator: SAXLocatorImpl is noinit

	init do
		locator = reader_model.locator.as(not null)
	end

	# Last read byte.
	#
	# Equals `-1` on end of file or error.
	private var last_char = -1

	# Before end-of-line handling, was the last read byte a CARRIAGE RETURN?
	private var was_cr: Bool = false


	# Expect a value delimiter (`"` or `'`).
	#
	# If the last read byte is a delimiter, return the delimiter and
	# read the next byte. Else, return `-1`.
	fun expect_delimiter: Int do
		if accept('"') then
			return '"'.code_point
		else if accept('\'') then
			return '\''.code_point
		else
			fire_unexpected_char(". Expecting `\"` or `'`")
			return -1
		end
	end

	# Is the last read byte matches the `Char` production?
	fun is_xml_char:Bool do
		# TODO: Handle code points above 0x7F.
		return last_char >= 32 or
				last_char == 9 or
				last_char == 10
	end

	# Push the last read byte in the specified buffer and read the next byte.
	#
	# If the last read byte is forbidden, fire a fatal error instead.
	fun expect_xml_char(buffer: Buffer): Bool do
		if is_xml_char then
			buffer.chars.push(last_char.code_point)
			read_char
			return true
		else if eof then
			return fire_fatal_error("Unexpected end of file.")
		else
			return fire_fatal_error("Forbidden character.")
		end
	end


	# Like `expect_xml_char`, but normalize white space and forbid `<`.
	#
	# SEE: The “3.3.3 Attribute-Value Normalization” section of any XML
	# recommendation.
	fun expect_att_value_char(buffer: Buffer): Bool do
		if is_s then
			buffer.chars.push(' ')
			read_char
			return true
		else if last_char == '<'.code_point then
			return fire_fatal_error("`<` is forbidden in attribute values.")
		else
			return expect_xml_char(buffer)
		end
	end

	# Is the last read byte matches the `S` production?
	fun is_s:Bool do
		return last_char == 32 or last_char == 9 or last_char == 10
	end

	# Skip a `S?` token and return `true`.
	fun skip_s: Bool do
		while is_s do read_char
		return true
	end

	# Accept a `S` token.
	fun accept_s: Bool do
		if is_s then
			read_char
			return skip_s
		else
			return false
		end
	end

	# Expect `S`.
	fun expect_s: Bool do
		return (accept_s and skip_s) or fire_unexpected_char(". Expecting white space")
	end

	# Is the last read byte matches the `NameStartChar` production?
	fun is_name_start_char: Bool do
		# TODO: Handle code points above 0x7F.
		return ['A'.code_point .. 'Z'.code_point].has(last_char) or
				['a'.code_point .. 'z'.code_point].has(last_char) or
				last_char == '_'.code_point or
				last_char == ':'.code_point or
				last_char > 127
	end

	# Is the last read byte matches the `NameChar` production?
	fun is_name_char: Bool do
		# TODO: Handle code points above 0x7F.
		return is_name_start_char or
				last_char == '-'.code_point or
				last_char == '.'.code_point or
				is_digit
	end

	# Expect a `Name` tokn.
	#
	# Append the parsed name to `buffer`.
	fun expect_name(buffer: Buffer): Bool do
		if not is_name_start_char then
			return fire_unexpected_char(" at the beginning of a name")
		end
		buffer.chars.push(last_char.code_point)
		read_char
		while is_name_char do
			buffer.chars.push(last_char.code_point)
			read_char
		end
		return true
	end

	# Expect a `PITarget` token.
	#
	# Append the parsed name to `buffer`.
	fun expect_pi_target(buffer: Buffer): Bool do
		return expect_name(buffer) and check_pi_target(buffer)
	end

	# Ensure the target is not `xml` (case-insensitive).
	#
	# Also, fire an error if the target contains a colon.
	fun check_pi_target(target: Text): Bool do
		var is_invalid = target.length == 3 and
				(target.chars[0] == 'X' or target.chars[0] == 'x') and
				(target.chars[0] == 'M' or target.chars[0] == 'm') and
				(target.chars[0] == 'L' or target.chars[0] == 'l')

		if is_invalid then
			return fire_fatal_error("Forbidden processing target `{target}`.")
		else
			if target.has(":") then
				reader_model.fire_error("The processing target `{target}` contains a colon.", null)
			end
			return true
		end
	end

	# Is the last read byte matches the `[0-9]` production?
	fun is_digit: Bool do
		return ['0'.code_point .. '9'.code_point].has(last_char)
	end

	# Accept a `[0-9]+` token.
	fun accept_digits(buffer: Buffer): Bool do
		if is_digit then
			loop
				buffer.chars.push(last_char.code_point)
				read_char
				if not is_digit then return true
			end
		else
			return false
		end
	end

	# Expect a `[0-9]+` token.
	fun expect_digits(buffer: Buffer): Bool do
		return accept_digits(buffer) or fire_unexpected_char(". Expecting a decimal digit")
	end

	# Is `last_char` matches the `[0-9a-fA-F]` production?
	fun is_hex: Bool do
		return ['0'.code_point .. '9'.code_point].has(last_char) or
				['A'.code_point .. 'Z'.code_point].has(last_char) or
				['a'.code_point .. 'Z'.code_point].has(last_char)
	end

	# Expect a `[0-9a-fA-F]+` token.
	fun expect_hex(buffer: Buffer): Bool do
		if is_hex then
			loop
				buffer.chars.push(last_char.code_point)
				read_char
				if not is_hex then return true
			end
		else
			return fire_unexpected_char(". Expecting an hexadecimal digit")
		end
	end

	# Expect `Eq`.
	fun expect_eq: Bool do
		return skip_s and expect('=', "") and skip_s
	end


	############################################################################
	# General

	# Read a byte and put it in `last_char`.
	#
	# In case of an end-of-file or an error, put -1 in `last_char`.
	private fun read_char do
		if locator.line_number < 0 then
			locator.line_number = 1
			locator.column_number = 1
		else if last_char < 0 then
			fire_fatal_error("Internal error: Already at the end of the file.")
			return
		else if last_char == '\n'.code_point then
			locator.line_number += 1
			locator.column_number = 1
		else
			locator.column_number += 1
		end

		var s = input.read_byte
		if s < 0 then
			last_char = -1
			return
		end
		last_char = s.to_i

		# XML 1.0 end-of-line handling
		# Note: Regardless the XML version, any EOL defined by the
		# recommandation MUST be reported as a single LINE FEED.
		if was_cr and last_char == '\n'.code_point then
			# EOL already reported. => Skip this byte.
			s = input.read_byte
			if s < 0 then
				last_char = -1
			else
				last_char = s
			end
		end
		was_cr = last_char == '\r'.code_point
		if was_cr then
			# Regardless the following byte, '\r' always introduce an EOL.
			last_char = '\n'.code_point
		end
	end

	# Is it the end of the stream?
	#
	# Also return `true` after a fatal error.
	fun eof: Bool do return last_char < 0

	# Start the lexer.
	fun start do
		if eof then
			last_char = 0
			read_char
		end
	end

	# Close the input.
	fun close do
		last_char = -1
		input.close
	end

	# Does the last read byte equal `c`?
	fun is_int(c: Int): Bool do return last_char == c

	# Does the last read byte equal `c`?
	fun is_char(c: Char): Bool do return last_char == c.code_point

	# Expect the specified byte.
	fun accept_int(expected: Int): Bool do
		if last_char == expected then
			read_char
			return true
		else
			return false
		end
	end

	# Accept the specified byte.
	fun accept(expected: Char): Bool do
		return accept_int(expected.code_point)
	end

	# Ensure the last read byte is equal to `expected`.
	#
	# If it is, read the next byte. If not, fire a fatal error using
	# `context`. `context` is the part of the message that gives the context.
	# For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
	# `context` is `" in y"`.
	#
	# Return `true` if and only if the last read byte as the expected value.
	fun expect_int(expected: Int, context: String): Bool do
		return accept_int(expected) or
				fire_unexpected_char("{context}. Expecting `{expected.code_point}`.")
	end

	# Ensure the last read byte is equal to `expected`.
	#
	# If it is, read the next byte. If not, fire a fatal error using
	# `context`. `context` is the part of the message that gives the context.
	# For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
	# `context` is `" in y"`.
	#
	# Return `true` if and only if the last read byte as the expected value.
	fun expect(expected: Char, context: String): Bool do
		return accept(expected) or
				fire_unexpected_char("{context}. Expecting `{expected}`.")
	end

	# Ensure the last read byte and following bytes match `expected`.
	#
	# If it is, read one more byte. If not, fire a fatal error using
	# `context`. `context` is the part of the message that gives the context.
	# For example, in `Unexpected ``x`` in y. Expecting ``z``.`, the value of
	# `context` is `" in y"`.
	#
	# Return `true` if and only if the last read byte and following bytes
	# match `expected`.
	fun expect_string(expected: String, context: String): Bool do
		var chars = expected.chars
		var i = 0

		while i < chars.length do
			if not accept(chars[i]) then
				if is_xml_char then
					return fire_fatal_error("Unexpected " +
							"`{expected.substring(0, i)}{last_char.code_point.to_s}`" +
							"{context}. Expecting `{expected}`.")
				else if eof then
					return fire_fatal_error("Unexpected end of file{context}. " +
							"Expecting `{expected}`.")
				else
					return fire_fatal_error("Forbidden character.")
				end
			end
			i += 1
		end
		return true
	end


	############################################################################
	# Dispatching

	# Fire a fatal error about an unexpected character.
	#
	# Return `false`.
	fun fire_unexpected_char(rest_of_message: String): Bool do
		if is_xml_char then
			return fire_fatal_error("Unexpected character `{last_char.code_point.to_s}`{rest_of_message}.")
		else if eof then
			return fire_fatal_error("Unexpected end of file{rest_of_message}.")
		else
			return fire_fatal_error("Forbidden character.")
		end
	end

	# Fire a fatal error with the specified message.
	#
	# Return `false`.
	private fun fire_fatal_error(message: String): Bool do
		reader_model.fire_fatal_error(message, null)
		last_char = -1
		return false
	end
end
lib/saxophonit/lexer.nit:16,1--405,3