Provides XML parsing facilities

Introduced properties

fun parse_document: XMLEntity

dom :: XMLProcessor :: parse_document

Parses a full XML document

Redefined properties

redef type SELF: XMLProcessor

dom $ XMLProcessor :: SELF

Type of this instance, automatically specialized in every class

All properties

fun !=(other: nullable Object): Bool

core :: Object :: !=

Have self and other different values?
fun ==(other: nullable Object): Bool

core :: Object :: ==

Have self and other the same value?
type CLASS: Class[SELF]

core :: Object :: CLASS

The type of the class of self.
type SELF: Object

core :: Object :: SELF

Type of this instance, automatically specialized in every class
protected fun class_factory(name: String): CLASS

core :: Object :: class_factory

Implementation used by get_class to create the specific class.
fun class_name: String

core :: Object :: class_name

The class name of the object.
fun current_location: Location

parser_base :: StringProcessor :: current_location

Gives the current location in the src
protected fun eof: Bool

parser_base :: StringProcessor :: eof

Is pos at the end of the source?
fun get_class: CLASS

core :: Object :: get_class

The meta-object representing the dynamic type of self.
fun hash: Int

core :: Object :: hash

The hash code of the object.
protected fun hot_location: Location

parser_base :: StringProcessor :: hot_location

Returns the current location as a Location object
protected fun ignore_until(s: String): Int

parser_base :: StringProcessor :: ignore_until

Reads characters until pattern s is found
protected fun ignore_until_whitespace: Int

parser_base :: StringProcessor :: ignore_until_whitespace

Ignores any printable character until a whitespace is encountered
protected fun ignore_until_whitespace_or_comment: Int

parser_base :: StringProcessor :: ignore_until_whitespace_or_comment

Advance pos until a whitespace or # is encountered
protected fun ignore_whitespaces

parser_base :: StringProcessor :: ignore_whitespaces

Advances in src until a non-whitespace character is encountered
init init

core :: Object :: init

fun inspect: String

core :: Object :: inspect

Developer readable representation of self.
protected fun inspect_head: String

core :: Object :: inspect_head

Return "CLASSNAME:#OBJECTID".
intern fun is_same_instance(other: nullable Object): Bool

core :: Object :: is_same_instance

Return true if self and other are the same instance (i.e. same identity).
fun is_same_serialized(other: nullable Object): Bool

core :: Object :: is_same_serialized

Is self the same as other in a serialization context?
intern fun is_same_type(other: Object): Bool

core :: Object :: is_same_type

Return true if self and other have the same dynamic type.
protected fun len: Int

parser_base :: StringProcessor :: len

Length of the source document
protected fun len=(len: Int)

parser_base :: StringProcessor :: len=

Length of the source document
protected fun line: Int

parser_base :: StringProcessor :: line

Current line in src
protected fun line=(line: Int)

parser_base :: StringProcessor :: line=

Current line in src
protected fun line_offset: Int

parser_base :: StringProcessor :: line_offset

Offset in the current line
protected fun line_start: Int

parser_base :: StringProcessor :: line_start

Position at which current line started
protected fun line_start=(line_start: Int)

parser_base :: StringProcessor :: line_start=

Position at which current line started
intern fun object_id: Int

core :: Object :: object_id

An internal hash code for the object based on its identity.
fun output

core :: Object :: output

Display self on stdout (debug only).
intern fun output_class_name

core :: Object :: output_class_name

Display class name on stdout (debug only).
fun parse_document: XMLEntity

dom :: XMLProcessor :: parse_document

Parses a full XML document
protected fun pos: Int

parser_base :: StringProcessor :: pos

Current position in src
protected fun pos=(pos: Int)

parser_base :: StringProcessor :: pos=

Current position in src
protected fun read_number: Float

parser_base :: StringProcessor :: read_number

Read a token and parse it as a Float
protected fun read_token: String

parser_base :: StringProcessor :: read_token

Read a single token after skipping preceding whitespaces
protected fun read_until_eol_or_comment: String

parser_base :: StringProcessor :: read_until_eol_or_comment

Advance pos until the next end of line or a #
protected fun read_vec3: Vec3

parser_base :: StringProcessor :: read_vec3

Read 2 or 3 numbers and return them as a Vec3
protected fun read_vec4: Vec4

parser_base :: StringProcessor :: read_vec4

Read 3 or 4 numbers and return them as a Vec4
fun serialization_hash: Int

core :: Object :: serialization_hash

Hash value use for serialization
protected fun skip_eol

parser_base :: StringProcessor :: skip_eol

Advance pos to skip the next end of line
protected fun src: String

parser_base :: StringProcessor :: src

Source document to parse
protected fun src=(src: String)

parser_base :: StringProcessor :: src=

Source document to parse
intern fun sys: Sys

core :: Object :: sys

Return the global sys object, the only instance of the Sys class.
abstract fun to_jvalue(env: JniEnv): JValue

core :: Object :: to_jvalue

fun to_s: String

core :: Object :: to_s

User readable representation of self.
package_diagram dom::XMLProcessor XMLProcessor parser_base::StringProcessor StringProcessor dom::XMLProcessor->parser_base::StringProcessor core::Object Object parser_base::StringProcessor->core::Object ...core::Object ... ...core::Object->core::Object

Ancestors

interface Object

core :: Object

The root of the class hierarchy.

Parents

class StringProcessor

parser_base :: StringProcessor

Basic facilities for common parser operations on String sources

Class definitions

dom $ XMLProcessor
# Provides XML parsing facilities
class XMLProcessor
	super StringProcessor

	# Parses a full XML document
	fun parse_document: XMLEntity do
		var stack = new Array[XMLStartTag]
		var doc = new XMLDocument
		loop
			ignore_whitespaces
			if pos >= src.length then break
			if src[pos] == '<' then
				var tag = read_tag
				if tag isa XMLStartTag then
					if stack.is_empty then
						tag.parent = doc
					else
						var st_last = stack.last
						tag.parent = st_last
					end
					stack.push tag
				else if tag isa XMLEndTag then
					if stack.is_empty then
						return new XMLError(tag.location, "Missing matching tag for `{tag.tag_name}`")
					end
					var st_last = stack.last
					if tag.tag_name == st_last.tag_name then
						st_last.matching = tag
						tag.matching = st_last
						stack.pop
					else
						var miss = stack.pop
						return new XMLError(miss.location, "Missing matching tag for `{miss.tag_name}`")
					end
				else if tag isa XMLError then
					return tag
				else
					if stack.is_empty then
						tag.parent = doc
					else
						tag.parent = stack.last
					end
				end
			else
				var st = pos
				var end_pc = ignore_until("<") - 1
				var loc = new Location(line, line_offset)
				var pc = new PCDATA(loc, src.substring(st, end_pc - st + 1).trim)
				if stack.is_empty then
					pc.parent = doc
				else
					pc.parent = stack.last
				end
			end
		end
		if not stack.is_empty then
			var miss = stack.pop
			return new XMLError(miss.location, "Missing matching tag for `{miss.tag_name}`")
		end
		return doc
	end

	# Reads the tag starting in `src` at current position
	private fun read_tag: XMLEntity do
		var st_loc = new Location(line, line_offset)
		var c = src[pos]
		if not c == '<' then return new XMLError(st_loc, "Expected start of tag, got `{c}`")
		pos += 1
		if pos >= src.length then return new XMLError(st_loc, "Malformed tag")
		c = src[pos]
		if c == '!' then
			# Special tag
			return read_special_tag(st_loc)
		else if c == '?' then
			# Prolog tag
			return read_prolog_tag(st_loc)
		else if c == '/' then
			# End tag
			return read_end_tag(st_loc)
		else
			# Start tag
			return read_start_tag(st_loc)
		end
	end

	# Reads a Special tag (starting with <!)
	#
	# In case of error, returns a `XMLError`
	private fun read_special_tag(st_loc: Location): XMLEntity do
		var srclen = src.length
		pos += 1
		if (pos + 2) >= srclen then return new XMLError(st_loc, "Unexpected EOF on start of Special tag")
		if src[pos] == '-' and src[pos + 1] == '-' then
			pos += 2
			var comst = pos
			var endcom = ignore_until("-->")
			if endcom == -1 then return new XMLError(st_loc, "Malformed comment")
			pos += 3
			return new XMLCommentTag(st_loc ,src.substring(comst, endcom - comst + 1))
		end
		var st = pos
		if srclen - pos >= 7 then
			var spe_type = src.substring(pos, 7)
			if spe_type == "[CDATA[" then
				pos += 7
				var cdst = pos
				var cdend = ignore_until("]]>")
				pos += 3
				if pos >= srclen then return new XMLError(st_loc, "Unfinished CDATA block")
				return new CDATA(st_loc, src.substring(cdst, cdend - cdst))
			else if spe_type == "DOCTYPE" then
				pos += 7
				return parse_doctype(st_loc)
			end
		end
		var end_spec = ignore_until(">")
		pos += 1
		return new XMLSpecialTag(st_loc, src.substring(st, end_spec - st))
	end

	# Parse a Doctype declaration tag
	private fun parse_doctype(st_loc: Location): XMLEntity do
		var elemts = new Array[String]
		var srclen = src.length
		loop
			ignore_whitespaces
			if pos >= srclen then return new XMLError(st_loc, "Malformed doctype")
			var c = src[pos]
			# TODO: Properly support intern DOCTYPE definitions
			if c == '[' then
				var intern_st = pos
				var intern_end = ignore_until("]")
				if intern_end == -1 then return new XMLError(st_loc, "Unfinished internal doctype declaration")
				pos += 1
				elemts.push src.substring(intern_st, intern_end - intern_st + 1)
				continue
			end
			var elm_st = pos
			while pos < srclen and not src[pos].is_whitespace and src[pos] != '>' do pos += 1
			if pos >= srclen then return new XMLError(st_loc, "Malformed doctype")
			if pos - elm_st > 1 then
				var str = src.substring(elm_st, pos - elm_st)
				elemts.push str
			end
			if src[pos] == '>' then
				pos += 1
				return new XMLDoctypeTag(st_loc, "DOCTYPE", elemts.join(" "))
			end
		end
	end

	# Reads a Prolog or Processing Instruction tag (starting with <?)
	#
	# In case of error, returns a `XMLError`
	private fun read_prolog_tag(st_loc: Location): XMLEntity do
		var srclen = src.length
		pos += 1
		if pos >= srclen then return new XMLError(st_loc, "Invalid start of prolog")
		var tag_name = parse_tag_name(['<', '>'])
		var c = src[pos]
		if c == '<' or c == '>' then return new XMLError(st_loc ,"Unexpected character `{c}` in prolog declaration")
		if tag_name == "xml" then
			var args = parse_args(['?'])
			for i in args do
				if i isa BadXMLAttribute then return new XMLError(i.location, i.name)
			end
			if src[pos] == '?' then
				if src[pos + 1] == '>' then
					pos += 2
					return new XMLPrologTag(st_loc, tag_name, args)
				end
			end
		else
			var cont_st = pos
			var cont_end = ignore_until("?>")
			if cont_end == -1 then
				pos += 2
				return new XMLError(st_loc, "Malformed Processing Instruction tag")
			end
			pos += 2
			return new XMLProcessingInstructionTag(st_loc, tag_name, src.substring(cont_st, cont_end - cont_st))
		end
		pos += 1
		return new XMLError(st_loc, "Malformed prolog tag")
	end

	# Reads an End tag (starting with </)
	#
	# In case of error, returns a `XMLError`
	private fun read_end_tag(st_loc: Location): XMLEntity do
		pos += 1
		var tag_name = parse_tag_name(['<', '>'])
		ignore_whitespaces
		if src[pos] == '>' then
			pos += 1
			return new XMLEndTag(st_loc, tag_name)
		end
		return new XMLError(st_loc, "Bad end tag `{tag_name}`")
	end

	# Reads a Start tag (starting with <)
	#
	# In case of error, returns a `XMLError`
	private fun read_start_tag(st_loc: Location): XMLEntity do
		var tag_name = parse_tag_name(['/', '>'])
		var args = parse_args(['/', '>'])
		for i in args do
			if i isa BadXMLAttribute then return new XMLError(i.location, i.name)
		end
		if src[pos] == '/' then
			if src[pos + 1] == '>' then
				pos += 2
				return new XMLOnelinerTag(st_loc, tag_name, args)
			end
		end
		pos += 1
		return new XMLStartTag(st_loc, tag_name, args)
	end

	# Parses an xml tag name
	private fun parse_tag_name(delims: Array[Char]): String do
		var idst = pos
		var srclen = src.length
		while pos < srclen do
			var c = src[pos]
			if c.is_whitespace or delims.has(c) then break
			pos += 1
		end
		return src.substring(idst, pos - idst).trim
	end

	# Parse the arguments of a tag
	private fun parse_args(endtags: Array[Char]): Array[XMLAttribute] do
		var attrs = new Array[XMLAttribute]
		loop
			var arg = parse_arg(endtags)
			if arg isa XMLAttributeEnd then return attrs
			attrs.add arg
			if arg isa BadXMLAttribute then return attrs
		end
	end

	# Parses the next argument in `src`
	private fun parse_arg(endtags: Array[Char]): XMLAttribute do
		var srclen = src.length
		ignore_whitespaces
		var st_loc = new Location(line, line_offset)
		if pos >= srclen then return new BadXMLAttribute(st_loc, "Unfinished attribute name")
		# FIXME: Ugly, but as long as it remains private, it is OK I guess
		if endtags.has(src[pos]) then return new XMLAttributeEnd(st_loc, "")
		var attrname_st = pos
		while pos < srclen and src[pos] != '=' and not endtags.has(src[pos]) do pos += 1
		if pos >= srclen then return new BadXMLAttribute(st_loc, "Unfinished attribute name")
		if src[pos] != '=' then return new BadXMLAttribute(st_loc, "Malformed attribute")
		var attrname_end = pos - 1
		var name = src.substring(attrname_st, attrname_end - attrname_st + 1).trim
		pos += 1
		ignore_whitespaces
		var attrval_st = pos
		if pos >= srclen then return new BadXMLAttribute(st_loc, "Unfinished attribute `{name}`")
		var match = src[pos]
		if match != '\'' and match != '"' then return new BadXMLAttribute(st_loc, "Invalid string delimiter `{match}` for attribute `{name}`")
		pos += 1
		while pos < srclen and src[pos] != match do pos += 1
		if pos >= srclen then return new BadXMLAttribute(st_loc, "Unfinished attribute `{name}`")
		var attrval_end = pos
		var val = src.substring(attrval_st, attrval_end - attrval_st + 1).trim
		pos += 1
		return new XMLStringAttr(st_loc, name, val.substring(1, val.length - 2), match)
	end
end
lib/dom/parser.nit:17,1--287,3