XML DOM-parsing facilities

Introduced classes

class XMLProcessor

dom :: XMLProcessor

Provides XML parsing facilities

Redefined classes

redef abstract class Text

dom :: parser $ Text

High-level abstraction for all text representations

All class definitions

redef abstract class Text

dom :: parser $ Text

High-level abstraction for all text representations
class XMLProcessor

dom $ XMLProcessor

Provides XML parsing facilities
package_diagram dom::parser parser dom::xml_entities xml_entities dom::parser->dom::xml_entities parser_base parser_base dom::xml_entities->parser_base ...parser_base ... ...parser_base->parser_base dom::dom dom dom::dom->dom::parser dom::checker checker dom::checker->dom::dom gamnit::bmfont bmfont gamnit::bmfont->dom::dom gamnit::texture_atlas_parser texture_atlas_parser gamnit::texture_atlas_parser->dom::dom nlp::stanford stanford nlp::stanford->dom::dom dom::checker... ... dom::checker...->dom::checker gamnit::bmfont... ... gamnit::bmfont...->gamnit::bmfont gamnit::texture_atlas_parser... ... gamnit::texture_atlas_parser...->gamnit::texture_atlas_parser nlp::stanford... ... nlp::stanford...->nlp::stanford

Ancestors

module abstract_collection

core :: abstract_collection

Abstract collection classes and services.
module abstract_text

core :: abstract_text

Abstract class for manipulation of sequences of characters
module array

core :: array

This module introduces the standard array structure.
module bitset

core :: bitset

Services to handle BitSet
module bytes

core :: bytes

Services for byte streams and arrays
module caching

serialization :: caching

Services for caching serialization engines
module circular_array

core :: circular_array

Efficient data structure to access both end of the sequence.
module codec_base

core :: codec_base

Base for codecs to use with streams
module codecs

core :: codecs

Group module for all codec-related manipulations
module collection

core :: collection

This module define several collection classes.
module core

core :: core

Standard classes and methods used by default by Nit programs and libraries.
module engine_tools

serialization :: engine_tools

Advanced services for serialization engines
module environ

core :: environ

Access to the environment variables of the process
module error

core :: error

Standard error-management infrastructure.
module exec

core :: exec

Invocation and management of operating system sub-processes.
module file

core :: file

File manipulations (create, read, write, etc.)
module fixed_ints

core :: fixed_ints

Basic integers of fixed-precision
module fixed_ints_text

core :: fixed_ints_text

Text services to complement fixed_ints
module flat

core :: flat

All the array-based text representations
module gc

core :: gc

Access to the Nit internal garbage collection mechanism
module hash_collection

core :: hash_collection

Introduce HashMap and HashSet.
module inspect

serialization :: inspect

Refine Serializable::inspect to show more useful information
module iso8859_1

core :: iso8859_1

Codec for ISO8859-1 I/O
module kernel

core :: kernel

Most basic classes and methods.
module list

core :: list

This module handle double linked lists
module math

core :: math

Mathematical operations
module meta

meta :: meta

Simple user-defined meta-level to manipulate types of instances as object.
module native

core :: native

Native structures for text and bytes
module numeric

core :: numeric

Advanced services for Numeric types
module parser_base

parser_base :: parser_base

Simple base for hand-made parsers of all kinds
module protocol

core :: protocol

module queue

core :: queue

Queuing data structures and wrappers
module range

core :: range

Module for range of discrete objects.
module re

core :: re

Regular expression support for all services based on Pattern
module ropes

core :: ropes

Tree-based representation of a String.
module serialization

serialization :: serialization

General serialization services
module serialization_core

serialization :: serialization_core

Abstract services to serialize Nit objects to different formats
module sorter

core :: sorter

This module contains classes used to compare things and sorts arrays.
module stream

core :: stream

Input and output streams of characters
module text

core :: text

All the classes and methods related to the manipulation of text entities
module time

core :: time

Management of time and dates
module union_find

core :: union_find

union–find algorithm using an efficient disjoint-set data structure
module utf8

core :: utf8

Codec for UTF-8 I/O

Parents

module xml_entities

dom :: xml_entities

Basic blocks for DOM-XML representation

Children

module dom

dom :: dom

Easy XML DOM parser

Descendants

module a_star-m

a_star-m

module bmfont

gamnit :: bmfont

Parse Angel Code BMFont format and draw text
module cardboard

gamnit :: cardboard

Update the orientation of world_camera at each frame using the head position given by android::cardboard
module checker

dom :: checker

Simple XML validity checker using the dom module
module depth

gamnit :: depth

Framework for 3D games in Nit
module flat

gamnit :: flat

Simple API for 2D games, built around Sprite and App::update
module more_materials

gamnit :: more_materials

Various material implementations
module more_models

gamnit :: more_models

Services to load models from the assets folder
module nlp

nlp :: nlp

Natural Language Processor based on the StanfordNLP core.
module nlp_index

nlp :: nlp_index

Example showing how to use a NLPFileIndex.
module selection

gamnit :: selection

Select Actor from a screen coordinate
module stanford

nlp :: stanford

Natural Language Processor based on the StanfordNLP core.
module stereoscopic_view

gamnit :: stereoscopic_view

Refine EulerCamera and App::frame_core_draw to get a stereoscopic view
module texture_atlas_parser

gamnit :: texture_atlas_parser

Tool to parse XML texture atlas and generated Nit code to access subtextures
module virtual_gamepad

gamnit :: virtual_gamepad

Virtual gamepad mapped to keyboard keys for quick and dirty mobile support
module vr

gamnit :: vr

VR support for gamnit depth, for Android only
# XML DOM-parsing facilities
module parser

intrude import parser_base
intrude import xml_entities

# Provides XML parsing facilities
class XMLProcessor
	super StringProcessor

	# Parses a full XML document
	fun parse_document: XMLEntity do
		var stack = new Array[XMLStartTag]
		var doc = new XMLDocument
		loop
			ignore_whitespaces
			if pos >= src.length then break
			if src[pos] == '<' then
				var tag = read_tag
				if tag isa XMLStartTag then
					if stack.is_empty then
						tag.parent = doc
					else
						var st_last = stack.last
						tag.parent = st_last
					end
					stack.push tag
				else if tag isa XMLEndTag then
					if stack.is_empty then
						return new XMLError(tag.location, "Missing matching tag for `{tag.tag_name}`")
					end
					var st_last = stack.last
					if tag.tag_name == st_last.tag_name then
						st_last.matching = tag
						tag.matching = st_last
						stack.pop
					else
						var miss = stack.pop
						return new XMLError(miss.location, "Missing matching tag for `{miss.tag_name}`")
					end
				else if tag isa XMLError then
					return tag
				else
					if stack.is_empty then
						tag.parent = doc
					else
						tag.parent = stack.last
					end
				end
			else
				var st = pos
				var end_pc = ignore_until("<") - 1
				var loc = new Location(line, line_offset)
				var pc = new PCDATA(loc, src.substring(st, end_pc - st + 1).trim)
				if stack.is_empty then
					pc.parent = doc
				else
					pc.parent = stack.last
				end
			end
		end
		if not stack.is_empty then
			var miss = stack.pop
			return new XMLError(miss.location, "Missing matching tag for `{miss.tag_name}`")
		end
		return doc
	end

	# Reads the tag starting in `src` at current position
	private fun read_tag: XMLEntity do
		var st_loc = new Location(line, line_offset)
		var c = src[pos]
		if not c == '<' then return new XMLError(st_loc, "Expected start of tag, got `{c}`")
		pos += 1
		if pos >= src.length then return new XMLError(st_loc, "Malformed tag")
		c = src[pos]
		if c == '!' then
			# Special tag
			return read_special_tag(st_loc)
		else if c == '?' then
			# Prolog tag
			return read_prolog_tag(st_loc)
		else if c == '/' then
			# End tag
			return read_end_tag(st_loc)
		else
			# Start tag
			return read_start_tag(st_loc)
		end
	end

	# Reads a Special tag (starting with <!)
	#
	# In case of error, returns a `XMLError`
	private fun read_special_tag(st_loc: Location): XMLEntity do
		var srclen = src.length
		pos += 1
		if (pos + 2) >= srclen then return new XMLError(st_loc, "Unexpected EOF on start of Special tag")
		if src[pos] == '-' and src[pos + 1] == '-' then
			pos += 2
			var comst = pos
			var endcom = ignore_until("-->")
			if endcom == -1 then return new XMLError(st_loc, "Malformed comment")
			pos += 3
			return new XMLCommentTag(st_loc ,src.substring(comst, endcom - comst + 1))
		end
		var st = pos
		if srclen - pos >= 7 then
			var spe_type = src.substring(pos, 7)
			if spe_type == "[CDATA[" then
				pos += 7
				var cdst = pos
				var cdend = ignore_until("]]>")
				pos += 3
				if pos >= srclen then return new XMLError(st_loc, "Unfinished CDATA block")
				return new CDATA(st_loc, src.substring(cdst, cdend - cdst))
			else if spe_type == "DOCTYPE" then
				pos += 7
				return parse_doctype(st_loc)
			end
		end
		var end_spec = ignore_until(">")
		pos += 1
		return new XMLSpecialTag(st_loc, src.substring(st, end_spec - st))
	end

	# Parse a Doctype declaration tag
	private fun parse_doctype(st_loc: Location): XMLEntity do
		var elemts = new Array[String]
		var srclen = src.length
		loop
			ignore_whitespaces
			if pos >= srclen then return new XMLError(st_loc, "Malformed doctype")
			var c = src[pos]
			# TODO: Properly support intern DOCTYPE definitions
			if c == '[' then
				var intern_st = pos
				var intern_end = ignore_until("]")
				if intern_end == -1 then return new XMLError(st_loc, "Unfinished internal doctype declaration")
				pos += 1
				elemts.push src.substring(intern_st, intern_end - intern_st + 1)
				continue
			end
			var elm_st = pos
			while pos < srclen and not src[pos].is_whitespace and src[pos] != '>' do pos += 1
			if pos >= srclen then return new XMLError(st_loc, "Malformed doctype")
			if pos - elm_st > 1 then
				var str = src.substring(elm_st, pos - elm_st)
				elemts.push str
			end
			if src[pos] == '>' then
				pos += 1
				return new XMLDoctypeTag(st_loc, "DOCTYPE", elemts.join(" "))
			end
		end
	end

	# Reads a Prolog or Processing Instruction tag (starting with <?)
	#
	# In case of error, returns a `XMLError`
	private fun read_prolog_tag(st_loc: Location): XMLEntity do
		var srclen = src.length
		pos += 1
		if pos >= srclen then return new XMLError(st_loc, "Invalid start of prolog")
		var tag_name = parse_tag_name(['<', '>'])
		var c = src[pos]
		if c == '<' or c == '>' then return new XMLError(st_loc ,"Unexpected character `{c}` in prolog declaration")
		if tag_name == "xml" then
			var args = parse_args(['?'])
			for i in args do
				if i isa BadXMLAttribute then return new XMLError(i.location, i.name)
			end
			if src[pos] == '?' then
				if src[pos + 1] == '>' then
					pos += 2
					return new XMLPrologTag(st_loc, tag_name, args)
				end
			end
		else
			var cont_st = pos
			var cont_end = ignore_until("?>")
			if cont_end == -1 then
				pos += 2
				return new XMLError(st_loc, "Malformed Processing Instruction tag")
			end
			pos += 2
			return new XMLProcessingInstructionTag(st_loc, tag_name, src.substring(cont_st, cont_end - cont_st))
		end
		pos += 1
		return new XMLError(st_loc, "Malformed prolog tag")
	end

	# Reads an End tag (starting with </)
	#
	# In case of error, returns a `XMLError`
	private fun read_end_tag(st_loc: Location): XMLEntity do
		pos += 1
		var tag_name = parse_tag_name(['<', '>'])
		ignore_whitespaces
		if src[pos] == '>' then
			pos += 1
			return new XMLEndTag(st_loc, tag_name)
		end
		return new XMLError(st_loc, "Bad end tag `{tag_name}`")
	end

	# Reads a Start tag (starting with <)
	#
	# In case of error, returns a `XMLError`
	private fun read_start_tag(st_loc: Location): XMLEntity do
		var tag_name = parse_tag_name(['/', '>'])
		var args = parse_args(['/', '>'])
		for i in args do
			if i isa BadXMLAttribute then return new XMLError(i.location, i.name)
		end
		if src[pos] == '/' then
			if src[pos + 1] == '>' then
				pos += 2
				return new XMLOnelinerTag(st_loc, tag_name, args)
			end
		end
		pos += 1
		return new XMLStartTag(st_loc, tag_name, args)
	end

	# Parses an xml tag name
	private fun parse_tag_name(delims: Array[Char]): String do
		var idst = pos
		var srclen = src.length
		while pos < srclen do
			var c = src[pos]
			if c.is_whitespace or delims.has(c) then break
			pos += 1
		end
		return src.substring(idst, pos - idst).trim
	end

	# Parse the arguments of a tag
	private fun parse_args(endtags: Array[Char]): Array[XMLAttribute] do
		var attrs = new Array[XMLAttribute]
		loop
			var arg = parse_arg(endtags)
			if arg isa XMLAttributeEnd then return attrs
			attrs.add arg
			if arg isa BadXMLAttribute then return attrs
		end
	end

	# Parses the next argument in `src`
	private fun parse_arg(endtags: Array[Char]): XMLAttribute do
		var srclen = src.length
		ignore_whitespaces
		var st_loc = new Location(line, line_offset)
		if pos >= srclen then return new BadXMLAttribute(st_loc, "Unfinished attribute name")
		# FIXME: Ugly, but as long as it remains private, it is OK I guess
		if endtags.has(src[pos]) then return new XMLAttributeEnd(st_loc, "")
		var attrname_st = pos
		while pos < srclen and src[pos] != '=' and not endtags.has(src[pos]) do pos += 1
		if pos >= srclen then return new BadXMLAttribute(st_loc, "Unfinished attribute name")
		if src[pos] != '=' then return new BadXMLAttribute(st_loc, "Malformed attribute")
		var attrname_end = pos - 1
		var name = src.substring(attrname_st, attrname_end - attrname_st + 1).trim
		pos += 1
		ignore_whitespaces
		var attrval_st = pos
		if pos >= srclen then return new BadXMLAttribute(st_loc, "Unfinished attribute `{name}`")
		var match = src[pos]
		if match != '\'' and match != '"' then return new BadXMLAttribute(st_loc, "Invalid string delimiter `{match}` for attribute `{name}`")
		pos += 1
		while pos < srclen and src[pos] != match do pos += 1
		if pos >= srclen then return new BadXMLAttribute(st_loc, "Unfinished attribute `{name}`")
		var attrval_end = pos
		var val = src.substring(attrval_st, attrval_end - attrval_st + 1).trim
		pos += 1
		return new XMLStringAttr(st_loc, name, val.substring(1, val.length - 2), match)
	end
end

redef class Text
	# Tries to parse the current string to XML
	#
	# Returns an `XMLDocument` if successful, or an `XMLError` if not
	fun to_xml: XMLEntity do return (new XMLProcessor(self.to_s)).parse_document
end
lib/dom/parser.nit:11,1--294,3