1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # XML DOM-parsing facilities
14 intrude import parser_base
15 intrude import xml_entities
17 # Provides XML parsing facilities
21 # Parses a full XML document
22 fun parse_document
: XMLEntity do
23 var stack
= new Array[XMLStartTag]
24 var doc
= new XMLDocument
27 if pos
>= src
.length
then break
28 if src
[pos
] == '<' then
30 if tag
isa XMLStartTag then
31 if stack
.is_empty
then
34 var st_last
= stack
.last
38 else if tag
isa XMLEndTag then
39 if stack
.is_empty
then
40 return new XMLError(location
= tag
.location
, "Missing matching tag for `{tag.tag_name}`")
42 var st_last
= stack
.last
43 if tag
.tag_name
== st_last
.tag_name
then
44 st_last
.matching
= tag
45 tag
.matching
= st_last
49 return new XMLError("Missing matching tag for `{miss.tag_name}`", location
=miss
.location
)
51 else if tag
isa XMLError then
54 if stack
.is_empty
then
57 tag
.parent
= stack
.last
62 var end_pc
= ignore_until
("<") - 1
63 var pc
= new PCDATA(src
.substring
(st
, end_pc
- st
+ 1).trim
)
64 if stack
.is_empty
then
67 pc
.parent
= stack
.last
71 if not stack
.is_empty
then
73 return new XMLError("Missing matching tag for `{miss.tag_name}`", location
=miss
.location
)
78 # Reads the tag starting in `src` at current position
79 private fun read_tag
: XMLEntity do
80 var st_loc
= new Location(line
, line_offset
)
82 if not c
== '<' then return new XMLError(location
=st_loc
, "Expected start of tag, got `{c}`")
88 return read_special_tag
(st_loc
)
91 return read_prolog_tag
(st_loc
)
94 return read_end_tag
(st_loc
)
97 return read_start_tag
(st_loc
)
101 # Reads a Special tag (starting with <!)
103 # In case of error, returns a `XMLError`
104 private fun read_special_tag
(st_loc
: Location): XMLEntity do
105 var srclen
= src
.length
107 if (pos
+ 2) >= srclen
then return new XMLError(location
=st_loc
, "Unexpected EOF on start of Special tag")
108 if src
[pos
] == '-' and src
[pos
+ 1] == '-' then
111 var endcom
= ignore_until
("-->")
112 if endcom
== -1 then return new XMLError(location
=st_loc
, "Malformatted comment")
114 return new XMLCommentTag(location
=st_loc
,src
.substring
(comst
, endcom
- comst
+ 1))
117 if srclen
- pos
>= 7 then
118 var spe_type
= src
.substring
(pos
, 7)
119 if spe_type
== "[CDATA[" then
122 var cdend
= ignore_until
("]]>")
124 if pos
>= srclen
then return new XMLError(location
= st_loc
, "Unfinished CDATA block")
125 return new CDATA(src
.substring
(cdst
, cdend
- cdst
))
126 else if spe_type
== "DOCTYPE" then
128 return parse_doctype
(st_loc
)
131 var end_spec
= ignore_until
(">")
133 return new XMLSpecialTag(location
=st_loc
, src
.substring
(st
, end_spec
- st
))
136 # Parse a Doctype declaration tag
137 private fun parse_doctype
(st_loc
: Location): XMLEntity do
138 var elemts
= new Array[String]
139 var srclen
= src
.length
142 if pos
>= srclen
then return new XMLError(location
= st_loc
, "Malformatted doctype")
144 # TODO: Properly support intern DOCTYPE definitions
147 var intern_end
= ignore_until
("]")
148 if intern_end
== -1 then return new XMLError(location
= st_loc
, "Unfinished internal doctype declaration")
150 elemts
.push src
.substring
(intern_st
, intern_end
- intern_st
+ 1)
154 while pos
< srclen
and not src
[pos
].is_whitespace
and src
[pos
] != '>' do pos
+= 1
155 if pos
>= srclen
then return new XMLError(location
= st_loc
, "Malformatted doctype")
156 if pos
- elm_st
> 1 then
157 var str
= src
.substring
(elm_st
, pos
- elm_st
)
160 if src
[pos
] == '>' then
162 return new XMLDoctypeTag(location
= st_loc
, "DOCTYPE", elemts
.join
(" "))
167 # Reads a Prolog or Processing Instruction tag (starting with <?)
169 # In case of error, returns a `XMLError`
170 private fun read_prolog_tag
(st_loc
: Location): XMLEntity do
171 var srclen
= src
.length
173 if pos
>= srclen
then return new XMLError(location
=st_loc
, "Invalid start of prolog")
175 var tag_name
= parse_tag_name
(['<', '>'])
177 if c
== '<' or c
== '>' then return new XMLError(location
=st_loc
,"Unexpected character `{c}` in prolog declaration")
178 if tag_name
== "xml" then
179 var args
= parse_args
(['?'])
181 if i
isa BadXMLAttribute then return new XMLError(location
= i
.location
, i
.name
)
183 if src
[pos
] == '?' then
184 if src
[pos
+ 1] == '>' then
186 return new XMLPrologTag(location
=st_loc
, tag_name
, args
)
190 if tag_name
.has
("xml") then return new XMLError(location
= st_loc
, "Forbidden keyword xml in Processing Instruction")
192 var cont_end
= ignore_until
("?>")
193 if cont_end
== -1 then
195 return new XMLError(location
= st_loc
, "Malformatted Processing Instruction tag")
198 return new XMLProcessingInstructionTag(location
=st_loc
, tag_name
, src
.substring
(cont_st
, cont_end
- cont_st
))
201 return new XMLError(location
=st_loc
, "Malformatted prolog tag")
204 # Reads an End tag (starting with </)
206 # In case of error, returns a `XMLError`
207 private fun read_end_tag
(st_loc
: Location): XMLEntity do
208 var srclen
= src
.length
210 var tag_name
= parse_tag_name
(['<', '>'])
212 if src
[pos
] == '>' then
214 return new XMLEndTag(location
=st_loc
, tag_name
)
216 return new XMLError(location
= st_loc
, "Bad end tag `{tag_name}`")
219 # Reads a Start tag (starting with <)
221 # In case of error, returns a `XMLError`
222 private fun read_start_tag
(st_loc
: Location): XMLEntity do
223 var srclen
= src
.length
224 var tag_name
= parse_tag_name
(['/', '>'])
225 var args
= parse_args
(['/', '>'])
227 if i
isa BadXMLAttribute then return new XMLError(location
=i
.location
, i
.name
)
229 if src
[pos
] == '/' then
230 if src
[pos
+ 1] == '>' then
232 return new XMLOnelinerTag(location
=st_loc
, tag_name
, args
)
236 return new XMLStartTag(location
=st_loc
, tag_name
, args
)
239 # Parses an xml tag name
240 private fun parse_tag_name
(delims
: Array[Char]): String do
243 var srclen
= src
.length
244 while pos
< srclen
and not c
.is_whitespace
and not delims
.has
(c
) do
248 return src
.substring
(idst
, pos
- idst
).trim
251 # Parse the arguments of a tag
252 private fun parse_args
(endtags
: Array[Char]): Array[XMLAttribute] do
253 var attrs
= new Array[XMLAttribute]
255 var arg
= parse_arg
(endtags
)
256 if arg
isa XMLAttributeEnd then return attrs
258 if arg
isa BadXMLAttribute then return attrs
262 # Parses the next argument in `src`
263 private fun parse_arg
(endtags
: Array[Char]): XMLAttribute do
264 var srclen
= src
.length
265 var attr
: XMLAttribute
267 var st_loc
= new Location(line
, line_offset
)
268 if pos
>= srclen
then return new BadXMLAttribute(location
= st_loc
, "Unfinished attribute name")
269 # FIXME: Ugly, but as long as it remains private, it is OK I guess
270 if endtags
.has
(src
[pos
]) then return new XMLAttributeEnd("")
271 var attrname_st
= pos
272 while pos
< srclen
and src
[pos
] != '=' and not endtags
.has
(src
[pos
]) do pos
+= 1
273 if pos
>= srclen
then return new BadXMLAttribute(location
= st_loc
, "Unfinished attribute name")
274 if src
[pos
] != '=' then return new BadXMLAttribute(location
= st_loc
, "Malformatted attribute")
275 var attrname_end
= pos
- 1
276 var name
= src
.substring
(attrname_st
, attrname_end
- attrname_st
+ 1).trim
280 if pos
>= srclen
then return new BadXMLAttribute(location
=st_loc
, "Unfinished attribute `{name}`")
282 if match
!= '\'' and match != '"' then return new BadXMLAttribute(location=st_loc, "Invalid string delimiter
`{match}` for attribute
`{name}`")
284 while pos < srclen and src[pos] != match do pos += 1
285 if pos >= srclen then return new BadXMLAttribute(location=st_loc, "Unfinished attribute
`{name}`")
286 var attrval_end = pos
287 var val = src.substring(attrval_st, attrval_end - attrval_st + 1).trim
289 return new XMLStringAttr(location=st_loc, name, val.substring(1, val.length - 2), match)
294 # Tries to parse the current string to XML
296 # Returns an `XMLDocument` if successful, or an `XMLError` if not
297 fun to_xml: XMLEntity do return (new XMLProcessor(self.to_s)).parse_document