1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # XML DOM-parsing facilities
14 intrude import parser_base
15 intrude import xml_entities
17 # Provides XML parsing facilities
21 # Parses a full XML document
22 fun parse_document
: XMLEntity do
23 var stack
= new Array[XMLStartTag]
24 var doc
= new XMLDocument
27 if pos
>= src
.length
then break
28 if src
[pos
] == '<' then
30 if tag
isa XMLStartTag then
31 if stack
.is_empty
then
34 var st_last
= stack
.last
38 else if tag
isa XMLEndTag then
39 if stack
.is_empty
then
40 return new XMLError(tag
.location
, "Missing matching tag for `{tag.tag_name}`")
42 var st_last
= stack
.last
43 if tag
.tag_name
== st_last
.tag_name
then
44 st_last
.matching
= tag
45 tag
.matching
= st_last
49 return new XMLError(miss
.location
, "Missing matching tag for `{miss.tag_name}`")
51 else if tag
isa XMLError then
54 if stack
.is_empty
then
57 tag
.parent
= stack
.last
62 var end_pc
= ignore_until
("<") - 1
63 var loc
= new Location(line
, line_offset
)
64 var pc
= new PCDATA(loc
, src
.substring
(st
, end_pc
- st
+ 1).trim
)
65 if stack
.is_empty
then
68 pc
.parent
= stack
.last
72 if not stack
.is_empty
then
74 return new XMLError(miss
.location
, "Missing matching tag for `{miss.tag_name}`")
79 # Reads the tag starting in `src` at current position
80 private fun read_tag
: XMLEntity do
81 var st_loc
= new Location(line
, line_offset
)
83 if not c
== '<' then return new XMLError(st_loc
, "Expected start of tag, got `{c}`")
88 return read_special_tag
(st_loc
)
91 return read_prolog_tag
(st_loc
)
94 return read_end_tag
(st_loc
)
97 return read_start_tag
(st_loc
)
101 # Reads a Special tag (starting with <!)
103 # In case of error, returns a `XMLError`
104 private fun read_special_tag
(st_loc
: Location): XMLEntity do
105 var srclen
= src
.length
107 if (pos
+ 2) >= srclen
then return new XMLError(st_loc
, "Unexpected EOF on start of Special tag")
108 if src
[pos
] == '-' and src
[pos
+ 1] == '-' then
111 var endcom
= ignore_until
("-->")
112 if endcom
== -1 then return new XMLError(st_loc
, "Malformed comment")
114 return new XMLCommentTag(st_loc
,src
.substring
(comst
, endcom
- comst
+ 1))
117 if srclen
- pos
>= 7 then
118 var spe_type
= src
.substring
(pos
, 7)
119 if spe_type
== "[CDATA[" then
122 var cdend
= ignore_until
("]]>")
124 if pos
>= srclen
then return new XMLError(st_loc
, "Unfinished CDATA block")
125 return new CDATA(st_loc
, src
.substring
(cdst
, cdend
- cdst
))
126 else if spe_type
== "DOCTYPE" then
128 return parse_doctype
(st_loc
)
131 var end_spec
= ignore_until
(">")
133 return new XMLSpecialTag(st_loc
, src
.substring
(st
, end_spec
- st
))
136 # Parse a Doctype declaration tag
137 private fun parse_doctype
(st_loc
: Location): XMLEntity do
138 var elemts
= new Array[String]
139 var srclen
= src
.length
142 if pos
>= srclen
then return new XMLError(st_loc
, "Malformed doctype")
144 # TODO: Properly support intern DOCTYPE definitions
147 var intern_end
= ignore_until
("]")
148 if intern_end
== -1 then return new XMLError(st_loc
, "Unfinished internal doctype declaration")
150 elemts
.push src
.substring
(intern_st
, intern_end
- intern_st
+ 1)
154 while pos
< srclen
and not src
[pos
].is_whitespace
and src
[pos
] != '>' do pos
+= 1
155 if pos
>= srclen
then return new XMLError(st_loc
, "Malformed doctype")
156 if pos
- elm_st
> 1 then
157 var str
= src
.substring
(elm_st
, pos
- elm_st
)
160 if src
[pos
] == '>' then
162 return new XMLDoctypeTag(st_loc
, "DOCTYPE", elemts
.join
(" "))
167 # Reads a Prolog or Processing Instruction tag (starting with <?)
169 # In case of error, returns a `XMLError`
170 private fun read_prolog_tag
(st_loc
: Location): XMLEntity do
171 var srclen
= src
.length
173 if pos
>= srclen
then return new XMLError(st_loc
, "Invalid start of prolog")
174 var tag_name
= parse_tag_name
(['<', '>'])
176 if c
== '<' or c
== '>' then return new XMLError(st_loc
,"Unexpected character `{c}` in prolog declaration")
177 if tag_name
== "xml" then
178 var args
= parse_args
(['?'])
180 if i
isa BadXMLAttribute then return new XMLError(i
.location
, i
.name
)
182 if src
[pos
] == '?' then
183 if src
[pos
+ 1] == '>' then
185 return new XMLPrologTag(st_loc
, tag_name
, args
)
189 if tag_name
.has
("xml") then return new XMLError(st_loc
, "Forbidden keyword xml in Processing Instruction")
191 var cont_end
= ignore_until
("?>")
192 if cont_end
== -1 then
194 return new XMLError(st_loc
, "Malformed Processing Instruction tag")
197 return new XMLProcessingInstructionTag(st_loc
, tag_name
, src
.substring
(cont_st
, cont_end
- cont_st
))
200 return new XMLError(st_loc
, "Malformed prolog tag")
203 # Reads an End tag (starting with </)
205 # In case of error, returns a `XMLError`
206 private fun read_end_tag
(st_loc
: Location): XMLEntity do
208 var tag_name
= parse_tag_name
(['<', '>'])
210 if src
[pos
] == '>' then
212 return new XMLEndTag(st_loc
, tag_name
)
214 return new XMLError(st_loc
, "Bad end tag `{tag_name}`")
217 # Reads a Start tag (starting with <)
219 # In case of error, returns a `XMLError`
220 private fun read_start_tag
(st_loc
: Location): XMLEntity do
221 var tag_name
= parse_tag_name
(['/', '>'])
222 var args
= parse_args
(['/', '>'])
224 if i
isa BadXMLAttribute then return new XMLError(i
.location
, i
.name
)
226 if src
[pos
] == '/' then
227 if src
[pos
+ 1] == '>' then
229 return new XMLOnelinerTag(st_loc
, tag_name
, args
)
233 return new XMLStartTag(st_loc
, tag_name
, args
)
236 # Parses an xml tag name
237 private fun parse_tag_name
(delims
: Array[Char]): String do
240 var srclen
= src
.length
241 while pos
< srclen
and not c
.is_whitespace
and not delims
.has
(c
) do
245 return src
.substring
(idst
, pos
- idst
).trim
248 # Parse the arguments of a tag
249 private fun parse_args
(endtags
: Array[Char]): Array[XMLAttribute] do
250 var attrs
= new Array[XMLAttribute]
252 var arg
= parse_arg
(endtags
)
253 if arg
isa XMLAttributeEnd then return attrs
255 if arg
isa BadXMLAttribute then return attrs
259 # Parses the next argument in `src`
260 private fun parse_arg
(endtags
: Array[Char]): XMLAttribute do
261 var srclen
= src
.length
263 var st_loc
= new Location(line
, line_offset
)
264 if pos
>= srclen
then return new BadXMLAttribute(st_loc
, "Unfinished attribute name")
265 # FIXME: Ugly, but as long as it remains private, it is OK I guess
266 if endtags
.has
(src
[pos
]) then return new XMLAttributeEnd(st_loc
, "")
267 var attrname_st
= pos
268 while pos
< srclen
and src
[pos
] != '=' and not endtags
.has
(src
[pos
]) do pos
+= 1
269 if pos
>= srclen
then return new BadXMLAttribute(st_loc
, "Unfinished attribute name")
270 if src
[pos
] != '=' then return new BadXMLAttribute(st_loc
, "Malformed attribute")
271 var attrname_end
= pos
- 1
272 var name
= src
.substring
(attrname_st
, attrname_end
- attrname_st
+ 1).trim
276 if pos
>= srclen
then return new BadXMLAttribute(st_loc
, "Unfinished attribute `{name}`")
278 if match
!= '\'' and match != '"' then return new BadXMLAttribute(st_loc, "Invalid string delimiter
`{match}` for attribute
`{name}`")
280 while pos < srclen and src[pos] != match do pos += 1
281 if pos >= srclen then return new BadXMLAttribute(st_loc, "Unfinished attribute
`{name}`")
282 var attrval_end = pos
283 var val = src.substring(attrval_st, attrval_end - attrval_st + 1).trim
285 return new XMLStringAttr(st_loc, name, val.substring(1, val.length - 2), match)
290 # Tries to parse the current string to XML
292 # Returns an `XMLDocument` if successful, or an `XMLError` if not
293 fun to_xml: XMLEntity do return (new XMLProcessor(self.to_s)).parse_document