1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # Encapsulates Namespace logic for use by applications using SAX, or internally by SAX drivers.
12 module sax
::helpers
::namespace_support
14 # Encapsulates Namespace logic for use by applications using SAX, or internally by SAX drivers.
16 # This class encapsulates the logic of Namespace processing:
17 # it tracks the declarations currently in force for each context
18 # and automatically processes qualified XML names into their
19 # Namespace parts; it can also be used in reverse for generating
20 # XML qualified names from Namespaces.
22 # Namespace support objects are reusable, but the reset method
23 # must be invoked between each session.
25 # Here is a simple session:
27 # var parts: Array[String] = new Array[String].with_capacity(3)
28 # var support = new NamespaceSupport
30 # support.push_context
31 # support.declare_prefix("", "http://www.w3.org/1999/xhtml")
32 # support.declare_prefix("dc", "http://www.purl.org/dc#")
34 # parts = support.process_name("p", parts, false).as(not null)
35 # assert parts[0] == "http://www.w3.org/1999/xhtml"
36 # assert parts[1] == "p"
37 # assert parts[2] == "p"
39 # parts = support.process_name("dc:title", parts, false).as(not null)
40 # assert parts[0] == "http://www.purl.org/dc#"
41 # assert parts[1] == "title"
42 # assert parts[2] == "dc:title"
46 # Note that this class is optimized for the use case where most
47 # elements do not contain Namespace declarations: if the same
48 # prefix/URI mapping is repeated for each context (for example), this
49 # class will be somewhat less efficient.
51 # Although SAX drivers (parsers) may choose to use this class to
52 # implement namespace handling, they are not required to do so.
53 # Applications must track namespace information themselves if they
54 # want to use namespace information.
56 # Note: The original source code and documentation of this class comes, in part,
57 # from [SAX 2.0](http://www.saxproject.org).
58 class NamespaceSupport
59 # The XML Namespace URI as a constant.
61 # The value is `http://www.w3.org/XML/1998/namespace`
62 # as defined in the “Namespaces in XML” specification.
64 # This is the Namespace URI that is automatically mapped
65 # to the `xml` prefix.
66 var xmlns
= "http://www.w3.org/XML/1998/namespace"
68 # The namespace declaration URI as a constant.
70 # The value is `http://www.w3.org/xmlns/2000/`, as defined
71 # in a erratum to the “Namespaces in XML” recommendation.
73 # This is the Namespace URI that is automatically mapped
74 # to the `xmlns` prefix.
75 var nsdecl
= "http://www.w3.org/xmlns/2000/"
77 private var contexts
= new Array[Context].with_capacity
(32)
78 private var current_context
= new Context
79 private var context_position
: Int = 0
82 contexts
.push
(current_context
)
83 current_context
.declare_prefix
("xml", xmlns
)
84 current_context
.declare_prefix
("xmlns", nsdecl
)
87 # Reset this Namespace support object for reuse.
89 # It is necessary to invoke this method before reusing the
90 # Namespace support object for a new session.
95 current_context
= new Context
96 contexts
.push
(current_context
)
97 current_context
.declare_prefix
("xml", xmlns
)
98 current_context
.declare_prefix
("xmlns", nsdecl
)
101 # Start a new Namespace context.
103 # The new context will automatically inherit
104 # the declarations of its parent context, but it will also keep
105 # track of which declarations were made within this context.
107 # Event callback code should start a new context once per element.
108 # This means being ready to call this in either of two places.
109 # For elements that don't include namespace declarations, the
110 # `ContentHandler.start_element` callback is the right place.
111 # For elements with such a declaration, it'd done in the first
112 # `ContentHandler.start_prefix_mapping` callback.
113 # A boolean flag can be used to
114 # track whether a context has been started yet. When either of
115 # those methods is called, it checks the flag to see if a new context
116 # needs to be started. If so, it starts the context and sets the
117 # flag. After `ContentHandler.start_element` does that, it always clears
120 # Normally, SAX drivers would push a new context at the beginning
121 # of each XML element. Then they perform a first pass over the
122 # attributes to process all namespace declarations, making
123 # `ContentHandler.start_prefix_mapping` callbacks.
124 # Then a second pass is made, to determine the namespace-qualified
125 # names for all attributes and for the element name.
126 # Finally all the information for the
127 # `ContentHandler.start_element` callback is available,
128 # so it can then be made.
130 # The Namespace support object always starts with a base context
131 # already in force: in this context, only the `xml` prefix is
134 # SEE: `sax::ContentHandler`
138 current_context
.decls_ok
= false
139 context_position
+= 1
141 # Extend the array if necessary.
142 if context_position
>= contexts
.length
then
143 current_context
= new Context
144 contexts
.push
(current_context
)
146 current_context
= contexts
[context_position
]
149 # Set the parent, if any.
150 if context_position
> 0 then
151 current_context
.parent
= contexts
[context_position
- 1]
155 # Revert to the previous Namespace context.
157 # Normally, you should pop the context at the end of each
158 # XML element. After popping the context, all Namespace prefix
159 # mappings that were previously in force are restored.
161 # You must not attempt to declare additional Namespace
162 # prefixes after popping a context, unless you push another
165 # SEE: `push_context`
167 assert stack_not_empty
: context_position
> 0
168 current_context
.clear
169 context_position
-= 1
170 current_context
= contexts
[context_position
]
173 # Declare a Namespace prefix.
175 # All prefixes must be declared before they are referenced.
176 # For example, a SAX driver (parser)
177 # would scan an element's attributes
178 # in two passes: first for namespace declarations,
179 # then a second pass using `process_name` to
180 # interpret prefixes against (potentially redefined) prefixes.
182 # This method declares a prefix in the current Namespace
183 # context; the prefix will remain in force until this context
184 # is popped, unless it is shadowed in a descendant context.
186 # To declare the default element Namespace, use the empty string as
189 # Note that you must *not* declare a prefix after
190 # you've pushed and popped another Namespace context, or
191 # treated the declarations phase as complete by processing
194 # Note that there is an asymmetry in this library:
195 # `prefix` will not return the `""` prefix,
196 # even if you have declared a default element namespace.
197 # To check for a default namespace,
198 # you have to look it up explicitly using `uri`.
199 # This asymmetry exists to make it easier to look up prefixes
200 # for attribute names, where the default prefix is not allowed.
204 # * `prefix`: prefix to declare, or the empty string to
205 # indicate the default element namespace. This may never have
206 # the value `xml` or `xmlns`.
207 # * `uri`: The Namespace URI to associate with the prefix.
212 # `true` if the prefix and the URI are legal, `false` otherwise.
214 # SEE: `process_name`
219 fun declare_prefix
(prefix
: String, uri
: String): Bool do
220 if prefix
== "xml" or prefix
== "xmlns" or
221 uri
== xmlns
or uri
== nsdecl
then
224 current_context
.declare_prefix
(prefix
, uri
)
229 # Process a raw XML qualified name, after all declarations in the current context have been handled by `declare_prefix`.
231 # This method processes a raw XML qualified name in the current
232 # context by removing the prefix and looking it up among the
233 # prefixes currently declared. The return value will be the
234 # array supplied by the caller, filled in as follows:
236 # * `parts[0]`: Namespace URI, or an empty string if none is in use.
237 # * `parts[1]`: local name (without prefix).
238 # * `parts[2]`: original raw name.
240 # If the raw name has a prefix that has not been declared, then
241 # the return value will be `null`.
243 # Note that attribute names are processed differently than
244 # element names: an unprefixed element name will receive the
245 # default Namespace (if any), while an unprefixed attribute name
250 # * `qname`: raw XML qualified name to be processed.
251 # * `parts`: array supplied by the caller. Will be enlarged to 3 elements if
252 # needed. If the specified array contains more than 3 elements, its length
253 # will be kept intact.
254 # * `is_attribute`: flag indicating whether this is an attribute name
255 # (`true`) or an element name (`false`).
257 # SEE: `declare_prefix`
258 fun process_name
(qname
: String, parts
: Array[String], is_attribute
: Bool):
259 nullable Array[String] do
260 var my_parts
= current_context
.process_name
(qname
, is_attribute
)
262 if my_parts
== null then
265 parts
[0] = my_parts
[0]
266 parts
[1] = my_parts
[1]
267 parts
[2] = my_parts
[2]
268 if parts
[0] == "" and qname
== "xmlns" and is_attribute
then
276 # Look up a prefix and get the currently-mapped Namespace URI.
278 # This method looks up the prefix in the current context.
279 # Use the empty string (`""`) for the default Namespace.
283 # * `prefix`: The prefix to look up.
287 # The associated Namespace URI, or `null` if the prefix
288 # is undeclared in this context.
293 fun uri
(prefix
: String): nullable String do
294 return current_context
.uri
(prefix
)
297 # Return all prefixes currently declared.
299 # Note: if there is a default prefix, it will not be
300 # returned in this enumeration; check for the default prefix
301 # using the `uri` with an argument of `""` or use `declared_prefixes`.
305 # All prefixes declared in the current context except
306 # for the empty (default) prefix.
308 # SEE: `declared_prefixes`
311 fun prefixes
: Collection[String] do return current_context
.prefixes
313 # Return one of the prefixes mapped to a Namespace URI.
315 # If more than one prefix is currently mapped to the same
316 # URI, this method will make an arbitrary selection; if you
317 # want all of the prefixes, use the `prefixes_of` method instead.
319 # Note: this will never return the empty (default) prefix;
320 # to check for a default prefix, use the `uri`
321 # method with an argument of `""`.
325 # * `uri`: Namespace URI.
329 # One of the prefixes currently mapped to the URI supplied,
330 # or `null` if none is mapped or if the URI is assigned to
331 # the default Namespace.
336 fun prefix
(uri
: String): nullable String do
337 return current_context
.prefix
(uri
)
340 # Return all prefixes currently declared for an URI.
342 # This method returns prefixes mapped to a specific Namespace
343 # URI. The `xml` prefix will be included. If you want only one
344 # prefix that's mapped to the Namespace URI, and you don't care
345 # which one you get, use the `prefix` method instead.
347 # Note: the empty (default) prefix is *never* included
348 # in this enumeration; to check for the presence of a default
349 # Namespace, use the `uri` method with an argument of `""`.
353 # * `uri`: The Namespace URI.
357 # SEE: `declared_prefixes`
360 fun prefixes_of
(uri
: String): Collection[String] do
361 var prefixes
= new Array[String]
362 var all_prefixes
= self.prefixes
364 for prefix
in all_prefixes
do
365 if uri
== self.uri
(prefix
) then
366 prefixes
.push
(prefix
)
372 # Return all prefixes declared (and undeclared) in this context.
374 # The empty (default) prefix will be included in this
375 # enumeration; note that this behaviour differs from that of
376 # `prefix`, `prefixes` and `prefixes_of`.
381 fun declared_prefixes
: Collection[String] do
382 return current_context
.declared_prefixes
387 # Internal class for a single Namespace context.
389 # This module caches and reuses Namespace contexts,
390 # so the number allocated
391 # will be equal to the element depth of the document, not to the total
392 # number of elements (i.e. 5-10 rather than tens of thousands).
393 # Also, data structures used to represent contexts are shared when
394 # possible (child contexts without declarations) to further reduce
395 # the amount of memory that's consumed.
397 # Note: The original source code and documentation of this class comes, in part,
398 # from [SAX 2.0](http://www.saxproject.org).
399 private class Context
401 var empty
: Collection[String] = new Array[String].with_capacity
(0)
404 var prefix_table
: nullable Map[String, String] = null
406 # Cache of `process_name` for elements.
408 # `qname -> [uri, local_name, qname]`
409 var element_name_table
: nullable Map[String, Array[String]] = null
411 # Cache of `process_name` for attributes.
413 # `qname -> [uri, local_name, qname]`
414 var attribute_name_table
: nullable Map[String, Array[String]] = null
416 # Namespace in absence of prefix.
417 var default_ns
: nullable String = null
419 # Can we currently declare prefixes in this context?
420 var decls_ok
: Bool = true is writable
422 # All prefixes declared in this context.
423 var declarations
: nullable Array[String] = null
425 # Was `copy_tables` called since the last call to `parent=`?
426 var decl_seen
: Bool = false
429 var p_parent
: nullable Context = null
431 # (Re)set the parent of this Namespace context.
433 # The context must either have been freshly constructed,
434 # or must have been cleared.
438 # * `context`: parent Namespace context object.
439 fun parent
=(parent
: Context) do
442 prefix_table
= parent
.prefix_table
443 element_name_table
= parent
.element_name_table
444 attribute_name_table
= parent
.attribute_name_table
445 default_ns
= parent
.default_ns
450 # Makes associated state become collectible, invalidating this context.
452 # `parent=` must be called before this context may be used again.
456 element_name_table
= null
457 attribute_name_table
= null
462 # Declare a Namespace prefix for this context.
466 # * `prefix`: prefix to declare.
467 # * `uri`: associated Namespace URI.
469 # SEE: `NamespaceSupport.declare_prefix`
470 fun declare_prefix
(prefix
: String, uri
: String) do
471 assert legal_state
: decls_ok
else
472 sys
.stderr
.write
("Can't declare any more prefixes in this context.\n")
476 if not decl_seen
then
486 else if "" == uri
then
487 prefix_table
.keys
.remove
(prefix
)
489 prefix_table
[prefix
] = uri
491 declarations
.push
(prefix
)
494 # Process a raw XML qualified name in this context.
498 # * `qname`: raw XML qualified name.
499 # * `is_attribute`: `true` if this is an attribute name.
503 # An array of three strings containing the URI part (or empty string),
504 # the local part and the raw name, or `null` if there is an undeclared
507 # SEE: `NamespaceSupport.process_name`
508 fun process_name
(qname
: String, is_attribute
: Bool):
509 nullable Array[String] do
510 var name
: Array[String]
511 var table
: Map[String, Array[String]]
512 var match
: nullable Match
514 # Detect errors in call sequence.
516 # Select the appropriate table.
518 table
= attribute_name_table
.as(not null)
520 table
= element_name_table
.as(not null)
523 # Start by looking in the cache, and
524 # return immediately if the name
525 # is already known in this content.
526 if table
.keys
.has
(qname
) then
530 # We haven't seen this name in this
531 # context before. Maybe in the parent
532 # context, but we can't assume prefix
533 # bindings are the same.
534 name
= new Array[String].with_capacity
(3)
535 match
= qname
.search
(':')
537 if match
== null then
542 name
.push
(default_ns
or else "")
548 var prefix
= qname
.substring
(0, match
.from
)
554 name
.push
(default_ns
or else "")
556 name
.push
(qname
.substring_from
(match
.after
))
558 else if (not is_attribute
) and "xmlns" == prefix
then
560 else if prefix_table
.keys
.has
(prefix
) then
561 name
.push
(prefix_table
[prefix
])
562 name
.push
(qname
.substring_from
(match
.after
))
569 # Save in the cache for future use.
570 # (Could be shared with parent context...)
575 # Look up the URI associated with a prefix in this context.
577 # Return `null` if no URI is associated with a specified prefix.
581 # * `prefix`: prefix to look up.
583 # SEE: `NamespaceSupport.uri`
584 fun uri
(prefix
: String): nullable String do
587 else if prefix_table
== null then
590 return prefix_table
.get_or_null
(prefix
)
594 # Look up one of the prefixes associated with a URI in this context.
596 # Since many prefixes may be mapped to the same URI,
597 # the return value may be unreliable.
601 # * `uri`: URI to look up.
605 # The associated prefix, or `null` if none is declared.
607 # SEE: `NamespaceSupport.prefix`
608 fun prefix
(uri
: String): nullable String do
609 # Note: We do not use the original code from SAX 2.0.1 because it is
610 # buggy with redefined prefixes. For example, with
611 # `<x xmlns:y="1"><z xmlns:y="2" /></x>`, when in `z`, `uri("1")`
612 # returns `"y"` in the original code while it should return `null`.
613 # Our code is slower, but it works.
614 var all_prefixes
= prefixes
616 for prefix
in all_prefixes
do
617 if uri
== self.uri
(prefix
) then
624 # Return all prefixes declared in this context (possibly empty).
626 # SEE: `NamespaceSupport.declared_prefixes`
627 fun declared_prefixes
: Collection[String] do
628 return declarations
or else empty
631 # Return all prefixes currently in force.
633 # The default prefix, if in force, is *not*
634 # returned, and will have to be checked for separately.
636 # SEE: `NamespaceSupport.prefixes`
637 fun prefixes
: Collection[String] do
638 if prefix_table
== null then
641 return prefix_table
.keys
645 # Copy on write for the internal tables in this context.
647 # This class is optimized for the normal case where most
648 # elements do not contain Namespace declarations.
650 if prefix_table
!= null then
651 var old_prefix_table
= prefix_table
.as(not null)
652 prefix_table
= new HashMap[String, String]
653 prefix_table
.recover_with
(old_prefix_table
)
655 prefix_table
= new HashMap[String, String]
657 element_name_table
= new HashMap[String, Array[String]]
658 attribute_name_table
= new HashMap[String, Array[String]]
659 declarations
= new Array[String]