ce8682ece45b938f470e3405936b9640eb286b9a
[nit.git] / lib / sax / helpers / namespace_support.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Encapsulates Namespace logic for use by applications using SAX, or internally by SAX drivers.
12 module sax::helpers::namespace_support
13
14 # Encapsulates Namespace logic for use by applications using SAX, or internally by SAX drivers.
15 #
16 # This class encapsulates the logic of Namespace processing:
17 # it tracks the declarations currently in force for each context
18 # and automatically processes qualified XML names into their
19 # Namespace parts; it can also be used in reverse for generating
20 # XML qualified names from Namespaces.
21 #
22 # Namespace support objects are reusable, but the reset method
23 # must be invoked between each session.
24 #
25 # Here is a simple session:
26 #
27 # var parts: Array[String] = new Array[String].with_capacity(3)
28 # var support = new NamespaceSupport
29 # #
30 # support.push_context
31 # support.declare_prefix("", "http://www.w3.org/1999/xhtml")
32 # support.declare_prefix("dc", "http://www.purl.org/dc#")
33 # #
34 # parts = support.process_name("p", parts, false).as(not null)
35 # assert parts[0] == "http://www.w3.org/1999/xhtml"
36 # assert parts[1] == "p"
37 # assert parts[2] == "p"
38 # #
39 # parts = support.process_name("dc:title", parts, false).as(not null)
40 # assert parts[0] == "http://www.purl.org/dc#"
41 # assert parts[1] == "title"
42 # assert parts[2] == "dc:title"
43 # #
44 # support.pop_context
45 #
46 # Note that this class is optimized for the use case where most
47 # elements do not contain Namespace declarations: if the same
48 # prefix/URI mapping is repeated for each context (for example), this
49 # class will be somewhat less efficient.
50 #
51 # Although SAX drivers (parsers) may choose to use this class to
52 # implement namespace handling, they are not required to do so.
53 # Applications must track namespace information themselves if they
54 # want to use namespace information.
55 #
56 # Note: The original source code and documentation of this class comes, in part,
57 # from [SAX 2.0](http://www.saxproject.org).
58 class NamespaceSupport
59 # The XML Namespace URI as a constant.
60 #
61 # The value is `http://www.w3.org/XML/1998/namespace`
62 # as defined in the “Namespaces in XML” specification.
63 #
64 # This is the Namespace URI that is automatically mapped
65 # to the `xml` prefix.
66 var xmlns = "http://www.w3.org/XML/1998/namespace"
67
68 # The namespace declaration URI as a constant.
69 #
70 # The value is `http://www.w3.org/xmlns/2000/`, as defined
71 # in a erratum to the “Namespaces in XML” recommendation.
72 #
73 # This is the Namespace URI that is automatically mapped
74 # to the `xmlns` prefix.
75 var nsdecl = "http://www.w3.org/xmlns/2000/"
76
77 private var contexts = new Array[Context].with_capacity(32)
78 private var current_context = new Context
79 private var context_position: Int = 0
80
81 init do
82 contexts.push(current_context)
83 current_context.declare_prefix("xml", xmlns)
84 current_context.declare_prefix("xmlns", nsdecl)
85 end
86
87 # Reset this Namespace support object for reuse.
88 #
89 # It is necessary to invoke this method before reusing the
90 # Namespace support object for a new session.
91 fun reset do
92 contexts.clear
93 contexts.enlarge(32)
94 context_position = 0
95 current_context = new Context
96 contexts.push(current_context)
97 current_context.declare_prefix("xml", xmlns)
98 current_context.declare_prefix("xmlns", nsdecl)
99 end
100
101 # Start a new Namespace context.
102 #
103 # The new context will automatically inherit
104 # the declarations of its parent context, but it will also keep
105 # track of which declarations were made within this context.
106 #
107 # Event callback code should start a new context once per element.
108 # This means being ready to call this in either of two places.
109 # For elements that don't include namespace declarations, the
110 # `ContentHandler.start_element` callback is the right place.
111 # For elements with such a declaration, it'd done in the first
112 # `ContentHandler.start_prefix_mapping` callback.
113 # A boolean flag can be used to
114 # track whether a context has been started yet. When either of
115 # those methods is called, it checks the flag to see if a new context
116 # needs to be started. If so, it starts the context and sets the
117 # flag. After `ContentHandler.start_element` does that, it always clears
118 # the flag.
119 #
120 # Normally, SAX drivers would push a new context at the beginning
121 # of each XML element. Then they perform a first pass over the
122 # attributes to process all namespace declarations, making
123 # `ContentHandler.start_prefix_mapping` callbacks.
124 # Then a second pass is made, to determine the namespace-qualified
125 # names for all attributes and for the element name.
126 # Finally all the information for the
127 # `ContentHandler.start_element` callback is available,
128 # so it can then be made.
129 #
130 # The Namespace support object always starts with a base context
131 # already in force: in this context, only the `xml` prefix is
132 # declared.
133 #
134 # SEE: `sax::ContentHandler`
135 #
136 # SEE: `pop_context`
137 fun push_context do
138 current_context.decls_ok = false
139 context_position += 1
140
141 # Extend the array if necessary.
142 if context_position >= contexts.length then
143 current_context = new Context
144 contexts.push(current_context)
145 else
146 current_context = contexts[context_position]
147 end
148
149 # Set the parent, if any.
150 if context_position > 0 then
151 current_context.parent = contexts[context_position - 1]
152 end
153 end
154
155 # Revert to the previous Namespace context.
156 #
157 # Normally, you should pop the context at the end of each
158 # XML element. After popping the context, all Namespace prefix
159 # mappings that were previously in force are restored.
160 #
161 # You must not attempt to declare additional Namespace
162 # prefixes after popping a context, unless you push another
163 # context first.
164 #
165 # SEE: `push_context`
166 fun pop_context do
167 assert stack_not_empty: context_position > 0
168 current_context.clear
169 context_position -= 1
170 current_context = contexts[context_position]
171 end
172
173 # Declare a Namespace prefix.
174 #
175 # All prefixes must be declared before they are referenced.
176 # For example, a SAX driver (parser)
177 # would scan an element's attributes
178 # in two passes: first for namespace declarations,
179 # then a second pass using `process_name` to
180 # interpret prefixes against (potentially redefined) prefixes.
181 #
182 # This method declares a prefix in the current Namespace
183 # context; the prefix will remain in force until this context
184 # is popped, unless it is shadowed in a descendant context.
185 #
186 # To declare the default element Namespace, use the empty string as
187 # the prefix.
188 #
189 # Note that you must *not* declare a prefix after
190 # you've pushed and popped another Namespace context, or
191 # treated the declarations phase as complete by processing
192 # a prefixed name.
193 #
194 # Note that there is an asymmetry in this library:
195 # `prefix` will not return the `""` prefix,
196 # even if you have declared a default element namespace.
197 # To check for a default namespace,
198 # you have to look it up explicitly using `uri`.
199 # This asymmetry exists to make it easier to look up prefixes
200 # for attribute names, where the default prefix is not allowed.
201 #
202 # Parameters:
203 #
204 # * `prefix`: prefix to declare, or the empty string to
205 # indicate the default element namespace. This may never have
206 # the value `xml` or `xmlns`.
207 # * `uri`: The Namespace URI to associate with the prefix.
208 #
209 #
210 # Returns:
211 #
212 # `true` if the prefix and the URI are legal, `false` otherwise.
213 #
214 # SEE: `process_name`
215 #
216 # SEE: `uri`
217 #
218 # SEE: `prefix`
219 fun declare_prefix(prefix: String, uri: String): Bool do
220 if prefix == "xml" or prefix == "xmlns" or
221 uri == xmlns or uri == nsdecl then
222 return false
223 else
224 current_context.declare_prefix(prefix, uri)
225 return true
226 end
227 end
228
229 # Process a raw XML qualified name, after all declarations in the current context have been handled by `declare_prefix`.
230 #
231 # This method processes a raw XML qualified name in the current
232 # context by removing the prefix and looking it up among the
233 # prefixes currently declared. The return value will be the
234 # array supplied by the caller, filled in as follows:
235 #
236 # * `parts[0]`: Namespace URI, or an empty string if none is in use.
237 # * `parts[1]`: local name (without prefix).
238 # * `parts[2]`: original raw name.
239 #
240 # If the raw name has a prefix that has not been declared, then
241 # the return value will be `null`.
242 #
243 # Note that attribute names are processed differently than
244 # element names: an unprefixed element name will receive the
245 # default Namespace (if any), while an unprefixed attribute name
246 # will not.
247 #
248 # Parameters:
249 #
250 # * `qname`: raw XML qualified name to be processed.
251 # * `parts`: array supplied by the caller. Will be enlarged to 3 elements if
252 # needed. If the specified array contains more than 3 elements, its length
253 # will be kept intact.
254 # * `is_attribute`: flag indicating whether this is an attribute name
255 # (`true`) or an element name (`false`).
256 #
257 # SEE: `declare_prefix`
258 fun process_name(qname: String, parts: Array[String], is_attribute: Bool):
259 nullable Array[String] do
260 var my_parts = current_context.process_name(qname, is_attribute)
261
262 if my_parts == null then
263 return null
264 else
265 parts[0] = my_parts[0]
266 parts[1] = my_parts[1]
267 parts[2] = my_parts[2]
268 if parts[0] == "" and qname == "xmlns" and is_attribute then
269 parts[0] = nsdecl
270 parts[1] = ""
271 end
272 return parts
273 end
274 end
275
276 # Look up a prefix and get the currently-mapped Namespace URI.
277 #
278 # This method looks up the prefix in the current context.
279 # Use the empty string (`""`) for the default Namespace.
280 #
281 # Parameters:
282 #
283 # * `prefix`: The prefix to look up.
284 #
285 # Returns:
286 #
287 # The associated Namespace URI, or `null` if the prefix
288 # is undeclared in this context.
289 #
290 # SEE: `prefix`
291 #
292 # SEE: `prefixes_of`
293 fun uri(prefix: String): nullable String do
294 return current_context.uri(prefix)
295 end
296
297 # Return all prefixes currently declared.
298 #
299 # Note: if there is a default prefix, it will not be
300 # returned in this enumeration; check for the default prefix
301 # using the `uri` with an argument of `""` or use `declared_prefixes`.
302 #
303 # Returns:
304 #
305 # All prefixes declared in the current context except
306 # for the empty (default) prefix.
307 #
308 # SEE: `declared_prefixes`
309 #
310 # SEE: `uri`
311 fun prefixes: Collection[String] do return current_context.prefixes
312
313 # Return one of the prefixes mapped to a Namespace URI.
314 #
315 # If more than one prefix is currently mapped to the same
316 # URI, this method will make an arbitrary selection; if you
317 # want all of the prefixes, use the `prefixes_of` method instead.
318 #
319 # Note: this will never return the empty (default) prefix;
320 # to check for a default prefix, use the `uri`
321 # method with an argument of `""`.
322 #
323 # Parameters:
324 #
325 # * `uri`: Namespace URI.
326 #
327 # Returns:
328 #
329 # One of the prefixes currently mapped to the URI supplied,
330 # or `null` if none is mapped or if the URI is assigned to
331 # the default Namespace.
332 #
333 # SEE: `prefixes_of`
334 #
335 # SEE: `uri`
336 fun prefix(uri: String): nullable String do
337 return current_context.prefix(uri)
338 end
339
340 # Return all prefixes currently declared for an URI.
341 #
342 # This method returns prefixes mapped to a specific Namespace
343 # URI. The `xml` prefix will be included. If you want only one
344 # prefix that's mapped to the Namespace URI, and you don't care
345 # which one you get, use the `prefix` method instead.
346 #
347 # Note: the empty (default) prefix is *never* included
348 # in this enumeration; to check for the presence of a default
349 # Namespace, use the `uri` method with an argument of `""`.
350 #
351 # Parameters:
352 #
353 # * `uri`: The Namespace URI.
354 #
355 # SEE: `prefix`
356 #
357 # SEE: `declared_prefixes`
358 #
359 # SEE: `uri`
360 fun prefixes_of(uri: String): Collection[String] do
361 var prefixes = new Array[String]
362 var all_prefixes = self.prefixes
363
364 for prefix in all_prefixes do
365 if uri == self.uri(prefix) then
366 prefixes.push(prefix)
367 end
368 end
369 return prefixes
370 end
371
372 # Return all prefixes declared (and undeclared) in this context.
373 #
374 # The empty (default) prefix will be included in this
375 # enumeration; note that this behaviour differs from that of
376 # `prefix`, `prefixes` and `prefixes_of`.
377 #
378 # SEE: `prefixes`
379 #
380 # SEE: `uri`
381 fun declared_prefixes: Collection[String] do
382 return current_context.declared_prefixes
383 end
384 end
385
386
387 # Internal class for a single Namespace context.
388 #
389 # This module caches and reuses Namespace contexts,
390 # so the number allocated
391 # will be equal to the element depth of the document, not to the total
392 # number of elements (i.e. 5-10 rather than tens of thousands).
393 # Also, data structures used to represent contexts are shared when
394 # possible (child contexts without declarations) to further reduce
395 # the amount of memory that's consumed.
396 #
397 # Note: The original source code and documentation of this class comes, in part,
398 # from [SAX 2.0](http://www.saxproject.org).
399 private class Context
400
401 var empty: Collection[String] = new Array[String].with_capacity(0)
402
403 # `prefix` -> `uri`
404 var prefix_table: nullable Map[String, String] = null
405
406 # Cache of `process_name` for elements.
407 #
408 # `qname -> [uri, local_name, qname]`
409 var element_name_table: nullable Map[String, Array[String]] = null
410
411 # Cache of `process_name` for attributes.
412 #
413 # `qname -> [uri, local_name, qname]`
414 var attribute_name_table: nullable Map[String, Array[String]] = null
415
416 # Namespace in absence of prefix.
417 var default_ns: nullable String = null
418
419 # Can we currently declare prefixes in this context?
420 var decls_ok: Bool = true is writable
421
422 # All prefixes declared in this context.
423 var declarations: nullable Array[String] = null
424
425 # Was `copy_tables` called since the last call to `parent=`?
426 var decl_seen: Bool = false
427
428 # Parent context.
429 var p_parent: nullable Context = null
430
431 # (Re)set the parent of this Namespace context.
432 #
433 # The context must either have been freshly constructed,
434 # or must have been cleared.
435 #
436 # Parameters:
437 #
438 # * `context`: parent Namespace context object.
439 fun parent=(parent: Context) do
440 p_parent = parent
441 declarations = null
442 prefix_table = parent.prefix_table
443 element_name_table = parent.element_name_table
444 attribute_name_table = parent.attribute_name_table
445 default_ns = parent.default_ns
446 decl_seen = false
447 decls_ok = true
448 end
449
450 # Makes associated state become collectible, invalidating this context.
451 #
452 # `parent=` must be called before this context may be used again.
453 fun clear do
454 p_parent = null
455 prefix_table = null
456 element_name_table = null
457 attribute_name_table = null
458 default_ns = null
459 declarations = null
460 end
461
462 # Declare a Namespace prefix for this context.
463 #
464 # Parameters:
465 #
466 # * `prefix`: prefix to declare.
467 # * `uri`: associated Namespace URI.
468 #
469 # SEE: `NamespaceSupport.declare_prefix`
470 fun declare_prefix(prefix: String, uri: String) do
471 assert legal_state: decls_ok else
472 sys.stderr.write("Can't declare any more prefixes in this context.\n")
473 end
474
475 # Lazy processing...
476 if not decl_seen then
477 copy_tables
478 end
479
480 if "" == prefix then
481 if "" == uri then
482 default_ns = null
483 else
484 default_ns = uri
485 end
486 else if "" == uri then
487 prefix_table.keys.remove(prefix)
488 else
489 prefix_table[prefix] = uri
490 end
491 declarations.push(prefix)
492 end
493
494 # Process a raw XML qualified name in this context.
495 #
496 # Parameters:
497 #
498 # * `qname`: raw XML qualified name.
499 # * `is_attribute`: `true` if this is an attribute name.
500 #
501 # Returns:
502 #
503 # An array of three strings containing the URI part (or empty string),
504 # the local part and the raw name, or `null` if there is an undeclared
505 # prefix.
506 #
507 # SEE: `NamespaceSupport.process_name`
508 fun process_name(qname: String, is_attribute: Bool):
509 nullable Array[String] do
510 var name: Array[String]
511 var table: Map[String, Array[String]]
512 var match: nullable Match
513
514 # Detect errors in call sequence.
515 decls_ok = false
516 # Select the appropriate table.
517 if is_attribute then
518 table = attribute_name_table.as(not null)
519 else
520 table = element_name_table.as(not null)
521 end
522
523 # Start by looking in the cache, and
524 # return immediately if the name
525 # is already known in this content.
526 if table.keys.has(qname) then
527 return table[qname]
528 end
529
530 # We haven't seen this name in this
531 # context before. Maybe in the parent
532 # context, but we can't assume prefix
533 # bindings are the same.
534 name = new Array[String].with_capacity(3)
535 match = qname.search(':')
536
537 if match == null then
538 # No prefix
539 if is_attribute then
540 name.push("")
541 else
542 name.push(default_ns or else "")
543 end
544 name.push(qname)
545 name.push(qname)
546 else
547 # Prefix
548 var prefix = qname.substring(0, match.from)
549
550 if prefix == "" then
551 if is_attribute then
552 name.push("")
553 else
554 name.push(default_ns or else "")
555 end
556 name.push(qname.substring_from(match.after))
557 name.push(qname)
558 else if (not is_attribute) and "xmlns" == prefix then
559 return null
560 else if prefix_table.keys.has(prefix) then
561 name.push(prefix_table[prefix])
562 name.push(qname.substring_from(match.after))
563 name.push(qname)
564 else
565 return null
566 end
567 end
568
569 # Save in the cache for future use.
570 # (Could be shared with parent context...)
571 table[qname] = name
572 return name
573 end
574
575 # Look up the URI associated with a prefix in this context.
576 #
577 # Return `null` if no URI is associated with a specified prefix.
578 #
579 # Parameters:
580 #
581 # * `prefix`: prefix to look up.
582 #
583 # SEE: `NamespaceSupport.uri`
584 fun uri(prefix: String): nullable String do
585 if "" == prefix then
586 return default_ns
587 else if prefix_table == null then
588 return null
589 else
590 return prefix_table.get_or_null(prefix)
591 end
592 end
593
594 # Look up one of the prefixes associated with a URI in this context.
595 #
596 # Since many prefixes may be mapped to the same URI,
597 # the return value may be unreliable.
598 #
599 # Parameters:
600 #
601 # * `uri`: URI to look up.
602 #
603 # Returns:
604 #
605 # The associated prefix, or `null` if none is declared.
606 #
607 # SEE: `NamespaceSupport.prefix`
608 fun prefix(uri: String): nullable String do
609 # Note: We do not use the original code from SAX 2.0.1 because it is
610 # buggy with redefined prefixes. For example, with
611 # `<x xmlns:y="1"><z xmlns:y="2" /></x>`, when in `z`, `uri("1")`
612 # returns `"y"` in the original code while it should return `null`.
613 # Our code is slower, but it works.
614 var all_prefixes = prefixes
615
616 for prefix in all_prefixes do
617 if uri == self.uri(prefix) then
618 return prefix
619 end
620 end
621 return null
622 end
623
624 # Return all prefixes declared in this context (possibly empty).
625 #
626 # SEE: `NamespaceSupport.declared_prefixes`
627 fun declared_prefixes: Collection[String] do
628 return declarations or else empty
629 end
630
631 # Return all prefixes currently in force.
632 #
633 # The default prefix, if in force, is *not*
634 # returned, and will have to be checked for separately.
635 #
636 # SEE: `NamespaceSupport.prefixes`
637 fun prefixes: Collection[String] do
638 if prefix_table == null then
639 return empty
640 else
641 return prefix_table.keys
642 end
643 end
644
645 # Copy on write for the internal tables in this context.
646 #
647 # This class is optimized for the normal case where most
648 # elements do not contain Namespace declarations.
649 fun copy_tables do
650 if prefix_table != null then
651 var old_prefix_table = prefix_table.as(not null)
652 prefix_table = new HashMap[String, String]
653 prefix_table.recover_with(old_prefix_table)
654 else
655 prefix_table = new HashMap[String, String]
656 end
657 element_name_table = new HashMap[String, Array[String]]
658 attribute_name_table = new HashMap[String, Array[String]]
659 declarations = new Array[String]
660 decl_seen = true
661 end
662 end