e682321d95752c8f79288067bcaedc42c4672fef
[nit.git] / lib / sax / helpers / namespace_support.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Encapsulates Namespace logic for use by applications using SAX, or internally by SAX drivers.
12 module sax::helpers::namespace_support
13
14 # Encapsulates Namespace logic for use by applications using SAX, or internally by SAX drivers.
15 #
16 # This class encapsulates the logic of Namespace processing:
17 # it tracks the declarations currently in force for each context
18 # and automatically processes qualified XML names into their
19 # Namespace parts; it can also be used in reverse for generating
20 # XML qualified names from Namespaces.
21 #
22 # Namespace support objects are reusable, but the reset method
23 # must be invoked between each session.
24 #
25 # Here is a simple session:
26 #
27 # var parts: Array[String] = new Array[String].with_capacity(3)
28 # var support = new NamespaceSupport
29 # #
30 # support.push_context
31 # support.declare_prefix("", "http://www.w3.org/1999/xhtml")
32 # support.declare_prefix("dc", "http://www.purl.org/dc#")
33 # #
34 # parts = support.process_name("p", parts, false).as(not null)
35 # assert parts[0] == "http://www.w3.org/1999/xhtml"
36 # assert parts[1] == "p"
37 # assert parts[2] == "p"
38 # #
39 # parts = support.process_name("dc:title", parts, false).as(not null)
40 # assert parts[0] == "http://www.purl.org/dc#"
41 # assert parts[1] == "title"
42 # assert parts[2] == "dc:title"
43 # #
44 # support.pop_context
45 #
46 # Note that this class is optimized for the use case where most
47 # elements do not contain Namespace declarations: if the same
48 # prefix/URI mapping is repeated for each context (for example), this
49 # class will be somewhat less efficient.
50 #
51 # Although SAX drivers (parsers) may choose to use this class to
52 # implement namespace handling, they are not required to do so.
53 # Applications must track namespace information themselves if they
54 # want to use namespace information.
55 #
56 # Note: The original source code and documentation of this class comes, in part,
57 # from [SAX 2.0](http://www.saxproject.org).
58 class NamespaceSupport
59 # The XML Namespace URI as a constant.
60 #
61 # The value is `http://www.w3.org/XML/1998/namespace`
62 # as defined in the “Namespaces in XML” specification.
63 #
64 # This is the Namespace URI that is automatically mapped
65 # to the `xml` prefix.
66 var xmlns = "http://www.w3.org/XML/1998/namespace"
67
68 # The namespace declaration URI as a constant.
69 #
70 # The value is `http://www.w3.org/xmlns/2000/`, as defined
71 # in a erratum to the “Namespaces in XML” recommendation.
72 #
73 # This is the Namespace URI that is automatically mapped
74 # to the `xmlns` prefix.
75 var nsdecl = "http://www.w3.org/xmlns/2000/"
76
77 private var contexts: Array[Context] = new Array[Context].with_capacity(32)
78 private var current_context: Context = new Context
79 private var context_position: Int = 0
80
81 init do
82 contexts.push(current_context)
83 current_context.declare_prefix("xml", xmlns)
84 current_context.declare_prefix("xmlns", nsdecl)
85 end
86
87 # Reset this Namespace support object for reuse.
88 #
89 # It is necessary to invoke this method before reusing the
90 # Namespace support object for a new session.
91 fun reset do
92 contexts.clear
93 contexts.enlarge(32)
94 context_position = 0
95 current_context = new Context
96 contexts.push(current_context)
97 current_context.declare_prefix("xml", xmlns)
98 current_context.declare_prefix("xmlns", nsdecl)
99 end
100
101 # Start a new Namespace context.
102 #
103 # The new context will automatically inherit
104 # the declarations of its parent context, but it will also keep
105 # track of which declarations were made within this context.
106 #
107 # Event callback code should start a new context once per element.
108 # This means being ready to call this in either of two places.
109 # For elements that don't include namespace declarations, the
110 # `ContentHandler.start_element` callback is the right place.
111 # For elements with such a declaration, it'd done in the first
112 # `ContentHandler.start_prefix_mapping` callback.
113 # A boolean flag can be used to
114 # track whether a context has been started yet. When either of
115 # those methods is called, it checks the flag to see if a new context
116 # needs to be started. If so, it starts the context and sets the
117 # flag. After `ContentHandler.start_element` does that, it always clears
118 # the flag.
119 #
120 # Normally, SAX drivers would push a new context at the beginning
121 # of each XML element. Then they perform a first pass over the
122 # attributes to process all namespace declarations, making
123 # `ContentHandler.start_prefix_mapping` callbacks.
124 # Then a second pass is made, to determine the namespace-qualified
125 # names for all attributes and for the element name.
126 # Finally all the information for the
127 # `ContentHandler.start_element` callback is available,
128 # so it can then be made.
129 #
130 # The Namespace support object always starts with a base context
131 # already in force: in this context, only the `xml` prefix is
132 # declared.
133 #
134 # SEE: `sax::ContentHandler`
135 #
136 # SEE: `pop_context`
137 fun push_context do
138 current_context.decls_ok = false
139 context_position += 1
140
141 # Extend the array if necessary.
142 if context_position >= contexts.length then
143 current_context = new Context
144 contexts.push(current_context)
145 else
146 current_context = contexts[context_position]
147 end
148
149 # Set the parent, if any.
150 if context_position > 0 then
151 current_context.parent = contexts[context_position - 1]
152 end
153 end
154
155 # Revert to the previous Namespace context.
156 #
157 # Normally, you should pop the context at the end of each
158 # XML element. After popping the context, all Namespace prefix
159 # mappings that were previously in force are restored.
160 #
161 # You must not attempt to declare additional Namespace
162 # prefixes after popping a context, unless you push another
163 # context first.
164 #
165 # SEE: `push_context`
166 fun pop_context do
167 assert stack_not_empty: context_position > 0
168 current_context.clear
169 context_position -= 1
170 current_context = contexts[context_position]
171 end
172
173 # Declare a Namespace prefix.
174 #
175 # All prefixes must be declared before they are referenced.
176 # For example, a SAX driver (parser)
177 # would scan an element's attributes
178 # in two passes: first for namespace declarations,
179 # then a second pass using `process_name` to
180 # interpret prefixes against (potentially redefined) prefixes.
181 #
182 # This method declares a prefix in the current Namespace
183 # context; the prefix will remain in force until this context
184 # is popped, unless it is shadowed in a descendant context.
185 #
186 # To declare the default element Namespace, use the empty string as
187 # the prefix.
188 #
189 # Note that you must *not* declare a prefix after
190 # you've pushed and popped another Namespace context, or
191 # treated the declarations phase as complete by processing
192 # a prefixed name.
193 #
194 # Note that there is an asymmetry in this library:
195 # `prefix` will not return the `""` prefix,
196 # even if you have declared a default element namespace.
197 # To check for a default namespace,
198 # you have to look it up explicitly using `uri`.
199 # This asymmetry exists to make it easier to look up prefixes
200 # for attribute names, where the default prefix is not allowed.
201 #
202 # Parameters:
203 #
204 # * `prefix`: prefix to declare, or the empty string to
205 # indicate the default element namespace. This may never have
206 # the value `xml` or `xmlns`.
207 # * `uri`: The Namespace URI to associate with the prefix.
208 #
209 #
210 # Returns:
211 #
212 # `true` if the prefix and the URI are legal, `false` otherwise.
213 #
214 # SEE: `process_name`
215 #
216 # SEE: `uri`
217 #
218 # SEE: `prefix`
219 fun declare_prefix(prefix: String, uri: String): Bool do
220 if prefix == "xml" or prefix == "xmlns" or
221 uri == xmlns or uri == nsdecl then
222 return false
223 else
224 current_context.declare_prefix(prefix, uri)
225 return true
226 end
227 end
228
229 # Process a raw XML qualified name, after all declarations in the current context have been handled by `declare_prefix`.
230 #
231 # This method processes a raw XML qualified name in the current
232 # context by removing the prefix and looking it up among the
233 # prefixes currently declared. The return value will be the
234 # array supplied by the caller, filled in as follows:
235 #
236 # * `parts[0]`: Namespace URI, or an empty string if none is in use.
237 # * `parts[1]`: local name (without prefix).
238 # * `parts[2]`: original raw name.
239 #
240 # If the raw name has a prefix that has not been declared, then
241 # the return value will be `null`.
242 #
243 # Note that attribute names are processed differently than
244 # element names: an unprefixed element name will receive the
245 # default Namespace (if any), while an unprefixed attribute name
246 # will not.
247 #
248 # Parameters:
249 #
250 # * `qname`: raw XML qualified name to be processed.
251 # * `parts`: array supplied by the caller. Will be enlarged to 3 elements if
252 # needed. If the specified array contains more than 3 elements, its length
253 # will be kept intact.
254 # * `is_attribute`: flag indicating whether this is an attribute name
255 # (`true`) or an element name (`false`).
256 #
257 # SEE: `declare_prefix`
258 fun process_name(qname: String, parts: Array[String], is_attribute: Bool):
259 nullable Array[String] do
260 var my_parts = current_context.process_name(qname, is_attribute)
261
262 if my_parts == null then
263 return null
264 else
265 parts[0] = my_parts[0]
266 parts[1] = my_parts[1]
267 parts[2] = my_parts[2]
268 if parts[0] == "" and qname == "xmlns" and is_attribute then
269 parts[0] = nsdecl
270 parts[1] = ""
271 end
272 return parts
273 end
274 end
275
276 # Look up a prefix and get the currently-mapped Namespace URI.
277 #
278 # This method looks up the prefix in the current context.
279 # Use the empty string (`""`) for the default Namespace.
280 #
281 # Parameters:
282 #
283 # * `prefix`: The prefix to look up.
284 #
285 # Returns:
286 #
287 # The associated Namespace URI, or `null` if the prefix
288 # is undeclared in this context.
289 #
290 # SEE: `prefix`
291 #
292 # SEE: `prefixes_of`
293 fun uri(prefix: String): nullable String do
294 return current_context.uri(prefix)
295 end
296
297 # Return all prefixes currently declared.
298 #
299 # Note: if there is a default prefix, it will not be
300 # returned in this enumeration; check for the default prefix
301 # using the `uri` with an argument of `""` or use `declared_prefixes`.
302 #
303 # Returns:
304 #
305 # All prefixes declared in the current context except
306 # for the empty (default) prefix.
307 #
308 # SEE: `declared_prefixes`
309 #
310 # SEE: `uri`
311 fun prefixes: Collection[String] do return current_context.prefixes
312
313 # Return one of the prefixes mapped to a Namespace URI.
314 #
315 # If more than one prefix is currently mapped to the same
316 # URI, this method will make an arbitrary selection; if you
317 # want all of the prefixes, use the `prefixes_of` method instead.
318 #
319 # Note: this will never return the empty (default) prefix;
320 # to check for a default prefix, use the `uri`
321 # method with an argument of `""`.
322 #
323 # Parameters:
324 #
325 # * `uri`: Namespace URI.
326 #
327 # Returns:
328 #
329 # One of the prefixes currently mapped to the URI supplied,
330 # or `null` if none is mapped or if the URI is assigned to
331 # the default Namespace.
332 #
333 # SEE: `prefixes_of`
334 #
335 # SEE: `uri`
336 fun prefix(uri: String): nullable String do
337 return current_context.prefix(uri)
338 end
339
340 # Return all prefixes currently declared for an URI.
341 #
342 # This method returns prefixes mapped to a specific Namespace
343 # URI. The `xml` prefix will be included. If you want only one
344 # prefix that's mapped to the Namespace URI, and you don't care
345 # which one you get, use the `prefix` method instead.
346 #
347 # Note: the empty (default) prefix is *never* included
348 # in this enumeration; to check for the presence of a default
349 # Namespace, use the `uri` method with an argument of `""`.
350 #
351 # Parameters:
352 #
353 # * `uri`: The Namespace URI.
354 #
355 # SEE: `prefix`
356 #
357 # SEE: `declared_prefixes`
358 #
359 # SEE: `uri`
360 fun prefixes_of(uri: String): Collection[String] do
361 var prefixes = new Array[String]
362 var all_prefixes = self.prefixes
363
364 for prefix in all_prefixes do
365 if uri == self.uri(prefix) then
366 prefixes.push(prefix)
367 end
368 end
369 return prefixes
370 end
371
372 # Return all prefixes declared (and undeclared) in this context.
373 #
374 # The empty (default) prefix will be included in this
375 # enumeration; note that this behaviour differs from that of
376 # `prefix`, `prefixes` and `prefixes_of`.
377 #
378 # SEE: `prefixes`
379 #
380 # SEE: `uri`
381 fun declared_prefixes: Collection[String] do
382 return current_context.declared_prefixes
383 end
384 end
385
386
387 # Internal class for a single Namespace context.
388 #
389 # This module caches and reuses Namespace contexts,
390 # so the number allocated
391 # will be equal to the element depth of the document, not to the total
392 # number of elements (i.e. 5-10 rather than tens of thousands).
393 # Also, data structures used to represent contexts are shared when
394 # possible (child contexts without declarations) to further reduce
395 # the amount of memory that's consumed.
396 #
397 # Note: The original source code and documentation of this class comes, in part,
398 # from [SAX 2.0](http://www.saxproject.org).
399 private class Context
400
401 private var empty: Collection[String] = new Array[String].with_capacity(0)
402
403 # `prefix` -> `uri`
404 private var prefix_table: nullable Map[String, String] = null
405
406 # Cache of `process_name` for elements.
407 #
408 # `qname -> [uri, local_name, qname]`
409 private var element_name_table: nullable Map[String, Array[String]] = null
410
411 # Cache of `process_name` for attributes.
412 #
413 # `qname -> [uri, local_name, qname]`
414 private var attribute_name_table: nullable Map[String, Array[String]] = null
415
416 # Namespace in absence of prefix.
417 private var default_ns: nullable String = null
418
419 # Can we currently declare prefixes in this context?
420 var decls_ok: Bool = true is writable
421
422 # All prefixes declared in this context.
423 private var declarations: nullable Array[String] = null
424
425 # Was `copy_tables` called since the last call to `parent=`?
426 private var decl_seen: Bool = false
427
428 # Parent context.
429 private var p_parent: nullable Context = null
430
431 init do
432 end
433
434 # (Re)set the parent of this Namespace context.
435 #
436 # The context must either have been freshly constructed,
437 # or must have been cleared.
438 #
439 # Parameters:
440 #
441 # * `context`: parent Namespace context object.
442 fun parent=(parent: Context) do
443 p_parent = parent
444 declarations = null
445 prefix_table = parent.prefix_table
446 element_name_table = parent.element_name_table
447 attribute_name_table = parent.attribute_name_table
448 default_ns = parent.default_ns
449 decl_seen = false
450 decls_ok = true
451 end
452
453 # Makes associated state become collectible, invalidating this context.
454 #
455 # `parent=` must be called before this context may be used again.
456 fun clear do
457 p_parent = null
458 prefix_table = null
459 element_name_table = null
460 attribute_name_table = null
461 default_ns = null
462 declarations = null
463 end
464
465 # Declare a Namespace prefix for this context.
466 #
467 # Parameters:
468 #
469 # * `prefix`: prefix to declare.
470 # * `uri`: associated Namespace URI.
471 #
472 # SEE: `NamespaceSupport.declare_prefix`
473 fun declare_prefix(prefix: String, uri: String) do
474 assert legal_state: decls_ok else
475 sys.stderr.write("Can't declare any more prefixes in this context.\n")
476 end
477
478 # Lazy processing...
479 if not decl_seen then
480 copy_tables
481 end
482
483 if "" == prefix then
484 if "" == uri then
485 default_ns = null
486 else
487 default_ns = uri
488 end
489 else if "" == uri then
490 prefix_table.keys.remove(prefix)
491 else
492 prefix_table[prefix] = uri
493 end
494 declarations.push(prefix)
495 end
496
497 # Process a raw XML qualified name in this context.
498 #
499 # Parameters:
500 #
501 # * `qname`: raw XML qualified name.
502 # * `is_attribute`: `true` if this is an attribute name.
503 #
504 # Returns:
505 #
506 # An array of three strings containing the URI part (or empty string),
507 # the local part and the raw name, or `null` if there is an undeclared
508 # prefix.
509 #
510 # SEE: `NamespaceSupport.process_name`
511 fun process_name(qname: String, is_attribute: Bool):
512 nullable Array[String] do
513 var name: Array[String]
514 var table: Map[String, Array[String]]
515 var match: nullable Match
516
517 # Detect errors in call sequence.
518 decls_ok = false
519 # Select the appropriate table.
520 if is_attribute then
521 table = attribute_name_table.as(not null)
522 else
523 table = element_name_table.as(not null)
524 end
525
526 # Start by looking in the cache, and
527 # return immediately if the name
528 # is already known in this content.
529 if table.keys.has(qname) then
530 return table[qname]
531 end
532
533 # We haven't seen this name in this
534 # context before. Maybe in the parent
535 # context, but we can't assume prefix
536 # bindings are the same.
537 name = new Array[String].with_capacity(3)
538 match = qname.search(':')
539
540 if match == null then
541 # No prefix
542 if is_attribute then
543 name.push("")
544 else
545 name.push(default_ns or else "")
546 end
547 name.push(qname)
548 name.push(qname)
549 else
550 # Prefix
551 var prefix = qname.substring(0, match.from)
552
553 if prefix == "" then
554 if is_attribute then
555 name.push("")
556 else
557 name.push(default_ns or else "")
558 end
559 name.push(qname.substring_from(match.after))
560 name.push(qname)
561 else if (not is_attribute) and "xmlns" == prefix then
562 return null
563 else if prefix_table.keys.has(prefix) then
564 name.push(prefix_table[prefix])
565 name.push(qname.substring_from(match.after))
566 name.push(qname)
567 else
568 return null
569 end
570 end
571
572 # Save in the cache for future use.
573 # (Could be shared with parent context...)
574 table[qname] = name
575 return name
576 end
577
578 # Look up the URI associated with a prefix in this context.
579 #
580 # Return `null` if no URI is associated with a specified prefix.
581 #
582 # Parameters:
583 #
584 # * `prefix`: prefix to look up.
585 #
586 # SEE: `NamespaceSupport.uri`
587 fun uri(prefix: String): nullable String do
588 if "" == prefix then
589 return default_ns
590 else if prefix_table == null then
591 return null
592 else
593 return prefix_table.get_or_null(prefix)
594 end
595 end
596
597 # Look up one of the prefixes associated with a URI in this context.
598 #
599 # Since many prefixes may be mapped to the same URI,
600 # the return value may be unreliable.
601 #
602 # Parameters:
603 #
604 # * `uri`: URI to look up.
605 #
606 # Returns:
607 #
608 # The associated prefix, or `null` if none is declared.
609 #
610 # SEE: `NamespaceSupport.prefix`
611 fun prefix(uri: String): nullable String do
612 # Note: We do not use the original code from SAX 2.0.1 because it is
613 # buggy with redefined prefixes. For example, with
614 # `<x xmlns:y="1"><z xmlns:y="2" /></x>`, when in `z`, `uri("1")`
615 # returns `"y"` in the original code while it should return `null`.
616 # Our code is slower, but it works.
617 var all_prefixes = prefixes
618
619 for prefix in all_prefixes do
620 if uri == self.uri(prefix) then
621 return prefix
622 end
623 end
624 return null
625 end
626
627 # Return all prefixes declared in this context (possibly empty).
628 #
629 # SEE: `NamespaceSupport.declared_prefixes`
630 fun declared_prefixes: Collection[String] do
631 return declarations or else empty
632 end
633
634 # Return all prefixes currently in force.
635 #
636 # The default prefix, if in force, is *not*
637 # returned, and will have to be checked for separately.
638 #
639 # SEE: `NamespaceSupport.prefixes`
640 fun prefixes: Collection[String] do
641 if prefix_table == null then
642 return empty
643 else
644 return prefix_table.keys
645 end
646 end
647
648 # Copy on write for the internal tables in this context.
649 #
650 # This class is optimized for the normal case where most
651 # elements do not contain Namespace declarations.
652 private fun copy_tables do
653 if prefix_table != null then
654 var old_prefix_table = prefix_table.as(not null)
655 prefix_table = new HashMap[String, String]
656 prefix_table.recover_with(old_prefix_table)
657 else
658 prefix_table = new HashMap[String, String]
659 end
660 element_name_table = new HashMap[String, Array[String]]
661 attribute_name_table = new HashMap[String, Array[String]]
662 declarations = new Array[String]
663 decl_seen = true
664 end
665 end