1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Parser for inline markdown
17 # Used to create the AST representation of inline nodes like emphasis, code, links
19 module markdown_inline_parsing
23 # Parser for inline content (text, links, emphasis, etc)
26 # List of delimiter processors to use
27 private var delimiter_processors
: Array[MdDelimiterProcessor] is lazy
do
28 var delimiters
= new Array[MdDelimiterProcessor]
29 delimiters
.add
new MdAsteriskDelimiterProcessor
30 delimiters
.add
new MdUnderscoreDelimiterProcessor
34 # Map special characters to their delimiter processor
35 private var delimiter_processors_map
: Map[Char, MdDelimiterProcessor] is lazy
do
36 var map
= new HashMap[Char, MdDelimiterProcessor]
37 for delimiter_processor
in delimiter_processors
do
38 add_delimiter_processor
(delimiter_processor
, map
)
40 special_characters
.add_all map
.keys
44 # Register a delimiter processor
45 private fun add_delimiter_processor
(delimiter_processor
: MdDelimiterProcessor, map
: Map[Char, MdDelimiterProcessor]) do
46 var opening
= delimiter_processor
.opening_delimiter
47 var closing
= delimiter_processor
.closing_delimiter
48 if opening
== closing
then
49 if map
.has_key
(opening
) then
50 var old
= map
[opening
]
51 if old
.opening_delimiter
== old
.closing_delimiter
then
52 var s
: MdStaggeredDelimiterProcessor
53 if old
isa MdStaggeredDelimiterProcessor then
56 s
= new MdStaggeredDelimiterProcessor(opening
)
59 s
.add delimiter_processor
62 add_delimiter_processor_for_char
(opening
, delimiter_processor
, map
)
65 add_delimiter_processor_for_char
(opening
, delimiter_processor
, map
)
68 add_delimiter_processor_for_char
(opening
, delimiter_processor
, map
)
69 add_delimiter_processor_for_char
(closing
, delimiter_processor
, map
)
73 # Register a delimiter processor for a special character
74 private fun add_delimiter_processor_for_char
(delimiter_char
: Char, delimiter_processor
: MdDelimiterProcessor, map
: Map[Char, MdDelimiterProcessor]) do
75 assert not map
.has_key
(delimiter_char
) else
76 print
"Delimiter processor conflict with delimiter char `{delimiter_char}`"
78 map
[delimiter_char
] = delimiter_processor
81 # List of characters that have a special Markdown meaning
82 private var special_characters
: Array[Char] = ['\n', '`', '[', ']', '\\', '!', '<', '&']
84 # Link references by ID, needs to be built up using `parse_reference` before calling `parse`
85 private var reference_map
= new HashMap[String, MdLink]
87 # Current block under parsing
88 private var block
: MdNode is noinit
90 # Current input string
91 private var input
: String is noinit
94 private var index
: Int is noinit
97 private var line
: Int is noinit
100 private var column
: Int is noinit
102 # Current column offset
103 private var column_offset
: Int is noinit
105 # Top delimiter (emphasis, strong emphasis or custom emphasis)
106 # Brackets are on a separate stack, different from the algorithm described in the spec.
107 private var last_delimiter
: nullable MdDelimiter = null
109 # Top opening bracket (`[` or `![`)
110 private var last_bracket
: nullable MdBracket = null
112 # Parse `input` as inline and add resulting nodes as children to `block`
113 fun parse
(input
: String, offset
: Int, block
: MdNode) do
115 self.input
= input
.trim
117 self.last_delimiter
= null
118 self.last_bracket
= null
119 self.line
= block
.location
.line_start
120 self.column_offset
= offset
121 self.column
= 1 + column_offset
123 var more_to_parse
= parse_inline
124 while more_to_parse
do
125 more_to_parse
= parse_inline
128 process_delimiters
(null)
129 merge_child_text_nodes
(block
)
132 # Advance the current index of `count` characters
133 private fun advance
(count
: Int) do
138 # Attempt to parse a link reference
140 # Return how many characters were parsed as a reference.
142 fun parse_reference
(input
: String): Int do
149 var start_index
= index
152 match_chars
= parse_link_label
153 if match_chars
== 0 then return 0
156 var raw_label
= input
.substring
(0, match_chars
)
159 if peek
!= ':' then return 0
165 dest
= parse_link_destination
.first
166 if dest
== null or dest
.is_empty
then return 0
168 var before_title
= index
169 var before_column
= column
171 title
= parse_link_title
172 if title
== null then
173 # rewind before spaces
175 column
= before_column
178 var at_line_end
= true
179 if index
!= input
.length
and match
(re_line_end
) == null then
180 if title
== null then
183 # the potential title we found is not at the line end,
184 # but it could still be a legal link reference if we discard the title
186 # rewind before spaces
188 column
= before_column
189 # and instead check if the link URL is at the line end
190 at_line_end
= match
(re_line_end
) != null
194 if not at_line_end
then return 0
196 var normalized_label
= raw_label
.normalize_reference
197 if normalized_label
.is_empty
then return 0
199 if not reference_map
.has_key
(normalized_label
) then
200 var link
= new MdLink(new MdLocation(0, 0, 0, 0), dest
, title
)
201 reference_map
[normalized_label
] = link
204 return index
- start_index
208 private var re_line_end
: Regex = "^ *(\n|$)".to_re
210 # Append standard text to the current block
212 # Read `text` between `begin_index` and `end_index`.
213 private fun append_text
(text
: String, begin_index
, end_index
: nullable Int): MdText do
215 if begin_index
!= null and end_index
!= null then
216 var nb_chars
= end_index
- begin_index
217 var string
= text
.substring
(begin_index
, nb_chars
)
223 column
+ nb_chars
- 1
238 # Append `node` to the current block
239 private fun append_node
(node
: MdNode) do block
.append_child
(node
)
241 # Parse the next inline element in subject, advancing input index
243 # On success, add the result to block's children and return true.
244 # On failure, return false.
245 private fun parse_inline
: Bool do
248 if c
== '\0' then return false
251 else if c
== '\\' then
252 res
= parse_backslash
253 else if c
== '`' then
254 res
= parse_backticks
255 else if c
== '[' then
256 res
= parse_open_bracket
257 else if c
== '!' then
259 else if c
== ']' then
260 res
= parse_close_bracket
261 else if c
== '<' then
262 res
= parse_auto_link
or parse_html_inline
263 else if c
== '&' then
266 if delimiter_processors_map
.has_key
(c
) then
267 res
= parse_delimiters
(delimiter_processors_map
[c
], c
)
275 # When we get here, it's only for a single special character that turned
276 # out to not have a special meaning.
277 # So we shouldn't have a single surrogate here, hence it should be ok
278 # to turn it into a String
286 # If `re` matches at current index in the input, advance index and return the match
288 private fun match
(re
: Pattern): nullable String do
289 if index
>= input
.length
then return null
290 var match
= input
.search_from
(re
, index
)
291 if match
!= null then
299 # Return the char at the current input index, or `\0`
300 private fun peek
: Char do
301 if index
< input
.length
then
302 return input
.chars
[index
]
307 # Return the char at the current input index + 1, or `\0`
308 private fun peek_next
: Char do
309 if index
+ 1 < input
.length
then
310 return input
.chars
[index
+ 1]
315 # Parse zero or more space characters, incuding at most one newline
316 private fun spnl
: Bool do
320 if c
== ' ' or c
== '\t' then
323 else if c
== '\n' then
324 if found_nl
then break
336 # If it was preceded by two spaces, return a hard line break,
337 # otherwise a soft line break
338 private fun parse_newline
: Bool do
339 advance
1 # assume we're at a `\n`
341 var last_child
= block
.last_child
343 # check previous text for trailing spaces
344 # the `has_suffix` is an optimization to avoid an RE match in the common case
345 if last_child
!= null and last_child
isa MdText and
346 (last_child
.literal
.has_suffix
(" ")) then
347 var text
= last_child
348 var literal
= text
.literal
349 var match
= literal
.search
(re_final_space
)
350 var spaces
= if match
!= null then match
.length
else 0
352 text
.literal
= literal
.substring
(0, literal
.length
- spaces
)
354 last_child
.location
.column_end
= last_child
.location
.column_end
- spaces
356 append_node
(new MdHardLineBreak(new MdLocation(line
, column
- spaces
- 1, line
, column
- 1), false))
358 append_node
(new MdSoftLineBreak(new MdLocation(line
, column
- spaces
- 1, line
, column
-1)))
361 append_node
(new MdSoftLineBreak(new MdLocation(line
, column
- 1, line
, column
- 1)))
364 column
= 1 + column_offset
366 # gobble leading spaces in next line
373 # Final white spaces pattern
374 private var re_final_space
: Regex = " *$".to_re
376 # Parse a backslash-escaped special character
378 # Add either the escaped characters, a hard line break (if the backslash is followed by
379 # a new line), or a literal backslash to the block's children.
380 private fun parse_backslash
: Bool do
383 append_node
(new MdHardLineBreak(new MdLocation(line
, column
- 1, line
, column
), true))
386 column
= 1 + column_offset
387 else if index
< input
.length
and input
.substring
(index
, 1).has
(re_escapable
) then
388 append_text
(input
, index
, index
+ 1)
396 # Escapable characters pattern
397 private var p_escapable
= "[]!\"#$%&\'()*+,./:;<=>?@\\[\\\\^_`\\\{|\\\}~-]"
399 # Escapable characters regex
400 private var re_escapable
: Regex = "^{p_escapable}".to_re
402 # Attempt to parse backticks
404 # Adding either a backtick code span or a literal sequence of backticks.
405 private fun parse_backticks
: Bool do
406 var column_before
= column
407 var ticks
= match
(re_ticks_here
)
408 if ticks
== null then return false
410 var after_open_ticks
= index
411 var matched
= match
(re_ticks
)
412 while matched
!= null do
413 if matched
== ticks
then
414 var content
= input
.substring
(after_open_ticks
, index
- after_open_ticks
- ticks
.length
)
415 content
= content
.trim
416 content
= content
.replace
(re_whitespace
, " ")
417 var node
= new MdCode(new MdLocation(line
, column_before
, line
, column
), matched
.to_s
, content
.trim
)
422 matched
= match
(re_ticks
)
424 # If we got here, we didn't match a closing backtick sequence
425 index
= after_open_ticks
426 column
= after_open_ticks
+ 1
431 # Backticks starting pattern
432 private var re_ticks_here
: Regex = "^`+".to_re
435 private var re_ticks
: Regex = "`+".to_re
437 # Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters
438 private fun parse_delimiters
(delimiter_processor
: MdDelimiterProcessor, delimiter_char
: Char): Bool do
439 var res
= scan_delimiters
(delimiter_processor
, delimiter_char
)
440 if res
== null then return false
442 var length
= res
.count
443 var start_index
= index
444 var start_column
= column
447 var column_before
= column
448 column
= start_column
449 var node
= append_text
(input
, start_index
, index
)
450 column
= column_before
452 # Add entry to stack for this opener
453 var last_delimiter
= new MdDelimiter(node
, delimiter_char
, res
.can_open
, res
.can_close
, last_delimiter
)
454 last_delimiter
.length
= length
455 last_delimiter
.original_length
= length
457 var prev
= last_delimiter
.prev
459 prev
.next
= last_delimiter
461 self.last_delimiter
= last_delimiter
465 # Add open bracket to delimiter stack and add a text node to block's children
466 private fun parse_open_bracket
: Bool do
467 var start_index
= index
470 var node
= append_text
("[")
472 # Add entry to stack for this opener
473 add_bracket
(new MdBracket.link
(node
, start_index
, column
- 1, last_bracket
, last_delimiter
))
477 # If next character is `[`, add `!` delimiter to delimiter stack and add a text node to
479 # Otherwise just add a text node.
480 private fun parse_bang
: Bool do
481 var start_index
= index
486 var node
= append_text
("![")
488 # Add entry to stack for this opener
489 add_bracket
(new MdBracket.image
(node
, start_index
+ 1, column
- 2, last_bracket
, last_delimiter
))
496 # Try match close bracket against an opening delimiter stack
498 # Add either a link or image, or a plan `[` character, to block's children.
499 # If there is a matching delimiter, remove it from the delimiter stack.
500 private fun parse_close_bracket
: Bool do
502 var start_index
= index
503 var start_column
= column
505 # Get previous `[` or `![`
506 var opener
= last_bracket
507 if opener
== null then
508 # no matching opener, just return a literal
513 if not opener
.allowed
then
514 # matching opener but it's not allowed, juste return a literal
520 # check to see if we have a link or image
521 var dest
: nullable Couple[nullable String, Bool] = null
523 var is_link_or_image
= false
525 # maybe an inline link like `[foo](\uri "title")`
529 dest
= parse_link_destination
530 if dest
.first
!= null then
532 # title needs a whitespace before
533 if input
.substring
(index
- 1, 1).has
(re_whitespace
) then
534 title
= parse_link_title
539 is_link_or_image
= true
542 column
= start_column
547 # maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]`
548 if not is_link_or_image
then
549 # see if there's a link label like `[bar]` or `[]`
550 var before_label
= index
551 var label_length
= parse_link_label
554 if label_length
> 2 then
555 ref
= input
.substring
(before_label
, label_length
)
556 else if not opener
.bracket_after
then
557 # If the second label is empty `[foo][]` or missing `[foo]`, then the first label
559 # But it can only be a reference when there's no (unescaped) bracket in it.
560 # If there is, we don't even need to try to lookup the reference.
561 ref
= input
.substring
(opener
.index
, start_index
- opener
.index
)
565 var nref
= ref
.normalize_reference
566 if reference_map
.has_key
(nref
) then
567 var link
= reference_map
[nref
]
568 dest
= new Couple[nullable String, Bool](link
.destination
, false)
570 is_link_or_image
= true
575 if is_link_or_image
then
576 # If we got here, open is a potential opener
577 var link_or_image
: MdLinkOrImage
578 if opener
.is_image
then
579 link_or_image
= new MdImage(new MdLocation(line
, opener
.column
, line
, column
- 1), dest
.as(not null).first
or else "", title
)
581 link_or_image
= new MdLink(new MdLocation(line
, opener
.column
, line
, column
- 1), dest
.as(not null).first
or else "", title
)
583 link_or_image
.has_brackets
= dest
.as(not null).second
585 var node
= opener
.node
.next
586 while node
!= null do
588 link_or_image
.append_child
(node
)
591 append_node
(link_or_image
)
593 # Process delimiters such as emphasis inside a link/image
594 process_delimiters
(opener
.prev_delimiter
)
595 merge_child_text_nodes
(link_or_image
)
596 # We don't need the corresponding text node anymore, we turned it into a node
600 # Links within links are not allowed
601 # We found this link, so there can be no other link around it.
602 if not opener
.is_image
then
603 var bracket
= last_bracket
604 while bracket
!= null do
605 if not bracket
.is_image
then
606 # disallow link opener
607 bracket
.allowed
= false
609 bracket
= bracket
.prev
615 if not is_link_or_image
then
616 if parse_wikilink
then return true
623 column
= start_column
628 private var re_whitespace
: Regex = "\\s+".to_re
630 # Add a bracket token on top of the `last_bracket` stack
631 private fun add_bracket
(bracket
: MdBracket) do
632 var last_bracket
= self.last_bracket
633 if last_bracket
!= null then
634 last_bracket
.bracket_after
= true
636 self.last_bracket
= bracket
639 # Remove the last bracket on the `last_bracket` stack
640 private fun remove_last_bracket
do
641 var last_bracket
= self.last_bracket
642 if last_bracket
== null then return
643 self.last_bracket
= last_bracket
.prev
646 # Wikilink placeholder
648 # Will be defined in sub module.
649 private fun parse_wikilink
: Bool do return false
651 # Attempt to parse a link destination, returning the string or null if not match
652 private fun parse_link_destination
: Couple[nullable String, Bool] do
653 var buffer
= new Buffer
658 var has_bracket
= c
== '<'
659 if has_bracket
then advance
1
665 else if c
== ' ' or c
== '\t' or c
== '\n' or c
== '\r' then
666 break # no spaces allowed in urls
667 else if c
== '\\' then
669 if escapable
.has
(next
) then
671 advance
2 # skip over the backslash
674 else if has_bracket
and c
== '>' then
677 else if not has_bracket
and c
== '(' then
679 else if not has_bracket
and c
== ')' then
680 if parens
== 0 then break
682 else if c
== '\0' then
688 return new Couple[nullable String, Bool](buffer
.to_s
, has_bracket
)
691 # Attempt to parse a link title (sans quotes), returning the string or null if no match
692 private fun parse_link_title
: nullable String do
694 if c
!= '\'' and c != '"' and c != '(' then
699 var buffer = new Buffer
703 if c == opener or (opener == '(' and c == ')') then
706 else if c == '\\' then
708 if escapable.has(next) then
713 else if c == '\0' then
721 # Escapable characters
722 private var escapable = "[]!\
"#$%&\'()*+,./:;<=>?@\\^_`\{|\}~-"
724 # Attempt to parse a link label returning number of characters parsed
725 private fun parse_link_label
: Int do
727 while i
< input
.length
do
729 if i
== index
and c
!= '[' then
731 else if c
== '[' and i
!= index
then
732 if input
[i
- 1] != '\\' or (i
- 2 > index
and input
[i
- 2] == '\\') then
735 else if c
== ']' then
736 if i
> 1001 then return 0
737 if input
[i
- 1] != '\\' or (i
- 2 > index
and input
[i
- 2] == '\\') then
738 return (i
- index
) + 1
746 # Attempt to parse an autolink (URL or email in pointy brackets)
747 private fun parse_auto_link
: Bool do
748 var column_before
= column
749 var m
= match
(re_autolink_email
)
751 var dest
= m
.substring
(1, m
.length
- 2)
752 var node
= new MdLink(new MdLocation(line
, column_before
, line
, column
), "mailto:{dest}", null, true)
753 node
.append_child
(new MdText(new MdLocation(line
, column_before
+ 1, line
, column
- 1), dest
))
758 m
= match
(re_autolink_url
)
760 var dest
= m
.substring
(1, m
.length
- 2)
761 var node
= new MdLink(new MdLocation(line
, column_before
, line
, column
), dest
, null, true)
762 node
.append_child
(new MdText(new MdLocation(line
, column_before
+ 1, line
, column
- 1), dest
))
770 # Autolink email pattern
771 private var re_autolink_email
: Regex = "^<([a-zA-Z0-9.!#$%&'*+/=?^_`\{|\}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]\{0,61\}[a-zA-Z0-9])?(\\.[a-zA-Z0-9]([a-zA-Z0-9-]\{0,61\}[a-zA-Z0-9])?)*)>".to_re
773 # Autolink url pattern
774 private var re_autolink_url
: Regex = "^<[a-zA-Z][a-zA-Z0-9.+-]\{1,31\}:[^<> ]*>".to_re
776 # Attempt to parse an inline HTML string
777 private fun parse_html_inline
: Bool do
778 var column_before
= column
779 var m
= match
(re_html_tag
)
781 var node
= new MdHtmlInline(new MdLocation(line
, column_before
, line
, column
), m
)
789 private var p_tagname
= "[A-Za-z][A-Za-z0-9-]*"
790 private var p_attribute_name
= "[a-zA-Z_:][a-zA-Z0-9:._-]*"
791 private var p_uquoted_value
= "[^\"'=<>` \t\n]+"
792 private var p_squoted_value = "'[^
']*'"
793 private var p_dquoted_value = "\
"[^\"]*\
""
794 private var p_attribute_value
= "({p_uquoted_value}|{p_squoted_value}|{p_dquoted_value})"
795 private var p_attribute_value_spec
= "(\\s*=\\s*{p_attribute_value})"
796 private var p_attribute
= "(\\s{p_attribute_name}{p_attribute_value_spec}?)"
797 private var p_opentag
= "<{p_tagname}{p_attribute}*\\s*/?>"
798 private var p_closetag
= "</{p_tagname}\\s*[>]"
799 private var p_html_comment
= "<!---->|<!--(-?[^>-])(-?[^-])*-->"
800 private var p_processing_instruction
= "[<][?].*?[?][>]"
801 private var p_declaration
= "<![A-Z]+\\s+[^>]*>"
802 private var p_cdata
= "<!\\[CDATA\\[.*\\]\\]>"
803 private var p_html_tag
= "({p_opentag}|{p_closetag}|{p_html_comment}|{p_processing_instruction}|{p_declaration}|{p_cdata})"
806 private var re_html_tag
: Regex do
807 var re
= "^{p_html_tag}".to_re
808 re
.ignore_case
= true
812 # Attempt to parse an HTML entity
813 private fun parse_entity
: Bool do
814 var m
= match
(re_entity_here
)
822 # HTML entity pattern
823 private var re_entity_here
: Regex do
824 var re
= "^&(#x[a-f0-9]\{1,8\}|#[0-9]\{1,8\}|[a-z][a-z0-9]\{1,31\});".to_re
825 re
.ignore_case
= true
829 # Parse a run of ordinary characters
831 # Or a single character with a special meaning in markdown, as a plain string.
832 private fun parse_string
: Bool do
834 var begin_column
= column
835 var length
= input
.length
836 while index
!= length
do
837 if special_characters
.has
(input
.chars
[index
]) then
842 if begin
!= index
then
843 var column_before
= column
844 column
= begin_column
845 append_text
(input
, begin
, index
)
846 column
= column_before
852 # Scan a sequence of characters with code `delimiter_char`
854 # Return information about the number of delimiters and whether they are positioned
855 # such as they can open and/or close emphasis or strong emphasis.
856 private fun scan_delimiters
(delimiter_processor
: MdDelimiterProcessor, delimiter_char
: Char): nullable MdDelimiterData do
857 var start_index
= index
858 var start_column
= column
860 var delimiter_count
= 0
861 while peek
== delimiter_char
do
866 if delimiter_count
< delimiter_processor
.min_length
then
868 column
= start_column
873 if start_index
> 0 then
874 before
= input
.substring
(start_index
- 1, 1)
877 var char_after
= peek
879 if char_after
!= '\0' then
880 after
= char_after
.to_s
883 var before_is_punctuation
= before
.has
(re_punctuation
)
884 var before_is_whitespace
= before
.has
(re_whitespace_char
)
885 var after_is_punctuation
= after
.has
(re_punctuation
)
886 var after_is_whitespace
= after
.has
(re_whitespace_char
)
888 var left_flanking
= not after_is_whitespace
and
889 (not after_is_punctuation
or before_is_whitespace
or before_is_punctuation
)
890 var right_flanking
= not before_is_whitespace
and
891 (not before_is_punctuation
or after_is_whitespace
or after_is_punctuation
)
895 if delimiter_char
== '_' then
896 can_open
= left_flanking
and (not right_flanking
or before_is_punctuation
)
897 can_close
= right_flanking
and (not left_flanking
or after_is_punctuation
)
899 can_open
= left_flanking
and delimiter_char
== delimiter_processor
.opening_delimiter
900 can_close
= right_flanking
and delimiter_char
== delimiter_processor
.closing_delimiter
904 column
= start_column
905 return new MdDelimiterData(delimiter_count
, can_open
, can_close
)
908 # Punctuation pattern
909 private var re_punctuation
: Regex = "^[]!\"#\\$%&'()*+,.:;<=>?@^_`\{|\}~[-]".to_re
911 # Whitespace character start pattern
912 private var re_whitespace_char
: Regex = "^[ Â \t\r\n]".to_re
914 # Process the stack of delimiters
915 private fun process_delimiters
(stack_bottom
: nullable MdDelimiter) do
916 var openers_bottom
= new HashMap[Char, nullable MdDelimiter]
918 # find first closer above stack bottom
919 var closer
= last_delimiter
920 while closer
!= null and closer
.prev
!= stack_bottom
do
923 # move forward, looking for closers, and handling each
924 while closer
!= null do
925 var delimiter_char
= closer
.delimiter_char
927 if not closer
.can_close
then
932 if not delimiter_processors_map
.has_key
(delimiter_char
) then
937 var delimiter_processor
= delimiter_processors_map
[delimiter_char
]
938 var opening_delimiter_char
= delimiter_processor
.opening_delimiter
940 # Found delimiter closer. Now look back for first matching opener
942 var opener_found
= false
943 var potential_opener_found
= false
944 var opener
= closer
.prev
946 while opener
!= null and opener
!= stack_bottom
and (not openers_bottom
.has_key
(delimiter_char
) or opener
!= openers_bottom
[delimiter_char
]) do
948 if opener
.can_open
and opener
.delimiter_char
== opening_delimiter_char
then
949 potential_opener_found
= true
950 use_delims
= delimiter_processor
.delimiter_use
(opener
, closer
)
951 if use_delims
> 0 then
959 if not opener_found
then
960 if not potential_opener_found
then
961 # Set lower bound for future searches for openers.
962 # Only do this when we didn't even have a potential opener
963 # (one that matches the character and can open).
964 # If an opener was rejected because of the number of delimiters
965 # (e.g. because of the "multiple of 3" rule),
966 # we want to consider it next time because the number of delimiter
967 # can change as we continue processing.
968 openers_bottom
[delimiter_char
] = closer
.prev
969 if not closer
.can_open
then
970 # We can remove a closer that can't be an opener,
971 # once we've seen there's no matching opener.
972 remove_delimiters_keep_node
(closer
)
979 var opener_node
= opener
.as(not null).node
980 var closer_node
= closer
.node
982 # Remove number of used delimieters from stack and inline nodes
983 opener
.as(not null).length
-= use_delims
984 closer
.length
-= use_delims
985 opener_node
.literal
= opener_node
.literal
.substring
(0,
986 opener_node
.literal
.length
- use_delims
)
987 closer_node
.literal
= closer_node
.literal
.substring
(0,
988 closer_node
.literal
.length
- use_delims
)
990 remove_delimiters_between
(opener
, closer
)
991 # The delimieter processor can re-parent the nodes between opener and closer,
992 # so make sure they're contiguous already.
993 # Exclusive because we want to keep opener/closer themselves.
994 merge_text_nodes_between_exclusive
(opener_node
, closer_node
)
995 delimiter_processor
.process
(opener_node
, closer_node
, use_delims
)
997 # Node delimieter characters left to process, so we can remove
998 # delimieter and the now empty node
999 if opener
.as(not null).length
== 0 then
1000 remove_delimiters_and_node
(opener
)
1003 if closer
.length
== 0 then
1004 var next
= closer
.next
1005 remove_delimiters_and_node
(closer
)
1010 # Remove all delimiters
1011 while last_delimiter
!= null and last_delimiter
!= stack_bottom
do
1012 remove_delimiters_keep_node
(last_delimiter
)
1016 # Remove all delimiters between `opener` and `closer`
1017 private fun remove_delimiters_between
(opener
, closer
: nullable MdDelimiter) do
1018 if opener
== null or closer
== null then return
1020 var delimiter
= closer
.prev
1021 while delimiter
!= null and delimiter
!= opener
do
1022 var previous_delimiter
= delimiter
.prev
1023 remove_delimiters_keep_node
(delimiter
)
1024 delimiter
= previous_delimiter
1028 # Remove the delimiter and the corresponding text node
1030 # For used delimiters, e.g. `*` in `*foo*`.
1031 private fun remove_delimiters_and_node
(delim
: nullable MdDelimiter) do
1032 if delim
== null then return
1034 var node
= delim
.node
1036 remove_delimiter
(delim
)
1039 # Remove the delimiter but keep the corresponding node as text
1041 # For unused delimiters such as `_` in `foo_bar`.
1042 private fun remove_delimiters_keep_node
(delim
: nullable MdDelimiter) do
1043 remove_delimiter
(delim
)
1046 # Remove the delimiter `delim`
1047 private fun remove_delimiter
(delim
: nullable MdDelimiter) do
1048 if delim
== null then return
1050 var prev
= delim
.prev
1051 if prev
!= null then
1052 prev
.next
= delim
.next
1054 var next
= delim
.next
1055 if next
== null then
1057 last_delimiter
= prev
1063 # Merge all nodes between `from` and `to` excluding `from` and `to`
1064 private fun merge_text_nodes_between_exclusive
(from
, to
: nullable MdNode) do
1065 if from
== null or to
== null then return
1066 # no node between them
1067 if from
== to
or from
.next
== to
then return
1068 merge_text_nodes_inclusive
(from
.next
, to
.prev
)
1071 # Merge all child nodes of `node` into one
1072 private fun merge_child_text_nodes
(node
: nullable MdNode) do
1073 if node
== null then return
1074 # no children or just one child node, no need for merging
1075 if node
.first_child
== node
.last_child
then return
1076 merge_text_nodes_inclusive
(node
.first_child
, node
.last_child
)
1079 # Merge all nodes between `from` and `to` including `from` and `to`
1080 private fun merge_text_nodes_inclusive
(from
, to
: nullable MdNode) do
1085 while node
!= null do
1086 if node
isa MdText then
1088 if first
== null then first
= text
1091 merge_if_needed
(first
, last
)
1095 if node
== to
then break
1098 merge_if_needed
(first
, last
)
1101 # Merge all nodes between `first` and `last`
1102 private fun merge_if_needed
(first
, last
: nullable MdText) do
1103 if first
!= null and last
!= null and first
!= last
then
1104 var buffer
= new Buffer
1105 buffer
.append
(first
.literal
)
1106 var node
= first
.next
1107 var stop
= last
.next
1108 while node
!= null and node
!= stop
do
1109 buffer
.append
(node
.as(MdText).literal
)
1110 first
.location
.line_end
= node
.location
.line_end
1111 first
.location
.column_end
= node
.location
.column_end
1116 var literal
= buffer
.write_to_string
1117 first
.literal
= literal
1122 # Custom delimiter processor for additional delimiters besides `_` and `*`
1123 interface MdDelimiterProcessor
1125 # The character that marks the beginning of a delimited node
1127 # Must not clash with anu built-in special characters.
1128 fun opening_delimiter
: Char is abstract
1130 # The character that marks the ending of a delimited node
1132 # Must not clash with anu built-in special characters.
1133 fun closing_delimiter
: Char is abstract
1135 # Minimum number of delimiters characters that are needed to active this
1137 # Must be at least one.
1138 fun min_length
: Int is abstract
1140 # Determine how many (if any) of the delimiter characters should be used
1142 # This allows implementations to decide how many characters to use based on the
1143 # properties of the delimiter runs.
1145 # An implementation can also return 0 when it doesn't want to allow this particular
1146 # combination of delimiter runs.
1147 fun delimiter_use
(opener
, closer
: MdDelimiter): Int is abstract
1149 # Process the matched delimiters
1151 # For example, by wrapping the nodes between `opener` and `closer` in a new node,
1152 # or appending a new node after the opener.
1154 # Note that removal of the delimiter from the delimiter nodes and unlinking
1155 # them is done by the caller.
1156 fun process
(opener
, closer
: MdText, delimiter_use
: Int) is abstract
1159 # A delimiter is one or more of the same delimiter character
1161 # Used for paired delimiters like emphasis or strong emphasis.
1164 # Node containing the delimiter
1167 # Character used as delimiter
1168 var delimiter_char
: Char
1170 # Can `self` open a delimiter?
1173 # Cant `self` close a delimiter?
1176 # Previous delimiter found
1177 var prev
: nullable MdDelimiter
1179 # Next delimiter found
1180 var next
: nullable MdDelimiter
1182 # The number of characters in this delimiter run that are left for processing
1185 # The number of characters originally in this delimiter run
1187 # At the start of processing, this is the same as `length`.
1188 var original_length
= 1
1191 # Opening bracket for links and images
1194 # Node containing the bracket
1197 # Index of the bracket in the original string
1200 # COlumn of the bracket
1203 # Is this bracket opening an image?
1207 var prev
: nullable MdBracket
1209 # Previous delimiter
1210 var prev_delimiter
: nullable MdDelimiter
1212 # Whether this bracket is allowed to form a link/image
1215 # Whether there is an unescaped bracket (opening or closing) anywhere after this bracket
1216 var bracket_after
= false
1218 # Create a new bracket for a link
1219 init link
(node
: MdText, index
: Int, column
: Int, prev
: nullable MdBracket, prev_delimiter
: nullable MdDelimiter) do
1220 init(node
, index
, column
, false, prev
, prev_delimiter
)
1223 # Create a new bracket for an image
1224 init image
(node
: MdText, index
: Int, column
: Int, prev
: nullable MdBracket, prev_delimiter
: nullable MdDelimiter) do
1225 init(node
, index
, column
, true, prev
, prev_delimiter
)
1229 # Data about a delimiter parsing
1230 private class MdDelimiterData
1232 # Number of successive delimiters found
1235 # Can this delimiter open an inline construct?
1238 # Can this delimiter close an inline construct?
1242 # An implementation of MdDelimiterProcessor that dispatches all calls to others
1244 # The sub processors called bepends on the length of the delimiter run.
1245 # All child processors must have different minimum lengths.
1246 # A given delimiter run is dispatched to the child with the largest acceptable minimum length.
1247 # If not child is applicable, the one with the largest minimum length is chosen.
1248 class MdStaggeredDelimiterProcessor
1249 super MdDelimiterProcessor
1251 # Delimiter character
1254 # Sub processors to apply
1255 var processors
= new Array[MdDelimiterProcessor]
1257 redef var min_length
= 0
1258 redef fun opening_delimiter
do return delim
1259 redef fun closing_delimiter
do return delim
1261 # Add a new sub delimiter processor
1262 fun add
(dp
: MdDelimiterProcessor) do
1263 var len
= dp
.min_length
1265 while i
< processors
.length
do
1266 var p
= processors
[i
]
1267 assert len
!= p
.min_length
else
1268 print
"Cannot add two delimiter processor for `{delim}` " +
1269 "and mininimum length `{len}`"
1271 if len
> p
.min_length
then
1276 processors
.insert
(dp
, i
)
1279 # Find the corresponding processor for a length of `len` delimiter characters
1280 fun find_processor
(len
: Int): MdDelimiterProcessor do
1281 for processor
in processors
do
1282 if processor
.min_length
<= len
then return processor
1284 return processors
.first
1287 redef fun delimiter_use
(opener
, closer
) do
1288 return find_processor
(opener
.length
).delimiter_use
(opener
, closer
)
1291 redef fun process
(opener
, closer
, delimiter_use
) do
1292 find_processor
(delimiter_use
).process
(opener
, closer
, delimiter_use
)
1296 # A processor for emphasis tokens
1297 class MdEmphasisDelimiterProcessor
1298 super MdDelimiterProcessor
1300 # Delimiter character
1301 var delimiter_char
: Char
1303 redef var min_length
= 1
1304 redef fun opening_delimiter
do return delimiter_char
1305 redef fun closing_delimiter
do return delimiter_char
1307 redef fun delimiter_use
(opener
, closer
) do
1308 # "multiple of 3" rule for internal delimiter runs
1309 if (opener
.can_close
or closer
.can_open
) and
1310 ((opener
.original_length
+ closer
.original_length
) % 3 == 0) then
1313 # calculate actual number of delimiters used from this closer
1314 if opener
.length
>= 2 and closer
.length
>= 2 then
1320 redef fun process
(opener
, closer
, delimiter_use
) do
1321 var single_delimiter
= opening_delimiter
.to_s
1322 var emphasis
: MdNode
1323 if delimiter_use
== 1 then
1324 emphasis
= new MdEmphasis(
1326 opener
.location
.line_start
,
1327 opener
.location
.column_start
,
1328 closer
.location
.line_end
,
1329 closer
.location
.column_end
),
1332 emphasis
= new MdStrongEmphasis(
1334 opener
.location
.line_start
,
1335 opener
.location
.column_start
+ opener
.literal
.length
,
1336 closer
.location
.line_end
,
1337 closer
.location
.column_end
- closer
.literal
.length
),
1338 "{single_delimiter}{single_delimiter}")
1340 var tmp
= opener
.next
1341 while tmp
!= null and tmp
!= closer
do
1343 emphasis
.append_child
(tmp
)
1346 opener
.insert_after
(emphasis
)
1350 # Asterisk delimiters processor
1351 class MdAsteriskDelimiterProcessor
1352 super MdEmphasisDelimiterProcessor
1355 redef var delimiter_char
= '*'
1358 # Underscore delimters processor
1359 class MdUnderscoreDelimiterProcessor
1360 super MdEmphasisDelimiterProcessor
1363 redef var delimiter_char
= '_'
1370 # Remove escape backslash from string
1371 fun unescape_string
: String do
1372 if not has
(re_escaped
) then return self
1374 var buffer
= new Buffer
1375 var match
= search
(re_escaped
)
1377 while match
!= null do
1378 buffer
.append substring
(last_end
, match
.from
- last_end
)
1379 buffer
.append substring
(match
.from
+ 1, 1)
1380 last_end
= match
.after
1381 match
= search_from
(re_escaped
, last_end
)
1383 if last_end
< length
then
1384 buffer
.append substring
(last_end
, length
- last_end
)
1389 # Normalize link reference names
1390 private fun normalize_reference
: String do
1391 var stripped
= self.substring
(1, length
- 2).trim
1392 var lowercase
= stripped
.to_lower
# TODO utf-8
1393 return lowercase
.replace
(re_whitespace
, " ")
1398 private var p_escapable
= "[]!\"#$%&\'()*+,./:;<=>?@\\[\\\\^_`\\\{|\\\}~-]"
1399 private var re_escaped
: Regex = "\\\\{p_escapable}".to_re
1400 private var re_whitespace
: Regex = "\\s+".to_re