markdown2 :: MdInlineParser :: parse_reference
Attempt to parse a link referencemarkdown2 $ MdInlineParser :: SELF
Type of this instance, automatically specialized in every classcore :: Object :: class_factory
Implementation used byget_class
to create the specific class.
core :: Object :: defaultinit
core :: Object :: is_same_instance
Return true ifself
and other
are the same instance (i.e. same identity).
core :: Object :: is_same_serialized
Isself
the same as other
in a serialization context?
core :: Object :: is_same_type
Return true ifself
and other
have the same dynamic type.
core :: Object :: output_class_name
Display class name on stdout (debug only).markdown2 :: MdInlineParser :: parse_reference
Attempt to parse a link reference
# Parser for inline content (text, links, emphasis, etc)
class MdInlineParser
# List of delimiter processors to use
private var delimiter_processors: Array[MdDelimiterProcessor] is lazy do
var delimiters = new Array[MdDelimiterProcessor]
delimiters.add new MdAsteriskDelimiterProcessor
delimiters.add new MdUnderscoreDelimiterProcessor
return delimiters
end
# Map special characters to their delimiter processor
private var delimiter_processors_map: Map[Char, MdDelimiterProcessor] is lazy do
var map = new HashMap[Char, MdDelimiterProcessor]
for delimiter_processor in delimiter_processors do
add_delimiter_processor(delimiter_processor, map)
end
special_characters.add_all map.keys
return map
end
# Register a delimiter processor
private fun add_delimiter_processor(delimiter_processor: MdDelimiterProcessor, map: Map[Char, MdDelimiterProcessor]) do
var opening = delimiter_processor.opening_delimiter
var closing = delimiter_processor.closing_delimiter
if opening == closing then
if map.has_key(opening) then
var old = map[opening]
if old.opening_delimiter == old.closing_delimiter then
var s: MdStaggeredDelimiterProcessor
if old isa MdStaggeredDelimiterProcessor then
s = old
else
s = new MdStaggeredDelimiterProcessor(opening)
s.add old
end
s.add delimiter_processor
map[opening] = s
else
add_delimiter_processor_for_char(opening, delimiter_processor, map)
end
else
add_delimiter_processor_for_char(opening, delimiter_processor, map)
end
else
add_delimiter_processor_for_char(opening, delimiter_processor, map)
add_delimiter_processor_for_char(closing, delimiter_processor, map)
end
end
# Register a delimiter processor for a special character
private fun add_delimiter_processor_for_char(delimiter_char: Char, delimiter_processor: MdDelimiterProcessor, map: Map[Char, MdDelimiterProcessor]) do
assert not map.has_key(delimiter_char) else
print "Delimiter processor conflict with delimiter char `{delimiter_char}`"
end
map[delimiter_char] = delimiter_processor
end
# List of characters that have a special Markdown meaning
private var special_characters: Array[Char] = ['\n', '`', '[', ']', '\\', '!', '<', '&']
# Link references by ID, needs to be built up using `parse_reference` before calling `parse`
private var reference_map = new HashMap[String, MdLink]
# Current block under parsing
private var block: MdNode is noinit
# Current input string
private var input: String is noinit
# Current index
private var index: Int is noinit
# Current line
private var line: Int is noinit
# Current column
private var column: Int is noinit
# Current column offset
private var column_offset: Int is noinit
# Top delimiter (emphasis, strong emphasis or custom emphasis)
# Brackets are on a separate stack, different from the algorithm described in the spec.
private var last_delimiter: nullable MdDelimiter = null
# Top opening bracket (`[` or `![`)
private var last_bracket: nullable MdBracket = null
# Parse `input` as inline and add resulting nodes as children to `block`
fun parse(input: String, offset: Int, block: MdNode) do
self.block = block
self.input = input.trim
self.index = 0
self.last_delimiter = null
self.last_bracket = null
self.line = block.location.line_start
self.column_offset = offset
self.column = 1 + column_offset
var more_to_parse = parse_inline
while more_to_parse do
more_to_parse = parse_inline
end
process_delimiters(null)
merge_child_text_nodes(block)
end
# Advance the current index of `count` characters
private fun advance(count: Int) do
index += count
column += count
end
# Attempt to parse a link reference
#
# Return how many characters were parsed as a reference.
# Returns 0 if none.
fun parse_reference(input: String): Int do
self.input = input
self.index = 0
self.column = 0
var dest
var title
var match_chars
var start_index = index
# label
match_chars = parse_link_label
if match_chars == 0 then return 0
advance match_chars
var raw_label = input.substring(0, match_chars)
# colon
if peek != ':' then return 0
advance 1
# link url
spnl
dest = parse_link_destination.first
if dest == null or dest.is_empty then return 0
var before_title = index
var before_column = column
spnl
title = parse_link_title
if title == null then
# rewind before spaces
index = before_title
column = before_column
end
var at_line_end = true
if index != input.length and match(re_line_end) == null then
if title == null then
at_line_end = false
else
# the potential title we found is not at the line end,
# but it could still be a legal link reference if we discard the title
title = null
# rewind before spaces
index = before_title
column = before_column
# and instead check if the link URL is at the line end
at_line_end = match(re_line_end) != null
end
end
if not at_line_end then return 0
var normalized_label = raw_label.normalize_reference
if normalized_label.is_empty then return 0
if not reference_map.has_key(normalized_label) then
var link = new MdLink(new MdLocation(0, 0, 0, 0), dest, title)
reference_map[normalized_label] = link
end
return index - start_index
end
# Line end pattern
private var re_line_end: Regex = "^ *(\n|$)".to_re
# Append standard text to the current block
#
# Read `text` between `begin_index` and `end_index`.
private fun append_text(text: String, begin_index, end_index: nullable Int): MdText do
var node: MdText
if begin_index != null and end_index != null then
var nb_chars = end_index - begin_index
var string = text.substring(begin_index, nb_chars)
node = new MdText(
new MdLocation(
line,
column,
line,
column + nb_chars - 1
), string)
else
node = new MdText(
new MdLocation(
line,
column,
line,
column + text.length
), text)
end
append_node(node)
return node
end
# Append `node` to the current block
private fun append_node(node: MdNode) do block.append_child(node)
# Parse the next inline element in subject, advancing input index
#
# On success, add the result to block's children and return true.
# On failure, return false.
private fun parse_inline: Bool do
var res: Bool
var c = peek
if c == '\0' then return false
if c == '\n' then
res = parse_newline
else if c == '\\' then
res = parse_backslash
else if c == '`' then
res = parse_backticks
else if c == '[' then
res = parse_open_bracket
else if c == '!' then
res = parse_bang
else if c == ']' then
res = parse_close_bracket
else if c == '<' then
res = parse_auto_link or parse_html_inline
else if c == '&' then
res = parse_entity
else
if delimiter_processors_map.has_key(c) then
res = parse_delimiters(delimiter_processors_map[c], c)
else
res = parse_string
end
end
if not res then
advance 1
# When we get here, it's only for a single special character that turned
# out to not have a special meaning.
# So we shouldn't have a single surrogate here, hence it should be ok
# to turn it into a String
var literal = c.to_s
append_text(literal)
end
return true
end
# If `re` matches at current index in the input, advance index and return the match
# Else return null.
private fun match(re: Pattern): nullable String do
if index >= input.length then return null
var match = input.search_from(re, index)
if match != null then
index = match.after
column = match.after
return match.to_s
end
return null
end
# Return the char at the current input index, or `\0`
private fun peek: Char do
if index < input.length then
return input.chars[index]
end
return '\0'
end
# Return the char at the current input index + 1, or `\0`
private fun peek_next: Char do
if index + 1 < input.length then
return input.chars[index + 1]
end
return '\0'
end
# Parse zero or more space characters, incuding at most one newline
private fun spnl: Bool do
var found_nl = false
loop
var c = peek
if c == ' ' or c == '\t' then
advance 1
continue
else if c == '\n' then
if found_nl then break
found_nl = true
advance 1
continue
end
break
end
return true
end
# Parse a new line
#
# If it was preceded by two spaces, return a hard line break,
# otherwise a soft line break
private fun parse_newline: Bool do
advance 1 # assume we're at a `\n`
var last_child = block.last_child
# check previous text for trailing spaces
# the `has_suffix` is an optimization to avoid an RE match in the common case
if last_child != null and last_child isa MdText and
(last_child.literal.has_suffix(" ")) then
var text = last_child
var literal = text.literal
var match = literal.search(re_final_space)
var spaces = if match != null then match.length else 0
if spaces > 0 then
text.literal = literal.substring(0, literal.length - spaces)
end
last_child.location.column_end = last_child.location.column_end - spaces
if spaces >= 2 then
append_node(new MdHardLineBreak(new MdLocation(line, column - spaces - 1, line, column - 1), false))
else
append_node(new MdSoftLineBreak(new MdLocation(line, column - spaces - 1, line, column -1)))
end
else
append_node(new MdSoftLineBreak(new MdLocation(line, column - 1, line, column - 1)))
end
line += 1
column = 1 + column_offset
# gobble leading spaces in next line
while peek == ' ' do
advance 1
end
return true
end
# Final white spaces pattern
private var re_final_space: Regex = " *$".to_re
# Parse a backslash-escaped special character
#
# Add either the escaped characters, a hard line break (if the backslash is followed by
# a new line), or a literal backslash to the block's children.
private fun parse_backslash: Bool do
advance 1
if peek == '\n' then
append_node(new MdHardLineBreak(new MdLocation(line, column - 1, line, column), true))
advance 1
line += 1
column = 1 + column_offset
else if index < input.length and input.substring(index, 1).has(re_escapable) then
append_text(input, index, index + 1)
advance 1
else
append_text("\\")
end
return true
end
# Escapable characters pattern
private var p_escapable = "[]!\"#$%&\'()*+,./:;<=>?@\\[\\\\^_`\\\{|\\\}~-]"
# Escapable characters regex
private var re_escapable: Regex = "^{p_escapable}".to_re
# Attempt to parse backticks
#
# Adding either a backtick code span or a literal sequence of backticks.
private fun parse_backticks: Bool do
var column_before = column
var ticks = match(re_ticks_here)
if ticks == null then return false
var after_open_ticks = index
var matched = match(re_ticks)
while matched != null do
if matched == ticks then
var content = input.substring(after_open_ticks, index - after_open_ticks - ticks.length)
content = content.trim
content = content.replace(re_whitespace, " ")
var node = new MdCode(new MdLocation(line, column_before, line, column), matched.to_s, content.trim)
append_node(node)
column += 1
return true
end
matched = match(re_ticks)
end
# If we got here, we didn't match a closing backtick sequence
index = after_open_ticks
column = after_open_ticks + 1
append_text(ticks)
return true
end
# Backticks starting pattern
private var re_ticks_here: Regex = "^`+".to_re
# Backticks pattern
private var re_ticks: Regex = "`+".to_re
# Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters
private fun parse_delimiters(delimiter_processor: MdDelimiterProcessor, delimiter_char: Char): Bool do
var res = scan_delimiters(delimiter_processor, delimiter_char)
if res == null then return false
var length = res.count
var start_index = index
var start_column = column
advance length
var column_before = column
column = start_column
var node = append_text(input, start_index, index)
column = column_before
# Add entry to stack for this opener
var last_delimiter = new MdDelimiter(node, delimiter_char, res.can_open, res.can_close, last_delimiter)
last_delimiter.length = length
last_delimiter.original_length = length
var prev = last_delimiter.prev
if prev != null then
prev.next = last_delimiter
end
self.last_delimiter = last_delimiter
return true
end
# Add open bracket to delimiter stack and add a text node to block's children
private fun parse_open_bracket: Bool do
var start_index = index
advance 1
var node = append_text("[")
# Add entry to stack for this opener
add_bracket(new MdBracket.link(node, start_index, column - 1, last_bracket, last_delimiter))
return true
end
# If next character is `[`, add `!` delimiter to delimiter stack and add a text node to
# block's children.
# Otherwise just add a text node.
private fun parse_bang: Bool do
var start_index = index
advance 1
if peek == '[' then
advance 1
var node = append_text("![")
# Add entry to stack for this opener
add_bracket(new MdBracket.image(node, start_index + 1, column - 2, last_bracket, last_delimiter))
else
append_text("!")
end
return true
end
# Try match close bracket against an opening delimiter stack
#
# Add either a link or image, or a plan `[` character, to block's children.
# If there is a matching delimiter, remove it from the delimiter stack.
private fun parse_close_bracket: Bool do
advance 1
var start_index = index
var start_column = column
# Get previous `[` or `![`
var opener = last_bracket
if opener == null then
# no matching opener, just return a literal
append_text("]")
return true
end
if not opener.allowed then
# matching opener but it's not allowed, juste return a literal
append_text("]")
remove_last_bracket
return true
end
# check to see if we have a link or image
var dest: nullable Couple[nullable String, Bool] = null
var title = null
var is_link_or_image = false
# maybe an inline link like `[foo](\uri "title")`
if peek == '(' then
advance 1
spnl
dest = parse_link_destination
if dest.first != null then
spnl
# title needs a whitespace before
if input.substring(index - 1, 1).has(re_whitespace) then
title = parse_link_title
spnl
end
if peek == ')' then
advance 1
is_link_or_image = true
else
index = start_index
column = start_column
end
end
end
# maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]`
if not is_link_or_image then
# see if there's a link label like `[bar]` or `[]`
var before_label = index
var label_length = parse_link_label
advance label_length
var ref = null
if label_length > 2 then
ref = input.substring(before_label, label_length)
else if not opener.bracket_after then
# If the second label is empty `[foo][]` or missing `[foo]`, then the first label
# is the reference.
# But it can only be a reference when there's no (unescaped) bracket in it.
# If there is, we don't even need to try to lookup the reference.
ref = input.substring(opener.index, start_index - opener.index)
end
if ref != null then
var nref = ref.normalize_reference
if reference_map.has_key(nref) then
var link = reference_map[nref]
dest = new Couple[nullable String, Bool](link.destination, false)
title = link.title
is_link_or_image = true
end
end
end
if is_link_or_image then
# If we got here, open is a potential opener
var link_or_image: MdLinkOrImage
if opener.is_image then
link_or_image = new MdImage(new MdLocation(line, opener.column, line, column - 1), dest.as(not null).first or else "", title)
else
link_or_image = new MdLink(new MdLocation(line, opener.column, line, column - 1), dest.as(not null).first or else "", title)
end
link_or_image.has_brackets = dest.as(not null).second
var node = opener.node.next
while node != null do
var next = node.next
link_or_image.append_child(node)
node = next
end
append_node(link_or_image)
# Process delimiters such as emphasis inside a link/image
process_delimiters(opener.prev_delimiter)
merge_child_text_nodes(link_or_image)
# We don't need the corresponding text node anymore, we turned it into a node
opener.node.unlink
remove_last_bracket
# Links within links are not allowed
# We found this link, so there can be no other link around it.
if not opener.is_image then
var bracket = last_bracket
while bracket != null do
if not bracket.is_image then
# disallow link opener
bracket.allowed = false
end
bracket = bracket.prev
end
end
return true
end
if not is_link_or_image then
if parse_wikilink then return true
end
# no link or image
append_text("]")
remove_last_bracket
index = start_index
column = start_column
return true
end
# Whitespace pattern
private var re_whitespace: Regex = "\\s+".to_re
# Add a bracket token on top of the `last_bracket` stack
private fun add_bracket(bracket: MdBracket) do
var last_bracket = self.last_bracket
if last_bracket != null then
last_bracket.bracket_after = true
end
self.last_bracket = bracket
end
# Remove the last bracket on the `last_bracket` stack
private fun remove_last_bracket do
var last_bracket = self.last_bracket
if last_bracket == null then return
self.last_bracket = last_bracket.prev
end
# Wikilink placeholder
#
# Will be defined in sub module.
private fun parse_wikilink: Bool do return false
# Attempt to parse a link destination, returning the string or null if not match
private fun parse_link_destination: Couple[nullable String, Bool] do
var buffer = new Buffer
var c = peek
var parens = 0
var has_bracket = c == '<'
if has_bracket then advance 1
loop
c = peek
if c == '\0' then
break # end of input
else if c == ' ' or c == '\t' or c == '\n' or c == '\r' then
break # no spaces allowed in urls
else if c == '\\' then
var next = peek_next
if escapable.has(next) then
buffer.add next
advance 2 # skip over the backslash
continue
end
else if has_bracket and c == '>' then
advance 1
break
else if not has_bracket and c == '(' then
parens += 1
else if not has_bracket and c == ')' then
if parens == 0 then break
parens -= 1
else if c == '\0' then
break
end
buffer.add c
advance 1
end
return new Couple[nullable String, Bool](buffer.to_s, has_bracket)
end
# Attempt to parse a link title (sans quotes), returning the string or null if no match
private fun parse_link_title: nullable String do
var c = peek
if c != '\'' and c != '"' and c != '(' then
return null
end
var opener = c
var buffer = new Buffer
loop
advance 1
c = peek
if c == opener or (opener == '(' and c == ')') then
advance 1
break
else if c == '\\' then
var next = peek_next
if escapable.has(next) then
buffer.add next
advance 1
continue
end
else if c == '\0' then
return null
end
buffer.add c
end
return buffer.to_s
end
# Escapable characters
private var escapable = "[]!\"#$%&\'()*+,./:;<=>?@\\^_`\{|\}~-"
# Attempt to parse a link label returning number of characters parsed
private fun parse_link_label: Int do
var i = index
while i < input.length do
var c = input[i]
if i == index and c != '[' then
return 0
else if c == '[' and i != index then
if input[i - 1] != '\\' or (i - 2 > index and input[i - 2] == '\\') then
return 0
end
else if c == ']' then
if i > 1001 then return 0
if input[i - 1] != '\\' or (i - 2 > index and input[i - 2] == '\\') then
return (i - index) + 1
end
end
i += 1
end
return 0
end
# Attempt to parse an autolink (URL or email in pointy brackets)
private fun parse_auto_link: Bool do
var column_before = column
var m = match(re_autolink_email)
if m != null then
var dest = m.substring(1, m.length - 2)
var node = new MdLink(new MdLocation(line, column_before, line, column), "mailto:{dest}", null, true)
node.append_child(new MdText(new MdLocation(line, column_before + 1, line, column - 1), dest))
column += 1
append_node(node)
return true
end
m = match(re_autolink_url)
if m != null then
var dest = m.substring(1, m.length - 2)
var node = new MdLink(new MdLocation(line, column_before, line, column), dest, null, true)
node.append_child(new MdText(new MdLocation(line, column_before + 1, line, column - 1), dest))
column += 1
append_node(node)
return true
end
return false
end
# Autolink email pattern
private var re_autolink_email: Regex = "^<([a-zA-Z0-9.!#$%&'*+/=?^_`\{|\}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]\{0,61\}[a-zA-Z0-9])?(\\.[a-zA-Z0-9]([a-zA-Z0-9-]\{0,61\}[a-zA-Z0-9])?)*)>".to_re
# Autolink url pattern
private var re_autolink_url: Regex = "^<[a-zA-Z][a-zA-Z0-9.+-]\{1,31\}:[^<> ]*>".to_re
# Attempt to parse an inline HTML string
private fun parse_html_inline: Bool do
var column_before = column
var m = match(re_html_tag)
if m != null then
var node = new MdHtmlInline(new MdLocation(line, column_before, line, column), m)
column += 1
append_node(node)
return true
end
return false
end
private var p_tagname = "[A-Za-z][A-Za-z0-9-]*"
private var p_attribute_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
private var p_uquoted_value = "[^\"'=<>` \t\n]+"
private var p_squoted_value = "'[^']*'"
private var p_dquoted_value = "\"[^\"]*\""
private var p_attribute_value = "({p_uquoted_value}|{p_squoted_value}|{p_dquoted_value})"
private var p_attribute_value_spec = "(\\s*=\\s*{p_attribute_value})"
private var p_attribute = "(\\s{p_attribute_name}{p_attribute_value_spec}?)"
private var p_opentag = "<{p_tagname}{p_attribute}*\\s*/?>"
private var p_closetag = "</{p_tagname}\\s*[>]"
private var p_html_comment = "<!---->|<!--(-?[^>-])(-?[^-])*-->"
private var p_processing_instruction = "[<][?].*?[?][>]"
private var p_declaration = "<![A-Z]+\\s+[^>]*>"
private var p_cdata = "<!\\[CDATA\\[.*\\]\\]>"
private var p_html_tag = "({p_opentag}|{p_closetag}|{p_html_comment}|{p_processing_instruction}|{p_declaration}|{p_cdata})"
# HTML tag pattern
private var re_html_tag: Regex do
var re = "^{p_html_tag}".to_re
re.ignore_case = true
return re
end
# Attempt to parse an HTML entity
private fun parse_entity: Bool do
var m = match(re_entity_here)
if m != null then
append_text(m)
return true
end
return false
end
# HTML entity pattern
private var re_entity_here: Regex do
var re = "^&(#x[a-f0-9]\{1,8\}|#[0-9]\{1,8\}|[a-z][a-z0-9]\{1,31\});".to_re
re.ignore_case = true
return re
end
# Parse a run of ordinary characters
#
# Or a single character with a special meaning in markdown, as a plain string.
private fun parse_string: Bool do
var begin = index
var begin_column = column
var length = input.length
while index != length do
if special_characters.has(input.chars[index]) then
break
end
advance 1
end
if begin != index then
var column_before = column
column = begin_column
append_text(input, begin, index)
column = column_before
return true
end
return false
end
# Scan a sequence of characters with code `delimiter_char`
#
# Return information about the number of delimiters and whether they are positioned
# such as they can open and/or close emphasis or strong emphasis.
private fun scan_delimiters(delimiter_processor: MdDelimiterProcessor, delimiter_char: Char): nullable MdDelimiterData do
var start_index = index
var start_column = column
var delimiter_count = 0
while peek == delimiter_char do
delimiter_count += 1
advance 1
end
if delimiter_count < delimiter_processor.min_length then
index = start_index
column = start_column
return null
end
var before = "\n"
if start_index > 0 then
before = input.substring(start_index - 1, 1)
end
var char_after = peek
var after = "\n"
if char_after != '\0' then
after = char_after.to_s
end
var before_is_punctuation = before.has(re_punctuation)
var before_is_whitespace = before.has(re_whitespace_char)
var after_is_punctuation = after.has(re_punctuation)
var after_is_whitespace = after.has(re_whitespace_char)
var left_flanking = not after_is_whitespace and
(not after_is_punctuation or before_is_whitespace or before_is_punctuation)
var right_flanking = not before_is_whitespace and
(not before_is_punctuation or after_is_whitespace or after_is_punctuation)
var can_open
var can_close
if delimiter_char == '_' then
can_open = left_flanking and (not right_flanking or before_is_punctuation)
can_close = right_flanking and (not left_flanking or after_is_punctuation)
else
can_open = left_flanking and delimiter_char == delimiter_processor.opening_delimiter
can_close = right_flanking and delimiter_char == delimiter_processor.closing_delimiter
end
index = start_index
column = start_column
return new MdDelimiterData(delimiter_count, can_open, can_close)
end
# Punctuation pattern
private var re_punctuation: Regex = "^[]!\"#\\$%&'()*+,.:;<=>?@^_`\{|\}~[-]".to_re
# Whitespace character start pattern
private var re_whitespace_char: Regex = "^[ Â \t\r\n]".to_re
# Process the stack of delimiters
private fun process_delimiters(stack_bottom: nullable MdDelimiter) do
var openers_bottom = new HashMap[Char, nullable MdDelimiter]
# find first closer above stack bottom
var closer = last_delimiter
while closer != null and closer.prev != stack_bottom do
closer = closer.prev
end
# move forward, looking for closers, and handling each
while closer != null do
var delimiter_char = closer.delimiter_char
if not closer.can_close then
closer = closer.next
continue
end
if not delimiter_processors_map.has_key(delimiter_char) then
closer = closer.next
continue
end
var delimiter_processor = delimiter_processors_map[delimiter_char]
var opening_delimiter_char = delimiter_processor.opening_delimiter
# Found delimiter closer. Now look back for first matching opener
var use_delims = 0
var opener_found = false
var potential_opener_found = false
var opener = closer.prev
while opener != null and opener != stack_bottom and (not openers_bottom.has_key(delimiter_char) or opener != openers_bottom[delimiter_char]) do
if opener.can_open and opener.delimiter_char == opening_delimiter_char then
potential_opener_found = true
use_delims = delimiter_processor.delimiter_use(opener, closer)
if use_delims > 0 then
opener_found = true
break
end
end
opener = opener.prev
end
if not opener_found then
if not potential_opener_found then
# Set lower bound for future searches for openers.
# Only do this when we didn't even have a potential opener
# (one that matches the character and can open).
# If an opener was rejected because of the number of delimiters
# (e.g. because of the "multiple of 3" rule),
# we want to consider it next time because the number of delimiter
# can change as we continue processing.
openers_bottom[delimiter_char] = closer.prev
if not closer.can_open then
# We can remove a closer that can't be an opener,
# once we've seen there's no matching opener.
remove_delimiters_keep_node(closer)
end
end
closer = closer.next
continue
end
var opener_node = opener.as(not null).node
var closer_node = closer.node
# Remove number of used delimieters from stack and inline nodes
opener.as(not null).length -= use_delims
closer.length -= use_delims
opener_node.literal = opener_node.literal.substring(0,
opener_node.literal.length - use_delims)
closer_node.literal = closer_node.literal.substring(0,
closer_node.literal.length - use_delims)
remove_delimiters_between(opener, closer)
# The delimieter processor can re-parent the nodes between opener and closer,
# so make sure they're contiguous already.
# Exclusive because we want to keep opener/closer themselves.
merge_text_nodes_between_exclusive(opener_node, closer_node)
delimiter_processor.process(opener_node, closer_node, use_delims)
# Node delimieter characters left to process, so we can remove
# delimieter and the now empty node
if opener.as(not null).length == 0 then
remove_delimiters_and_node(opener)
end
if closer.length == 0 then
var next = closer.next
remove_delimiters_and_node(closer)
closer = next
end
end
# Remove all delimiters
while last_delimiter != null and last_delimiter != stack_bottom do
remove_delimiters_keep_node(last_delimiter)
end
end
# Remove all delimiters between `opener` and `closer`
private fun remove_delimiters_between(opener, closer: nullable MdDelimiter) do
if opener == null or closer == null then return
var delimiter = closer.prev
while delimiter != null and delimiter != opener do
var previous_delimiter = delimiter.prev
remove_delimiters_keep_node(delimiter)
delimiter = previous_delimiter
end
end
# Remove the delimiter and the corresponding text node
#
# For used delimiters, e.g. `*` in `*foo*`.
private fun remove_delimiters_and_node(delim: nullable MdDelimiter) do
if delim == null then return
var node = delim.node
node.unlink
remove_delimiter(delim)
end
# Remove the delimiter but keep the corresponding node as text
#
# For unused delimiters such as `_` in `foo_bar`.
private fun remove_delimiters_keep_node(delim: nullable MdDelimiter) do
remove_delimiter(delim)
end
# Remove the delimiter `delim`
private fun remove_delimiter(delim: nullable MdDelimiter) do
if delim == null then return
var prev = delim.prev
if prev != null then
prev.next = delim.next
end
var next = delim.next
if next == null then
# top of stack
last_delimiter = prev
else
next.prev = prev
end
end
# Merge all nodes between `from` and `to` excluding `from` and `to`
private fun merge_text_nodes_between_exclusive(from, to: nullable MdNode) do
if from == null or to == null then return
# no node between them
if from == to or from.next == to then return
merge_text_nodes_inclusive(from.next, to.prev)
end
# Merge all child nodes of `node` into one
private fun merge_child_text_nodes(node: nullable MdNode) do
if node == null then return
# no children or just one child node, no need for merging
if node.first_child == node.last_child then return
merge_text_nodes_inclusive(node.first_child, node.last_child)
end
# Merge all nodes between `from` and `to` including `from` and `to`
private fun merge_text_nodes_inclusive(from, to: nullable MdNode) do
var first = null
var last = null
var node = from
while node != null do
if node isa MdText then
var text = node
if first == null then first = text
last = text
else
merge_if_needed(first, last)
first = null
last = null
end
if node == to then break
node = node.next
end
merge_if_needed(first, last)
end
# Merge all nodes between `first` and `last`
private fun merge_if_needed(first, last: nullable MdText) do
if first != null and last != null and first != last then
var buffer = new Buffer
buffer.append(first.literal)
var node = first.next
var stop = last.next
while node != null and node != stop do
buffer.append(node.as(MdText).literal)
first.location.line_end = node.location.line_end
first.location.column_end = node.location.column_end
var unlink = node
node = node.next
unlink.unlink
end
var literal = buffer.write_to_string
first.literal = literal
end
end
end
lib/markdown2/markdown_inline_parsing.nit:23,1--1120,3
redef class MdInlineParser
# Enable Github mode
private var github_mode = false
redef var delimiter_processors is lazy do
var delimiters = super
if github_mode then
delimiters.add new MdStrikeProcessor
delimiters.add new MdSuperProcessor
end
return delimiters
end
end
lib/markdown2/markdown_github.nit:40,1--53,3
redef class MdInlineParser
# Enable wikilinks mode
private var wikilinks_mode = false
redef fun parse_wikilink do
if not wikilinks_mode then return false
# do we have two opening bracket?
var last_bracket = self.last_bracket
if last_bracket == null then return false
var first_bracket = last_bracket.prev
if first_bracket == null then return false
# was the first bracket an image?
if first_bracket.is_image then return false
# do we have two closing brackets?
if index >= input.length or input.chars[index] != ']' then return false
advance 1 # skip last bracket
var start_index = first_bracket.index + 2
var end_index = index - 2
# create wikilink node
var content = input.substring(start_index, end_index - start_index)
var parts = content.split("|")
var title = if parts.length > 1 then parts.first.trim else null
var link = parts.last.trim
var wikilink = new MdWikilink(
new MdLocation(
first_bracket.node.location.line_start,
first_bracket.node.location.column_start - 1,
line,
column - 1),
link, title)
var node = last_bracket.node.next
var in_link = false
while node != null do
var next = node.next
if not in_link then
if node isa MdText and node.literal.has("|") then
var buf = new Buffer
for c in node.literal.chars do
if c == '|' then
in_link = true
break
end
buf.add c
end
node.literal = buf.write_to_string.r_trim
end
wikilink.append_child(node)
else
node.unlink
end
node = next
end
append_node(wikilink)
# Process delimiters such as emphasis inside a link/image
process_delimiters(last_bracket.prev_delimiter)
merge_child_text_nodes(wikilink)
# remove brackets
first_bracket.node.unlink
last_bracket.node.unlink
return true
end
end
lib/markdown2/markdown_wikilinks.nit:35,1--108,3