Parser for inline markdown

Used to create the AST representation of inline nodes like emphasis, code, links images etc.

Introduced classes

class MdAsteriskDelimiterProcessor

markdown2 :: MdAsteriskDelimiterProcessor

Asterisk delimiters processor
class MdBracket

markdown2 :: MdBracket

Opening bracket for links and images
class MdDelimiter

markdown2 :: MdDelimiter

A delimiter is one or more of the same delimiter character
interface MdDelimiterProcessor

markdown2 :: MdDelimiterProcessor

Custom delimiter processor for additional delimiters besides _ and *
class MdEmphasisDelimiterProcessor

markdown2 :: MdEmphasisDelimiterProcessor

A processor for emphasis tokens
class MdInlineParser

markdown2 :: MdInlineParser

Parser for inline content (text, links, emphasis, etc)
class MdStaggeredDelimiterProcessor

markdown2 :: MdStaggeredDelimiterProcessor

An implementation of MdDelimiterProcessor that dispatches all calls to others

Redefined classes

redef abstract class String

markdown2 :: markdown_inline_parsing $ String

Immutable sequence of characters.
redef class Sys

markdown2 :: markdown_inline_parsing $ Sys

The main class of the program.

All class definitions

class MdBracket

markdown2 $ MdBracket

Opening bracket for links and images
class MdDelimiter

markdown2 $ MdDelimiter

A delimiter is one or more of the same delimiter character
interface MdDelimiterProcessor

markdown2 $ MdDelimiterProcessor

Custom delimiter processor for additional delimiters besides _ and *
class MdEmphasisDelimiterProcessor

markdown2 $ MdEmphasisDelimiterProcessor

A processor for emphasis tokens
class MdInlineParser

markdown2 $ MdInlineParser

Parser for inline content (text, links, emphasis, etc)
class MdStaggeredDelimiterProcessor

markdown2 $ MdStaggeredDelimiterProcessor

An implementation of MdDelimiterProcessor that dispatches all calls to others
redef abstract class String

markdown2 :: markdown_inline_parsing $ String

Immutable sequence of characters.
redef class Sys

markdown2 :: markdown_inline_parsing $ Sys

The main class of the program.
package_diagram markdown2::markdown_inline_parsing markdown_inline_parsing markdown2::markdown_ast markdown_ast markdown2::markdown_inline_parsing->markdown2::markdown_ast core core markdown2::markdown_ast->core ...core ... ...core->core markdown2::markdown_block_parsing markdown_block_parsing markdown2::markdown_block_parsing->markdown2::markdown_inline_parsing markdown2::markdown_github markdown_github markdown2::markdown_github->markdown2::markdown_block_parsing markdown2::markdown_wikilinks markdown_wikilinks markdown2::markdown_wikilinks->markdown2::markdown_block_parsing markdown2::markdown_github... ... markdown2::markdown_github...->markdown2::markdown_github markdown2::markdown_wikilinks... ... markdown2::markdown_wikilinks...->markdown2::markdown_wikilinks

Ancestors

module abstract_collection

core :: abstract_collection

Abstract collection classes and services.
module abstract_text

core :: abstract_text

Abstract class for manipulation of sequences of characters
module array

core :: array

This module introduces the standard array structure.
module bitset

core :: bitset

Services to handle BitSet
module bytes

core :: bytes

Services for byte streams and arrays
module circular_array

core :: circular_array

Efficient data structure to access both end of the sequence.
module codec_base

core :: codec_base

Base for codecs to use with streams
module codecs

core :: codecs

Group module for all codec-related manipulations
module collection

core :: collection

This module define several collection classes.
module core

core :: core

Standard classes and methods used by default by Nit programs and libraries.
module environ

core :: environ

Access to the environment variables of the process
module error

core :: error

Standard error-management infrastructure.
module exec

core :: exec

Invocation and management of operating system sub-processes.
module file

core :: file

File manipulations (create, read, write, etc.)
module fixed_ints

core :: fixed_ints

Basic integers of fixed-precision
module fixed_ints_text

core :: fixed_ints_text

Text services to complement fixed_ints
module flat

core :: flat

All the array-based text representations
module gc

core :: gc

Access to the Nit internal garbage collection mechanism
module hash_collection

core :: hash_collection

Introduce HashMap and HashSet.
module iso8859_1

core :: iso8859_1

Codec for ISO8859-1 I/O
module kernel

core :: kernel

Most basic classes and methods.
module list

core :: list

This module handle double linked lists
module math

core :: math

Mathematical operations
module native

core :: native

Native structures for text and bytes
module numeric

core :: numeric

Advanced services for Numeric types
module protocol

core :: protocol

module queue

core :: queue

Queuing data structures and wrappers
module range

core :: range

Module for range of discrete objects.
module re

core :: re

Regular expression support for all services based on Pattern
module ropes

core :: ropes

Tree-based representation of a String.
module sorter

core :: sorter

This module contains classes used to compare things and sorts arrays.
module stream

core :: stream

Input and output streams of characters
module text

core :: text

All the classes and methods related to the manipulation of text entities
module time

core :: time

Management of time and dates
module union_find

core :: union_find

union–find algorithm using an efficient disjoint-set data structure
module utf8

core :: utf8

Codec for UTF-8 I/O

Parents

module markdown_ast

markdown2 :: markdown_ast

Markdown AST representation

Children

module markdown_block_parsing

markdown2 :: markdown_block_parsing

Markdown blocks parsing

Descendants

module a_star-m

a_star-m

module markdown_github

markdown2 :: markdown_github

Markdown Github mode
module markdown_html_rendering

markdown2 :: markdown_html_rendering

HTML rendering of Markdown documents
module markdown_latex_rendering

markdown2 :: markdown_latex_rendering

LaTeX rendering of Markdown documents
module markdown_man_rendering

markdown2 :: markdown_man_rendering

Manpages rendering of Markdown documents
module markdown_md_rendering

markdown2 :: markdown_md_rendering

Markdown rendering of Markdown documents
module nitmd

markdown2 :: nitmd

A Markdown parser for Nit.
module test_markdown

markdown2 :: test_markdown

Test suites for module markdown
# Parser for inline markdown
#
# Used to create the AST representation of inline nodes like emphasis, code, links
# images etc.
module markdown_inline_parsing

import markdown_ast

# Parser for inline content (text, links, emphasis, etc)
class MdInlineParser

	# List of delimiter processors to use
	private var delimiter_processors: Array[MdDelimiterProcessor] is lazy do
		var delimiters = new Array[MdDelimiterProcessor]
		delimiters.add new MdAsteriskDelimiterProcessor
		delimiters.add new MdUnderscoreDelimiterProcessor
		return delimiters
	end

	# Map special characters to their delimiter processor
	private var delimiter_processors_map: Map[Char, MdDelimiterProcessor] is lazy do
		var map = new HashMap[Char, MdDelimiterProcessor]
		for delimiter_processor in delimiter_processors do
			add_delimiter_processor(delimiter_processor, map)
		end
		special_characters.add_all map.keys
		return map
	end

	# Register a delimiter processor
	private fun add_delimiter_processor(delimiter_processor: MdDelimiterProcessor, map: Map[Char, MdDelimiterProcessor]) do
		var opening = delimiter_processor.opening_delimiter
		var closing = delimiter_processor.closing_delimiter
		if opening == closing then
			if map.has_key(opening) then
				var old = map[opening]
				if old.opening_delimiter == old.closing_delimiter then
					var s: MdStaggeredDelimiterProcessor
					if old isa MdStaggeredDelimiterProcessor then
						s = old
					else
						s = new MdStaggeredDelimiterProcessor(opening)
						s.add old
					end
					s.add delimiter_processor
					map[opening] = s
				else
					add_delimiter_processor_for_char(opening, delimiter_processor, map)
				end
			else
				add_delimiter_processor_for_char(opening, delimiter_processor, map)
			end
		else
			add_delimiter_processor_for_char(opening, delimiter_processor, map)
			add_delimiter_processor_for_char(closing, delimiter_processor, map)
		end
	end

	# Register a delimiter processor for a special character
	private fun add_delimiter_processor_for_char(delimiter_char: Char, delimiter_processor: MdDelimiterProcessor, map: Map[Char, MdDelimiterProcessor]) do
		assert not map.has_key(delimiter_char) else
			print "Delimiter processor conflict with delimiter char `{delimiter_char}`"
		end
		map[delimiter_char] = delimiter_processor
	end

	# List of characters that have a special Markdown meaning
	private var special_characters: Array[Char] = ['\n', '`', '[', ']', '\\', '!', '<', '&']

	# Link references by ID, needs to be built up using `parse_reference` before calling `parse`
	private var reference_map = new HashMap[String, MdLink]

	# Current block under parsing
	private var block: MdNode is noinit

	# Current input string
	private var input: String is noinit

	# Current index
	private var index: Int is noinit

	# Current line
	private var line: Int is noinit

	# Current column
	private var column: Int is noinit

	# Current column offset
	private var column_offset: Int is noinit

	# Top delimiter (emphasis, strong emphasis or custom emphasis)
	# Brackets are on a separate stack, different from the algorithm described in the spec.
	private var last_delimiter: nullable MdDelimiter = null

	# Top opening bracket (`[` or `![`)
	private var last_bracket: nullable MdBracket = null

	# Parse `input` as inline and add resulting nodes as children to `block`
	fun parse(input: String, offset: Int, block: MdNode) do
		self.block = block
		self.input = input.trim
		self.index = 0
		self.last_delimiter = null
		self.last_bracket = null
		self.line = block.location.line_start
		self.column_offset = offset
		self.column = 1 + column_offset

		var more_to_parse = parse_inline
		while more_to_parse do
			more_to_parse = parse_inline
		end

		process_delimiters(null)
		merge_child_text_nodes(block)
	end

	# Advance the current index of `count` characters
	private fun advance(count: Int) do
		index += count
		column += count
	end

	# Attempt to parse a link reference
	#
	# Return how many characters were parsed as a reference.
	# Returns 0 if none.
	fun parse_reference(input: String): Int do
		self.input = input
		self.index = 0
		self.column = 0
		var dest
		var title
		var match_chars
		var start_index = index

		# label
		match_chars = parse_link_label
		if match_chars == 0 then return 0
		advance match_chars

		var raw_label = input.substring(0, match_chars)

		# colon
		if peek != ':' then return 0
		advance 1

		# link url
		spnl

		dest = parse_link_destination.first
		if dest == null or dest.is_empty then return 0

		var before_title = index
		var before_column = column
		spnl
		title = parse_link_title
		if title == null then
			# rewind before spaces
			index = before_title
			column = before_column
		end

		var at_line_end = true
		if index != input.length and match(re_line_end) == null then
			if title == null then
				at_line_end = false
			else
				# the potential title we found is not at the line end,
				# but it could still be a legal link reference if we discard the title
				title = null
				# rewind before spaces
				index = before_title
				column = before_column
				# and instead check if the link URL is at the line end
				at_line_end = match(re_line_end) != null
			end
		end

		if not at_line_end then return 0

		var normalized_label = raw_label.normalize_reference
		if normalized_label.is_empty then return 0

		if not reference_map.has_key(normalized_label) then
			var link = new MdLink(new MdLocation(0, 0, 0, 0), dest, title)
			reference_map[normalized_label] = link
		end

		return index - start_index
	end

	# Line end pattern
	private var re_line_end: Regex = "^ *(\n|$)".to_re

	# Append standard text to the current block
	#
	# Read `text` between `begin_index` and `end_index`.
	private fun append_text(text: String, begin_index, end_index: nullable Int): MdText do
		var node: MdText
		if begin_index != null and end_index != null then
			var nb_chars = end_index - begin_index
			var string = text.substring(begin_index, nb_chars)
			node = new MdText(
				new MdLocation(
					line,
					column,
					line,
					column + nb_chars - 1
				), string)
		else
			node = new MdText(
				new MdLocation(
					line,
					column,
					line,
					column + text.length
				), text)
		end
		append_node(node)
		return node
	end

	# Append `node` to the current block
	private fun append_node(node: MdNode) do block.append_child(node)

	# Parse the next inline element in subject, advancing input index
	#
	# On success, add the result to block's children and return true.
	# On failure, return false.
	private fun parse_inline: Bool do
		var res: Bool
		var c = peek
		if c == '\0' then return false
		if c == '\n' then
			res = parse_newline
		else if c == '\\' then
			res = parse_backslash
		else if c == '`' then
			res = parse_backticks
		else if c == '[' then
			res = parse_open_bracket
		else if c == '!' then
			res = parse_bang
		else if c == ']' then
			res = parse_close_bracket
		else if c == '<' then
			res = parse_auto_link or parse_html_inline
		else if c == '&' then
			res = parse_entity
		else
			if delimiter_processors_map.has_key(c) then
				res = parse_delimiters(delimiter_processors_map[c], c)
			else
				res = parse_string
			end
		end

		if not res then
			advance 1
			# When we get here, it's only for a single special character that turned
			# out to not have a special meaning.
			# So we shouldn't have a single surrogate here, hence it should be ok
			# to turn it into a String
			var literal = c.to_s
			append_text(literal)
		end

		return true
	end

	# If `re` matches at current index in the input, advance index and return the match
	# Else return null.
	private fun match(re: Pattern): nullable String do
		if index >= input.length then return null
		var match = input.search_from(re, index)
		if match != null then
			index = match.after
			column = match.after
			return match.to_s
		end
		return null
	end

	# Return the char at the current input index, or `\0`
	private fun peek: Char do
		if index < input.length then
			return input.chars[index]
		end
		return '\0'
	end

	# Return the char at the current input index + 1, or `\0`
	private fun peek_next: Char do
		if index + 1 < input.length then
			return input.chars[index + 1]
		end
		return '\0'
	end

	# Parse zero or more space characters, incuding at most one newline
	private fun spnl: Bool do
		var found_nl = false
		loop
			var c = peek
			if c == ' ' or c == '\t' then
				advance 1
				continue
			else if c == '\n' then
				if found_nl then break
				found_nl = true
				advance 1
				continue
			end
			break
		end
		return true
	end

	# Parse a new line
	#
	# If it was preceded by two spaces, return a hard line break,
	# otherwise a soft line break
	private fun parse_newline: Bool do
		advance 1 # assume we're at a `\n`

		var last_child = block.last_child

		# check previous text for trailing spaces
		# the `has_suffix` is an optimization to avoid an RE match in the common case
		if last_child != null and last_child isa MdText and
		   (last_child.literal.has_suffix(" ")) then
			var text = last_child
			var literal = text.literal
			var match = literal.search(re_final_space)
			var spaces = if match != null then match.length else 0
			if spaces > 0 then
				text.literal = literal.substring(0, literal.length - spaces)
			end
			last_child.location.column_end = last_child.location.column_end - spaces
			if spaces >= 2 then
				append_node(new MdHardLineBreak(new MdLocation(line, column - spaces - 1, line, column - 1), false))
			else
				append_node(new MdSoftLineBreak(new MdLocation(line, column - spaces - 1, line, column -1)))
			end
		else
			append_node(new MdSoftLineBreak(new MdLocation(line, column - 1, line, column - 1)))
		end
		line += 1
		column = 1 + column_offset

		# gobble leading spaces in next line
		while peek == ' ' do
			advance 1
		end
		return true
	end

	# Final white spaces pattern
	private var re_final_space: Regex = " *$".to_re

	# Parse a backslash-escaped special character
	#
	# Add either the escaped characters, a hard line break (if the backslash is followed by
	# a new line), or a literal backslash to the block's children.
	private fun parse_backslash: Bool do
		advance 1
		if peek == '\n' then
			append_node(new MdHardLineBreak(new MdLocation(line, column - 1, line, column), true))
			advance 1
			line += 1
			column = 1 + column_offset
		else if index < input.length and input.substring(index, 1).has(re_escapable) then
		    append_text(input, index, index + 1)
		    advance 1
		else
			append_text("\\")
		end
		return true
	end

	# Escapable characters pattern
	private var p_escapable = "[]!\"#$%&\'()*+,./:;<=>?@\\[\\\\^_`\\\{|\\\}~-]"

	# Escapable characters regex
	private var re_escapable: Regex = "^{p_escapable}".to_re

	# Attempt to parse backticks
	#
	# Adding either a backtick code span or a literal sequence of backticks.
	private fun parse_backticks: Bool do
		var column_before = column
		var ticks = match(re_ticks_here)
		if ticks == null then return false

		var after_open_ticks = index
		var matched = match(re_ticks)
		while matched != null do
			if matched == ticks then
				var content = input.substring(after_open_ticks, index - after_open_ticks - ticks.length)
				content = content.trim
				content = content.replace(re_whitespace, " ")
				var node = new MdCode(new MdLocation(line, column_before, line, column), matched.to_s, content.trim)
				append_node(node)
				column += 1
				return true
			end
			matched = match(re_ticks)
		end
		# If we got here, we didn't match a closing backtick sequence
		index = after_open_ticks
		column = after_open_ticks + 1
		append_text(ticks)
		return true
	end

	# Backticks starting pattern
	private var re_ticks_here: Regex = "^`+".to_re

	# Backticks pattern
	private var re_ticks: Regex = "`+".to_re

	# Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters
	private fun parse_delimiters(delimiter_processor: MdDelimiterProcessor, delimiter_char: Char): Bool do
		var res = scan_delimiters(delimiter_processor, delimiter_char)
		if res == null then return false

		var length = res.count
		var start_index = index
		var start_column = column

		advance length
		var column_before = column
		column = start_column
		var node = append_text(input, start_index, index)
		column = column_before

		# Add entry to stack for this opener
		var last_delimiter = new MdDelimiter(node, delimiter_char, res.can_open, res.can_close, last_delimiter)
		last_delimiter.length = length
		last_delimiter.original_length = length

		var prev = last_delimiter.prev
		if prev != null then
			prev.next = last_delimiter
		end
		self.last_delimiter = last_delimiter
		return true
	end

	# Add open bracket to delimiter stack and add a text node to block's children
	private fun parse_open_bracket: Bool do
		var start_index = index
		advance 1

		var node = append_text("[")

		# Add entry to stack for this opener
		add_bracket(new MdBracket.link(node, start_index, column - 1, last_bracket, last_delimiter))
		return true
	end

	# If next character is `[`, add `!` delimiter to delimiter stack and add a text node to
	# block's children.
	# Otherwise just add a text node.
	private fun parse_bang: Bool do
		var start_index = index
		advance 1

		if peek == '[' then
			advance 1
			var node = append_text("![")

			# Add entry to stack for this opener
			add_bracket(new MdBracket.image(node, start_index + 1, column - 2, last_bracket, last_delimiter))
		else
			append_text("!")
		end
		return true
	end

	# Try match close bracket against an opening delimiter stack
	#
	# Add either a link or image, or a plan `[` character, to block's children.
	# If there is a matching delimiter, remove it from the delimiter stack.
	private fun parse_close_bracket: Bool do
		advance 1
		var start_index = index
		var start_column = column

		# Get previous `[` or `![`
		var opener = last_bracket
		if opener == null then
			# no matching opener, just return a literal
			append_text("]")
			return true
		end

		if not opener.allowed then
			# matching opener but it's not allowed, juste return a literal
			append_text("]")
			remove_last_bracket
			return true
		end

		# check to see if we have a link or image
		var dest: nullable Couple[nullable String, Bool] = null
		var title = null
		var is_link_or_image = false

		# maybe an inline link like `[foo](\uri "title")`
		if peek == '(' then
			advance 1
			spnl
			dest = parse_link_destination
			if dest.first != null then
				spnl
				# title needs a whitespace before
				if input.substring(index - 1, 1).has(re_whitespace) then
					title = parse_link_title
					spnl
				end
				if peek == ')' then
					advance 1
					is_link_or_image = true
				else
					index = start_index
					column = start_column
				end
			end
		end

		# maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]`
		if not is_link_or_image then
			# see if there's a link label like `[bar]` or `[]`
			var before_label = index
			var label_length = parse_link_label
			advance label_length
			var ref = null
			if label_length > 2 then
				ref = input.substring(before_label, label_length)
			else if not opener.bracket_after then
				# If the second label is empty `[foo][]` or missing `[foo]`, then the first label
				# is the reference.
				# But it can only be a reference when there's no (unescaped) bracket in it.
				# If there is, we don't even need to try to lookup the reference.
				ref = input.substring(opener.index, start_index - opener.index)
			end

			if ref != null then
				var nref = ref.normalize_reference
				if reference_map.has_key(nref) then
					var link = reference_map[nref]
					dest = new Couple[nullable String, Bool](link.destination, false)
					title = link.title
					is_link_or_image = true
				end
			end
		end

		if is_link_or_image then
			# If we got here, open is a potential opener
			var link_or_image: MdLinkOrImage
			if opener.is_image then
				link_or_image = new MdImage(new MdLocation(line, opener.column, line, column - 1), dest.as(not null).first or else "", title)
			else
				link_or_image = new MdLink(new MdLocation(line, opener.column, line, column - 1), dest.as(not null).first or else "", title)
			end
			link_or_image.has_brackets = dest.as(not null).second

			var node = opener.node.next
			while node != null do
				var next = node.next
				link_or_image.append_child(node)
				node = next
			end
			append_node(link_or_image)

			# Process delimiters such as emphasis inside a link/image
			process_delimiters(opener.prev_delimiter)
			merge_child_text_nodes(link_or_image)
			# We don't need the corresponding text node anymore, we turned it into a node
			opener.node.unlink
			remove_last_bracket

			# Links within links are not allowed
			# We found this link, so there can be no other link around it.
			if not opener.is_image then
				var bracket = last_bracket
				while bracket != null do
					if not bracket.is_image then
						# disallow link opener
						bracket.allowed = false
					end
					bracket = bracket.prev
				end
			end
			return true
		end

		if not is_link_or_image then
			if parse_wikilink then return true
		end

		# no link or image
		append_text("]")
		remove_last_bracket
		index = start_index
		column = start_column
		return true
	end

	# Whitespace pattern
	private var re_whitespace: Regex = "\\s+".to_re

	# Add a bracket token on top of the `last_bracket` stack
	private fun add_bracket(bracket: MdBracket) do
		var last_bracket = self.last_bracket
		if last_bracket != null then
			last_bracket.bracket_after = true
		end
		self.last_bracket = bracket
	end

	# Remove the last bracket on the `last_bracket` stack
	private fun remove_last_bracket do
		var last_bracket = self.last_bracket
		if last_bracket == null then return
		self.last_bracket = last_bracket.prev
	end

	# Wikilink placeholder
	#
	# Will be defined in sub module.
	private fun parse_wikilink: Bool do return false

	# Attempt to parse a link destination, returning the string or null if not match
	private fun parse_link_destination: Couple[nullable String, Bool] do
		var buffer = new Buffer

		var c = peek
		var parens = 0

		var has_bracket = c == '<'
		if has_bracket then advance 1

		loop
			c = peek
			if c == '\0' then
				break # end of input
			else if c == ' ' or c == '\t' or c == '\n' or c == '\r' then
				break # no spaces allowed in urls
			else if c == '\\' then
				var next = peek_next
				if escapable.has(next) then
					buffer.add next
					advance 2 # skip over the backslash
					continue
				end
			else if has_bracket and c == '>' then
				advance 1
				break
			else if not has_bracket and c == '(' then
				parens += 1
			else if not has_bracket and c == ')' then
				if parens == 0 then break
				parens -= 1
			else if c == '\0' then
				break
			end
			buffer.add c
			advance 1
		end
		return new Couple[nullable String, Bool](buffer.to_s, has_bracket)
	end

	# Attempt to parse a link title (sans quotes), returning the string or null if no match
	private fun parse_link_title: nullable String do
		var c = peek
		if c != '\'' and c != '"' and c != '(' then
			return null
		end
		var opener = c

		var buffer = new Buffer
		loop
			advance 1
			c = peek
			if c == opener or (opener == '(' and c == ')') then
				advance 1
				break
			else if c == '\\' then
				var next = peek_next
				if escapable.has(next) then
					buffer.add next
					advance 1
					continue
				end
			else if c == '\0' then
				return null
			end
			buffer.add c
		end
		return buffer.to_s
	end

	# Escapable characters
	private var escapable = "[]!\"#$%&\'()*+,./:;<=>?@\\^_`\{|\}~-"

	# Attempt to parse a link label returning number of characters parsed
	private fun parse_link_label: Int do
		var i = index
		while i < input.length do
			var c = input[i]
			if i == index and c != '[' then
				return 0
			else if c == '[' and i != index then
				if input[i - 1] != '\\' or (i - 2 > index and input[i - 2] == '\\') then
					return 0
				end
			else if c == ']' then
				if i > 1001 then return 0
				if input[i - 1] != '\\' or (i - 2 > index and input[i - 2] == '\\') then
					return (i - index) + 1
				end
			end
			i += 1
		end
		return 0
	end

	# Attempt to parse an autolink (URL or email in pointy brackets)
	private fun parse_auto_link: Bool do
		var column_before = column
		var m = match(re_autolink_email)
		if m != null then
			var dest = m.substring(1, m.length - 2)
			var node = new MdLink(new MdLocation(line, column_before, line, column), "mailto:{dest}", null, true)
			node.append_child(new MdText(new MdLocation(line, column_before + 1, line, column - 1), dest))
			column += 1
			append_node(node)
			return true
		end
		m = match(re_autolink_url)
		if m != null then
			var dest = m.substring(1, m.length - 2)
			var node = new MdLink(new MdLocation(line, column_before, line, column), dest, null, true)
			node.append_child(new MdText(new MdLocation(line, column_before + 1, line, column - 1), dest))
			column += 1
			append_node(node)
			return true
		end
		return false
	end

	# Autolink email pattern
	private var re_autolink_email: Regex = "^<([a-zA-Z0-9.!#$%&'*+/=?^_`\{|\}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]\{0,61\}[a-zA-Z0-9])?(\\.[a-zA-Z0-9]([a-zA-Z0-9-]\{0,61\}[a-zA-Z0-9])?)*)>".to_re

	# Autolink url pattern
	private var re_autolink_url: Regex = "^<[a-zA-Z][a-zA-Z0-9.+-]\{1,31\}:[^<> ]*>".to_re

	# Attempt to parse an inline HTML string
	private fun parse_html_inline: Bool do
		var column_before = column
		var m = match(re_html_tag)
		if m != null then
			var node = new MdHtmlInline(new MdLocation(line, column_before, line, column), m)
			column += 1
			append_node(node)
			return true
		end
		return false
	end

	private var p_tagname = "[A-Za-z][A-Za-z0-9-]*"
	private var p_attribute_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
	private var p_uquoted_value = "[^\"'=<>` \t\n]+"
	private var p_squoted_value = "'[^']*'"
	private var p_dquoted_value = "\"[^\"]*\""
	private var p_attribute_value = "({p_uquoted_value}|{p_squoted_value}|{p_dquoted_value})"
	private var p_attribute_value_spec = "(\\s*=\\s*{p_attribute_value})"
	private var p_attribute = "(\\s{p_attribute_name}{p_attribute_value_spec}?)"
	private var p_opentag = "<{p_tagname}{p_attribute}*\\s*/?>"
	private var p_closetag = "</{p_tagname}\\s*[>]"
	private var p_html_comment = "<!---->|<!--(-?[^>-])(-?[^-])*-->"
	private var p_processing_instruction = "[<][?].*?[?][>]"
	private var p_declaration = "<![A-Z]+\\s+[^>]*>"
	private var p_cdata = "<!\\[CDATA\\[.*\\]\\]>"
	private var p_html_tag = "({p_opentag}|{p_closetag}|{p_html_comment}|{p_processing_instruction}|{p_declaration}|{p_cdata})"

	# HTML tag pattern
	private var re_html_tag: Regex do
		var re = "^{p_html_tag}".to_re
		re.ignore_case = true
		return re
	end

	# Attempt to parse an HTML entity
	private fun parse_entity: Bool do
		var m = match(re_entity_here)
		if m != null then
			append_text(m)
			return true
		end
		return false
	end

	# HTML entity pattern
	private var re_entity_here: Regex do
		var re = "^&(#x[a-f0-9]\{1,8\}|#[0-9]\{1,8\}|[a-z][a-z0-9]\{1,31\});".to_re
		re.ignore_case = true
		return re
	end

	# Parse a run of ordinary characters
	#
	# Or a single character with a special meaning in markdown, as a plain string.
	private fun parse_string: Bool do
		var begin = index
		var begin_column = column
		var length = input.length
		while index != length do
			if special_characters.has(input.chars[index]) then
				break
			end
			advance 1
		end
		if begin != index then
			var column_before = column
			column = begin_column
			append_text(input, begin, index)
			column = column_before
			return true
		end
		return false
	end

	# Scan a sequence of characters with code `delimiter_char`
	#
	# Return information about the number of delimiters and whether they are positioned
	# such as they can open and/or close emphasis or strong emphasis.
	private fun scan_delimiters(delimiter_processor: MdDelimiterProcessor, delimiter_char: Char): nullable MdDelimiterData do
		var start_index = index
		var start_column = column

		var delimiter_count = 0
		while peek == delimiter_char do
			delimiter_count += 1
			advance 1
		end

		if delimiter_count < delimiter_processor.min_length then
			index = start_index
			column = start_column
			return null
		end

		var before = "\n"
		if start_index > 0 then
			before = input.substring(start_index - 1, 1)
		end

		var char_after = peek
		var after = "\n"
		if char_after != '\0' then
			after = char_after.to_s
		end

		var before_is_punctuation = before.has(re_punctuation)
		var before_is_whitespace = before.has(re_whitespace_char)
		var after_is_punctuation = after.has(re_punctuation)
		var after_is_whitespace = after.has(re_whitespace_char)

		var left_flanking = not after_is_whitespace and
			(not after_is_punctuation or before_is_whitespace or before_is_punctuation)
		var right_flanking = not before_is_whitespace and
			(not before_is_punctuation or after_is_whitespace or after_is_punctuation)

		var can_open
		var can_close
		if delimiter_char == '_' then
			can_open = left_flanking and (not right_flanking or before_is_punctuation)
			can_close = right_flanking and (not left_flanking or after_is_punctuation)
		else
			can_open = left_flanking and delimiter_char == delimiter_processor.opening_delimiter
			can_close = right_flanking and delimiter_char == delimiter_processor.closing_delimiter
		end

		index = start_index
		column = start_column
		return new MdDelimiterData(delimiter_count, can_open, can_close)
	end

	# Punctuation pattern
	private var re_punctuation: Regex = "^[]!\"#\\$%&'()*+,.:;<=>?@^_`\{|\}~[-]".to_re

	# Whitespace character start pattern
	private var re_whitespace_char: Regex = "^[  \t\r\n]".to_re

	# Process the stack of delimiters
	private fun process_delimiters(stack_bottom: nullable MdDelimiter) do
		var openers_bottom = new HashMap[Char, nullable MdDelimiter]

		# find first closer above stack bottom
		var closer = last_delimiter
		while closer != null and closer.prev != stack_bottom do
			closer = closer.prev
		end
		# move forward, looking for closers, and handling each
		while closer != null do
			var delimiter_char = closer.delimiter_char

			if not closer.can_close then
				closer = closer.next
				continue
			end

			if not delimiter_processors_map.has_key(delimiter_char) then
				closer = closer.next
				continue
			end

			var delimiter_processor = delimiter_processors_map[delimiter_char]
			var opening_delimiter_char = delimiter_processor.opening_delimiter

			# Found delimiter closer. Now look back for first matching opener
			var use_delims = 0
			var opener_found = false
			var potential_opener_found = false
			var opener = closer.prev

			while opener != null and opener != stack_bottom and (not openers_bottom.has_key(delimiter_char) or opener != openers_bottom[delimiter_char]) do

				if opener.can_open and opener.delimiter_char == opening_delimiter_char then
					potential_opener_found = true
					use_delims = delimiter_processor.delimiter_use(opener, closer)
					if use_delims > 0 then
						opener_found = true
						break
					end
				end
				opener = opener.prev
			end

			if not opener_found then
				if not potential_opener_found then
					# Set lower bound for future searches for openers.
					# Only do this when we didn't even have a potential opener
					# (one that matches the character and can open).
					# If an opener was rejected because of the number of delimiters
					# (e.g. because of the "multiple of 3" rule),
					# we want to consider it next time because the number of delimiter
					# can change as we continue processing.
					openers_bottom[delimiter_char] = closer.prev
					if not closer.can_open then
						# We can remove a closer that can't be an opener,
						# once we've seen there's no matching opener.
						remove_delimiters_keep_node(closer)
					end
				end
				closer = closer.next
				continue
			end

			var opener_node = opener.as(not null).node
			var closer_node = closer.node

			# Remove number of used delimieters from stack and inline nodes
			opener.as(not null).length -= use_delims
			closer.length -= use_delims
			opener_node.literal = opener_node.literal.substring(0,
				opener_node.literal.length - use_delims)
			closer_node.literal = closer_node.literal.substring(0,
				closer_node.literal.length - use_delims)

			remove_delimiters_between(opener, closer)
			# The delimieter processor can re-parent the nodes between opener and closer,
			# so make sure they're contiguous already.
			# Exclusive because we want to keep opener/closer themselves.
			merge_text_nodes_between_exclusive(opener_node, closer_node)
			delimiter_processor.process(opener_node, closer_node, use_delims)

			# Node delimieter characters left to process, so we can remove
			# delimieter and the now empty node
			if opener.as(not null).length == 0 then
				remove_delimiters_and_node(opener)
			end

			if closer.length == 0 then
				var next = closer.next
				remove_delimiters_and_node(closer)
				closer = next
			end
		end

		# Remove all delimiters
		while last_delimiter != null and last_delimiter != stack_bottom do
			remove_delimiters_keep_node(last_delimiter)
		end
	end

	# Remove all delimiters between `opener` and `closer`
	private fun remove_delimiters_between(opener, closer: nullable MdDelimiter) do
		if opener == null or closer == null then return

		var delimiter = closer.prev
		while delimiter != null and delimiter != opener do
			var previous_delimiter = delimiter.prev
			remove_delimiters_keep_node(delimiter)
			delimiter = previous_delimiter
		end
	end

	# Remove the delimiter and the corresponding text node
	#
	# For used delimiters, e.g. `*` in `*foo*`.
	private fun remove_delimiters_and_node(delim: nullable MdDelimiter) do
		if delim == null then return

		var node = delim.node
		node.unlink
		remove_delimiter(delim)
	end

	# Remove the delimiter but keep the corresponding node as text
	#
	# For unused delimiters such as `_` in `foo_bar`.
	private fun remove_delimiters_keep_node(delim: nullable MdDelimiter) do
		remove_delimiter(delim)
	end

	# Remove the delimiter `delim`
	private fun remove_delimiter(delim: nullable MdDelimiter) do
		if delim == null then return

		var prev = delim.prev
		if prev != null then
			prev.next = delim.next
		end
		var next = delim.next
		if next == null then
			# top of stack
			last_delimiter = prev
		else
			next.prev = prev
		end
	end

	# Merge all nodes between `from` and `to` excluding `from` and `to`
	private fun merge_text_nodes_between_exclusive(from, to: nullable MdNode) do
		if from == null or to == null then return
		# no node between them
		if from == to or from.next == to then return
		merge_text_nodes_inclusive(from.next, to.prev)
	end

	# Merge all child nodes of `node` into one
	private fun merge_child_text_nodes(node: nullable MdNode) do
		if node == null then return
		# no children or just one child node, no need for merging
		if node.first_child == node.last_child then return
		merge_text_nodes_inclusive(node.first_child, node.last_child)
	end

	# Merge all nodes between `from` and `to` including `from` and `to`
	private fun merge_text_nodes_inclusive(from, to: nullable MdNode) do
		var first = null
		var last = null

		var node = from
		while node != null do
			if node isa MdText then
				var text = node
				if first == null then first = text
				last = text
			else
				merge_if_needed(first, last)
				first = null
				last = null
			end
			if node == to then break
			node = node.next
		end
		merge_if_needed(first, last)
	end

	# Merge all nodes between `first` and `last`
	private fun merge_if_needed(first, last: nullable MdText) do
		if first != null and last != null and first != last then
			var buffer = new Buffer
			buffer.append(first.literal)
			var node = first.next
			var stop = last.next
			while node != null and node != stop do
				buffer.append(node.as(MdText).literal)
				first.location.line_end = node.location.line_end
				first.location.column_end = node.location.column_end
				var unlink = node
				node = node.next
				unlink.unlink
			end
			var literal = buffer.write_to_string
			first.literal = literal
		end
	end
end

# Custom delimiter processor for additional delimiters besides `_` and `*`
interface MdDelimiterProcessor

	# The character that marks the beginning of a delimited node
	#
	# Must not clash with anu built-in special characters.
	fun opening_delimiter: Char is abstract

	# The character that marks the ending of a delimited node
	#
	# Must not clash with anu built-in special characters.
	fun closing_delimiter: Char is abstract

	# Minimum number of delimiters characters that are needed to active this
	#
	# Must be at least one.
	fun min_length: Int is abstract

	# Determine how many (if any) of the delimiter characters should be used
	#
	# This allows implementations to decide how many characters to use based on the
	# properties of the delimiter runs.
	#
	# An implementation can also return 0 when it doesn't want to allow this particular
	# combination of delimiter runs.
	fun delimiter_use(opener, closer: MdDelimiter): Int is abstract

	# Process the matched delimiters
	#
	# For example, by wrapping the nodes between `opener` and `closer` in a new node,
	# or appending a new node after the opener.
	#
	# Note that removal of the delimiter from the delimiter nodes and unlinking
	# them is done by the caller.
	fun process(opener, closer: MdText, delimiter_use: Int) is abstract
end

# A delimiter is one or more of the same delimiter character
#
# Used for paired delimiters like emphasis or strong emphasis.
class MdDelimiter

	# Node containing the delimiter
	var node: MdText

	# Character used as delimiter
	var delimiter_char: Char

	# Can `self` open a delimiter?
	var can_open: Bool

	# Cant `self` close a delimiter?
	var can_close: Bool

	# Previous delimiter found
	var prev: nullable MdDelimiter

	# Next delimiter found
	var next: nullable MdDelimiter

	# The number of characters in this delimiter run that are left for processing
	var length = 1

	# The number of characters originally in this delimiter run
	#
	# At the start of processing, this is the same as `length`.
	var original_length = 1
end

# Opening bracket for links and images
class MdBracket

	# Node containing the bracket
	var node: MdText

	# Index of the bracket in the original string
	var index: Int

	# COlumn of the bracket
	var column: Int

	# Is this bracket opening an image?
	var is_image: Bool

	# Previous bracket
	var prev: nullable MdBracket

	# Previous delimiter
	var prev_delimiter: nullable MdDelimiter

	# Whether this bracket is allowed to form a link/image
	var allowed = true

	# Whether there is an unescaped bracket (opening or closing) anywhere after this bracket
	var bracket_after = false

	# Create a new bracket for a link
	init link(node: MdText, index: Int, column: Int, prev: nullable MdBracket, prev_delimiter: nullable MdDelimiter) do
		init(node, index, column, false, prev, prev_delimiter)
	end

	# Create a new bracket for an image
	init image(node: MdText, index: Int, column: Int, prev: nullable MdBracket, prev_delimiter: nullable MdDelimiter) do
		init(node, index, column, true, prev, prev_delimiter)
	end
end

# Data about a delimiter parsing
private class MdDelimiterData

	# Number of successive delimiters found
	var count: Int

	# Can this delimiter open an inline construct?
	var can_open: Bool

	# Can this delimiter close an inline construct?
	var can_close: Bool
end

# An implementation of MdDelimiterProcessor that dispatches all calls to others
#
# The sub processors called bepends on the length of the delimiter run.
# All child processors must have different minimum lengths.
# A given delimiter run is dispatched to the child with the largest acceptable minimum length.
# If not child is applicable, the one with the largest minimum length is chosen.
class MdStaggeredDelimiterProcessor
	super MdDelimiterProcessor

	# Delimiter character
	var delim: Char

	# Sub processors to apply
	var processors = new Array[MdDelimiterProcessor]

	redef var min_length = 0
	redef fun opening_delimiter do return delim
	redef fun closing_delimiter do return delim

	# Add a new sub delimiter processor
	fun add(dp: MdDelimiterProcessor) do
		var len = dp.min_length
		var i = 0
		while i < processors.length do
			var p = processors[i]
			assert len != p.min_length else
				print "Cannot add two delimiter processor for `{delim}` " +
					"and mininimum length `{len}`"
			end
			if len > p.min_length then
				break
			end
			i += 1
		end
		processors.insert(dp, i)
	end

	# Find the corresponding processor for a length of `len` delimiter characters
	fun find_processor(len: Int): MdDelimiterProcessor do
		for processor in processors do
			if processor.min_length <= len then return processor
		end
		return processors.first
	end

	redef fun delimiter_use(opener, closer) do
		return find_processor(opener.length).delimiter_use(opener, closer)
	end

	redef fun process(opener, closer, delimiter_use) do
		find_processor(delimiter_use).process(opener, closer, delimiter_use)
	end
end

# A processor for emphasis tokens
class MdEmphasisDelimiterProcessor
	super MdDelimiterProcessor

	# Delimiter character
	var delimiter_char: Char

	redef var min_length = 1
	redef fun opening_delimiter do return delimiter_char
	redef fun closing_delimiter do return delimiter_char

	redef fun delimiter_use(opener, closer) do
		# "multiple of 3" rule for internal delimiter runs
		if (opener.can_close or closer.can_open) and
		   ((opener.original_length + closer.original_length) % 3 == 0) then
			return 0
		end
		# calculate actual number of delimiters used from this closer
		if opener.length >= 2 and closer.length >= 2 then
			return 2
		end
		return 1
	end

	redef fun process(opener, closer, delimiter_use) do
		var single_delimiter = opening_delimiter.to_s
		var emphasis: MdNode
		if delimiter_use == 1 then
			emphasis = new MdEmphasis(
				new MdLocation(
					opener.location.line_start,
					opener.location.column_start,
					closer.location.line_end,
					closer.location.column_end),
				single_delimiter)
		else
			emphasis = new MdStrongEmphasis(
				new MdLocation(
					opener.location.line_start,
					opener.location.column_start + opener.literal.length,
					closer.location.line_end,
					closer.location.column_end - closer.literal.length),
				"{single_delimiter}{single_delimiter}")
		end
		var tmp = opener.next
		while tmp != null and tmp != closer do
			var next = tmp.next
			emphasis.append_child(tmp)
			tmp = next
		end
		opener.insert_after(emphasis)
	end
end

# Asterisk delimiters processor
class MdAsteriskDelimiterProcessor
	super MdEmphasisDelimiterProcessor
	noautoinit

	redef var delimiter_char = '*'
end

# Underscore delimters processor
class MdUnderscoreDelimiterProcessor
	super MdEmphasisDelimiterProcessor
	noautoinit

	redef var delimiter_char = '_'
end

# Utils

redef class String

	# Remove escape backslash from string
	fun unescape_string: String do
		if not has(re_escaped) then return self

		var buffer = new Buffer
		var match = search(re_escaped)
		var last_end = 0
		while match != null do
			buffer.append substring(last_end, match.from - last_end)
			buffer.append substring(match.from + 1, 1)
			last_end = match.after
			match = search_from(re_escaped, last_end)
		end
		if last_end < length then
			buffer.append substring(last_end, length - last_end)
		end
		return buffer.to_s
	end

	# Normalize link reference names
	private fun normalize_reference: String do
		var stripped = self.substring(1, length - 2).trim
		var lowercase = stripped.to_lower # TODO utf-8
		return lowercase.replace(re_whitespace, " ")
	end
end

redef class Sys
	private var p_escapable = "[]!\"#$%&\'()*+,./:;<=>?@\\[\\\\^_`\\\{|\\\}~-]"
	private var re_escaped: Regex = "\\\\{p_escapable}".to_re
	private var re_whitespace: Regex = "\\s+".to_re
end
lib/markdown2/markdown_inline_parsing.nit:15,1--1401,3