Markdown parsing.

# Markdown parsing.
module markdown

import template

# Parse a markdown string and split it in blocks.
#
# Blocks are then outputed by an `MarkdownEmitter`.
#
# Usage:
#
#    var proc = new MarkdownProcessor
#    var html = proc.process("**Hello World!**")
#    assert html == "<p><strong>Hello World!</strong></p>\n"
#
# SEE: `String::md_to_html` for a shortcut.
class MarkdownProcessor

	# Work in extended mode (default).
	#
	# Behavior changes when using extended mode:
	#
	# * Lists and code blocks end a paragraph
	#
	#   In normal markdown the following:
	#
	# ~~~md
	# This is a paragraph
	# * and this is not a list
	# ~~~
	#
	#   Will produce:
	#
	# ~~~html
	# <p>This is a paragraph
	# * and this is not a list</p>
	# ~~~
	#
	#   When using extended mode this changes to:
	#
	# ~~~html
	# <p>This is a paragraph</p>
	# <ul>
	# <li>and this is not a list</li>
	# </ul>
	# ~~~
	#
	# * Fences code blocks
	#
	#   If you don't want to indent your all your code with 4 spaces,
	#   you can wrap your code in ``` ``` ``` or `~~~`.
	#
	#   Here's an example:
	#
	# ~~~md
	# fun test do
	#    print "Hello World!"
	# end
	# ~~~
	#
	# * Code blocks meta
	#
	#   If you want to use syntax highlighting tools, most of them need to know what kind
	#   of language they are highlighting.
	#   You can add an optional language identifier after the fence declaration to output
	#   it in the HTML render.
	#
	# ```nit
	# import markdown
	#
	# print "# Hello World!".md_to_html
	# ```
	#
	#   Becomes
	#
	# ~~~html
	# <pre class="nit"><code>import markdown
	#
	# print "Hello World!".md_to_html
	# </code></pre>
	# ~~~
	#
	# * Underscores (Emphasis)
	#
	#   Underscores in the middle of a word like:
	#
	# ~~~md
	# Con_cat_this
	# ~~~
	#
	#   normally produces this:
	#
	# ~~~html
	# <p>Con<em>cat</em>this</p>
	# ~~~
	#
	#   With extended mode they don't result in emphasis.
	#
	# ~~~html
	# <p>Con_cat_this</p>
	# ~~~
	#
	# * Strikethrough
	#
	#   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
	#   strikethrought span is marked with `~~`.
	#
	# ~~~md
	# ~~Mistaken text.~~
	# ~~~
	#
	#   becomes
	#
	# ~~~html
	# <del>Mistaken text.</del>
	# ~~~
	var ext_mode = true

	# Disable attaching MDLocation to Tokens
	#
	# Locations are useful for some tools but they may
	# cause an important time and space overhead.
	#
	# Default = `false`
	var no_location = false is writable

	# Process the mardown `input` string and return the processed output.
	fun process(input: String): Writable do
		# init processor
		link_refs.clear
		last_link_ref = null
		current_line = null
		current_block = null
		# parse markdown
		var parent = read_lines(input)
		parent.remove_surrounding_empty_lines
		recurse(parent, false)
		# output processed text
		decorator.headlines.clear
		return emit(parent.kind)
	end

	# Split `input` string into `MDLines` and create a parent `MDBlock` with it.
	private fun read_lines(input: String): MDBlock do
		var block = new MDBlock(new MDLocation(1, 1, 1, 1))
		var value = new FlatBuffer
		var i = 0

		var line_pos = 0
		var col_pos = 0

		while i < input.length do
			value.clear
			var pos = 0
			var eol = false
			while not eol and i < input.length do
				col_pos += 1
				var c = input[i]
				if c == '\n' then
					eol = true
				else if c == '\r' then
				else if c == '\t' then
					var np = pos + (4 - (pos & 3))
					while pos < np do
						value.add ' '
						pos += 1
					end
				else
					pos += 1
					value.add c
				end
				i += 1
			end
			line_pos += 1

			var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
			var line = new MDLine(loc, value.write_to_string)
			var is_link_ref = check_link_ref(line)
			# Skip link refs
			if not is_link_ref then block.add_line line
			col_pos = 0
		end
		return block
	end

	# Check if line is a block link definition.
	# Return `true` if line contains a valid link ref and save it into `link_refs`.
	private fun check_link_ref(line: MDLine): Bool do
		var md = line.value
		var is_link_ref = false
		var id = new FlatBuffer
		var link = new FlatBuffer
		var comment = new FlatBuffer
		var pos = -1
		if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
			pos = line.leading + 1
			pos = md.read_until(id, pos, ']')
			if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
				if line.value[pos + 1] == ':' then
					pos += 2
					pos = md.skip_spaces(pos)
					if pos >= 0 and line.value[pos] == '<' then
						pos += 1
						pos = md.read_until(link, pos, '>')
						pos += 1
					else if pos >= 0 then
						pos = md.read_until(link, pos, ' ', '\n')
					end
					if not link.is_empty then
						pos = md.skip_spaces(pos)
						if pos > 0 and pos < line.value.length then
							var c = line.value[pos]
							if c == '\"' or c == '\'' or c == '(' then
								pos += 1
								if c == '(' then
									pos = md.read_until(comment, pos, ')')
								else
									pos = md.read_until(comment, pos, c)
								end
								if pos > 0 then is_link_ref = true
							end
						else
							is_link_ref = true
						end
					end
				end
			end
		end
		if is_link_ref and not id.is_empty and not link.is_empty then
			var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
			add_link_ref(id.write_to_string, lr)
			if comment.is_empty then last_link_ref = lr
			return true
		else
			comment = new FlatBuffer
			if not line.is_empty and last_link_ref != null then
				pos = line.leading
				var c = line.value[pos]
				if c == '\"' or c == '\'' or c ==  '(' then
					pos += 1
					if c == '(' then
						pos = md.read_until(comment, pos, ')')
					else
						pos = md.read_until(comment, pos, c)
					end
				end
				var last_link_ref = self.last_link_ref
				if not comment.is_empty and last_link_ref != null then
					last_link_ref.title = comment.write_to_string
				end
			end
			if comment.is_empty then return false
			return true
		end
	end

	# Known link refs
	# This list will be needed during output to expand links.
	var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]

	# Last encountered link ref (for multiline definitions)
	#
	# Markdown allows link refs to be defined over two lines:
	#
	# ~~~md
	# [id]: http://example.com/longish/path/to/resource/here
	#	"Optional Title Here"
	# ~~~
	#
	private var last_link_ref: nullable LinkRef = null

	# Add a link ref to the list
	fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref

	# Recursively split a `block`.
	#
	# The block is splitted according to the type of lines it contains.
	# Some blocks can be splited again recursively like lists.
	# The `in_list` mode is used to recurse on list and build
	# nested paragraphs or code blocks.
	fun recurse(root: MDBlock, in_list: Bool) do
		var old_mode = self.in_list
		var old_root = self.current_block
		self.in_list = in_list

		var line = root.first_line
		while line != null and line.is_empty do
			line = line.next
			if line == null then return
		end

		current_line = line
		current_block = root
		while current_line != null do
			line_kind(current_line.as(not null)).process(self)
		end
		self.in_list = old_mode
		self.current_block = old_root
	end

	# Currently processed line.
	# Used when visiting blocks with `recurse`.
	var current_line: nullable MDLine = null is writable

	# Currently processed block.
	# Used when visiting blocks with `recurse`.
	var current_block: nullable MDBlock = null is writable

	# Is the current recursion in list mode?
	# Used when visiting blocks with `recurse`
	private var in_list = false

	# The type of line.
	# see: `md_line_*`
	fun line_kind(md: MDLine): Line do
		var value = md.value
		var leading = md.leading
		var trailing = md.trailing
		if md.is_empty then return new LineEmpty
		if md.leading > 3 then return new LineCode
		if value[leading] == '#' then return new LineHeadline
		if value[leading] == '>' then return new LineBlockquote

		if ext_mode then
			if value.length - leading - trailing > 2 then
				if value[leading] == '`' and md.count_chars_start('`') >= 3 then
					return new LineFence
				end
				if value[leading] == '~' and md.count_chars_start('~') >= 3 then
					return new LineFence
				end
			end
		end

		if value.length - leading - trailing > 2 and
		   (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
		   if md.count_chars(value[leading]) >= 3 then
				return new LineHR
		   end
		end

		if value.length - leading >= 2 and value[leading + 1] == ' ' then
			var c = value[leading]
			if c == '*' or c == '-' or c == '+' then return new LineUList
		end

		if value.length - leading >= 3 and value[leading].is_digit then
			var i = leading + 1
			while i < value.length and value[i].is_digit do i += 1
			if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
				return new LineOList
			end
		end

		if value[leading] == '<' and md.check_html then return new LineXML

		var next = md.next
		if next != null and not next.is_empty then
			if next.count_chars('=') > 0 then
				return new LineHeadline1
			end
			if next.count_chars('-') > 0 then
				return new LineHeadline2
			end
		end
		return new LineOther
	end

	# Get the token kind at `pos`.
	fun token_at(text: Text, pos: Int): Token do
		var c0: Char
		var c1: Char
		var c2: Char

		if pos > 0 then
			c0 = text[pos - 1]
		else
			c0 = ' '
		end
		var c = text[pos]

		if pos + 1 < text.length then
			c1 = text[pos + 1]
		else
			c1 = ' '
		end
		if pos + 2 < text.length then
			c2 = text[pos + 2]
		else
			c2 = ' '
		end

		var loc
		if no_location then
			loc = null
		else
			loc = new MDLocation(
				current_loc.line_start,
				current_loc.column_start + pos,
				current_loc.line_start,
				current_loc.column_start + pos)
		end

		if c == '*' then
			if c1 == '*' then
				if c0 != ' ' or c2 != ' ' then
					return new TokenStrongStar(loc, pos, c)
				else
					return new TokenEmStar(loc, pos, c)
				end
			end
			if c0 != ' ' or c1 != ' ' then
				return new TokenEmStar(loc, pos, c)
			else
				return new TokenNone(loc, pos, c)
			end
		else if c == '_' then
			if c1 == '_' then
				if c0 != ' ' or c2 != ' ' then
					return new TokenStrongUnderscore(loc, pos, c)
				else
					return new TokenEmUnderscore(loc, pos, c)
				end
			end
			if ext_mode then
				if (c0.is_letter or c0.is_digit) and c0 != '_' and
				   (c1.is_letter or c1.is_digit) then
					return new TokenNone(loc, pos, c)
				else
					return new TokenEmUnderscore(loc, pos, c)
				end
			end
			if c0 != ' ' or c1 != ' ' then
				return new TokenEmUnderscore(loc, pos, c)
			else
				return new TokenNone(loc, pos, c)
			end
		else if c == '!' then
			if c1 == '[' then return new TokenImage(loc, pos, c)
			return new TokenNone(loc, pos, c)
		else if c == '[' then
			return new TokenLink(loc, pos, c)
		else if c == ']' then
			return new TokenNone(loc, pos, c)
		else if c == '`' then
			if c1 == '`' then
				return new TokenCodeDouble(loc, pos, c)
			else
				return new TokenCodeSingle(loc, pos, c)
			end
		else if c == '\\' then
			if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
				return new TokenEscape(loc, pos, c)
			else
				return new TokenNone(loc, pos, c)
			end
		else if c == '<' then
			return new TokenHTML(loc, pos, c)
		else if c == '&' then
			return new TokenEntity(loc, pos, c)
		else
			if ext_mode then
				if c == '~' and c1 == '~' then
					return new TokenStrike(loc, pos, c)
				end
			end
			return new TokenNone(loc, pos, c)
		end
	end

	# Find the position of a `token` in `self`.
	fun find_token(text: Text, start: Int, token: Token): Int do
		var pos = start
		while pos < text.length do
			if token_at(text, pos).is_same_type(token) then
				return pos
			end
			pos += 1
		end
		return -1
	end

	# Kind of decorator used for decoration.
	type DECORATOR: Decorator

	# Decorator used for output.
	# Default is `HTMLDecorator`
	var decorator: DECORATOR is writable, lazy do
		return new HTMLDecorator
	end

	# Create a new `MarkdownEmitter` using a custom `decorator`.
	init with_decorator(decorator: DECORATOR) do
		self.decorator = decorator
	end

	# Output `block` using `decorator` in the current buffer.
	fun emit(block: Block): Text do
		var buffer = push_buffer
		block.emit(self)
		pop_buffer
		return buffer
	end

	# Output the content of `block`.
	fun emit_in(block: Block) do block.emit_in(self)

	# Transform and emit mardown text
	fun emit_text(text: Text) do emit_text_until(text, 0, null)

	# Transform and emit mardown text starting at `start` and
	# until a token with the same type as `token` is found.
	# Go until the end of `text` if `token` is null.
	fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
		var old_text = current_text
		var old_pos = current_pos
		current_text = text
		current_pos = start
		while current_pos < text.length do
			if text[current_pos] == '\n' then
				current_loc.line_start += 1
				current_loc.column_start = -current_pos
			end
			var mt = token_at(text, current_pos)
			if (token != null and not token isa TokenNone) and
			(mt.is_same_type(token) or
			(token isa TokenEmStar and mt isa TokenStrongStar) or
			(token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
				return current_pos
			end
			mt.emit(self)
			current_pos += 1
		end
		current_text = old_text
		current_pos = old_pos
		return -1
	end

	# Currently processed position in `current_text`.
	# Used when visiting inline production with `emit_text_until`.
	private var current_pos: Int = -1

	# Currently processed text.
	# Used when visiting inline production with `emit_text_until`.
	private var current_text: nullable Text = null

	# Stacked buffers.
	private var buffer_stack = new List[FlatBuffer]

	# Push a new buffer on the stack.
	private fun push_buffer: FlatBuffer do
		var buffer = new FlatBuffer
		buffer_stack.add buffer
		return buffer
	end

	# Pop the last buffer.
	private fun pop_buffer do buffer_stack.pop

	# Current output buffer.
	private fun current_buffer: FlatBuffer do
		assert not buffer_stack.is_empty
		return buffer_stack.last
	end

	# Stacked locations.
	private var loc_stack = new List[MDLocation]

	# Push a new MDLocation on the stack.
	private fun push_loc(location: MDLocation) do loc_stack.add location

	# Pop the last buffer.
	private fun pop_loc: MDLocation do return loc_stack.pop

	# Current output buffer.
	private fun current_loc: MDLocation do
		assert not loc_stack.is_empty
		return loc_stack.last
	end

	# Append `e` to current buffer.
	fun add(e: Writable) do
		if e isa Text then
			current_buffer.append e
		else
			current_buffer.append e.write_to_string
		end
	end

	# Append `c` to current buffer.
	fun addc(c: Char) do
		current_buffer.add c
	end

	# Append a "\n" line break.
	fun addn do addc '\n'
end

# A Link Reference.
# Links that are specified somewhere in the mardown document to be reused as shortcuts.
#
# ~~~raw
# [1]: http://example.com/ "Optional title"
# ~~~
class LinkRef

	# Link href
	var link: String

	# Optional link title
	var title: nullable String = null

	# Is the link an abreviation?
	var is_abbrev = false

	# Create a link with a title.
	init with_title(link: String, title: nullable String) do
		init(link)
		self.title = title
	end
end

# A `Decorator` is used to emit mardown into a specific format.
# Default decorator used is `HTMLDecorator`.
interface Decorator

	# Kind of processor used
	type PROCESSOR: MarkdownProcessor

	# Render a single plain char.
	#
	# Redefine this method to add special escaping for plain text.
	fun add_char(v: PROCESSOR, c: Char) do v.addc c

	# Render a ruler block.
	fun add_ruler(v: PROCESSOR, block: BlockRuler) is abstract

	# Render a headline block with corresponding level.
	fun add_headline(v: PROCESSOR, block: BlockHeadline) is abstract

	# Render a paragraph block.
	fun add_paragraph(v: PROCESSOR, block: BlockParagraph) is abstract

	# Render a code or fence block.
	fun add_code(v: PROCESSOR, block: BlockCode) is abstract

	# Render a blockquote.
	fun add_blockquote(v: PROCESSOR, block: BlockQuote) is abstract

	# Render an unordered list.
	fun add_unorderedlist(v: PROCESSOR, block: BlockUnorderedList) is abstract

	# Render an ordered list.
	fun add_orderedlist(v: PROCESSOR, block: BlockOrderedList) is abstract

	# Render a list item.
	fun add_listitem(v: PROCESSOR, block: BlockListItem) is abstract

	# Render an emphasis text.
	fun add_em(v: PROCESSOR, text: Text) is abstract

	# Render a strong text.
	fun add_strong(v: PROCESSOR, text: Text) is abstract

	# Render a strike text.
	#
	# Extended mode only (see `MarkdownProcessor::ext_mode`)
	fun add_strike(v: PROCESSOR, text: Text) is abstract

	# Render a link.
	fun add_link(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract

	# Render an image.
	fun add_image(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract

	# Render an abbreviation.
	fun add_abbr(v: PROCESSOR, name: Text, comment: Text) is abstract

	# Render a code span reading from a buffer.
	fun add_span_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract

	# Render a text and escape it.
	fun append_value(v: PROCESSOR, value: Text) is abstract

	# Render code text from buffer and escape it.
	fun append_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract

	# Render a character escape.
	fun escape_char(v: PROCESSOR, char: Char) is abstract

	# Render a line break
	fun add_line_break(v: PROCESSOR) is abstract

	# Generate a new html valid id from a `String`.
	fun strip_id(txt: String): String is abstract

	# Found headlines during the processing labeled by their ids.
	fun headlines: ArrayMap[String, HeadLine] is abstract
end

# Class representing a markdown headline.
class HeadLine
	# Unique identifier of this headline.
	var id: String

	# Text of the headline.
	var title: String

	# Level of this headline.
	#
	# According toe the markdown specification, level must be in `[1..6]`.
	var level: Int
end

# `Decorator` that outputs HTML.
class HTMLDecorator
	super Decorator

	redef var headlines = new ArrayMap[String, HeadLine]

	redef fun add_ruler(v, block) do v.add "<hr/>\n"

	redef fun add_headline(v, block) do
		# save headline
		var line = block.block.first_line
		if line == null then return
		var txt = line.value
		var id = strip_id(txt)
		var lvl = block.depth
		headlines[id] = new HeadLine(id, txt, lvl)
		# output it
		v.add "<h{lvl} id=\"{id}\">"
		v.emit_in block
		v.add "</h{lvl}>\n"
	end

	redef fun add_paragraph(v, block) do
		v.add "<p>"
		v.emit_in block
		v.add "</p>\n"
	end

	redef fun add_code(v, block) do
		var meta = block.meta
		if meta != null then
			v.add "<pre class=\""
			append_value(v, meta)
			v.add "\"><code>"
		else
			v.add "<pre><code>"
		end
		v.emit_in block
		v.add "</code></pre>\n"
	end

	redef fun add_blockquote(v, block) do
		v.add "<blockquote>\n"
		v.emit_in block
		v.add "</blockquote>\n"
	end

	redef fun add_unorderedlist(v, block) do
		v.add "<ul>\n"
		v.emit_in block
		v.add "</ul>\n"
	end

	redef fun add_orderedlist(v, block) do
		v.add "<ol>\n"
		v.emit_in block
		v.add "</ol>\n"
	end

	redef fun add_listitem(v, block) do
		v.add "<li>"
		v.emit_in block
		v.add "</li>\n"
	end

	redef fun add_em(v, text) do
		v.add "<em>"
		v.add text
		v.add "</em>"
	end

	redef fun add_strong(v, text) do
		v.add "<strong>"
		v.add text
		v.add "</strong>"
	end

	redef fun add_strike(v, text) do
		v.add "<del>"
		v.add text
		v.add "</del>"
	end

	redef fun add_image(v, link, name, comment) do
		v.add "<img src=\""
		append_value(v, link)
		v.add "\" alt=\""
		append_value(v, name)
		v.add "\""
		if comment != null and not comment.is_empty then
			v.add " title=\""
			append_value(v, comment)
			v.add "\""
		end
		v.add "/>"
	end

	redef fun add_link(v, link, name, comment) do
		v.add "<a href=\""
		append_value(v, link)
		v.add "\""
		if comment != null and not comment.is_empty then
			v.add " title=\""
			append_value(v, comment)
			v.add "\""
		end
		v.add ">"
		v.emit_text(name)
		v.add "</a>"
	end

	redef fun add_abbr(v, name, comment) do
		v.add "<abbr title=\""
		append_value(v, comment)
		v.add "\">"
		v.emit_text(name)
		v.add "</abbr>"
	end

	redef fun add_span_code(v, text, from, to) do
		v.add "<code>"
		append_code(v, text, from, to)
		v.add "</code>"
	end

	redef fun add_line_break(v) do
		v.add "<br/>"
	end

	redef fun append_value(v, text) do for c in text do escape_char(v, c)

	redef fun escape_char(v, c) do
		if c == '&' then
			v.add "&amp;"
		else if c == '<' then
			v.add "&lt;"
		else if c == '>' then
			v.add "&gt;"
		else if c == '"' then
			v.add "&quot;"
		else if c == '\'' then
			v.add "&apos;"
		else
			v.addc c
		end
	end

	redef fun append_code(v, buffer, from, to) do
		for i in [from..to[ do
			var c = buffer[i]
			if c == '&' then
				v.add "&amp;"
			else if c == '<' then
				v.add "&lt;"
			else if c == '>' then
				v.add "&gt;"
			else
				v.addc c
			end
		end
	end

	redef fun strip_id(txt) do
		# strip id
		var b = new FlatBuffer
		for c in txt do
			if c == ' ' then
				b.add '_'
			else
				if not c.is_letter and
				   not c.is_digit and
				   not allowed_id_chars.has(c) then continue
				b.add c
			end
		end
		var res = b.to_s
		var key = res
		# check for multiple id definitions
		if headlines.has_key(key) then
			var i = 1
			key = "{res}_{i}"
			while headlines.has_key(key) do
				i += 1
				key = "{res}_{i}"
			end
		end
		return key
	end

	private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
end

# Location in a Markdown input.
class MDLocation

	# Starting line number (starting from 1).
	var line_start: Int

	# Starting column number (starting from 1).
	var column_start: Int

	# Stopping line number (starting from 1).
	var line_end: Int

	# Stopping column number (starting from 1).
	var column_end: Int

	redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"

	# Return a copy of `self`.
	fun copy: MDLocation do
		return new MDLocation(line_start, column_start, line_end, column_end)
	end
end

# A block of markdown lines.
# A `MDBlock` can contains lines and/or sub-blocks.
class MDBlock

	# Position of `self` in the input.
	var location: MDLocation

	# Kind of block.
	# See `Block`.
	var kind: Block = new BlockNone(self) is writable

	# First line if any.
	var first_line: nullable MDLine = null is writable

	# Last line if any.
	var last_line: nullable MDLine = null is writable

	# First sub-block if any.
	var first_block: nullable MDBlock = null is writable

	# Last sub-block if any.
	var last_block: nullable MDBlock = null is writable

	# Previous block if any.
	var prev: nullable MDBlock = null is writable

	# Next block if any.
	var next: nullable MDBlock = null is writable

	# Does this block contain subblocks?
	fun has_blocks: Bool do return first_block != null

	# Count sub-blocks.
	fun count_blocks: Int do
		var count = 0
		var block = first_block
		while block != null do
			count += 1
			block = block.next
		end
		return count
	end

	# Does this block contain lines?
	fun has_lines: Bool do return first_line != null

	# Count block lines.
	fun count_lines: Int do
		var count = 0
		var line = first_line
		while line != null do
			count += 1
			line = line.next
		end
		return count
	end

	# Split `self` creating a new sub-block having `line` has `last_line`.
	fun split(line: MDLine): MDBlock do
		# location for new block
		var new_loc = new MDLocation(
			first_line.as(not null).location.line_start,
			first_line.as(not null).location.column_start,
			line.location.line_end,
			line.location.column_end)
		# create block
		var block = new MDBlock(new_loc)
		block.first_line = first_line
		block.last_line = line
		first_line = line.next
		line.next = null
		if first_line == null then
			last_line = null
		else
			first_line.as(not null).prev = null
			# update current block loc
			location.line_start = first_line.as(not null).location.line_start
			location.column_start = first_line.as(not null).location.column_start
		end
		if first_block == null then
			first_block = block
			last_block = block
		else
			last_block.as(not null).next = block
			last_block = block
		end
		return block
	end

	# Add a `line` to this block.
	fun add_line(line: MDLine) do
		if last_line == null then
			first_line = line
			last_line = line
		else
			last_line.as(not null).next_empty = line.is_empty
			line.prev_empty = last_line.as(not null).is_empty
			line.prev = last_line
			last_line.as(not null).next = line
			last_line = line
		end
	end

	# Remove `line` from this block.
	fun remove_line(line: MDLine) do
		if line.prev == null then
			first_line = line.next
		else
			line.prev.as(not null).next = line.next
		end
		if line.next == null then
			last_line = line.prev
		else
			line.next.as(not null).prev = line.prev
		end
		line.prev = null
		line.next = null
	end

	# Remove leading empty lines.
	fun remove_leading_empty_lines: Bool do
		var was_empty = false
		var line = first_line
		while line != null and line.is_empty do
			remove_line line
			line = first_line
			was_empty = true
		end
		return was_empty
	end

	# Remove trailing empty lines.
	fun remove_trailing_empty_lines: Bool do
		var was_empty = false
		var line = last_line
		while line != null and line.is_empty do
			remove_line line
			line = last_line
			was_empty = true
		end
		return was_empty
	end

	# Remove leading and trailing empty lines.
	fun remove_surrounding_empty_lines: Bool do
		var was_empty = false
		if remove_leading_empty_lines then was_empty = true
		if remove_trailing_empty_lines then was_empty = true
		return was_empty
	end

	# Remove list markers and up to 4 leading spaces.
	# Used to clean nested lists.
	fun remove_list_indent(v: MarkdownProcessor) do
		var line = first_line
		while line != null do
			if not line.is_empty then
				var kind = v.line_kind(line)
				if kind isa LineList then
					line.value = kind.extract_value(line)
				else
					line.value = line.value.substring_from(line.leading.min(4))
				end
				line.leading = line.process_leading
			end
			line = line.next
		end
	end

	# Collect block line text.
	fun text: String do
		var text = new FlatBuffer
		var line = first_line
		while line != null do
			if not line.is_empty then
				text.append line.text
			end
			text.append "\n"
			line = line.next
		end
		var block = first_block
		while block != null do
			text.append block.text
			text.append "\n"
			block = block.next
		end
		return text.write_to_string
	end
end

# Representation of a markdown block in the AST.
# Each `Block` is linked to a `MDBlock` that contains mardown code.
abstract class Block

	# The markdown block `self` is related to.
	var block: MDBlock

	# Output `self` using `v.decorator`.
	fun emit(v: MarkdownProcessor) do v.emit_in(self)

	# Emit the containts of `self`, lines or blocks.
	fun emit_in(v: MarkdownProcessor) do
		block.remove_surrounding_empty_lines
		if block.has_lines then
			emit_lines(v)
		else
			emit_blocks(v)
		end
	end

	# Emit lines contained in `block`.
	fun emit_lines(v: MarkdownProcessor) do
		var tpl = v.push_buffer
		var line = block.first_line
		while line != null do
			if not line.is_empty then
				v.add line.value.substring(line.leading, line.value.length - line.trailing)
				if line.trailing >= 2 then v.decorator.add_line_break(v)
			end
			if line.next != null then
				v.addn
			end
			line = line.next
		end
		v.pop_buffer
		v.emit_text(tpl)
	end

	# Emit sub-blocks contained in `block`.
	fun emit_blocks(v: MarkdownProcessor) do
		var block = self.block.first_block
		while block != null do
			v.push_loc(block.location)
			block.kind.emit(v)
			v.pop_loc
			block = block.next
		end
	end

	# The raw content of the block as a multi-line string.
	fun raw_content: String do
		var infence = self isa BlockFence
		var text = new FlatBuffer
		var line = self.block.first_line
		while line != null do
			if not line.is_empty then
				var str = line.value
				if not infence and str.has_prefix("    ") then
					text.append str.substring(4, str.length - line.trailing)
				else
					text.append str
				end
			end
			text.append "\n"
			line = line.next
		end
		return text.write_to_string
	end
end

# A block without any markdown specificities.
#
# Actually use the same implementation than `BlockCode`,
# this class is only used for typing purposes.
class BlockNone
	super Block
end

# A markdown blockquote.
class BlockQuote
	super Block

	redef fun emit(v) do v.decorator.add_blockquote(v, self)

	# Remove blockquote markers.
	private fun remove_block_quote_prefix(block: MDBlock) do
		var line = block.first_line
		while line != null do
			if not line.is_empty then
				if line.value[line.leading] == '>' then
					var rem = line.leading + 1
					if line.leading + 1 < line.value.length and
					   line.value[line.leading + 1] == ' ' then
						rem += 1
					end
					line.value = line.value.substring_from(rem)
					line.leading = line.process_leading
				end
			end
			line = line.next
		end
	end
end

# A markdown code block.
class BlockCode
	super Block

	# Any string found after fence token.
	var meta: nullable Text

	# Number of char to skip at the beginning of the line.
	#
	# Block code lines start at 4 spaces.
	protected var line_start = 4

	redef fun emit(v) do v.decorator.add_code(v, self)

	redef fun emit_lines(v) do
		var line = block.first_line
		while line != null do
			if not line.is_empty then
				v.decorator.append_code(v, line.value, line_start, line.value.length)
			end
			v.addn
			line = line.next
		end
	end
end

# A markdown code-fence block.
#
# Actually use the same implementation than `BlockCode`,
# this class is only used for typing purposes.
class BlockFence
	super BlockCode

	# Fence code lines start at 0 spaces.
	redef var line_start = 0
end

# A markdown headline.
class BlockHeadline
	super Block

	redef fun emit(v) do
		var loc = block.location.copy
		loc.column_start += start
		v.push_loc(loc)
		v.decorator.add_headline(v, self)
		v.pop_loc
	end

	private var start = 0

	# Depth of the headline used to determine the headline level.
	var depth = 0

	# Remove healine marks from lines contained in `self`.
	private fun transform_headline(block: MDBlock) do
		if depth > 0 then return
		var level = 0
		var line = block.first_line
		if line == null then return
		if line.is_empty then return
		var start = line.leading
		while start < line.value.length and line.value[start] == '#' do
			level += 1
			start += 1
		end
		while start < line.value.length and line.value[start] == ' ' do
			start += 1
		end
		if start >= line.value.length then
			line.is_empty = true
		else
			var nend = line.value.length - line.trailing - 1
			while line.value[nend] == '#' do nend -= 1
			while line.value[nend] == ' ' do nend -= 1
			line.value = line.value.substring(start, nend - start + 1)
			line.leading = 0
			line.trailing = 0
		end
		self.start = start
		depth = level.min(6)
	end
end

# A markdown list item block.
class BlockListItem
	super Block

	redef fun emit(v) do v.decorator.add_listitem(v, self)
end

# A markdown list block.
# Can be either an ordered or unordered list, this class is mainly used to factorize code.
abstract class BlockList
	super Block

	# Split list block into list items sub-blocks.
	private fun init_block(v: MarkdownProcessor) do
		var line = block.first_line
		if line == null then return
		line = line.next
		while line != null do
			var t = v.line_kind(line)
			if t isa LineList or
			   (not line.is_empty and (line.prev_empty and line.leading == 0 and
			   not (t isa LineList))) then
				   var sblock = block.split(line.prev.as(not null))
				   sblock.kind = new BlockListItem(sblock)
			end
			line = line.next
		end
		var sblock = block.split(block.last_line.as(not null))
		sblock.kind = new BlockListItem(sblock)
	end

	# Expand list items as paragraphs if needed.
	private fun expand_paragraphs(block: MDBlock) do
		var outer = block.first_block
		var inner: nullable MDBlock
		var has_paragraph = false
		while outer != null and not has_paragraph do
			if outer.kind isa BlockListItem then
				inner = outer.first_block
				while inner != null and not has_paragraph do
					if inner.kind isa BlockParagraph then
						has_paragraph = true
					end
					inner = inner.next
				end
			end
			outer = outer.next
		end
		if has_paragraph then
			outer = block.first_block
			while outer != null do
				if outer.kind isa BlockListItem then
					inner = outer.first_block
					while inner != null do
						if inner.kind isa BlockNone then
							inner.kind = new BlockParagraph(inner)
						end
						inner = inner.next
					end
				end
				outer = outer.next
			end
		end
	end
end

# A markdown ordered list.
class BlockOrderedList
	super BlockList

	redef fun emit(v) do v.decorator.add_orderedlist(v, self)
end

# A markdown unordred list.
class BlockUnorderedList
	super BlockList

	redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
end

# A markdown paragraph block.
class BlockParagraph
	super Block

	redef fun emit(v) do v.decorator.add_paragraph(v, self)
end

# A markdown ruler.
class BlockRuler
	super Block

	redef fun emit(v) do v.decorator.add_ruler(v, self)
end

# Xml blocks that can be found in markdown markup.
class BlockXML
	super Block

	redef fun emit_lines(v) do
		var line = block.first_line
		while line != null do
			if not line.is_empty then v.add line.value
			v.addn
			line = line.next
		end
	end
end

# A markdown line.
class MDLine

	# Location of `self` in the original input.
	var location: MDLocation

	# Text contained in this line.
	var value: String is writable

	# Is this line empty?
	# Lines containing only spaces are considered empty.
	var is_empty: Bool = true is writable

	# Previous line in `MDBlock` or null if first line.
	var prev: nullable MDLine = null is writable

	# Next line in `MDBlock` or null if last line.
	var next: nullable MDLine = null is writable

	# Is the previous line empty?
	var prev_empty: Bool = false is writable

	# Is the next line empty?
	var next_empty: Bool = false is writable

	# Initialize a new MDLine from its string value
	init do
		self.leading = process_leading
		if leading != value.length then
			self.is_empty = false
			self.trailing = process_trailing
		end
	end

	# Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
	fun clear do
		value = ""
		leading = 0
		trailing = 0
		is_empty = true
		if prev != null then prev.as(not null).next_empty = true
		if next != null then next.as(not null).prev_empty = true
	end

	# Number or leading spaces on this line.
	var leading: Int = 0 is writable

	# Compute `leading` depending on `value`.
	fun process_leading: Int do
		var count = 0
		var value = self.value
		while count < value.length and value[count] == ' ' do count += 1
		if leading == value.length then clear
		return count
	end

	# Number of trailing spaces on this line.
	var trailing: Int = 0 is writable

	# Compute `trailing` depending on `value`.
	fun process_trailing: Int do
		var count = 0
		var value = self.value
		while value[value.length - count - 1] == ' ' do
			count += 1
		end
		return count
	end

	# Count the amount of `ch` in this line.
	# Return A value > 0 if this line only consists of `ch` end spaces.
	fun count_chars(ch: Char): Int do
		var count = 0
		for c in value do
			if c == ' ' then
				continue
			end
			if c == ch then
				count += 1
				continue
			end
			count = 0
			break
		end
		return count
	end

	# Count the amount of `ch` at the start of this line ignoring spaces.
	fun count_chars_start(ch: Char): Int do
		var count = 0
		for c in value do
			if c == ' ' then
				continue
			end
			if c == ch then
				count += 1
			else
				break
			end
		end
		return count
	end

	# Last XML line if any.
	private var xml_end_line: nullable MDLine = null

	# Does `value` contains valid XML markup?
	private fun check_html: Bool do
		var tags = new Array[String]
		var tmp = new FlatBuffer
		var pos = leading
		if pos + 1 < value.length and value[pos + 1] == '!' then
			if read_xml_comment(self, pos) > 0 then return true
		end
		pos = value.read_xml(tmp, pos, false)
		var tag: String
		if pos > -1 then
			tag = tmp.xml_tag
			if not tag.is_html_block then
				return false
			end
			if tag == "hr" then
				xml_end_line = self
				return true
			end
			tags.add tag
			var line: nullable MDLine = self
			while line != null do
				while pos < line.value.length and line.value[pos] != '<' do
					pos += 1
				end
				if pos >= line.value.length then
					if pos - 2 >= 0 and line.value[pos - 2] == '/' then
						tags.pop
						if tags.is_empty then
							xml_end_line = line
							break
						end
					end
					line = line.next
					pos = 0
				else
					tmp = new FlatBuffer
					var new_pos = line.value.read_xml(tmp, pos, false)
					if new_pos > 0 then
						tag = tmp.xml_tag
						if tag.is_html_block and not tag == "hr" then
							if tmp[1] == '/' then
								if tags.last != tag then
									return false
								end
								tags.pop
							else
								tags.add tag
							end
						end
						if tags.is_empty then
							xml_end_line = line
							break
						end
						pos = new_pos
					else
						pos += 1
					end
				end
			end
			return tags.is_empty
		end
		return false
	end

	# Read a XML comment.
	# Used by `check_html`.
	private fun read_xml_comment(first_line: MDLine, start: Int): Int do
		var line: nullable MDLine = first_line
		if start + 3 < line.as(not null).value.length then
			if line.as(not null).value[2] == '-' and line.as(not null).value[3] == '-' then
				var pos = start + 4
				while line != null do
					while pos < line.value.length and line.value[pos] != '-' do
						pos += 1
					end
					if pos == line.value.length then
						line = line.next
						pos = 0
					else
						if pos + 2 < line.value.length then
							if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
								first_line.xml_end_line = line
								return pos + 3
							end
						end
						pos += 1
					end
				end
			end
		end
		return -1
	end

	# Extract the text of `self` without leading and trailing.
	fun text: String do return value.substring(leading, value.length - trailing)
end

# A markdown line.
interface Line

	# Parse the line.
	# See `MarkdownProcessor::recurse`.
	fun process(v: MarkdownProcessor) is abstract
end

# An empty markdown line.
class LineEmpty
	super Line

	redef fun process(v) do
		v.current_line = v.current_line.as(not null).next
	end
end

# A non-specific markdown construction.
# Mainly used as part of another line construct such as paragraphs or lists.
class LineOther
	super Line

	redef fun process(v) do
		var line = v.current_line
		# go to block end
		var was_empty = line.as(not null).prev_empty
		while line != null and not line.is_empty do
			var t = v.line_kind(line)
			if (v.in_list or v.ext_mode) and t isa LineList then
				break
			end
			if v.ext_mode and (t isa LineCode or t isa LineFence) then
				break
			end
			if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
			   t isa LineHR or t isa LineBlockquote or t isa LineXML then
				   break
			end
			line = line.next
		end
		# build block
		var current_block = v.current_block.as(not null)
		if line != null and not line.is_empty then
			var block = current_block.split(line.prev.as(not null))
			if v.in_list and not was_empty then
				block.kind = new BlockNone(block)
			else
				block.kind = new BlockParagraph(block)
			end
			current_block.remove_leading_empty_lines
		else
			var block: MDBlock
			if line != null then
				block = current_block.split(line)
			else
				block = current_block.split(current_block.last_line.as(not null))
			end
			if v.in_list and (line == null or not line.is_empty) and not was_empty then
				block.kind = new BlockNone(block)
			else
				block.kind = new BlockParagraph(block)
			end
			current_block.remove_leading_empty_lines
		end
		v.current_line = current_block.first_line
	end
end

# A line of markdown code.
class LineCode
	super Line

	redef fun process(v) do
		var line = v.current_line
		# lookup block end
		while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
			line = line.next
		end
		# split at block end line
		var current_block = v.current_block.as(not null)
		var block: MDBlock
		if line != null then
			block = current_block.split(line.prev.as(not null))
		else
			block = current_block.split(current_block.last_line.as(not null))
		end
		block.kind = new BlockCode(block)
		block.remove_surrounding_empty_lines
		v.current_line = current_block.first_line
	end
end

# A line of raw XML.
class LineXML
	super Line

	redef fun process(v) do
		var line = v.current_line
		if line == null then return
		var current_block = v.current_block.as(not null)
		var prev = line.prev
		if prev != null then current_block.split(prev)
		var block = current_block.split(line.xml_end_line.as(not null))
		block.kind = new BlockXML(block)
		current_block.remove_leading_empty_lines
		v.current_line = current_block.first_line
	end
end

# A markdown blockquote line.
class LineBlockquote
	super Line

	redef fun process(v) do
		var line = v.current_line
		var current_block = v.current_block.as(not null)
		# go to bquote end
		while line != null do
			if not line.is_empty and (line.prev_empty and
			   line.leading == 0 and
			   not v.line_kind(line) isa LineBlockquote) then break
			line = line.next
		end
		# build sub block
		var block: MDBlock
		if line != null then
			block = current_block.split(line.prev.as(not null))
		else
			block = current_block.split(current_block.last_line.as(not null))
		end
		var kind = new BlockQuote(block)
		block.kind = kind
		block.remove_surrounding_empty_lines
		kind.remove_block_quote_prefix(block)
		v.current_line = line
		v.recurse(block, false)
		v.current_line = current_block.first_line
	end
end

# A markdown ruler line.
class LineHR
	super Line

	redef fun process(v) do
		var line = v.current_line
		if line == null then return
		var current_block = v.current_block.as(not null)
		if line.prev != null then current_block.split(line.prev.as(not null))
		var block = current_block.split(line)
		block.kind = new BlockRuler(block)
		current_block.remove_leading_empty_lines
		v.current_line = current_block.first_line
	end
end

# A markdown fence code line.
class LineFence
	super Line

	redef fun process(v) do
		# go to fence end
		var line = v.current_line.as(not null).next
		var current_block = v.current_block.as(not null)
		while line != null do
			if v.line_kind(line) isa LineFence then break
			line = line.next
		end
		if line != null then
			line = line.next
		end
		# build fence block
		var block: MDBlock
		if line != null then
			block = current_block.split(line.prev.as(not null))
		else
			block = current_block.split(current_block.last_line.as(not null))
		end
		block.remove_surrounding_empty_lines
		var meta = block.first_line.as(not null).value.meta_from_fence
		block.kind = new BlockFence(block, meta)
		block.first_line.as(not null).clear
		var last = block.last_line
		if last != null and v.line_kind(last) isa LineFence then
			block.last_line.as(not null).clear
		end
		block.remove_surrounding_empty_lines
		v.current_line = line
	end
end

# A markdown headline.
class LineHeadline
	super Line

	redef fun process(v) do
		var line = v.current_line
		if line == null then return
		var current_block = v.current_block.as(not null)
		var lprev = line.prev
		if lprev != null then current_block.split(lprev)
		var block = current_block.split(line)
		var kind = new BlockHeadline(block)
		block.kind = kind
		kind.transform_headline(block)
		current_block.remove_leading_empty_lines
		v.current_line = current_block.first_line
	end
end

# A markdown headline of level 1.
class LineHeadline1
	super LineHeadline

	redef fun process(v) do
		var line = v.current_line
		if line == null then return
		var current_block = v.current_block.as(not null)
		var lprev = line.prev
		if lprev != null then current_block.split(lprev)
		line.next.as(not null).clear
		var block = current_block.split(line)
		var kind = new BlockHeadline(block)
		kind.depth = 1
		kind.transform_headline(block)
		block.kind = kind
		current_block.remove_leading_empty_lines
		v.current_line = current_block.first_line
	end
end

# A markdown headline of level 2.
class LineHeadline2
	super LineHeadline

	redef fun process(v) do
		var line = v.current_line
		if line == null then return
		var current_block = v.current_block.as(not null)
		var lprev = line.prev
		if lprev != null then current_block.split(lprev)
		line.next.as(not null).clear
		var block = current_block.split(line)
		var kind = new BlockHeadline(block)
		kind.depth = 2
		kind.transform_headline(block)
		block.kind = kind
		current_block.remove_leading_empty_lines
		v.current_line = current_block.first_line
	end
end

# A markdown list line.
# Mainly used to factorize code between ordered and unordered lists.
abstract class LineList
	super Line

	redef fun process(v) do
		var line = v.current_line
		# go to list end
		while line != null do
			var t = v.line_kind(line)
			if not line.is_empty and (line.prev_empty and line.leading == 0 and
			   not t isa LineList) then break
			line = line.next
		end
		# build list block
		var current_block = v.current_block.as(not null)
		var list: MDBlock
		if line != null then
			list = current_block.split(line.prev.as(not null))
		else
			list = current_block.split(current_block.last_line.as(not null))
		end
		var kind = block_kind(list)
		list.kind = kind
		list.first_line.as(not null).prev_empty = false
		list.last_line.as(not null).next_empty = false
		list.remove_surrounding_empty_lines
		list.first_line.as(not null).prev_empty = false
		list.last_line.as(not null).next_empty = false
		kind.init_block(v)
		var block = list.first_block
		while block != null do
			block.remove_list_indent(v)
			v.recurse(block, true)
			block = block.next
		end
		kind.expand_paragraphs(list)
		v.current_line = line
	end

	# Create a new block kind based on this line.
	protected fun block_kind(block: MDBlock): BlockList is abstract

	# Extract string value from `MDLine`.
	protected fun extract_value(line: MDLine): String is abstract
end

# An ordered list line.
class LineOList
	super LineList

	redef fun block_kind(block) do return new BlockOrderedList(block)

	redef fun extract_value(line) do
		return line.value.substring_from(line.value.index_of('.') + 2)
	end
end

# An unordered list line.
class LineUList
	super LineList

	redef fun block_kind(block) do return new BlockUnorderedList(block)

	redef fun extract_value(line) do
		return line.value.substring_from(line.leading + 2)
	end
end

# A token represent a character in the markdown input.
# Some tokens have a specific markup behaviour that is handled here.
abstract class Token

	# Location of `self` in the original input.
	var location: nullable MDLocation

	# Position of `self` in input independant from lines.
	var pos: Int

	# Character found at `pos` in the markdown input.
	var char: Char

	# Output that token using `MarkdownEmitter::decorator`.
	fun emit(v: MarkdownProcessor) do v.decorator.add_char(v, char)
end

# A token without a specific meaning.
class TokenNone
	super Token
end

# An emphasis token.
abstract class TokenEm
	super Token

	redef fun emit(v) do
		var tmp = v.push_buffer
		var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
		v.pop_buffer
		if b > 0 then
			v.decorator.add_em(v, tmp)
			v.current_pos = b
		else
			v.addc char
		end
	end
end

# An emphasis star token.
class TokenEmStar
	super TokenEm
end

# An emphasis underscore token.
class TokenEmUnderscore
	super TokenEm
end

# A strong token.
abstract class TokenStrong
	super Token

	redef fun emit(v) do
		var tmp = v.push_buffer
		var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
		v.pop_buffer
		if b > 0 then
			v.decorator.add_strong(v, tmp)
			v.current_pos = b + 1
		else
			v.addc char
		end
	end
end

# A strong star token.
class TokenStrongStar
	super TokenStrong
end

# A strong underscore token.
class TokenStrongUnderscore
	super TokenStrong
end

# A code token.
# This class is mainly used to factorize work between single and double quoted span codes.
abstract class TokenCode
	super Token

	redef fun emit(v) do
		var current_text = v.current_text.as(not null)
		var a = pos + next_pos + 1
		var b = v.find_token(current_text, a, self)
		if b > 0 then
			v.current_pos = b + next_pos
			while a < b and current_text[a] == ' ' do a += 1
			if a < b then
				while current_text[b - 1] == ' ' do b -= 1
				v.decorator.add_span_code(v, current_text, a, b)
			end
		else
			v.addc char
		end
	end

	private fun next_pos: Int is abstract
end

# A span code token.
class TokenCodeSingle
	super TokenCode

	redef fun next_pos do return 0
end

# A doubled span code token.
class TokenCodeDouble
	super TokenCode

	redef fun next_pos do return 1
end

# A link or image token.
# This class is mainly used to factorize work between images and links.
abstract class TokenLinkOrImage
	super Token

	# Link adress
	var link: nullable Text = null

	# Link text
	var name: nullable Text = null

	# Link title
	var comment: nullable Text = null

	# Is the link construct an abbreviation?
	var is_abbrev = false

	redef fun emit(v) do
		var tmp = new FlatBuffer
		var b = check_link(v, tmp, pos, self)
		if b > 0 then
			emit_hyper(v)
			v.current_pos = b
		else
			v.addc char
		end
	end

	# Emit the hyperlink as link or image.
	private fun emit_hyper(v: MarkdownProcessor) is abstract

	# Check if the link is a valid link.
	private fun check_link(v: MarkdownProcessor, out: FlatBuffer, start: Int, token: Token): Int do
		var md = v.current_text
		if md == null then return -1
		var pos
		if token isa TokenLink then
			pos = start + 1
		else
			pos = start + 2
		end
		var tmp = new FlatBuffer
		pos = md.read_md_link_id(tmp, pos)
		if pos < start then return -1
		name = tmp
		var old_pos = pos
		pos += 1
		pos = md.skip_spaces(pos)
		if pos < start then
			var tid = name.as(not null).write_to_string.to_lower
			if v.link_refs.has_key(tid) then
				var lr = v.link_refs[tid]
				is_abbrev = lr.is_abbrev
				link = lr.link
				comment = lr.title
				pos = old_pos
			else
				return -1
			end
		else if md[pos] == '(' then
			pos += 1
			pos = md.skip_spaces(pos)
			if pos < start then return -1
			tmp = new FlatBuffer
			var use_lt = md[pos] == '<'
			if use_lt then
				pos = md.read_until(tmp, pos + 1, '>')
			else
				pos = md.read_md_link(tmp, pos)
			end
			if pos < start then return -1
			if use_lt then pos += 1
			link = tmp.write_to_string
			if md[pos] == ' ' then
				pos = md.skip_spaces(pos)
				if pos > start and md[pos] == '"' then
					pos += 1
					tmp = new FlatBuffer
					pos = md.read_until(tmp, pos, '"')
					if pos < start then return -1
					comment = tmp.write_to_string
					pos += 1
					pos = md.skip_spaces(pos)
					if pos == -1 then return -1
				end
			end
			if pos < start then return -1
			if md[pos] != ')' then return -1
		else if md[pos] == '[' then
			pos += 1
			tmp = new FlatBuffer
			pos = md.read_raw_until(tmp, pos, ']')
			if pos < start then return -1
			var id
			if tmp.length > 0 then
				id = tmp
			else
				id = name
			end
			var tid = id.as(not null).write_to_string.to_lower
			if v.link_refs.has_key(tid) then
				var lr = v.link_refs[tid]
				link = lr.link
				comment = lr.title
			end
		else
			var tid = name.as(not null).write_to_string.replace("\n", " ").to_lower
			if v.link_refs.has_key(tid) then
				var lr = v.link_refs[tid]
				link = lr.link
				comment = lr.title
				pos = old_pos
			else
				return -1
			end
		end
		if link == null then return -1
		return pos
	end
end

# A markdown link token.
class TokenLink
	super TokenLinkOrImage

	redef fun emit_hyper(v) do
		if is_abbrev and comment != null then
			v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
		else
			v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
		end
	end
end

# A markdown image token.
class TokenImage
	super TokenLinkOrImage

	redef fun emit_hyper(v) do
		v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
	end
end

# A HTML/XML token.
class TokenHTML
	super Token

	redef fun emit(v) do
		var tmp = new FlatBuffer
		var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
		if b > 0 then
			v.add tmp
			v.current_pos = b
		else
			v.decorator.escape_char(v, char)
		end
	end

	# Is the HTML valid?
	# Also take care of link and mailto shortcuts.
	private fun check_html(v: MarkdownProcessor, out: FlatBuffer, md: Text, start: Int): Int do
		# check for auto links
		var tmp = new FlatBuffer
		var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
		if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
			pos = md.read_until(tmp, pos, '>')
			if pos != -1 then
				var link = tmp.write_to_string
				v.decorator.add_link(v, link, link, null)
				return pos
			end
		end
		# TODO check for mailto
		# check for inline html
		if start + 2 < md.length then
			return md.read_xml(out, start, true)
		end
		return -1
	end
end

# An HTML entity token.
class TokenEntity
	super Token

	redef fun emit(v) do
		var tmp = new FlatBuffer
		var b = check_entity(tmp, v.current_text.as(not null), pos)
		if b > 0 then
			v.add tmp
			v.current_pos = b
		else
			v.decorator.escape_char(v, char)
		end
	end

	# Is the entity valid?
	private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
		var pos = md.read_until(out, start, ';')
		if pos < 0 or out.length < 3 then
			return -1
		end
		if out[1] == '#' then
			if out[2] == 'x' or out[2] == 'X' then
				if out.length < 4 then return -1
				for i in [3..out.length[ do
					var c = out[i]
					if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
						return -1
					end
				end
			else
				for i in [2..out.length[ do
					var c = out[i]
					if c < '0' or c > '9' then return -1
				end
			end
			out.add ';'
		else
			for i in [1..out.length[ do
				var c = out[i]
				if not c.is_digit and not c.is_letter then return -1
			end
			out.add ';'
			# TODO check entity is valid
			# if out.is_entity then
				return pos
			# else
				# return -1
			# end
		end
		return pos
	end
end

# A markdown escape token.
class TokenEscape
	super Token

	redef fun emit(v) do
		v.current_pos += 1
		v.addc v.current_text.as(not null)[v.current_pos]
	end
end

# A markdown strike token.
#
# Extended mode only (see `MarkdownProcessor::ext_mode`)
class TokenStrike
	super Token

	redef fun emit(v) do
		var tmp = v.push_buffer
		var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
		v.pop_buffer
		if b > 0 then
			v.decorator.add_strike(v, tmp)
			v.current_pos = b + 1
		else
			v.addc char
		end
	end
end

redef class Text

	# Get the position of the next non-space character.
	private fun skip_spaces(start: Int): Int do
		var pos = start
		while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
			pos += 1
		end
		if pos < length then return pos
		return -1
	end

	# Read `self` until `nend` and append it to the `out` buffer.
	# Escape markdown special chars.
	private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
		var pos = start
		while pos < length do
			var c = self[pos]
			if c == '\\' and pos + 1 < length then
				pos = escape(out, self[pos + 1], pos)
			else
				for n in nend do if c == n then break label
				out.add c
			end
			pos += 1
		end label
		if pos == length then return -1
		return pos
	end

	# Read `self` as raw text until `nend` and append it to the `out` buffer.
	# No escape is made.
	private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
		var pos = start
		while pos < length do
			var c = self[pos]
			var end_reached = false
			for n in nend do
				if c == n then
					end_reached = true
					break
				end
			end
			if end_reached then break
			out.add c
			pos += 1
		end
		if pos == length then return -1
		return pos
	end

	# Read `self` as XML until `to` and append it to the `out` buffer.
	# Escape HTML special chars.
	private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
		var pos = from
		var in_str = false
		var str_char: nullable Char = null
		while pos < length do
			var c = self[pos]
			if in_str then
				if c == '\\' then
					out.add c
					pos += 1
					if pos < length then
						out.add c
						pos += 1
					end
					continue
				end
				if c == str_char then
					in_str = false
					out.add c
					pos += 1
					continue
				end
			end
			if c == '"' or c == '\'' then
				in_str = true
				str_char = c
			end
			if not in_str then
				var end_reached = false
				for n in [0..to.length[ do
					if c == to[n] then
						end_reached = true
						break
					end
				end
				if end_reached then break
			end
			out.add c
			pos += 1
		end
		if pos == length then return -1
		return pos
	end

	# Read `self` as XML and append it to the `out` buffer.
	# Safe mode can be activated to limit reading to valid xml.
	private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
		var pos = 0
		var is_valid = true
		var is_close_tag = false
		if start + 1 >= length then return -1
		if self[start + 1] == '/' then
			is_close_tag = true
			pos = start + 2
		else if self[start + 1] == '!' then
			out.append "<!"
			return start + 1
		else
			is_close_tag = false
			pos = start + 1
		end
		if safe_mode then
			var tmp = new FlatBuffer
			pos = read_xml_until(tmp, pos, ' ', '/', '>')
			if pos == -1 then return -1
			var tag = tmp.write_to_string.trim.to_lower
			if not tag.is_valid_html_tag then
				out.append "&lt;"
				pos = -1
			else if tag.is_html_unsafe then
				is_valid = false
				out.append "&lt;"
				if is_close_tag then out.add '/'
				out.append tmp
			else
				out.append "<"
				if is_close_tag then out.add '/'
				out.append tmp
			end
		else
			out.add '<'
			if is_close_tag then out.add '/'
			pos = read_xml_until(out, pos, ' ', '/', '>')
		end
		if pos == -1 then return -1
		pos = read_xml_until(out, pos, '/', '>')
		if pos == -1 then return -1
		if self[pos] == '/' then
			out.append " /"
			pos = self.read_xml_until(out, pos + 1, '>')
			if pos == -1 then return -1
		end
		if self[pos] == '>' then
			if is_valid then
				out.add '>'
			else
				out.append "&gt;"
			end
			return pos
		end
		return -1
	end

	# Read a markdown link address and append it to the `out` buffer.
	private fun read_md_link(out: FlatBuffer, start: Int): Int do
		var pos = start
		var counter = 1
		while pos < length do
			var c = self[pos]
			if c == '\\' and pos + 1 < length then
				pos = escape(out, self[pos + 1], pos)
			else
				var end_reached = false
				if c == '(' then
					counter += 1
				else if c == ' ' then
					if counter == 1 then end_reached = true
				else if c == ')' then
					counter -= 1
					if counter == 0 then end_reached = true
				end
				if end_reached then break
				out.add c
			end
			pos += 1
		end
		if pos == length then return -1
		return pos
	end

	# Read a markdown link text and append it to the `out` buffer.
	private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
		var pos = start
		var counter = 1
		while pos < length do
			var c = self[pos]
			var end_reached = false
			if c == '[' then
				counter += 1
				out.add c
			else if c == ']' then
				counter -= 1
				if counter == 0 then
					end_reached = true
				else
					out.add c
				end
			else
				out.add c
			end
			if end_reached then break
			pos += 1
		end
		if pos == length then return -1
		return pos
	end

	# Extract the XML tag name from a XML tag.
	private fun xml_tag: String do
		var tpl = new FlatBuffer
		var pos = 1
		if pos < length and self[1] == '/' then pos += 1
		while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
			tpl.add self[pos]
			pos += 1
		end
		return tpl.write_to_string.to_lower
	end

	private fun is_valid_html_tag: Bool do
		if is_empty then return false
		for c in self do
			if not c.is_alpha then return false
		end
		return true
	end

	# Read and escape the markdown contained in `self`.
	private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
		if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
		   c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
		   c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
		   c == '`' or c == '~' or c == '^' then
			out.add c
			return pos + 1
		end
		out.add '\\'
		return pos
	end

	# Extract string found at end of fence opening.
	private fun meta_from_fence: nullable Text do
		for i in [0..chars.length[ do
			var c = chars[i]
			if c != ' ' and c != '`' and c != '~' then
				return substring_from(i).trim
			end
		end
		return null
	end

	# Is `self` an unsafe HTML element?
	private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)

	# Is `self` a HRML block element?
	private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)

	# Is `self` a link prefix?
	private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)

	private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]

	private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]

	private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
end

redef class String

	# Parse `self` as markdown and return the HTML representation
	#.
	#    var md = "**Hello World!**"
	#    var html = md.md_to_html
	#    assert html == "<p><strong>Hello World!</strong></p>\n"
	fun md_to_html: Writable do
		var processor = new MarkdownProcessor
		return processor.process(self)
	end
end

lib/markdown/markdown.nit:15,1--2614,3

module markdown

Summary

Markdown parsing.

Introduced classes

Block

BlockCode

BlockFence

BlockHeadline

BlockList

BlockListItem

BlockNone

BlockOrderedList

BlockParagraph

BlockQuote

BlockRuler

BlockUnorderedList

BlockXML

Decorator

HTMLDecorator

HeadLine

Line

LineBlockquote

LineCode

LineEmpty

LineFence

LineHR

LineHeadline

LineHeadline1

LineHeadline2

LineList

LineOList

LineOther

LineUList

LineXML

LinkRef

MDBlock

MDLine

MDLocation

MarkdownProcessor

Token

TokenCode

TokenCodeDouble

TokenCodeSingle

TokenEm

TokenEmStar

TokenEmUnderscore

TokenEntity

TokenEscape

TokenHTML

TokenImage

TokenLink

TokenLinkOrImage

TokenNone

TokenStrike

TokenStrong

TokenStrongStar

TokenStrongUnderscore

Redefined classes

String

Text

Markdown parsing.

Introduced classes

abstract class Block

class BlockCode

class BlockFence

class BlockHeadline

abstract class BlockList

class BlockListItem

class BlockNone

class BlockOrderedList

class BlockParagraph

class BlockQuote

class BlockRuler

class BlockUnorderedList

class BlockXML

interface Decorator

class HTMLDecorator

class HeadLine

interface Line

class LineBlockquote