# Parse a markdown string and split it in blocks.
#
# Blocks are then outputed by an `MarkdownEmitter`.
#
# Usage:
#
# var proc = new MarkdownProcessor
# var html = proc.process("**Hello World!**")
# assert html == "<p><strong>Hello World!</strong></p>\n"
#
# SEE: `String::md_to_html` for a shortcut.
class MarkdownProcessor
# Work in extended mode (default).
#
# Behavior changes when using extended mode:
#
# * Lists and code blocks end a paragraph
#
# In normal markdown the following:
#
# ~~~md
# This is a paragraph
# * and this is not a list
# ~~~
#
# Will produce:
#
# ~~~html
# <p>This is a paragraph
# * and this is not a list</p>
# ~~~
#
# When using extended mode this changes to:
#
# ~~~html
# <p>This is a paragraph</p>
# <ul>
# <li>and this is not a list</li>
# </ul>
# ~~~
#
# * Fences code blocks
#
# If you don't want to indent your all your code with 4 spaces,
# you can wrap your code in ``` ``` ``` or `~~~`.
#
# Here's an example:
#
# ~~~md
# fun test do
# print "Hello World!"
# end
# ~~~
#
# * Code blocks meta
#
# If you want to use syntax highlighting tools, most of them need to know what kind
# of language they are highlighting.
# You can add an optional language identifier after the fence declaration to output
# it in the HTML render.
#
# ```nit
# import markdown
#
# print "# Hello World!".md_to_html
# ```
#
# Becomes
#
# ~~~html
# <pre class="nit"><code>import markdown
#
# print "Hello World!".md_to_html
# </code></pre>
# ~~~
#
# * Underscores (Emphasis)
#
# Underscores in the middle of a word like:
#
# ~~~md
# Con_cat_this
# ~~~
#
# normally produces this:
#
# ~~~html
# <p>Con<em>cat</em>this</p>
# ~~~
#
# With extended mode they don't result in emphasis.
#
# ~~~html
# <p>Con_cat_this</p>
# ~~~
#
# * Strikethrough
#
# Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
# strikethrought span is marked with `~~`.
#
# ~~~md
# ~~Mistaken text.~~
# ~~~
#
# becomes
#
# ~~~html
# <del>Mistaken text.</del>
# ~~~
var ext_mode = true
# Disable attaching MDLocation to Tokens
#
# Locations are useful for some tools but they may
# cause an important time and space overhead.
#
# Default = `false`
var no_location = false is writable
# Process the mardown `input` string and return the processed output.
fun process(input: String): Writable do
# init processor
link_refs.clear
last_link_ref = null
current_line = null
current_block = null
# parse markdown
var parent = read_lines(input)
parent.remove_surrounding_empty_lines
recurse(parent, false)
# output processed text
decorator.headlines.clear
return emit(parent.kind)
end
# Split `input` string into `MDLines` and create a parent `MDBlock` with it.
private fun read_lines(input: String): MDBlock do
var block = new MDBlock(new MDLocation(1, 1, 1, 1))
var value = new FlatBuffer
var i = 0
var line_pos = 0
var col_pos = 0
while i < input.length do
value.clear
var pos = 0
var eol = false
while not eol and i < input.length do
col_pos += 1
var c = input[i]
if c == '\n' then
eol = true
else if c == '\r' then
else if c == '\t' then
var np = pos + (4 - (pos & 3))
while pos < np do
value.add ' '
pos += 1
end
else
pos += 1
value.add c
end
i += 1
end
line_pos += 1
var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
var line = new MDLine(loc, value.write_to_string)
var is_link_ref = check_link_ref(line)
# Skip link refs
if not is_link_ref then block.add_line line
col_pos = 0
end
return block
end
# Check if line is a block link definition.
# Return `true` if line contains a valid link ref and save it into `link_refs`.
private fun check_link_ref(line: MDLine): Bool do
var md = line.value
var is_link_ref = false
var id = new FlatBuffer
var link = new FlatBuffer
var comment = new FlatBuffer
var pos = -1
if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
pos = line.leading + 1
pos = md.read_until(id, pos, ']')
if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
if line.value[pos + 1] == ':' then
pos += 2
pos = md.skip_spaces(pos)
if pos >= 0 and line.value[pos] == '<' then
pos += 1
pos = md.read_until(link, pos, '>')
pos += 1
else if pos >= 0 then
pos = md.read_until(link, pos, ' ', '\n')
end
if not link.is_empty then
pos = md.skip_spaces(pos)
if pos > 0 and pos < line.value.length then
var c = line.value[pos]
if c == '\"' or c == '\'' or c == '(' then
pos += 1
if c == '(' then
pos = md.read_until(comment, pos, ')')
else
pos = md.read_until(comment, pos, c)
end
if pos > 0 then is_link_ref = true
end
else
is_link_ref = true
end
end
end
end
end
if is_link_ref and not id.is_empty and not link.is_empty then
var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
add_link_ref(id.write_to_string, lr)
if comment.is_empty then last_link_ref = lr
return true
else
comment = new FlatBuffer
if not line.is_empty and last_link_ref != null then
pos = line.leading
var c = line.value[pos]
if c == '\"' or c == '\'' or c == '(' then
pos += 1
if c == '(' then
pos = md.read_until(comment, pos, ')')
else
pos = md.read_until(comment, pos, c)
end
end
var last_link_ref = self.last_link_ref
if not comment.is_empty and last_link_ref != null then
last_link_ref.title = comment.write_to_string
end
end
if comment.is_empty then return false
return true
end
end
# Known link refs
# This list will be needed during output to expand links.
var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
# Last encountered link ref (for multiline definitions)
#
# Markdown allows link refs to be defined over two lines:
#
# ~~~md
# [id]: http://example.com/longish/path/to/resource/here
# "Optional Title Here"
# ~~~
#
private var last_link_ref: nullable LinkRef = null
# Add a link ref to the list
fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
# Recursively split a `block`.
#
# The block is splitted according to the type of lines it contains.
# Some blocks can be splited again recursively like lists.
# The `in_list` mode is used to recurse on list and build
# nested paragraphs or code blocks.
fun recurse(root: MDBlock, in_list: Bool) do
var old_mode = self.in_list
var old_root = self.current_block
self.in_list = in_list
var line = root.first_line
while line != null and line.is_empty do
line = line.next
if line == null then return
end
current_line = line
current_block = root
while current_line != null do
line_kind(current_line.as(not null)).process(self)
end
self.in_list = old_mode
self.current_block = old_root
end
# Currently processed line.
# Used when visiting blocks with `recurse`.
var current_line: nullable MDLine = null is writable
# Currently processed block.
# Used when visiting blocks with `recurse`.
var current_block: nullable MDBlock = null is writable
# Is the current recursion in list mode?
# Used when visiting blocks with `recurse`
private var in_list = false
# The type of line.
# see: `md_line_*`
fun line_kind(md: MDLine): Line do
var value = md.value
var leading = md.leading
var trailing = md.trailing
if md.is_empty then return new LineEmpty
if md.leading > 3 then return new LineCode
if value[leading] == '#' then return new LineHeadline
if value[leading] == '>' then return new LineBlockquote
if ext_mode then
if value.length - leading - trailing > 2 then
if value[leading] == '`' and md.count_chars_start('`') >= 3 then
return new LineFence
end
if value[leading] == '~' and md.count_chars_start('~') >= 3 then
return new LineFence
end
end
end
if value.length - leading - trailing > 2 and
(value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
if md.count_chars(value[leading]) >= 3 then
return new LineHR
end
end
if value.length - leading >= 2 and value[leading + 1] == ' ' then
var c = value[leading]
if c == '*' or c == '-' or c == '+' then return new LineUList
end
if value.length - leading >= 3 and value[leading].is_digit then
var i = leading + 1
while i < value.length and value[i].is_digit do i += 1
if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
return new LineOList
end
end
if value[leading] == '<' and md.check_html then return new LineXML
var next = md.next
if next != null and not next.is_empty then
if next.count_chars('=') > 0 then
return new LineHeadline1
end
if next.count_chars('-') > 0 then
return new LineHeadline2
end
end
return new LineOther
end
# Get the token kind at `pos`.
fun token_at(text: Text, pos: Int): Token do
var c0: Char
var c1: Char
var c2: Char
if pos > 0 then
c0 = text[pos - 1]
else
c0 = ' '
end
var c = text[pos]
if pos + 1 < text.length then
c1 = text[pos + 1]
else
c1 = ' '
end
if pos + 2 < text.length then
c2 = text[pos + 2]
else
c2 = ' '
end
var loc
if no_location then
loc = null
else
loc = new MDLocation(
current_loc.line_start,
current_loc.column_start + pos,
current_loc.line_start,
current_loc.column_start + pos)
end
if c == '*' then
if c1 == '*' then
if c0 != ' ' or c2 != ' ' then
return new TokenStrongStar(loc, pos, c)
else
return new TokenEmStar(loc, pos, c)
end
end
if c0 != ' ' or c1 != ' ' then
return new TokenEmStar(loc, pos, c)
else
return new TokenNone(loc, pos, c)
end
else if c == '_' then
if c1 == '_' then
if c0 != ' ' or c2 != ' ' then
return new TokenStrongUnderscore(loc, pos, c)
else
return new TokenEmUnderscore(loc, pos, c)
end
end
if ext_mode then
if (c0.is_letter or c0.is_digit) and c0 != '_' and
(c1.is_letter or c1.is_digit) then
return new TokenNone(loc, pos, c)
else
return new TokenEmUnderscore(loc, pos, c)
end
end
if c0 != ' ' or c1 != ' ' then
return new TokenEmUnderscore(loc, pos, c)
else
return new TokenNone(loc, pos, c)
end
else if c == '!' then
if c1 == '[' then return new TokenImage(loc, pos, c)
return new TokenNone(loc, pos, c)
else if c == '[' then
return new TokenLink(loc, pos, c)
else if c == ']' then
return new TokenNone(loc, pos, c)
else if c == '`' then
if c1 == '`' then
return new TokenCodeDouble(loc, pos, c)
else
return new TokenCodeSingle(loc, pos, c)
end
else if c == '\\' then
if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
return new TokenEscape(loc, pos, c)
else
return new TokenNone(loc, pos, c)
end
else if c == '<' then
return new TokenHTML(loc, pos, c)
else if c == '&' then
return new TokenEntity(loc, pos, c)
else
if ext_mode then
if c == '~' and c1 == '~' then
return new TokenStrike(loc, pos, c)
end
end
return new TokenNone(loc, pos, c)
end
end
# Find the position of a `token` in `self`.
fun find_token(text: Text, start: Int, token: Token): Int do
var pos = start
while pos < text.length do
if token_at(text, pos).is_same_type(token) then
return pos
end
pos += 1
end
return -1
end
# Kind of decorator used for decoration.
type DECORATOR: Decorator
# Decorator used for output.
# Default is `HTMLDecorator`
var decorator: DECORATOR is writable, lazy do
return new HTMLDecorator
end
# Create a new `MarkdownEmitter` using a custom `decorator`.
init with_decorator(decorator: DECORATOR) do
self.decorator = decorator
end
# Output `block` using `decorator` in the current buffer.
fun emit(block: Block): Text do
var buffer = push_buffer
block.emit(self)
pop_buffer
return buffer
end
# Output the content of `block`.
fun emit_in(block: Block) do block.emit_in(self)
# Transform and emit mardown text
fun emit_text(text: Text) do emit_text_until(text, 0, null)
# Transform and emit mardown text starting at `start` and
# until a token with the same type as `token` is found.
# Go until the end of `text` if `token` is null.
fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
var old_text = current_text
var old_pos = current_pos
current_text = text
current_pos = start
while current_pos < text.length do
if text[current_pos] == '\n' then
current_loc.line_start += 1
current_loc.column_start = -current_pos
end
var mt = token_at(text, current_pos)
if (token != null and not token isa TokenNone) and
(mt.is_same_type(token) or
(token isa TokenEmStar and mt isa TokenStrongStar) or
(token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
return current_pos
end
mt.emit(self)
current_pos += 1
end
current_text = old_text
current_pos = old_pos
return -1
end
# Currently processed position in `current_text`.
# Used when visiting inline production with `emit_text_until`.
private var current_pos: Int = -1
# Currently processed text.
# Used when visiting inline production with `emit_text_until`.
private var current_text: nullable Text = null
# Stacked buffers.
private var buffer_stack = new List[FlatBuffer]
# Push a new buffer on the stack.
private fun push_buffer: FlatBuffer do
var buffer = new FlatBuffer
buffer_stack.add buffer
return buffer
end
# Pop the last buffer.
private fun pop_buffer do buffer_stack.pop
# Current output buffer.
private fun current_buffer: FlatBuffer do
assert not buffer_stack.is_empty
return buffer_stack.last
end
# Stacked locations.
private var loc_stack = new List[MDLocation]
# Push a new MDLocation on the stack.
private fun push_loc(location: MDLocation) do loc_stack.add location
# Pop the last buffer.
private fun pop_loc: MDLocation do return loc_stack.pop
# Current output buffer.
private fun current_loc: MDLocation do
assert not loc_stack.is_empty
return loc_stack.last
end
# Append `e` to current buffer.
fun add(e: Writable) do
if e isa Text then
current_buffer.append e
else
current_buffer.append e.write_to_string
end
end
# Append `c` to current buffer.
fun addc(c: Char) do
current_buffer.add c
end
# Append a "\n" line break.
fun addn do addc '\n'
end
lib/markdown/markdown.nit:20,1--611,3