# This file is part of NIT ( http://www.nitlanguage.org ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Markdown parsing. module markdown import template # Parse a markdown string and split it in blocks. # # Blocks are then outputed by an `MarkdownEmitter`. # # Usage: # # var proc = new MarkdownProcessor # var html = proc.process("**Hello World!**") # assert html == "
Hello World!
\n" # # SEE: `String::md_to_html` for a shortcut. class MarkdownProcessor # `MarkdownEmitter` used for ouput. var emitter: MarkdownEmitter is noinit, protected writable # Work in extended mode (default). # # Behavior changes when using extended mode: # # * Lists and code blocks end a paragraph # # In normal markdown the following: # # ~~~md # This is a paragraph # * and this is not a list # ~~~ # # Will produce: # # ~~~html #This is a paragraph # * and this is not a list
# ~~~ # # When using extended mode this changes to: # # ~~~html #This is a paragraph
#import markdown
#
# print "Hello World!".md_to_html
#
# ~~~
#
# * Underscores (Emphasis)
#
# Underscores in the middle of a word like:
#
# ~~~md
# Con_cat_this
# ~~~
#
# normally produces this:
#
# ~~~html
# Concatthis
# ~~~ # # With extended mode they don't result in emphasis. # # ~~~html #Con_cat_this
# ~~~ # # * Strikethrough # # Like in [GFM](https://help.github.com/articles/github-flavored-markdown), # strikethrought span is marked with `~~`. # # ~~~md # ~~Mistaken text.~~ # ~~~ # # becomes # # ~~~html #" v.emit_in block v.add "
\n" end redef fun add_code(v, block) do var meta = block.meta if meta != null then v.add ""
else
v.add ""
end
v.emit_in block
v.add "
\n"
end
redef fun add_blockquote(v, block) do
v.add "\n"
v.emit_in block
v.add "
\n"
end
redef fun add_unorderedlist(v, block) do
v.add "\n"
v.emit_in block
v.add "
\n"
end
redef fun add_orderedlist(v, block) do
v.add "\n"
v.emit_in block
v.add "
\n"
end
redef fun add_listitem(v, block) do
v.add ""
v.emit_in block
v.add " \n"
end
redef fun add_em(v, text) do
v.add ""
v.add text
v.add ""
end
redef fun add_strong(v, text) do
v.add ""
v.add text
v.add ""
end
redef fun add_strike(v, text) do
v.add ""
v.add text
v.add ""
end
redef fun add_image(v, link, name, comment) do
v.add ""
end
redef fun add_link(v, link, name, comment) do
v.add ""
v.emit_text(name)
v.add ""
end
redef fun add_abbr(v, name, comment) do
v.add ""
v.emit_text(name)
v.add ""
end
redef fun add_span_code(v, text, from, to) do
v.add ""
append_code(v, text, from, to)
v.add "
"
end
redef fun add_line_break(v) do
v.add "
"
end
redef fun append_value(v, text) do for c in text do escape_char(v, c)
redef fun escape_char(v, c) do
if c == '&' then
v.add "&"
else if c == '<' then
v.add "<"
else if c == '>' then
v.add ">"
else if c == '"' then
v.add """
else if c == '\'' then
v.add "'"
else
v.addc c
end
end
redef fun append_code(v, buffer, from, to) do
for i in [from..to[ do
var c = buffer[i]
if c == '&' then
v.add "&"
else if c == '<' then
v.add "<"
else if c == '>' then
v.add ">"
else
v.addc c
end
end
end
redef fun strip_id(txt) do
# strip id
var b = new FlatBuffer
for c in txt do
if c == ' ' then
b.add '_'
else
if not c.is_letter and
not c.is_digit and
not allowed_id_chars.has(c) then continue
b.add c
end
end
var res = b.to_s
var key = res
# check for multiple id definitions
if headlines.has_key(key) then
var i = 1
key = "{res}_{i}"
while headlines.has_key(key) do
i += 1
key = "{res}_{i}"
end
end
return key
end
private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
end
# Location in a Markdown input.
class MDLocation
# Starting line number (starting from 1).
var line_start: Int
# Starting column number (starting from 1).
var column_start: Int
# Stopping line number (starting from 1).
var line_end: Int
# Stopping column number (starting from 1).
var column_end: Int
redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
# Return a copy of `self`.
fun copy: MDLocation do
return new MDLocation(line_start, column_start, line_end, column_end)
end
end
# A block of markdown lines.
# A `MDBlock` can contains lines and/or sub-blocks.
class MDBlock
# Position of `self` in the input.
var location: MDLocation
# Kind of block.
# See `Block`.
var kind: Block = new BlockNone(self) is writable
# First line if any.
var first_line: nullable MDLine = null is writable
# Last line if any.
var last_line: nullable MDLine = null is writable
# First sub-block if any.
var first_block: nullable MDBlock = null is writable
# Last sub-block if any.
var last_block: nullable MDBlock = null is writable
# Previous block if any.
var prev: nullable MDBlock = null is writable
# Next block if any.
var next: nullable MDBlock = null is writable
# Does this block contain subblocks?
fun has_blocks: Bool do return first_block != null
# Count sub-blocks.
fun count_blocks: Int do
var count = 0
var block = first_block
while block != null do
count += 1
block = block.next
end
return count
end
# Does this block contain lines?
fun has_lines: Bool do return first_line != null
# Count block lines.
fun count_lines: Int do
var count = 0
var line = first_line
while line != null do
count += 1
line = line.next
end
return count
end
# Split `self` creating a new sub-block having `line` has `last_line`.
fun split(line: MDLine): MDBlock do
# location for new block
var new_loc = new MDLocation(
first_line.location.line_start,
first_line.location.column_start,
line.location.line_end,
line.location.column_end)
# create block
var block = new MDBlock(new_loc)
block.first_line = first_line
block.last_line = line
first_line = line.next
line.next = null
if first_line == null then
last_line = null
else
first_line.prev = null
# update current block loc
location.line_start = first_line.location.line_start
location.column_start = first_line.location.column_start
end
if first_block == null then
first_block = block
last_block = block
else
last_block.next = block
last_block = block
end
return block
end
# Add a `line` to this block.
fun add_line(line: MDLine) do
if last_line == null then
first_line = line
last_line = line
else
last_line.next_empty = line.is_empty
line.prev_empty = last_line.is_empty
line.prev = last_line
last_line.next = line
last_line = line
end
end
# Remove `line` from this block.
fun remove_line(line: MDLine) do
if line.prev == null then
first_line = line.next
else
line.prev.next = line.next
end
if line.next == null then
last_line = line.prev
else
line.next.prev = line.prev
end
line.prev = null
line.next = null
end
# Remove leading empty lines.
fun remove_leading_empty_lines: Bool do
var was_empty = false
var line = first_line
while line != null and line.is_empty do
remove_line line
line = first_line
was_empty = true
end
return was_empty
end
# Remove trailing empty lines.
fun remove_trailing_empty_lines: Bool do
var was_empty = false
var line = last_line
while line != null and line.is_empty do
remove_line line
line = last_line
was_empty = true
end
return was_empty
end
# Remove leading and trailing empty lines.
fun remove_surrounding_empty_lines: Bool do
var was_empty = false
if remove_leading_empty_lines then was_empty = true
if remove_trailing_empty_lines then was_empty = true
return was_empty
end
# Remove list markers and up to 4 leading spaces.
# Used to clean nested lists.
fun remove_list_indent(v: MarkdownProcessor) do
var line = first_line
while line != null do
if not line.is_empty then
var kind = v.line_kind(line)
if kind isa LineList then
line.value = kind.extract_value(line)
else
line.value = line.value.substring_from(line.leading.min(4))
end
line.leading = line.process_leading
end
line = line.next
end
end
# Collect block line text.
fun text: String do
var text = new FlatBuffer
var line = first_line
while line != null do
if not line.is_empty then
text.append line.text
end
text.append "\n"
line = line.next
end
return text.write_to_string
end
end
# Representation of a markdown block in the AST.
# Each `Block` is linked to a `MDBlock` that contains mardown code.
abstract class Block
# The markdown block `self` is related to.
var block: MDBlock
# Output `self` using `v.decorator`.
fun emit(v: MarkdownEmitter) do v.emit_in(self)
# Emit the containts of `self`, lines or blocks.
fun emit_in(v: MarkdownEmitter) do
block.remove_surrounding_empty_lines
if block.has_lines then
emit_lines(v)
else
emit_blocks(v)
end
end
# Emit lines contained in `block`.
fun emit_lines(v: MarkdownEmitter) do
var tpl = v.push_buffer
var line = block.first_line
while line != null do
if not line.is_empty then
v.add line.value.substring(line.leading, line.value.length - line.trailing)
if line.trailing >= 2 then v.decorator.add_line_break(v)
end
if line.next != null then
v.addn
end
line = line.next
end
v.pop_buffer
v.emit_text(tpl)
end
# Emit sub-blocks contained in `block`.
fun emit_blocks(v: MarkdownEmitter) do
var block = self.block.first_block
while block != null do
v.push_loc(block.location)
block.kind.emit(v)
v.pop_loc
block = block.next
end
end
# The raw content of the block as a multi-line string.
fun raw_content: String do
var infence = self isa BlockFence
var text = new FlatBuffer
var line = self.block.first_line
while line != null do
if not line.is_empty then
var str = line.value
if not infence and str.has_prefix(" ") then
text.append str.substring(4, str.length - line.trailing)
else
text.append str
end
end
text.append "\n"
line = line.next
end
return text.write_to_string
end
end
# A block without any markdown specificities.
#
# Actually use the same implementation than `BlockCode`,
# this class is only used for typing purposes.
class BlockNone
super Block
end
# A markdown blockquote.
class BlockQuote
super Block
redef fun emit(v) do v.decorator.add_blockquote(v, self)
# Remove blockquote markers.
private fun remove_block_quote_prefix(block: MDBlock) do
var line = block.first_line
while line != null do
if not line.is_empty then
if line.value[line.leading] == '>' then
var rem = line.leading + 1
if line.leading + 1 < line.value.length and
line.value[line.leading + 1] == ' ' then
rem += 1
end
line.value = line.value.substring_from(rem)
line.leading = line.process_leading
end
end
line = line.next
end
end
end
# A markdown code block.
class BlockCode
super Block
# Any string found after fence token.
var meta: nullable Text
# Number of char to skip at the beginning of the line.
#
# Block code lines start at 4 spaces.
protected var line_start = 4
redef fun emit(v) do v.decorator.add_code(v, self)
redef fun emit_lines(v) do
var line = block.first_line
while line != null do
if not line.is_empty then
v.decorator.append_code(v, line.value, line_start, line.value.length)
end
v.addn
line = line.next
end
end
end
# A markdown code-fence block.
#
# Actually use the same implementation than `BlockCode`,
# this class is only used for typing purposes.
class BlockFence
super BlockCode
# Fence code lines start at 0 spaces.
redef var line_start = 0
end
# A markdown headline.
class BlockHeadline
super Block
redef fun emit(v) do
var loc = block.location.copy
loc.column_start += start
v.push_loc(loc)
v.decorator.add_headline(v, self)
v.pop_loc
end
private var start = 0
# Depth of the headline used to determine the headline level.
var depth = 0
# Remove healine marks from lines contained in `self`.
private fun transform_headline(block: MDBlock) do
if depth > 0 then return
var level = 0
var line = block.first_line
if line.is_empty then return
var start = line.leading
while start < line.value.length and line.value[start] == '#' do
level += 1
start += 1
end
while start < line.value.length and line.value[start] == ' ' do
start += 1
end
if start >= line.value.length then
line.is_empty = true
else
var nend = line.value.length - line.trailing - 1
while line.value[nend] == '#' do nend -= 1
while line.value[nend] == ' ' do nend -= 1
line.value = line.value.substring(start, nend - start + 1)
line.leading = 0
line.trailing = 0
end
self.start = start
depth = level.min(6)
end
end
# A markdown list item block.
class BlockListItem
super Block
redef fun emit(v) do v.decorator.add_listitem(v, self)
end
# A markdown list block.
# Can be either an ordered or unordered list, this class is mainly used to factorize code.
abstract class BlockList
super Block
# Split list block into list items sub-blocks.
private fun init_block(v: MarkdownProcessor) do
var line = block.first_line
line = line.next
while line != null do
var t = v.line_kind(line)
if t isa LineList or
(not line.is_empty and (line.prev_empty and line.leading == 0 and
not (t isa LineList))) then
var sblock = block.split(line.prev.as(not null))
sblock.kind = new BlockListItem(sblock)
end
line = line.next
end
var sblock = block.split(block.last_line.as(not null))
sblock.kind = new BlockListItem(sblock)
end
# Expand list items as paragraphs if needed.
private fun expand_paragraphs(block: MDBlock) do
var outer = block.first_block
var inner: nullable MDBlock
var has_paragraph = false
while outer != null and not has_paragraph do
if outer.kind isa BlockListItem then
inner = outer.first_block
while inner != null and not has_paragraph do
if inner.kind isa BlockParagraph then
has_paragraph = true
end
inner = inner.next
end
end
outer = outer.next
end
if has_paragraph then
outer = block.first_block
while outer != null do
if outer.kind isa BlockListItem then
inner = outer.first_block
while inner != null do
if inner.kind isa BlockNone then
inner.kind = new BlockParagraph(inner)
end
inner = inner.next
end
end
outer = outer.next
end
end
end
end
# A markdown ordered list.
class BlockOrderedList
super BlockList
redef fun emit(v) do v.decorator.add_orderedlist(v, self)
end
# A markdown unordred list.
class BlockUnorderedList
super BlockList
redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
end
# A markdown paragraph block.
class BlockParagraph
super Block
redef fun emit(v) do v.decorator.add_paragraph(v, self)
end
# A markdown ruler.
class BlockRuler
super Block
redef fun emit(v) do v.decorator.add_ruler(v, self)
end
# Xml blocks that can be found in markdown markup.
class BlockXML
super Block
redef fun emit_lines(v) do
var line = block.first_line
while line != null do
if not line.is_empty then v.add line.value
v.addn
line = line.next
end
end
end
# A markdown line.
class MDLine
# Location of `self` in the original input.
var location: MDLocation
# Text contained in this line.
var value: String is writable
# Is this line empty?
# Lines containing only spaces are considered empty.
var is_empty: Bool = true is writable
# Previous line in `MDBlock` or null if first line.
var prev: nullable MDLine = null is writable
# Next line in `MDBlock` or null if last line.
var next: nullable MDLine = null is writable
# Is the previous line empty?
var prev_empty: Bool = false is writable
# Is the next line empty?
var next_empty: Bool = false is writable
# Initialize a new MDLine from its string value
init do
self.leading = process_leading
if leading != value.length then
self.is_empty = false
self.trailing = process_trailing
end
end
# Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
fun clear do
value = ""
leading = 0
trailing = 0
is_empty = true
if prev != null then prev.next_empty = true
if next != null then next.prev_empty = true
end
# Number or leading spaces on this line.
var leading: Int = 0 is writable
# Compute `leading` depending on `value`.
fun process_leading: Int do
var count = 0
var value = self.value
while count < value.length and value[count] == ' ' do count += 1
if leading == value.length then clear
return count
end
# Number of trailing spaces on this line.
var trailing: Int = 0 is writable
# Compute `trailing` depending on `value`.
fun process_trailing: Int do
var count = 0
var value = self.value
while value[value.length - count - 1] == ' ' do
count += 1
end
return count
end
# Count the amount of `ch` in this line.
# Return A value > 0 if this line only consists of `ch` end spaces.
fun count_chars(ch: Char): Int do
var count = 0
for c in value do
if c == ' ' then
continue
end
if c == ch then
count += 1
continue
end
count = 0
break
end
return count
end
# Count the amount of `ch` at the start of this line ignoring spaces.
fun count_chars_start(ch: Char): Int do
var count = 0
for c in value do
if c == ' ' then
continue
end
if c == ch then
count += 1
else
break
end
end
return count
end
# Last XML line if any.
private var xml_end_line: nullable MDLine = null
# Does `value` contains valid XML markup?
private fun check_html: Bool do
var tags = new Array[String]
var tmp = new FlatBuffer
var pos = leading
if pos + 1 < value.length and value[pos + 1] == '!' then
if read_xml_comment(self, pos) > 0 then return true
end
pos = value.read_xml(tmp, pos, false)
var tag: String
if pos > -1 then
tag = tmp.xml_tag
if not tag.is_html_block then
return false
end
if tag == "hr" then
xml_end_line = self
return true
end
tags.add tag
var line: nullable MDLine = self
while line != null do
while pos < line.value.length and line.value[pos] != '<' do
pos += 1
end
if pos >= line.value.length then
if pos - 2 >= 0 and line.value[pos - 2] == '/' then
tags.pop
if tags.is_empty then
xml_end_line = line
break
end
end
line = line.next
pos = 0
else
tmp = new FlatBuffer
var new_pos = line.value.read_xml(tmp, pos, false)
if new_pos > 0 then
tag = tmp.xml_tag
if tag.is_html_block and not tag == "hr" then
if tmp[1] == '/' then
if tags.last != tag then
return false
end
tags.pop
else
tags.add tag
end
end
if tags.is_empty then
xml_end_line = line
break
end
pos = new_pos
else
pos += 1
end
end
end
return tags.is_empty
end
return false
end
# Read a XML comment.
# Used by `check_html`.
private fun read_xml_comment(first_line: MDLine, start: Int): Int do
var line: nullable MDLine = first_line
if start + 3 < line.value.length then
if line.value[2] == '-' and line.value[3] == '-' then
var pos = start + 4
while line != null do
while pos < line.value.length and line.value[pos] != '-' do
pos += 1
end
if pos == line.value.length then
line = line.next
pos = 0
else
if pos + 2 < line.value.length then
if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
first_line.xml_end_line = line
return pos + 3
end
end
pos += 1
end
end
end
end
return -1
end
# Extract the text of `self` without leading and trailing.
fun text: String do return value.substring(leading, value.length - trailing)
end
# A markdown line.
interface Line
# Parse the line.
# See `MarkdownProcessor::recurse`.
fun process(v: MarkdownProcessor) is abstract
end
# An empty markdown line.
class LineEmpty
super Line
redef fun process(v) do
v.current_line = v.current_line.next
end
end
# A non-specific markdown construction.
# Mainly used as part of another line construct such as paragraphs or lists.
class LineOther
super Line
redef fun process(v) do
var line = v.current_line
# go to block end
var was_empty = line.prev_empty
while line != null and not line.is_empty do
var t = v.line_kind(line)
if (v.in_list or v.ext_mode) and t isa LineList then
break
end
if v.ext_mode and (t isa LineCode or t isa LineFence) then
break
end
if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
t isa LineHR or t isa LineBlockquote or t isa LineXML then
break
end
line = line.next
end
# build block
if line != null and not line.is_empty then
var block = v.current_block.split(line.prev.as(not null))
if v.in_list and not was_empty then
block.kind = new BlockNone(block)
else
block.kind = new BlockParagraph(block)
end
v.current_block.remove_leading_empty_lines
else
var block: MDBlock
if line != null then
block = v.current_block.split(line)
else
block = v.current_block.split(v.current_block.last_line.as(not null))
end
if v.in_list and (line == null or not line.is_empty) and not was_empty then
block.kind = new BlockNone(block)
else
block.kind = new BlockParagraph(block)
end
v.current_block.remove_leading_empty_lines
end
v.current_line = v.current_block.first_line
end
end
# A line of markdown code.
class LineCode
super Line
redef fun process(v) do
var line = v.current_line
# lookup block end
while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
line = line.next
end
# split at block end line
var block: MDBlock
if line != null then
block = v.current_block.split(line.prev.as(not null))
else
block = v.current_block.split(v.current_block.last_line.as(not null))
end
block.kind = new BlockCode(block)
block.remove_surrounding_empty_lines
v.current_line = v.current_block.first_line
end
end
# A line of raw XML.
class LineXML
super Line
redef fun process(v) do
var line = v.current_line
var prev = line.prev
if prev != null then v.current_block.split(prev)
var block = v.current_block.split(line.xml_end_line.as(not null))
block.kind = new BlockXML(block)
v.current_block.remove_leading_empty_lines
v.current_line = v.current_block.first_line
end
end
# A markdown blockquote line.
class LineBlockquote
super Line
redef fun process(v) do
var line = v.current_line
# go to bquote end
while line != null do
if not line.is_empty and (line.prev_empty and
line.leading == 0 and
not v.line_kind(line) isa LineBlockquote) then break
line = line.next
end
# build sub block
var block: MDBlock
if line != null then
block = v.current_block.split(line.prev.as(not null))
else
block = v.current_block.split(v.current_block.last_line.as(not null))
end
var kind = new BlockQuote(block)
block.kind = kind
block.remove_surrounding_empty_lines
kind.remove_block_quote_prefix(block)
v.current_line = line
v.recurse(block, false)
v.current_line = v.current_block.first_line
end
end
# A markdown ruler line.
class LineHR
super Line
redef fun process(v) do
var line = v.current_line
if line.prev != null then v.current_block.split(line.prev.as(not null))
var block = v.current_block.split(line.as(not null))
block.kind = new BlockRuler(block)
v.current_block.remove_leading_empty_lines
v.current_line = v.current_block.first_line
end
end
# A markdown fence code line.
class LineFence
super Line
redef fun process(v) do
# go to fence end
var line = v.current_line.next
while line != null do
if v.line_kind(line) isa LineFence then break
line = line.next
end
if line != null then
line = line.next
end
# build fence block
var block: MDBlock
if line != null then
block = v.current_block.split(line.prev.as(not null))
else
block = v.current_block.split(v.current_block.last_line.as(not null))
end
block.remove_surrounding_empty_lines
var meta = block.first_line.value.meta_from_fence
block.kind = new BlockFence(block, meta)
block.first_line.clear
var last = block.last_line
if last != null and v.line_kind(last) isa LineFence then
block.last_line.clear
end
block.remove_surrounding_empty_lines
v.current_line = line
end
end
# A markdown headline.
class LineHeadline
super Line
redef fun process(v) do
var line = v.current_line
var lprev = line.prev
if lprev != null then v.current_block.split(lprev)
var block = v.current_block.split(line.as(not null))
var kind = new BlockHeadline(block)
block.kind = kind
kind.transform_headline(block)
v.current_block.remove_leading_empty_lines
v.current_line = v.current_block.first_line
end
end
# A markdown headline of level 1.
class LineHeadline1
super LineHeadline
redef fun process(v) do
var line = v.current_line
var lprev = line.prev
if lprev != null then v.current_block.split(lprev)
line.next.clear
var block = v.current_block.split(line.as(not null))
var kind = new BlockHeadline(block)
kind.depth = 1
kind.transform_headline(block)
block.kind = kind
v.current_block.remove_leading_empty_lines
v.current_line = v.current_block.first_line
end
end
# A markdown headline of level 2.
class LineHeadline2
super LineHeadline
redef fun process(v) do
var line = v.current_line
var lprev = line.prev
if lprev != null then v.current_block.split(lprev)
line.next.clear
var block = v.current_block.split(line.as(not null))
var kind = new BlockHeadline(block)
kind.depth = 2
kind.transform_headline(block)
block.kind = kind
v.current_block.remove_leading_empty_lines
v.current_line = v.current_block.first_line
end
end
# A markdown list line.
# Mainly used to factorize code between ordered and unordered lists.
abstract class LineList
super Line
redef fun process(v) do
var line = v.current_line
# go to list end
while line != null do
var t = v.line_kind(line)
if not line.is_empty and (line.prev_empty and line.leading == 0 and
not t isa LineList) then break
line = line.next
end
# build list block
var list: MDBlock
if line != null then
list = v.current_block.split(line.prev.as(not null))
else
list = v.current_block.split(v.current_block.last_line.as(not null))
end
var kind = block_kind(list)
list.kind = kind
list.first_line.prev_empty = false
list.last_line.next_empty = false
list.remove_surrounding_empty_lines
list.first_line.prev_empty = false
list.last_line.next_empty = false
kind.init_block(v)
var block = list.first_block
while block != null do
block.remove_list_indent(v)
v.recurse(block, true)
block = block.next
end
kind.expand_paragraphs(list)
v.current_line = line
end
# Create a new block kind based on this line.
protected fun block_kind(block: MDBlock): BlockList is abstract
# Extract string value from `MDLine`.
protected fun extract_value(line: MDLine): String is abstract
end
# An ordered list line.
class LineOList
super LineList
redef fun block_kind(block) do return new BlockOrderedList(block)
redef fun extract_value(line) do
return line.value.substring_from(line.value.index_of('.') + 2)
end
end
# An unordered list line.
class LineUList
super LineList
redef fun block_kind(block) do return new BlockUnorderedList(block)
redef fun extract_value(line) do
return line.value.substring_from(line.leading + 2)
end
end
# A token represent a character in the markdown input.
# Some tokens have a specific markup behaviour that is handled here.
abstract class Token
# Location of `self` in the original input.
var location: MDLocation
# Position of `self` in input independant from lines.
var pos: Int
# Character found at `pos` in the markdown input.
var char: Char
# Output that token using `MarkdownEmitter::decorator`.
fun emit(v: MarkdownEmitter) do v.decorator.add_char(v, char)
end
# A token without a specific meaning.
class TokenNone
super Token
end
# An emphasis token.
abstract class TokenEm
super Token
redef fun emit(v) do
var tmp = v.push_buffer
var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
v.pop_buffer
if b > 0 then
v.decorator.add_em(v, tmp)
v.current_pos = b
else
v.addc char
end
end
end
# An emphasis star token.
class TokenEmStar
super TokenEm
end
# An emphasis underscore token.
class TokenEmUnderscore
super TokenEm
end
# A strong token.
abstract class TokenStrong
super Token
redef fun emit(v) do
var tmp = v.push_buffer
var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
v.pop_buffer
if b > 0 then
v.decorator.add_strong(v, tmp)
v.current_pos = b + 1
else
v.addc char
end
end
end
# A strong star token.
class TokenStrongStar
super TokenStrong
end
# A strong underscore token.
class TokenStrongUnderscore
super TokenStrong
end
# A code token.
# This class is mainly used to factorize work between single and double quoted span codes.
abstract class TokenCode
super Token
redef fun emit(v) do
var a = pos + next_pos + 1
var b = v.processor.find_token(v.current_text.as(not null), a, self)
if b > 0 then
v.current_pos = b + next_pos
while a < b and v.current_text[a] == ' ' do a += 1
if a < b then
while v.current_text[b - 1] == ' ' do b -= 1
v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
end
else
v.addc char
end
end
private fun next_pos: Int is abstract
end
# A span code token.
class TokenCodeSingle
super TokenCode
redef fun next_pos do return 0
end
# A doubled span code token.
class TokenCodeDouble
super TokenCode
redef fun next_pos do return 1
end
# A link or image token.
# This class is mainly used to factorize work between images and links.
abstract class TokenLinkOrImage
super Token
# Link adress
var link: nullable Text = null
# Link text
var name: nullable Text = null
# Link title
var comment: nullable Text = null
# Is the link construct an abbreviation?
var is_abbrev = false
redef fun emit(v) do
var tmp = new FlatBuffer
var b = check_link(v, tmp, pos, self)
if b > 0 then
emit_hyper(v)
v.current_pos = b
else
v.addc char
end
end
# Emit the hyperlink as link or image.
private fun emit_hyper(v: MarkdownEmitter) is abstract
# Check if the link is a valid link.
private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
var md = v.current_text
var pos
if token isa TokenLink then
pos = start + 1
else
pos = start + 2
end
var tmp = new FlatBuffer
pos = md.read_md_link_id(tmp, pos)
if pos < start then return -1
name = tmp
var old_pos = pos
pos += 1
pos = md.skip_spaces(pos)
if pos < start then
var tid = name.write_to_string.to_lower
if v.processor.link_refs.has_key(tid) then
var lr = v.processor.link_refs[tid]
is_abbrev = lr.is_abbrev
link = lr.link
comment = lr.title
pos = old_pos
else
return -1
end
else if md[pos] == '(' then
pos += 1
pos = md.skip_spaces(pos)
if pos < start then return -1
tmp = new FlatBuffer
var use_lt = md[pos] == '<'
if use_lt then
pos = md.read_until(tmp, pos + 1, '>')
else
pos = md.read_md_link(tmp, pos)
end
if pos < start then return -1
if use_lt then pos += 1
link = tmp.write_to_string
if md[pos] == ' ' then
pos = md.skip_spaces(pos)
if pos > start and md[pos] == '"' then
pos += 1
tmp = new FlatBuffer
pos = md.read_until(tmp, pos, '"')
if pos < start then return -1
comment = tmp.write_to_string
pos += 1
pos = md.skip_spaces(pos)
if pos == -1 then return -1
end
end
if pos < start then return -1
if md[pos] != ')' then return -1
else if md[pos] == '[' then
pos += 1
tmp = new FlatBuffer
pos = md.read_raw_until(tmp, pos, ']')
if pos < start then return -1
var id
if tmp.length > 0 then
id = tmp
else
id = name
end
var tid = id.write_to_string.to_lower
if v.processor.link_refs.has_key(tid) then
var lr = v.processor.link_refs[tid]
link = lr.link
comment = lr.title
end
else
var tid = name.write_to_string.replace("\n", " ").to_lower
if v.processor.link_refs.has_key(tid) then
var lr = v.processor.link_refs[tid]
link = lr.link
comment = lr.title
pos = old_pos
else
return -1
end
end
if link == null then return -1
return pos
end
end
# A markdown link token.
class TokenLink
super TokenLinkOrImage
redef fun emit_hyper(v) do
if is_abbrev and comment != null then
v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
else
v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
end
end
end
# A markdown image token.
class TokenImage
super TokenLinkOrImage
redef fun emit_hyper(v) do
v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
end
end
# A HTML/XML token.
class TokenHTML
super Token
redef fun emit(v) do
var tmp = new FlatBuffer
var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
if b > 0 then
v.add tmp
v.current_pos = b
else
v.decorator.escape_char(v, char)
end
end
# Is the HTML valid?
# Also take care of link and mailto shortcuts.
private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
# check for auto links
var tmp = new FlatBuffer
var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
pos = md.read_until(tmp, pos, '>')
if pos != -1 then
var link = tmp.write_to_string
v.decorator.add_link(v, link, link, null)
return pos
end
end
# TODO check for mailto
# check for inline html
if start + 2 < md.length then
return md.read_xml(out, start, true)
end
return -1
end
end
# An HTML entity token.
class TokenEntity
super Token
redef fun emit(v) do
var tmp = new FlatBuffer
var b = check_entity(tmp, v.current_text.as(not null), pos)
if b > 0 then
v.add tmp
v.current_pos = b
else
v.decorator.escape_char(v, char)
end
end
# Is the entity valid?
private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
var pos = md.read_until(out, start, ';')
if pos < 0 or out.length < 3 then
return -1
end
if out[1] == '#' then
if out[2] == 'x' or out[2] == 'X' then
if out.length < 4 then return -1
for i in [3..out.length[ do
var c = out[i]
if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
return -1
end
end
else
for i in [2..out.length[ do
var c = out[i]
if c < '0' or c > '9' then return -1
end
end
out.add ';'
else
for i in [1..out.length[ do
var c = out[i]
if not c.is_digit and not c.is_letter then return -1
end
out.add ';'
# TODO check entity is valid
# if out.is_entity then
return pos
# else
# return -1
# end
end
return pos
end
end
# A markdown escape token.
class TokenEscape
super Token
redef fun emit(v) do
v.current_pos += 1
v.addc v.current_text[v.current_pos]
end
end
# A markdown strike token.
#
# Extended mode only (see `MarkdownProcessor::ext_mode`)
class TokenStrike
super Token
redef fun emit(v) do
var tmp = v.push_buffer
var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
v.pop_buffer
if b > 0 then
v.decorator.add_strike(v, tmp)
v.current_pos = b + 1
else
v.addc char
end
end
end
redef class Text
# Get the position of the next non-space character.
private fun skip_spaces(start: Int): Int do
var pos = start
while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
pos += 1
end
if pos < length then return pos
return -1
end
# Read `self` until `nend` and append it to the `out` buffer.
# Escape markdown special chars.
private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
var pos = start
while pos < length do
var c = self[pos]
if c == '\\' and pos + 1 < length then
pos = escape(out, self[pos + 1], pos)
else
var end_reached = false
for n in nend do
if c == n then
end_reached = true
break
end
end
if end_reached then break
out.add c
end
pos += 1
end
if pos == length then return -1
return pos
end
# Read `self` as raw text until `nend` and append it to the `out` buffer.
# No escape is made.
private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
var pos = start
while pos < length do
var c = self[pos]
var end_reached = false
for n in nend do
if c == n then
end_reached = true
break
end
end
if end_reached then break
out.add c
pos += 1
end
if pos == length then return -1
return pos
end
# Read `self` as XML until `to` and append it to the `out` buffer.
# Escape HTML special chars.
private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
var pos = from
var in_str = false
var str_char: nullable Char = null
while pos < length do
var c = self[pos]
if in_str then
if c == '\\' then
out.add c
pos += 1
if pos < length then
out.add c
pos += 1
end
continue
end
if c == str_char then
in_str = false
out.add c
pos += 1
continue
end
end
if c == '"' or c == '\'' then
in_str = true
str_char = c
end
if not in_str then
var end_reached = false
for n in [0..to.length[ do
if c == to[n] then
end_reached = true
break
end
end
if end_reached then break
end
out.add c
pos += 1
end
if pos == length then return -1
return pos
end
# Read `self` as XML and append it to the `out` buffer.
# Safe mode can be activated to limit reading to valid xml.
private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
var pos = 0
var is_valid = true
var is_close_tag = false
if start + 1 >= length then return -1
if self[start + 1] == '/' then
is_close_tag = true
pos = start + 2
else if self[start + 1] == '!' then
out.append "')
if pos == -1 then return -1
var tag = tmp.write_to_string.trim.to_lower
if not tag.is_valid_html_tag then
out.append "<"
pos = -1
else if tag.is_html_unsafe then
is_valid = false
out.append "<"
if is_close_tag then out.add '/'
out.append tmp
else
out.append "<"
if is_close_tag then out.add '/'
out.append tmp
end
else
out.add '<'
if is_close_tag then out.add '/'
pos = read_xml_until(out, pos, ' ', '/', '>')
end
if pos == -1 then return -1
pos = read_xml_until(out, pos, '/', '>')
if pos == -1 then return -1
if self[pos] == '/' then
out.append " /"
pos = self.read_xml_until(out, pos + 1, '>')
if pos == -1 then return -1
end
if self[pos] == '>' then
if is_valid then
out.add '>'
else
out.append ">"
end
return pos
end
return -1
end
# Read a markdown link address and append it to the `out` buffer.
private fun read_md_link(out: FlatBuffer, start: Int): Int do
var pos = start
var counter = 1
while pos < length do
var c = self[pos]
if c == '\\' and pos + 1 < length then
pos = escape(out, self[pos + 1], pos)
else
var end_reached = false
if c == '(' then
counter += 1
else if c == ' ' then
if counter == 1 then end_reached = true
else if c == ')' then
counter -= 1
if counter == 0 then end_reached = true
end
if end_reached then break
out.add c
end
pos += 1
end
if pos == length then return -1
return pos
end
# Read a markdown link text and append it to the `out` buffer.
private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
var pos = start
var counter = 1
while pos < length do
var c = self[pos]
var end_reached = false
if c == '[' then
counter += 1
out.add c
else if c == ']' then
counter -= 1
if counter == 0 then
end_reached = true
else
out.add c
end
else
out.add c
end
if end_reached then break
pos += 1
end
if pos == length then return -1
return pos
end
# Extract the XML tag name from a XML tag.
private fun xml_tag: String do
var tpl = new FlatBuffer
var pos = 1
if pos < length and self[1] == '/' then pos += 1
while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
tpl.add self[pos]
pos += 1
end
return tpl.write_to_string.to_lower
end
private fun is_valid_html_tag: Bool do
if is_empty then return false
for c in self do
if not c.is_alpha then return false
end
return true
end
# Read and escape the markdown contained in `self`.
private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
c == '`' or c == '~' or c == '^' then
out.add c
return pos + 1
end
out.add '\\'
return pos
end
# Extract string found at end of fence opening.
private fun meta_from_fence: nullable Text do
for i in [0..chars.length[ do
var c = chars[i]
if c != ' ' and c != '`' and c != '~' then
return substring_from(i).trim
end
end
return null
end
# Is `self` an unsafe HTML element?
private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
# Is `self` a HRML block element?
private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
# Is `self` a link prefix?
private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
end
redef class String
# Parse `self` as markdown and return the HTML representation
#.
# var md = "**Hello World!**"
# var html = md.md_to_html
# assert html == "Hello World!
\n"
fun md_to_html: Writable do
var processor = new MarkdownProcessor
return processor.process(self)
end
end