# SEE: `String::md_to_html` for a shortcut.
class MarkdownProcessor
- var emitter: MarkdownEmitter is noinit
+ # `MarkdownEmitter` used for ouput.
+ var emitter: MarkdownEmitter is noinit, protected writable
+
+ # Work in extended mode (default).
+ #
+ # Behavior changes when using extended mode:
+ #
+ # * Lists and code blocks end a paragraph
+ #
+ # In normal markdown the following:
+ #
+ # This is a paragraph
+ # * and this is not a list
+ #
+ # Will produce:
+ #
+ # <p>This is a paragraph
+ # * and this is not a list</p>
+ #
+ # When using extended mode this changes to:
+ #
+ # <p>This is a paragraph</p>
+ # <ul>
+ # <li>and this is not a list</li>
+ # </ul>
+ #
+ # * Fences code blocks
+ #
+ # If you don't want to indent your all your code with 4 spaces,
+ # you can wrap your code in ``` ``` ``` or `~~~`.
+ #
+ # Here's an example:
+ #
+ # ```
+ # fun test do
+ # print "Hello World!"
+ # end
+ # ```
+ #
+ # * Code blocks meta
+ #
+ # If you want to use syntax highlighting tools, most of them need to know what kind
+ # of language they are highlighting.
+ # You can add an optional language identifier after the fence declaration to output
+ # it in the HTML render.
+ #
+ # ```nit
+ # import markdown
+ #
+ # print "# Hello World!".md_to_html
+ # ```
+ #
+ # Becomes
+ #
+ # <pre class="nit"><code>import markdown
+ #
+ # print "Hello World!".md_to_html
+ # </code></pre>
+ #
+ # * Underscores (Emphasis)
+ #
+ # Underscores in the middle of a word like:
+ #
+ # Con_cat_this
+ #
+ # normally produces this:
+ #
+ # <p>Con<em>cat</em>this</p>
+ #
+ # With extended mode they don't result in emphasis.
+ #
+ # <p>Con_cat_this</p>
+ #
+ # * Strikethrough
+ #
+ # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
+ # strikethrought span is marked with `~~`.
+ #
+ # ~~Mistaken text.~~
+ #
+ # becomes
+ #
+ # <del>Mistaken text.</del>
+ var ext_mode = true
init do self.emitter = new MarkdownEmitter(self)
# Process the mardown `input` string and return the processed output.
- fun process(input: String): Streamable do
+ fun process(input: String): Writable do
# init processor
link_refs.clear
last_link_ref = null
if value[leading] == '#' then return new LineHeadline
if value[leading] == '>' then return new LineBlockquote
- if value.length - leading - trailing > 2 then
- if value[leading] == '`' and md.count_chars_start('`') >= 3 then
- return new LineFence
- end
- if value[leading] == '~' and md.count_chars_start('~') >= 3 then
- return new LineFence
+ if ext_mode then
+ if value.length - leading - trailing > 2 then
+ if value[leading] == '`' and md.count_chars_start('`') >= 3 then
+ return new LineFence
+ end
+ if value[leading] == '~' and md.count_chars_start('~') >= 3 then
+ return new LineFence
+ end
end
end
return new LineOther
end
+ # Get the token kind at `pos`.
+ fun token_at(text: Text, pos: Int): Token do
+ var c0: Char
+ var c1: Char
+ var c2: Char
+
+ if pos > 0 then
+ c0 = text[pos - 1]
+ else
+ c0 = ' '
+ end
+ var c = text[pos]
+
+ if pos + 1 < text.length then
+ c1 = text[pos + 1]
+ else
+ c1 = ' '
+ end
+ if pos + 2 < text.length then
+ c2 = text[pos + 2]
+ else
+ c2 = ' '
+ end
+
+ if c == '*' then
+ if c1 == '*' then
+ if c0 != ' ' or c2 != ' ' then
+ return new TokenStrongStar(pos, c)
+ else
+ return new TokenEmStar(pos, c)
+ end
+ end
+ if c0 != ' ' or c1 != ' ' then
+ return new TokenEmStar(pos, c)
+ else
+ return new TokenNone(pos, c)
+ end
+ else if c == '_' then
+ if c1 == '_' then
+ if c0 != ' ' or c2 != ' 'then
+ return new TokenStrongUnderscore(pos, c)
+ else
+ return new TokenEmUnderscore(pos, c)
+ end
+ end
+ if ext_mode then
+ if (c0.is_letter or c0.is_digit) and c0 != '_' and
+ (c1.is_letter or c1.is_digit) then
+ return new TokenNone(pos, c)
+ else
+ return new TokenEmUnderscore(pos, c)
+ end
+ end
+ if c0 != ' ' or c1 != ' ' then
+ return new TokenEmUnderscore(pos, c)
+ else
+ return new TokenNone(pos, c)
+ end
+ else if c == '!' then
+ if c1 == '[' then return new TokenImage(pos, c)
+ return new TokenNone(pos, c)
+ else if c == '[' then
+ return new TokenLink(pos, c)
+ else if c == ']' then
+ return new TokenNone(pos, c)
+ else if c == '`' then
+ if c1 == '`' then
+ return new TokenCodeDouble(pos, c)
+ else
+ return new TokenCodeSingle(pos, c)
+ end
+ else if c == '\\' then
+ if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
+ return new TokenEscape(pos, c)
+ else
+ return new TokenNone(pos, c)
+ end
+ else if c == '<' then
+ return new TokenHTML(pos, c)
+ else if c == '&' then
+ return new TokenEntity(pos, c)
+ else
+ if ext_mode then
+ if c == '~' and c1 == '~' then
+ return new TokenStrike(pos, c)
+ end
+ end
+ return new TokenNone(pos, c)
+ end
+ end
+
+ # Find the position of a `token` in `self`.
+ fun find_token(text: Text, start: Int, token: Token): Int do
+ var pos = start
+ while pos < text.length do
+ if token_at(text, pos).is_same_type(token) then
+ return pos
+ end
+ pos += 1
+ end
+ return -1
+ end
end
# Emit output corresponding to blocks content.
# Default is `HTMLDecorator`
var decorator: Decorator = new HTMLDecorator is writable
- # Create a new `MardownEmitter` using the default `HTMLDecorator`
- init(processor: MarkdownProcessor) do
- self.processor = processor
- end
-
# Create a new `MarkdownEmitter` using a custom `decorator`.
init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
init processor
current_text = text
current_pos = start
while current_pos < text.length do
- var mt = text.token_at(current_pos)
+ var mt = processor.token_at(text, current_pos)
if (token != null and not token isa TokenNone) and
(mt.is_same_type(token) or
(token isa TokenEmStar and mt isa TokenStrongStar) or
end
# Append `e` to current buffer.
- fun add(e: Streamable) do
+ fun add(e: Writable) do
if e isa Text then
current_buffer.append e
else
# A Link Reference.
# Links that are specified somewhere in the mardown document to be reused as shortcuts.
#
-# Example:
-#
-# [1]: http://example.com/ "Optional title"
+# ~~~raw
+# [1]: http://example.com/ "Optional title"
+# ~~~
class LinkRef
# Link href
# Render a strong text.
fun add_strong(v: MarkdownEmitter, text: Text) is abstract
- # Render a super text.
- fun add_super(v: MarkdownEmitter, text: Text) is abstract
+ # Render a strike text.
+ #
+ # Extended mode only (see `MarkdownProcessor::ext_mode`)
+ fun add_strike(v: MarkdownEmitter, text: Text) is abstract
# Render a link.
fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
end
redef fun add_code(v, block) do
- v.add "<pre><code>"
+ if block isa BlockFence and block.meta != null then
+ v.add "<pre class=\"{block.meta.to_s}\"><code>"
+ else
+ v.add "<pre><code>"
+ end
v.emit_in block
v.add "</code></pre>\n"
end
v.add "</strong>"
end
- redef fun add_super(v, text) do
- v.add "<sup>"
+ redef fun add_strike(v, text) do
+ v.add "<del>"
v.add text
- v.add "</sup>"
+ v.add "</del>"
end
redef fun add_image(v, link, name, comment) do
class BlockFence
super BlockCode
+ # Any string found after fence token.
+ var meta: nullable Text
+
# Fence code lines start at 0 spaces.
redef var line_start = 0
end
var next_empty: Bool = false is writable
# Initialize a new MDLine from its string value
- init(value: String) do
- self.value = value
+ init do
self.leading = process_leading
if leading != value.length then
self.is_empty = false
var was_empty = line.prev_empty
while line != null and not line.is_empty do
var t = v.line_kind(line)
- if v.in_list and t isa LineList then
+ if (v.in_list or v.ext_mode) and t isa LineList then
break
end
- if t isa LineCode or t isa LineFence then
+ if v.ext_mode and (t isa LineCode or t isa LineFence) then
break
end
if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
line = line.next
end
# build block
- var bk: Block
if line != null and not line.is_empty then
var block = v.current_block.split(line.prev.as(not null))
if v.in_list and not was_empty then
else
block = v.current_block.split(v.current_block.last_line.as(not null))
end
- block.kind = new BlockFence(block)
+ var meta = block.first_line.value.meta_from_fence
+ block.kind = new BlockFence(block, meta)
block.first_line.clear
var last = block.last_line
if last != null and v.line_kind(last) isa LineFence then
redef fun emit(v) do
var a = pos + next_pos + 1
- var b = v.current_text.find_token(a, self)
+ var b = v.processor.find_token(v.current_text.as(not null), a, self)
if b > 0 then
v.current_pos = b + next_pos
while a < b and v.current_text[a] == ' ' do a += 1
end
end
-# A markdown super token.
-class TokenSuper
+# A markdown strike token.
+#
+# Extended mode only (see `MarkdownProcessor::ext_mode`)
+class TokenStrike
super Token
redef fun emit(v) do
var tmp = v.push_buffer
- var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
+ var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
v.pop_buffer
if b > 0 then
- v.decorator.add_super(v, tmp)
- v.current_pos = b
+ v.decorator.add_strike(v, tmp)
+ v.current_pos = b + 1
else
v.addc char
end
redef class Text
- # Get the token kind at `pos`.
- private fun token_at(pos: Int): Token do
- var c0: Char
- var c1: Char
- var c2: Char
- var c3: Char
-
- if pos > 0 then
- c0 = self[pos - 1]
- else
- c0 = ' '
- end
- var c = self[pos]
-
- if pos + 1 < length then
- c1 = self[pos + 1]
- else
- c1 = ' '
- end
- if pos + 2 < length then
- c2 = self[pos + 2]
- else
- c2 = ' '
- end
- if pos + 3 < length then
- c3 = self[pos + 3]
- else
- c3 = ' '
- end
-
- if c == '*' then
- if c1 == '*' then
- if c0 != ' ' or c2 != ' ' then
- return new TokenStrongStar(pos, c)
- else
- return new TokenEmStar(pos, c)
- end
- end
- if c0 != ' ' or c1 != ' ' then
- return new TokenEmStar(pos, c)
- else
- return new TokenNone(pos, c)
- end
- else if c == '_' then
- if c1 == '_' then
- if c0 != ' ' or c2 != ' 'then
- return new TokenStrongUnderscore(pos, c)
- else
- return new TokenEmUnderscore(pos, c)
- end
- end
- if c0 != ' ' or c1 != ' ' then
- return new TokenEmUnderscore(pos, c)
- else
- return new TokenNone(pos, c)
- end
- else if c == '!' then
- if c1 == '[' then return new TokenImage(pos, c)
- return new TokenNone(pos, c)
- else if c == '[' then
- return new TokenLink(pos, c)
- else if c == ']' then
- return new TokenNone(pos, c)
- else if c == '`' then
- if c1 == '`' then
- return new TokenCodeDouble(pos, c)
- else
- return new TokenCodeSingle(pos, c)
- end
- else if c == '\\' then
- if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
- return new TokenEscape(pos, c)
- else
- return new TokenNone(pos, c)
- end
- else if c == '<' then
- return new TokenHTML(pos, c)
- else if c == '&' then
- return new TokenEntity(pos, c)
- else if c == '^' then
- if c0 == '^' or c1 == '^' then
- return new TokenNone(pos, c)
- else
- return new TokenSuper(pos, c)
- end
- else
- return new TokenNone(pos, c)
- end
- end
-
- # Find the position of a `token` in `self`.
- private fun find_token(start: Int, token: Token): Int do
- var pos = start
- while pos < length do
- if token_at(pos).is_same_type(token) then
- return pos
- end
- pos += 1
- end
- return -1
- end
-
# Get the position of the next non-space character.
private fun skip_spaces(start: Int): Int do
var pos = start
# Safe mode can be activated to limit reading to valid xml.
private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
var pos = 0
+ var is_valid = true
var is_close_tag = false
if start + 1 >= length then return -1
if self[start + 1] == '/' then
pos = read_xml_until(tmp, pos, ' ', '/', '>')
if pos == -1 then return -1
var tag = tmp.write_to_string.trim.to_lower
- if tag.is_html_unsafe then
+ if not tag.is_valid_html_tag then
+ out.append "<"
+ pos = -1
+ else if tag.is_html_unsafe then
+ is_valid = false
out.append "<"
if is_close_tag then out.add '/'
out.append tmp
if pos == -1 then return -1
end
if self[pos] == '>' then
- out.add '>'
+ if is_valid then
+ out.add '>'
+ else
+ out.append ">"
+ end
return pos
end
return -1
return tpl.write_to_string.to_lower
end
+ private fun is_valid_html_tag: Bool do
+ if is_empty then return false
+ for c in self do
+ if not c.is_alpha then return false
+ end
+ return true
+ end
+
# Read and escape the markdown contained in `self`.
private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
return pos
end
+ # Extract string found at end of fence opening.
+ private fun meta_from_fence: nullable Text do
+ for i in [0..chars.length[ do
+ var c = chars[i]
+ if c != ' ' and c != '`' and c != '~' then
+ return substring_from(i).trim
+ end
+ end
+ return null
+ end
+
# Is `self` an unsafe HTML element?
private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
# var md = "**Hello World!**"
# var html = md.md_to_html
# assert html == "<p><strong>Hello World!</strong></p>\n"
- fun md_to_html: Streamable do
+ fun md_to_html: Writable do
var processor = new MarkdownProcessor
return processor.process(self)
end