X-Git-Url: http://nitlanguage.org
diff --git a/lib/markdown/markdown.nit b/lib/markdown/markdown.nit
index 41e2441..d63f3e4 100644
--- a/lib/markdown/markdown.nit
+++ b/lib/markdown/markdown.nit
@@ -31,12 +31,120 @@ import template
class MarkdownProcessor
# `MarkdownEmitter` used for ouput.
- var emitter: MarkdownEmitter is noinit
+ var emitter: MarkdownEmitter is noinit, protected writable
+
+ # Work in extended mode (default).
+ #
+ # Behavior changes when using extended mode:
+ #
+ # * Lists and code blocks end a paragraph
+ #
+ # In normal markdown the following:
+ #
+ # ~~~md
+ # This is a paragraph
+ # * and this is not a list
+ # ~~~
+ #
+ # Will produce:
+ #
+ # ~~~html
+ #
This is a paragraph
+ # * and this is not a list
+ # ~~~
+ #
+ # When using extended mode this changes to:
+ #
+ # ~~~html
+ # This is a paragraph
+ #
+ # - and this is not a list
+ #
+ # ~~~
+ #
+ # * Fences code blocks
+ #
+ # If you don't want to indent your all your code with 4 spaces,
+ # you can wrap your code in ``` ``` ``` or `~~~`.
+ #
+ # Here's an example:
+ #
+ # ~~~md
+ # fun test do
+ # print "Hello World!"
+ # end
+ # ~~~
+ #
+ # * Code blocks meta
+ #
+ # If you want to use syntax highlighting tools, most of them need to know what kind
+ # of language they are highlighting.
+ # You can add an optional language identifier after the fence declaration to output
+ # it in the HTML render.
+ #
+ # ```nit
+ # import markdown
+ #
+ # print "# Hello World!".md_to_html
+ # ```
+ #
+ # Becomes
+ #
+ # ~~~html
+ # import markdown
+ #
+ # print "Hello World!".md_to_html
+ #
+ # ~~~
+ #
+ # * Underscores (Emphasis)
+ #
+ # Underscores in the middle of a word like:
+ #
+ # ~~~md
+ # Con_cat_this
+ # ~~~
+ #
+ # normally produces this:
+ #
+ # ~~~html
+ # Concatthis
+ # ~~~
+ #
+ # With extended mode they don't result in emphasis.
+ #
+ # ~~~html
+ # Con_cat_this
+ # ~~~
+ #
+ # * Strikethrough
+ #
+ # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
+ # strikethrought span is marked with `~~`.
+ #
+ # ~~~md
+ # ~~Mistaken text.~~
+ # ~~~
+ #
+ # becomes
+ #
+ # ~~~html
+ # Mistaken text.
+ # ~~~
+ var ext_mode = true
+
+ # Disable attaching MDLocation to Tokens
+ #
+ # Locations are useful for some tools but they may
+ # cause an important time and space overhead.
+ #
+ # Default = `false`
+ var no_location = false is writable
init do self.emitter = new MarkdownEmitter(self)
# Process the mardown `input` string and return the processed output.
- fun process(input: String): Streamable do
+ fun process(input: String): Writable do
# init processor
link_refs.clear
last_link_ref = null
@@ -52,36 +160,43 @@ class MarkdownProcessor
# Split `input` string into `MDLines` and create a parent `MDBlock` with it.
private fun read_lines(input: String): MDBlock do
- var block = new MDBlock
+ var block = new MDBlock(new MDLocation(1, 1, 1, 1))
var value = new FlatBuffer
var i = 0
+
+ var line_pos = 0
+ var col_pos = 0
+
while i < input.length do
value.clear
var pos = 0
var eol = false
while not eol and i < input.length do
+ col_pos += 1
var c = input[i]
if c == '\n' then
- i += 1
eol = true
+ else if c == '\r' then
else if c == '\t' then
- var np = pos + (4 - (pos.bin_and(3)))
+ var np = pos + (4 - (pos & 3))
while pos < np do
value.add ' '
pos += 1
end
- i += 1
else
pos += 1
value.add c
- i += 1
end
+ i += 1
end
+ line_pos += 1
- var line = new MDLine(value.write_to_string)
+ var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
+ var line = new MDLine(loc, value.write_to_string)
var is_link_ref = check_link_ref(line)
# Skip link refs
if not is_link_ref then block.add_line line
+ col_pos = 0
end
return block
end
@@ -162,8 +277,10 @@ class MarkdownProcessor
#
# Markdown allows link refs to be defined over two lines:
#
- # [id]: http://example.com/longish/path/to/resource/here
- # "Optional Title Here"
+ # ~~~md
+ # [id]: http://example.com/longish/path/to/resource/here
+ # "Optional Title Here"
+ # ~~~
#
private var last_link_ref: nullable LinkRef = null
@@ -219,12 +336,14 @@ class MarkdownProcessor
if value[leading] == '#' then return new LineHeadline
if value[leading] == '>' then return new LineBlockquote
- if value.length - leading - trailing > 2 then
- if value[leading] == '`' and md.count_chars_start('`') >= 3 then
- return new LineFence
- end
- if value[leading] == '~' and md.count_chars_start('~') >= 3 then
- return new LineFence
+ if ext_mode then
+ if value.length - leading - trailing > 2 then
+ if value[leading] == '`' and md.count_chars_start('`') >= 3 then
+ return new LineFence
+ end
+ if value[leading] == '~' and md.count_chars_start('~') >= 3 then
+ return new LineFence
+ end
end
end
@@ -286,57 +405,81 @@ class MarkdownProcessor
c2 = ' '
end
+ var loc
+ if no_location then
+ loc = null
+ else
+ loc = new MDLocation(
+ current_loc.line_start,
+ current_loc.column_start + pos,
+ current_loc.line_start,
+ current_loc.column_start + pos)
+ end
+
if c == '*' then
if c1 == '*' then
if c0 != ' ' or c2 != ' ' then
- return new TokenStrongStar(pos, c)
+ return new TokenStrongStar(loc, pos, c)
else
- return new TokenEmStar(pos, c)
+ return new TokenEmStar(loc, pos, c)
end
end
if c0 != ' ' or c1 != ' ' then
- return new TokenEmStar(pos, c)
+ return new TokenEmStar(loc, pos, c)
else
- return new TokenNone(pos, c)
+ return new TokenNone(loc, pos, c)
end
else if c == '_' then
if c1 == '_' then
- if c0 != ' ' or c2 != ' 'then
- return new TokenStrongUnderscore(pos, c)
+ if c0 != ' ' or c2 != ' ' then
+ return new TokenStrongUnderscore(loc, pos, c)
else
- return new TokenEmUnderscore(pos, c)
+ return new TokenEmUnderscore(loc, pos, c)
+ end
+ end
+ if ext_mode then
+ if (c0.is_letter or c0.is_digit) and c0 != '_' and
+ (c1.is_letter or c1.is_digit) then
+ return new TokenNone(loc, pos, c)
+ else
+ return new TokenEmUnderscore(loc, pos, c)
end
end
if c0 != ' ' or c1 != ' ' then
- return new TokenEmUnderscore(pos, c)
+ return new TokenEmUnderscore(loc, pos, c)
else
- return new TokenNone(pos, c)
+ return new TokenNone(loc, pos, c)
end
else if c == '!' then
- if c1 == '[' then return new TokenImage(pos, c)
- return new TokenNone(pos, c)
+ if c1 == '[' then return new TokenImage(loc, pos, c)
+ return new TokenNone(loc, pos, c)
else if c == '[' then
- return new TokenLink(pos, c)
+ return new TokenLink(loc, pos, c)
else if c == ']' then
- return new TokenNone(pos, c)
+ return new TokenNone(loc, pos, c)
else if c == '`' then
if c1 == '`' then
- return new TokenCodeDouble(pos, c)
+ return new TokenCodeDouble(loc, pos, c)
else
- return new TokenCodeSingle(pos, c)
+ return new TokenCodeSingle(loc, pos, c)
end
else if c == '\\' then
if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
- return new TokenEscape(pos, c)
+ return new TokenEscape(loc, pos, c)
else
- return new TokenNone(pos, c)
+ return new TokenNone(loc, pos, c)
end
else if c == '<' then
- return new TokenHTML(pos, c)
+ return new TokenHTML(loc, pos, c)
else if c == '&' then
- return new TokenEntity(pos, c)
+ return new TokenEntity(loc, pos, c)
else
- return new TokenNone(pos, c)
+ if ext_mode then
+ if c == '~' and c1 == '~' then
+ return new TokenStrike(loc, pos, c)
+ end
+ end
+ return new TokenNone(loc, pos, c)
end
end
@@ -351,6 +494,12 @@ class MarkdownProcessor
end
return -1
end
+
+ # Location used for next parsed token.
+ #
+ # This location can be changed by the emitter to adjust with `\n` found
+ # in the input.
+ private fun current_loc: MDLocation do return emitter.current_loc
end
# Emit output corresponding to blocks content.
@@ -359,15 +508,23 @@ end
# The emitter use a `Decorator` to select the output format.
class MarkdownEmitter
+ # Kind of processor used for parsing.
+ type PROCESSOR: MarkdownProcessor
+
# Processor containing link refs.
- var processor: MarkdownProcessor
+ var processor: PROCESSOR
+
+ # Kind of decorator used for decoration.
+ type DECORATOR: Decorator
# Decorator used for output.
# Default is `HTMLDecorator`
- var decorator: Decorator = new HTMLDecorator is writable
+ var decorator: DECORATOR is writable, lazy do
+ return new HTMLDecorator
+ end
# Create a new `MarkdownEmitter` using a custom `decorator`.
- init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
+ init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
init processor
self.decorator = decorator
end
@@ -384,19 +541,21 @@ class MarkdownEmitter
fun emit_in(block: Block) do block.emit_in(self)
# Transform and emit mardown text
- fun emit_text(text: Text) do
- emit_text_until(text, 0, null)
- end
+ fun emit_text(text: Text) do emit_text_until(text, 0, null)
- # Transform and emit mardown text starting at `from` and
+ # Transform and emit mardown text starting at `start` and
# until a token with the same type as `token` is found.
- # Go until the end of text if `token` is null.
+ # Go until the end of `text` if `token` is null.
fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
var old_text = current_text
var old_pos = current_pos
current_text = text
current_pos = start
while current_pos < text.length do
+ if text[current_pos] == '\n' then
+ current_loc.line_start += 1
+ current_loc.column_start = -current_pos
+ end
var mt = processor.token_at(text, current_pos)
if (token != null and not token isa TokenNone) and
(mt.is_same_type(token) or
@@ -439,8 +598,23 @@ class MarkdownEmitter
return buffer_stack.last
end
+ # Stacked locations.
+ private var loc_stack = new List[MDLocation]
+
+ # Push a new MDLocation on the stack.
+ private fun push_loc(location: MDLocation) do loc_stack.add location
+
+ # Pop the last buffer.
+ private fun pop_loc: MDLocation do return loc_stack.pop
+
+ # Current output buffer.
+ private fun current_loc: MDLocation do
+ assert not loc_stack.is_empty
+ return loc_stack.last
+ end
+
# Append `e` to current buffer.
- fun add(e: Streamable) do
+ fun add(e: Writable) do
if e isa Text then
current_buffer.append e
else
@@ -449,18 +623,20 @@ class MarkdownEmitter
end
# Append `c` to current buffer.
- fun addc(c: Char) do current_buffer.add c
+ fun addc(c: Char) do
+ current_buffer.add c
+ end
# Append a "\n" line break.
- fun addn do current_buffer.add '\n'
+ fun addn do addc '\n'
end
# A Link Reference.
# Links that are specified somewhere in the mardown document to be reused as shortcuts.
#
-# Example:
-#
-# [1]: http://example.com/ "Optional title"
+# ~~~raw
+# [1]: http://example.com/ "Optional title"
+# ~~~
class LinkRef
# Link href
@@ -474,7 +650,7 @@ class LinkRef
# Create a link with a title.
init with_title(link: String, title: nullable String) do
- self.link = link
+ init(link)
self.title = title
end
end
@@ -483,59 +659,72 @@ end
# Default decorator used is `HTMLDecorator`.
interface Decorator
+ # Kind of emitter used for decoration.
+ type EMITTER: MarkdownEmitter
+
+ # Render a single plain char.
+ #
+ # Redefine this method to add special escaping for plain text.
+ fun add_char(v: EMITTER, c: Char) do v.addc c
+
# Render a ruler block.
- fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
+ fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
# Render a headline block with corresponding level.
- fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
+ fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
# Render a paragraph block.
- fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
+ fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
# Render a code or fence block.
- fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
+ fun add_code(v: EMITTER, block: BlockCode) is abstract
# Render a blockquote.
- fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
+ fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
# Render an unordered list.
- fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
+ fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
# Render an ordered list.
- fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
+ fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
# Render a list item.
- fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
+ fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
# Render an emphasis text.
- fun add_em(v: MarkdownEmitter, text: Text) is abstract
+ fun add_em(v: EMITTER, text: Text) is abstract
# Render a strong text.
- fun add_strong(v: MarkdownEmitter, text: Text) is abstract
+ fun add_strong(v: EMITTER, text: Text) is abstract
+
+ # Render a strike text.
+ #
+ # Extended mode only (see `MarkdownProcessor::ext_mode`)
+ fun add_strike(v: EMITTER, text: Text) is abstract
# Render a link.
- fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
+ fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
# Render an image.
- fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
+ fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
# Render an abbreviation.
- fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
+ fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
# Render a code span reading from a buffer.
- fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
+ fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
# Render a text and escape it.
- fun append_value(v: MarkdownEmitter, value: Text) is abstract
+ fun append_value(v: EMITTER, value: Text) is abstract
# Render code text from buffer and escape it.
- fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
+ fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
# Render a character escape.
- fun escape_char(v: MarkdownEmitter, char: Char) is abstract
+ fun escape_char(v: EMITTER, char: Char) is abstract
# Render a line break
- fun add_line_break(v: MarkdownEmitter) is abstract
+ fun add_line_break(v: EMITTER) is abstract
# Generate a new html valid id from a `String`.
fun strip_id(txt: String): String is abstract
@@ -585,7 +774,14 @@ class HTMLDecorator
end
redef fun add_code(v, block) do
- v.add ""
+ var meta = block.meta
+ if meta != null then
+ v.add ""
+ else
+ v.add ""
+ end
v.emit_in block
v.add "
\n"
end
@@ -626,6 +822,12 @@ class HTMLDecorator
v.add ""
end
+ redef fun add_strike(v, text) do
+ v.add ""
+ v.add text
+ v.add ""
+ end
+
redef fun add_image(v, link, name, comment) do
v.add " 0 then
+ v.decorator.add_strike(v, tmp)
+ v.current_pos = b + 1
+ else
+ v.addc char
+ end
+ end
+end
+
redef class Text
# Get the position of the next non-space character.
@@ -2044,18 +2345,11 @@ redef class Text
if c == '\\' and pos + 1 < length then
pos = escape(out, self[pos + 1], pos)
else
- var end_reached = false
- for n in nend do
- if c == n then
- end_reached = true
- break
- end
- end
- if end_reached then break
+ for n in nend do if c == n then break label
out.add c
end
pos += 1
- end
+ end label
if pos == length then return -1
return pos
end
@@ -2131,6 +2425,7 @@ redef class Text
# Safe mode can be activated to limit reading to valid xml.
private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
var pos = 0
+ var is_valid = true
var is_close_tag = false
if start + 1 >= length then return -1
if self[start + 1] == '/' then
@@ -2148,7 +2443,11 @@ redef class Text
pos = read_xml_until(tmp, pos, ' ', '/', '>')
if pos == -1 then return -1
var tag = tmp.write_to_string.trim.to_lower
- if tag.is_html_unsafe then
+ if not tag.is_valid_html_tag then
+ out.append "<"
+ pos = -1
+ else if tag.is_html_unsafe then
+ is_valid = false
out.append "<"
if is_close_tag then out.add '/'
out.append tmp
@@ -2171,7 +2470,11 @@ redef class Text
if pos == -1 then return -1
end
if self[pos] == '>' then
- out.add '>'
+ if is_valid then
+ out.add '>'
+ else
+ out.append ">"
+ end
return pos
end
return -1
@@ -2243,6 +2546,14 @@ redef class Text
return tpl.write_to_string.to_lower
end
+ private fun is_valid_html_tag: Bool do
+ if is_empty then return false
+ for c in self do
+ if not c.is_alpha then return false
+ end
+ return true
+ end
+
# Read and escape the markdown contained in `self`.
private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
@@ -2256,6 +2567,17 @@ redef class Text
return pos
end
+ # Extract string found at end of fence opening.
+ private fun meta_from_fence: nullable Text do
+ for i in [0..chars.length[ do
+ var c = chars[i]
+ if c != ' ' and c != '`' and c != '~' then
+ return substring_from(i).trim
+ end
+ end
+ return null
+ end
+
# Is `self` an unsafe HTML element?
private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
@@ -2279,7 +2601,7 @@ redef class String
# var md = "**Hello World!**"
# var html = md.md_to_html
# assert html == "Hello World!
\n"
- fun md_to_html: Streamable do
+ fun md_to_html: Writable do
var processor = new MarkdownProcessor
return processor.process(self)
end