X-Git-Url: http://nitlanguage.org diff --git a/lib/markdown/markdown.nit b/lib/markdown/markdown.nit index add43e1..0d264cb 100644 --- a/lib/markdown/markdown.nit +++ b/lib/markdown/markdown.nit @@ -31,12 +31,94 @@ import template class MarkdownProcessor # `MarkdownEmitter` used for ouput. - var emitter: MarkdownEmitter is noinit + var emitter: MarkdownEmitter is noinit, protected writable + + # Work in extended mode (default). + # + # Behavior changes when using extended mode: + # + # * Lists and code blocks end a paragraph + # + # In normal markdown the following: + # + # This is a paragraph + # * and this is not a list + # + # Will produce: + # + #

This is a paragraph + # * and this is not a list

+ # + # When using extended mode this changes to: + # + #

This is a paragraph

+ # + # + # * Fences code blocks + # + # If you don't want to indent your all your code with 4 spaces, + # you can wrap your code in ``` ``` ``` or `~~~`. + # + # Here's an example: + # + # ``` + # fun test do + # print "Hello World!" + # end + # ``` + # + # * Code blocks meta + # + # If you want to use syntax highlighting tools, most of them need to know what kind + # of language they are highlighting. + # You can add an optional language identifier after the fence declaration to output + # it in the HTML render. + # + # ```nit + # import markdown + # + # print "# Hello World!".md_to_html + # ``` + # + # Becomes + # + #
import markdown
+	#
+	#		print "Hello World!".md_to_html
+	#		
+ # + # * Underscores (Emphasis) + # + # Underscores in the middle of a word like: + # + # Con_cat_this + # + # normally produces this: + # + #

Concatthis

+ # + # With extended mode they don't result in emphasis. + # + #

Con_cat_this

+ # + # * Strikethrough + # + # Like in [GFM](https://help.github.com/articles/github-flavored-markdown), + # strikethrought span is marked with `~~`. + # + # ~~Mistaken text.~~ + # + # becomes + # + # Mistaken text. + var ext_mode = true init do self.emitter = new MarkdownEmitter(self) # Process the mardown `input` string and return the processed output. - fun process(input: String): Streamable do + fun process(input: String): Writable do # init processor link_refs.clear last_link_ref = null @@ -52,17 +134,21 @@ class MarkdownProcessor # Split `input` string into `MDLines` and create a parent `MDBlock` with it. private fun read_lines(input: String): MDBlock do - var block = new MDBlock + var block = new MDBlock(new MDLocation(1, 1, 1, 1)) var value = new FlatBuffer var i = 0 + + var line_pos = 0 + var col_pos = 0 + while i < input.length do value.clear var pos = 0 var eol = false while not eol and i < input.length do + col_pos += 1 var c = input[i] if c == '\n' then - i += 1 eol = true else if c == '\t' then var np = pos + (4 - (pos.bin_and(3))) @@ -70,18 +156,20 @@ class MarkdownProcessor value.add ' ' pos += 1 end - i += 1 else pos += 1 value.add c - i += 1 end + i += 1 end + line_pos += 1 - var line = new MDLine(value.write_to_string) + var loc = new MDLocation(line_pos, 1, line_pos, col_pos) + var line = new MDLine(loc, value.write_to_string) var is_link_ref = check_link_ref(line) # Skip link refs if not is_link_ref then block.add_line line + col_pos = 0 end return block end @@ -162,7 +250,7 @@ class MarkdownProcessor # # Markdown allows link refs to be defined over two lines: # - # [id]: http://example.com/longish/path/to/resource/here + # [id]: http://example.com/longish/path/to/resource/here # "Optional Title Here" # private var last_link_ref: nullable LinkRef = null @@ -219,12 +307,14 @@ class MarkdownProcessor if value[leading] == '#' then return new LineHeadline if value[leading] == '>' then return new LineBlockquote - if value.length - leading - trailing > 2 then - if value[leading] == '`' and md.count_chars_start('`') >= 3 then - return new LineFence - end - if value[leading] == '~' and md.count_chars_start('~') >= 3 then - return new LineFence + if ext_mode then + if value.length - leading - trailing > 2 then + if value[leading] == '`' and md.count_chars_start('`') >= 3 then + return new LineFence + end + if value[leading] == '~' and md.count_chars_start('~') >= 3 then + return new LineFence + end end end @@ -286,63 +376,72 @@ class MarkdownProcessor c2 = ' ' end + var loc = text.pos_to_loc(pos) + if c == '*' then if c1 == '*' then if c0 != ' ' or c2 != ' ' then - return new TokenStrongStar(pos, c) + return new TokenStrongStar(loc, pos, c) else - return new TokenEmStar(pos, c) + return new TokenEmStar(loc, pos, c) end end if c0 != ' ' or c1 != ' ' then - return new TokenEmStar(pos, c) + return new TokenEmStar(loc, pos, c) else - return new TokenNone(pos, c) + return new TokenNone(loc, pos, c) end else if c == '_' then if c1 == '_' then if c0 != ' ' or c2 != ' 'then - return new TokenStrongUnderscore(pos, c) + return new TokenStrongUnderscore(loc, pos, c) else - return new TokenEmUnderscore(pos, c) + return new TokenEmUnderscore(loc, pos, c) + end + end + if ext_mode then + if (c0.is_letter or c0.is_digit) and c0 != '_' and + (c1.is_letter or c1.is_digit) then + return new TokenNone(loc, pos, c) + else + return new TokenEmUnderscore(loc, pos, c) end end if c0 != ' ' or c1 != ' ' then - return new TokenEmUnderscore(pos, c) + return new TokenEmUnderscore(loc, pos, c) else - return new TokenNone(pos, c) + return new TokenNone(loc, pos, c) end else if c == '!' then - if c1 == '[' then return new TokenImage(pos, c) - return new TokenNone(pos, c) + if c1 == '[' then return new TokenImage(loc, pos, c) + return new TokenNone(loc, pos, c) else if c == '[' then - return new TokenLink(pos, c) + return new TokenLink(loc, pos, c) else if c == ']' then - return new TokenNone(pos, c) + return new TokenNone(loc, pos, c) else if c == '`' then if c1 == '`' then - return new TokenCodeDouble(pos, c) + return new TokenCodeDouble(loc, pos, c) else - return new TokenCodeSingle(pos, c) + return new TokenCodeSingle(loc, pos, c) end else if c == '\\' then if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then - return new TokenEscape(pos, c) + return new TokenEscape(loc, pos, c) else - return new TokenNone(pos, c) + return new TokenNone(loc, pos, c) end else if c == '<' then - return new TokenHTML(pos, c) + return new TokenHTML(loc, pos, c) else if c == '&' then - return new TokenEntity(pos, c) - else if c == '^' then - if c0 == '^' or c1 == '^' then - return new TokenNone(pos, c) - else - return new TokenSuper(pos, c) - end + return new TokenEntity(loc, pos, c) else - return new TokenNone(pos, c) + if ext_mode then + if c == '~' and c1 == '~' then + return new TokenStrike(loc, pos, c) + end + end + return new TokenNone(loc, pos, c) end end @@ -365,15 +464,23 @@ end # The emitter use a `Decorator` to select the output format. class MarkdownEmitter + # Kind of processor used for parsing. + type PROCESSOR: MarkdownProcessor + # Processor containing link refs. - var processor: MarkdownProcessor + var processor: PROCESSOR + + # Kind of decorator used for decoration. + type DECORATOR: Decorator # Decorator used for output. # Default is `HTMLDecorator` - var decorator: Decorator = new HTMLDecorator is writable + var decorator: DECORATOR is writable, lazy do + return new HTMLDecorator + end # Create a new `MarkdownEmitter` using a custom `decorator`. - init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do + init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do init processor self.decorator = decorator end @@ -390,9 +497,7 @@ class MarkdownEmitter fun emit_in(block: Block) do block.emit_in(self) # Transform and emit mardown text - fun emit_text(text: Text) do - emit_text_until(text, 0, null) - end + fun emit_text(text: Text) do emit_text_until(text, 0, null) # Transform and emit mardown text starting at `from` and # until a token with the same type as `token` is found. @@ -446,7 +551,7 @@ class MarkdownEmitter end # Append `e` to current buffer. - fun add(e: Streamable) do + fun add(e: Writable) do if e isa Text then current_buffer.append e else @@ -455,18 +560,18 @@ class MarkdownEmitter end # Append `c` to current buffer. - fun addc(c: Char) do current_buffer.add c + fun addc(c: Char) do add c.to_s # Append a "\n" line break. - fun addn do current_buffer.add '\n' + fun addn do add "\n" end # A Link Reference. # Links that are specified somewhere in the mardown document to be reused as shortcuts. # -# Example: -# -# [1]: http://example.com/ "Optional title" +# ~~~raw +# [1]: http://example.com/ "Optional title" +# ~~~ class LinkRef # Link href @@ -489,62 +594,67 @@ end # Default decorator used is `HTMLDecorator`. interface Decorator + # Kind of emitter used for decoration. + type EMITTER: MarkdownEmitter + # Render a ruler block. - fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract + fun add_ruler(v: EMITTER, block: BlockRuler) is abstract # Render a headline block with corresponding level. - fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract + fun add_headline(v: EMITTER, block: BlockHeadline) is abstract # Render a paragraph block. - fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract + fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract # Render a code or fence block. - fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract + fun add_code(v: EMITTER, block: BlockCode) is abstract # Render a blockquote. - fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract + fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract # Render an unordered list. - fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract + fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract # Render an ordered list. - fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract + fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract # Render a list item. - fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract + fun add_listitem(v: EMITTER, block: BlockListItem) is abstract # Render an emphasis text. - fun add_em(v: MarkdownEmitter, text: Text) is abstract + fun add_em(v: EMITTER, text: Text) is abstract # Render a strong text. - fun add_strong(v: MarkdownEmitter, text: Text) is abstract + fun add_strong(v: EMITTER, text: Text) is abstract - # Render a super text. - fun add_super(v: MarkdownEmitter, text: Text) is abstract + # Render a strike text. + # + # Extended mode only (see `MarkdownProcessor::ext_mode`) + fun add_strike(v: EMITTER, text: Text) is abstract # Render a link. - fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract + fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract # Render an image. - fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract + fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract # Render an abbreviation. - fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract + fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract # Render a code span reading from a buffer. - fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract + fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract # Render a text and escape it. - fun append_value(v: MarkdownEmitter, value: Text) is abstract + fun append_value(v: EMITTER, value: Text) is abstract # Render code text from buffer and escape it. - fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract + fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract # Render a character escape. - fun escape_char(v: MarkdownEmitter, char: Char) is abstract + fun escape_char(v: EMITTER, char: Char) is abstract # Render a line break - fun add_line_break(v: MarkdownEmitter) is abstract + fun add_line_break(v: EMITTER) is abstract # Generate a new html valid id from a `String`. fun strip_id(txt: String): String is abstract @@ -594,7 +704,11 @@ class HTMLDecorator end redef fun add_code(v, block) do - v.add "
"
+		if block isa BlockFence and block.meta != null then
+			v.add "
"
+		else
+			v.add "
"
+		end
 		v.emit_in block
 		v.add "
\n" end @@ -635,10 +749,10 @@ class HTMLDecorator v.add "" end - redef fun add_super(v, text) do - v.add "" + redef fun add_strike(v, text) do + v.add "" v.add text - v.add "" + v.add "" end redef fun add_image(v, link, name, comment) do @@ -750,9 +864,31 @@ class HTMLDecorator private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.'] end +# Location in a Markdown input. +class MDLocation + + # Starting line number (starting from 1). + var line_start: Int + + # Starting column number (starting from 1). + var column_start: Int + + # Stopping line number (starting from 1). + var line_end: Int + + # Stopping column number (starting from 1). + var column_end: Int + + redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}" +end + # A block of markdown lines. # A `MDBlock` can contains lines and/or sub-blocks. class MDBlock + + # Position of `self` in the input. + var location: MDLocation + # Kind of block. # See `Block`. var kind: Block = new BlockNone(self) is writable @@ -805,7 +941,14 @@ class MDBlock # Split `self` creating a new sub-block having `line` has `last_line`. fun split(line: MDLine): MDBlock do - var block = new MDBlock + # location for new block + var new_loc = new MDLocation( + first_line.location.line_start, + first_line.location.column_start, + line.location.line_end, + line.location.column_end) + # create block + var block = new MDBlock(new_loc) block.first_line = first_line block.last_line = line first_line = line.next @@ -814,6 +957,9 @@ class MDBlock last_line = null else first_line.prev = null + # update current block loc + location.line_start = first_line.location.line_start + location.column_start = first_line.location.column_start end if first_block == null then first_block = block @@ -1032,6 +1178,9 @@ end class BlockFence super BlockCode + # Any string found after fence token. + var meta: nullable Text + # Fence code lines start at 0 spaces. redef var line_start = 0 end @@ -1183,6 +1332,9 @@ end # A markdown line. class MDLine + # Location of `self` in the original input. + var location: MDLocation + # Text contained in this line. var value: String is writable @@ -1409,10 +1561,10 @@ class LineOther var was_empty = line.prev_empty while line != null and not line.is_empty do var t = v.line_kind(line) - if v.in_list and t isa LineList then + if (v.in_list or v.ext_mode) and t isa LineList then break end - if t isa LineCode or t isa LineFence then + if v.ext_mode and (t isa LineCode or t isa LineFence) then break end if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or @@ -1551,7 +1703,8 @@ class LineFence else block = v.current_block.split(v.current_block.last_line.as(not null)) end - block.kind = new BlockFence(block) + var meta = block.first_line.value.meta_from_fence + block.kind = new BlockFence(block, meta) block.first_line.clear var last = block.last_line if last != null and v.line_kind(last) isa LineFence then @@ -1689,7 +1842,10 @@ end # Some tokens have a specific markup behaviour that is handled here. abstract class Token - # Position of `self` in markdown input. + # Location of `self` in the original input. + var location: MDLocation + + # Position of `self` in input independant from lines. var pos: Int # Character found at `pos` in the markdown input. @@ -1899,7 +2055,7 @@ abstract class TokenLinkOrImage comment = lr.title end else - var tid = name.write_to_string.replace("\n", " ").to_lower + var tid = name.write_to_string.replace("\n", " ").to_lower if v.processor.link_refs.has_key(tid) then var lr = v.processor.link_refs[tid] link = lr.link @@ -2038,17 +2194,19 @@ class TokenEscape end end -# A markdown super token. -class TokenSuper +# A markdown strike token. +# +# Extended mode only (see `MarkdownProcessor::ext_mode`) +class TokenStrike super Token redef fun emit(v) do var tmp = v.push_buffer - var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self) + var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self) v.pop_buffer if b > 0 then - v.decorator.add_super(v, tmp) - v.current_pos = b + v.decorator.add_strike(v, tmp) + v.current_pos = b + 1 else v.addc char end @@ -2163,6 +2321,7 @@ redef class Text # Safe mode can be activated to limit reading to valid xml. private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do var pos = 0 + var is_valid = true var is_close_tag = false if start + 1 >= length then return -1 if self[start + 1] == '/' then @@ -2180,7 +2339,11 @@ redef class Text pos = read_xml_until(tmp, pos, ' ', '/', '>') if pos == -1 then return -1 var tag = tmp.write_to_string.trim.to_lower - if tag.is_html_unsafe then + if not tag.is_valid_html_tag then + out.append "<" + pos = -1 + else if tag.is_html_unsafe then + is_valid = false out.append "<" if is_close_tag then out.add '/' out.append tmp @@ -2203,7 +2366,11 @@ redef class Text if pos == -1 then return -1 end if self[pos] == '>' then - out.add '>' + if is_valid then + out.add '>' + else + out.append ">" + end return pos end return -1 @@ -2275,6 +2442,14 @@ redef class Text return tpl.write_to_string.to_lower end + private fun is_valid_html_tag: Bool do + if is_empty then return false + for c in self do + if not c.is_alpha then return false + end + return true + end + # Read and escape the markdown contained in `self`. private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or @@ -2288,6 +2463,35 @@ redef class Text return pos end + # Extract string found at end of fence opening. + private fun meta_from_fence: nullable Text do + for i in [0..chars.length[ do + var c = chars[i] + if c != ' ' and c != '`' and c != '~' then + return substring_from(i).trim + end + end + return null + end + + # Init a `MDLocation` instance at `pos` in `self`. + private fun pos_to_loc(pos: Int): MDLocation do + assert pos <= length + var line = 1 + var col = 0 + var i = 0 + while i <= pos do + col += 1 + var c = self[i] + if c == '\n' then + line +=1 + col = 0 + end + i +=1 + end + return new MDLocation(line, col, line, col) + end + # Is `self` an unsafe HTML element? private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string) @@ -2311,7 +2515,7 @@ redef class String # var md = "**Hello World!**" # var html = md.md_to_html # assert html == "

Hello World!

\n" - fun md_to_html: Streamable do + fun md_to_html: Writable do var processor = new MarkdownProcessor return processor.process(self) end