X-Git-Url: http://nitlanguage.org diff --git a/lib/markdown/markdown.nit b/lib/markdown/markdown.nit index bd5e3be..2426d53 100644 --- a/lib/markdown/markdown.nit +++ b/lib/markdown/markdown.nit @@ -30,12 +30,116 @@ import template # SEE: `String::md_to_html` for a shortcut. class MarkdownProcessor - var emitter: MarkdownEmitter is noinit + # Work in extended mode (default). + # + # Behavior changes when using extended mode: + # + # * Lists and code blocks end a paragraph + # + # In normal markdown the following: + # + # ~~~md + # This is a paragraph + # * and this is not a list + # ~~~ + # + # Will produce: + # + # ~~~html + #

This is a paragraph + # * and this is not a list

+ # ~~~ + # + # When using extended mode this changes to: + # + # ~~~html + #

This is a paragraph

+ # + # ~~~ + # + # * Fences code blocks + # + # If you don't want to indent your all your code with 4 spaces, + # you can wrap your code in ``` ``` ``` or `~~~`. + # + # Here's an example: + # + # ~~~md + # fun test do + # print "Hello World!" + # end + # ~~~ + # + # * Code blocks meta + # + # If you want to use syntax highlighting tools, most of them need to know what kind + # of language they are highlighting. + # You can add an optional language identifier after the fence declaration to output + # it in the HTML render. + # + # ```nit + # import markdown + # + # print "# Hello World!".md_to_html + # ``` + # + # Becomes + # + # ~~~html + #
import markdown
+	#
+	# print "Hello World!".md_to_html
+	# 
+ # ~~~ + # + # * Underscores (Emphasis) + # + # Underscores in the middle of a word like: + # + # ~~~md + # Con_cat_this + # ~~~ + # + # normally produces this: + # + # ~~~html + #

Concatthis

+ # ~~~ + # + # With extended mode they don't result in emphasis. + # + # ~~~html + #

Con_cat_this

+ # ~~~ + # + # * Strikethrough + # + # Like in [GFM](https://help.github.com/articles/github-flavored-markdown), + # strikethrought span is marked with `~~`. + # + # ~~~md + # ~~Mistaken text.~~ + # ~~~ + # + # becomes + # + # ~~~html + # Mistaken text. + # ~~~ + var ext_mode = true - init do self.emitter = new MarkdownEmitter(self) + # Disable attaching MDLocation to Tokens + # + # Locations are useful for some tools but they may + # cause an important time and space overhead. + # + # Default = `false` + var no_location = false is writable # Process the mardown `input` string and return the processed output. - fun process(input: String): Streamable do + fun process(input: String): Writable do # init processor link_refs.clear last_link_ref = null @@ -46,41 +150,49 @@ class MarkdownProcessor parent.remove_surrounding_empty_lines recurse(parent, false) # output processed text - return emitter.emit(parent.kind) + decorator.headlines.clear + return emit(parent.kind) end # Split `input` string into `MDLines` and create a parent `MDBlock` with it. private fun read_lines(input: String): MDBlock do - var block = new MDBlock + var block = new MDBlock(new MDLocation(1, 1, 1, 1)) var value = new FlatBuffer var i = 0 + + var line_pos = 0 + var col_pos = 0 + while i < input.length do value.clear var pos = 0 var eol = false while not eol and i < input.length do + col_pos += 1 var c = input[i] if c == '\n' then - i += 1 eol = true + else if c == '\r' then else if c == '\t' then - var np = pos + (4 - (pos.bin_and(3))) + var np = pos + (4 - (pos & 3)) while pos < np do value.add ' ' pos += 1 end - i += 1 else pos += 1 value.add c - i += 1 end + i += 1 end + line_pos += 1 - var line = new MDLine(value.write_to_string) + var loc = new MDLocation(line_pos, 1, line_pos, col_pos) + var line = new MDLine(loc, value.write_to_string) var is_link_ref = check_link_ref(line) # Skip link refs if not is_link_ref then block.add_line line + col_pos = 0 end return block end @@ -97,15 +209,15 @@ class MarkdownProcessor if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then pos = line.leading + 1 pos = md.read_until(id, pos, ']') - if not id.is_empty and pos + 2 < line.value.length then + if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then if line.value[pos + 1] == ':' then pos += 2 pos = md.skip_spaces(pos) - if line.value[pos] == '<' then + if pos >= 0 and line.value[pos] == '<' then pos += 1 pos = md.read_until(link, pos, '>') pos += 1 - else + else if pos >= 0 then pos = md.read_until(link, pos, ' ', '\n') end if not link.is_empty then @@ -146,7 +258,10 @@ class MarkdownProcessor pos = md.read_until(comment, pos, c) end end - if not comment.is_empty then last_link_ref.title = comment.write_to_string + var last_link_ref = self.last_link_ref + if not comment.is_empty and last_link_ref != null then + last_link_ref.title = comment.write_to_string + end end if comment.is_empty then return false return true @@ -161,8 +276,10 @@ class MarkdownProcessor # # Markdown allows link refs to be defined over two lines: # - # [id]: http://example.com/longish/path/to/resource/here - # "Optional Title Here" + # ~~~md + # [id]: http://example.com/longish/path/to/resource/here + # "Optional Title Here" + # ~~~ # private var last_link_ref: nullable LinkRef = null @@ -218,12 +335,14 @@ class MarkdownProcessor if value[leading] == '#' then return new LineHeadline if value[leading] == '>' then return new LineBlockquote - if value.length - leading - trailing > 2 then - if value[leading] == '`' and md.count_chars_start('`') >= 3 then - return new LineFence - end - if value[leading] == '~' and md.count_chars_start('~') >= 3 then - return new LineFence + if ext_mode then + if value.length - leading - trailing > 2 then + if value[leading] == '`' and md.count_chars_start('`') >= 3 then + return new LineFence + end + if value[leading] == '~' and md.count_chars_start('~') >= 3 then + return new LineFence + end end end @@ -261,29 +380,131 @@ class MarkdownProcessor return new LineOther end -end + # Get the token kind at `pos`. + fun token_at(text: Text, pos: Int): Token do + var c0: Char + var c1: Char + var c2: Char -# Emit output corresponding to blocks content. -# -# Blocks are created by a previous pass in `MarkdownProcessor`. -# The emitter use a `Decorator` to select the output format. -class MarkdownEmitter + if pos > 0 then + c0 = text[pos - 1] + else + c0 = ' ' + end + var c = text[pos] + + if pos + 1 < text.length then + c1 = text[pos + 1] + else + c1 = ' ' + end + if pos + 2 < text.length then + c2 = text[pos + 2] + else + c2 = ' ' + end + + var loc + if no_location then + loc = null + else + loc = new MDLocation( + current_loc.line_start, + current_loc.column_start + pos, + current_loc.line_start, + current_loc.column_start + pos) + end + + if c == '*' then + if c1 == '*' then + if c0 != ' ' or c2 != ' ' then + return new TokenStrongStar(loc, pos, c) + else + return new TokenEmStar(loc, pos, c) + end + end + if c0 != ' ' or c1 != ' ' then + return new TokenEmStar(loc, pos, c) + else + return new TokenNone(loc, pos, c) + end + else if c == '_' then + if c1 == '_' then + if c0 != ' ' or c2 != ' ' then + return new TokenStrongUnderscore(loc, pos, c) + else + return new TokenEmUnderscore(loc, pos, c) + end + end + if ext_mode then + if (c0.is_letter or c0.is_digit) and c0 != '_' and + (c1.is_letter or c1.is_digit) then + return new TokenNone(loc, pos, c) + else + return new TokenEmUnderscore(loc, pos, c) + end + end + if c0 != ' ' or c1 != ' ' then + return new TokenEmUnderscore(loc, pos, c) + else + return new TokenNone(loc, pos, c) + end + else if c == '!' then + if c1 == '[' then return new TokenImage(loc, pos, c) + return new TokenNone(loc, pos, c) + else if c == '[' then + return new TokenLink(loc, pos, c) + else if c == ']' then + return new TokenNone(loc, pos, c) + else if c == '`' then + if c1 == '`' then + return new TokenCodeDouble(loc, pos, c) + else + return new TokenCodeSingle(loc, pos, c) + end + else if c == '\\' then + if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then + return new TokenEscape(loc, pos, c) + else + return new TokenNone(loc, pos, c) + end + else if c == '<' then + return new TokenHTML(loc, pos, c) + else if c == '&' then + return new TokenEntity(loc, pos, c) + else + if ext_mode then + if c == '~' and c1 == '~' then + return new TokenStrike(loc, pos, c) + end + end + return new TokenNone(loc, pos, c) + end + end + + # Find the position of a `token` in `self`. + fun find_token(text: Text, start: Int, token: Token): Int do + var pos = start + while pos < text.length do + if token_at(text, pos).is_same_type(token) then + return pos + end + pos += 1 + end + return -1 + end - # Processor containing link refs. - var processor: MarkdownProcessor + # Kind of decorator used for decoration. + type DECORATOR: Decorator # Decorator used for output. # Default is `HTMLDecorator` - var decorator: Decorator = new HTMLDecorator is writable - - # Create a new `MardownEmitter` using the default `HTMLDecorator` - init(processor: MarkdownProcessor) do - self.processor = processor + var decorator: DECORATOR is writable, lazy do + return new HTMLDecorator end # Create a new `MarkdownEmitter` using a custom `decorator`. - init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do - init processor + init with_decorator(decorator: DECORATOR) do self.decorator = decorator end @@ -299,20 +520,22 @@ class MarkdownEmitter fun emit_in(block: Block) do block.emit_in(self) # Transform and emit mardown text - fun emit_text(text: Text) do - emit_text_until(text, 0, null) - end + fun emit_text(text: Text) do emit_text_until(text, 0, null) - # Transform and emit mardown text starting at `from` and + # Transform and emit mardown text starting at `start` and # until a token with the same type as `token` is found. - # Go until the end of text if `token` is null. + # Go until the end of `text` if `token` is null. fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do var old_text = current_text var old_pos = current_pos current_text = text current_pos = start while current_pos < text.length do - var mt = text.token_at(current_pos) + if text[current_pos] == '\n' then + current_loc.line_start += 1 + current_loc.column_start = -current_pos + end + var mt = token_at(text, current_pos) if (token != null and not token isa TokenNone) and (mt.is_same_type(token) or (token isa TokenEmStar and mt isa TokenStrongStar) or @@ -354,8 +577,23 @@ class MarkdownEmitter return buffer_stack.last end + # Stacked locations. + private var loc_stack = new List[MDLocation] + + # Push a new MDLocation on the stack. + private fun push_loc(location: MDLocation) do loc_stack.add location + + # Pop the last buffer. + private fun pop_loc: MDLocation do return loc_stack.pop + + # Current output buffer. + private fun current_loc: MDLocation do + assert not loc_stack.is_empty + return loc_stack.last + end + # Append `e` to current buffer. - fun add(e: Streamable) do + fun add(e: Writable) do if e isa Text then current_buffer.append e else @@ -364,18 +602,20 @@ class MarkdownEmitter end # Append `c` to current buffer. - fun addc(c: Char) do current_buffer.add c + fun addc(c: Char) do + current_buffer.add c + end # Append a "\n" line break. - fun addn do current_buffer.add '\n' + fun addn do addc '\n' end # A Link Reference. # Links that are specified somewhere in the mardown document to be reused as shortcuts. # -# Example: -# -# [1]: http://example.com/ "Optional title" +# ~~~raw +# [1]: http://example.com/ "Optional title" +# ~~~ class LinkRef # Link href @@ -389,7 +629,7 @@ class LinkRef # Create a link with a title. init with_title(link: String, title: nullable String) do - self.link = link + init(link) self.title = title end end @@ -398,62 +638,72 @@ end # Default decorator used is `HTMLDecorator`. interface Decorator + # Kind of processor used + type PROCESSOR: MarkdownProcessor + + # Render a single plain char. + # + # Redefine this method to add special escaping for plain text. + fun add_char(v: PROCESSOR, c: Char) do v.addc c + # Render a ruler block. - fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract + fun add_ruler(v: PROCESSOR, block: BlockRuler) is abstract # Render a headline block with corresponding level. - fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract + fun add_headline(v: PROCESSOR, block: BlockHeadline) is abstract # Render a paragraph block. - fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract + fun add_paragraph(v: PROCESSOR, block: BlockParagraph) is abstract # Render a code or fence block. - fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract + fun add_code(v: PROCESSOR, block: BlockCode) is abstract # Render a blockquote. - fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract + fun add_blockquote(v: PROCESSOR, block: BlockQuote) is abstract # Render an unordered list. - fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract + fun add_unorderedlist(v: PROCESSOR, block: BlockUnorderedList) is abstract # Render an ordered list. - fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract + fun add_orderedlist(v: PROCESSOR, block: BlockOrderedList) is abstract # Render a list item. - fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract + fun add_listitem(v: PROCESSOR, block: BlockListItem) is abstract # Render an emphasis text. - fun add_em(v: MarkdownEmitter, text: Text) is abstract + fun add_em(v: PROCESSOR, text: Text) is abstract # Render a strong text. - fun add_strong(v: MarkdownEmitter, text: Text) is abstract + fun add_strong(v: PROCESSOR, text: Text) is abstract - # Render a super text. - fun add_super(v: MarkdownEmitter, text: Text) is abstract + # Render a strike text. + # + # Extended mode only (see `MarkdownProcessor::ext_mode`) + fun add_strike(v: PROCESSOR, text: Text) is abstract # Render a link. - fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract + fun add_link(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract # Render an image. - fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract + fun add_image(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract # Render an abbreviation. - fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract + fun add_abbr(v: PROCESSOR, name: Text, comment: Text) is abstract # Render a code span reading from a buffer. - fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract + fun add_span_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract # Render a text and escape it. - fun append_value(v: MarkdownEmitter, value: Text) is abstract + fun append_value(v: PROCESSOR, value: Text) is abstract # Render code text from buffer and escape it. - fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract + fun append_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract # Render a character escape. - fun escape_char(v: MarkdownEmitter, char: Char) is abstract + fun escape_char(v: PROCESSOR, char: Char) is abstract # Render a line break - fun add_line_break(v: MarkdownEmitter) is abstract + fun add_line_break(v: PROCESSOR) is abstract # Generate a new html valid id from a `String`. fun strip_id(txt: String): String is abstract @@ -486,7 +736,9 @@ class HTMLDecorator redef fun add_headline(v, block) do # save headline - var txt = block.block.first_line.value + var line = block.block.first_line + if line == null then return + var txt = line.value var id = strip_id(txt) var lvl = block.depth headlines[id] = new HeadLine(id, txt, lvl) @@ -503,7 +755,14 @@ class HTMLDecorator end redef fun add_code(v, block) do - v.add "
"
+		var meta = block.meta
+		if meta != null then
+			v.add "
"
+		else
+			v.add "
"
+		end
 		v.emit_in block
 		v.add "
\n" end @@ -544,10 +803,10 @@ class HTMLDecorator v.add "" end - redef fun add_super(v, text) do - v.add "" + redef fun add_strike(v, text) do + v.add "" v.add text - v.add "" + v.add "" end redef fun add_image(v, link, name, comment) do @@ -659,9 +918,36 @@ class HTMLDecorator private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.'] end +# Location in a Markdown input. +class MDLocation + + # Starting line number (starting from 1). + var line_start: Int + + # Starting column number (starting from 1). + var column_start: Int + + # Stopping line number (starting from 1). + var line_end: Int + + # Stopping column number (starting from 1). + var column_end: Int + + redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}" + + # Return a copy of `self`. + fun copy: MDLocation do + return new MDLocation(line_start, column_start, line_end, column_end) + end +end + # A block of markdown lines. # A `MDBlock` can contains lines and/or sub-blocks. class MDBlock + + # Position of `self` in the input. + var location: MDLocation + # Kind of block. # See `Block`. var kind: Block = new BlockNone(self) is writable @@ -714,7 +1000,14 @@ class MDBlock # Split `self` creating a new sub-block having `line` has `last_line`. fun split(line: MDLine): MDBlock do - var block = new MDBlock + # location for new block + var new_loc = new MDLocation( + first_line.as(not null).location.line_start, + first_line.as(not null).location.column_start, + line.location.line_end, + line.location.column_end) + # create block + var block = new MDBlock(new_loc) block.first_line = first_line block.last_line = line first_line = line.next @@ -722,13 +1015,16 @@ class MDBlock if first_line == null then last_line = null else - first_line.prev = null + first_line.as(not null).prev = null + # update current block loc + location.line_start = first_line.as(not null).location.line_start + location.column_start = first_line.as(not null).location.column_start end if first_block == null then first_block = block last_block = block else - last_block.next = block + last_block.as(not null).next = block last_block = block end return block @@ -740,10 +1036,10 @@ class MDBlock first_line = line last_line = line else - last_line.next_empty = line.is_empty - line.prev_empty = last_line.is_empty + last_line.as(not null).next_empty = line.is_empty + line.prev_empty = last_line.as(not null).is_empty line.prev = last_line - last_line.next = line + last_line.as(not null).next = line last_line = line end end @@ -753,12 +1049,12 @@ class MDBlock if line.prev == null then first_line = line.next else - line.prev.next = line.next + line.prev.as(not null).next = line.next end if line.next == null then last_line = line.prev else - line.next.prev = line.prev + line.next.as(not null).prev = line.prev end line.prev = null line.next = null @@ -825,6 +1121,12 @@ class MDBlock text.append "\n" line = line.next end + var block = first_block + while block != null do + text.append block.text + text.append "\n" + block = block.next + end return text.write_to_string end end @@ -837,10 +1139,10 @@ abstract class Block var block: MDBlock # Output `self` using `v.decorator`. - fun emit(v: MarkdownEmitter) do v.emit_in(self) + fun emit(v: MarkdownProcessor) do v.emit_in(self) # Emit the containts of `self`, lines or blocks. - fun emit_in(v: MarkdownEmitter) do + fun emit_in(v: MarkdownProcessor) do block.remove_surrounding_empty_lines if block.has_lines then emit_lines(v) @@ -850,7 +1152,7 @@ abstract class Block end # Emit lines contained in `block`. - fun emit_lines(v: MarkdownEmitter) do + fun emit_lines(v: MarkdownProcessor) do var tpl = v.push_buffer var line = block.first_line while line != null do @@ -868,13 +1170,35 @@ abstract class Block end # Emit sub-blocks contained in `block`. - fun emit_blocks(v: MarkdownEmitter) do + fun emit_blocks(v: MarkdownProcessor) do var block = self.block.first_block while block != null do + v.push_loc(block.location) block.kind.emit(v) + v.pop_loc block = block.next end end + + # The raw content of the block as a multi-line string. + fun raw_content: String do + var infence = self isa BlockFence + var text = new FlatBuffer + var line = self.block.first_line + while line != null do + if not line.is_empty then + var str = line.value + if not infence and str.has_prefix(" ") then + text.append str.substring(4, str.length - line.trailing) + else + text.append str + end + end + text.append "\n" + line = line.next + end + return text.write_to_string + end end # A block without any markdown specificities. @@ -915,6 +1239,9 @@ end class BlockCode super Block + # Any string found after fence token. + var meta: nullable Text + # Number of char to skip at the beginning of the line. # # Block code lines start at 4 spaces. @@ -949,7 +1276,15 @@ end class BlockHeadline super Block - redef fun emit(v) do v.decorator.add_headline(v, self) + redef fun emit(v) do + var loc = block.location.copy + loc.column_start += start + v.push_loc(loc) + v.decorator.add_headline(v, self) + v.pop_loc + end + + private var start = 0 # Depth of the headline used to determine the headline level. var depth = 0 @@ -959,6 +1294,7 @@ class BlockHeadline if depth > 0 then return var level = 0 var line = block.first_line + if line == null then return if line.is_empty then return var start = line.leading while start < line.value.length and line.value[start] == '#' do @@ -978,6 +1314,7 @@ class BlockHeadline line.leading = 0 line.trailing = 0 end + self.start = start depth = level.min(6) end end @@ -997,6 +1334,7 @@ abstract class BlockList # Split list block into list items sub-blocks. private fun init_block(v: MarkdownProcessor) do var line = block.first_line + if line == null then return line = line.next while line != null do var t = v.line_kind(line) @@ -1092,6 +1430,9 @@ end # A markdown line. class MDLine + # Location of `self` in the original input. + var location: MDLocation + # Text contained in this line. var value: String is writable @@ -1112,8 +1453,7 @@ class MDLine var next_empty: Bool = false is writable # Initialize a new MDLine from its string value - init(value: String) do - self.value = value + init do self.leading = process_leading if leading != value.length then self.is_empty = false @@ -1127,8 +1467,8 @@ class MDLine leading = 0 trailing = 0 is_empty = true - if prev != null then prev.next_empty = true - if next != null then next.prev_empty = true + if prev != null then prev.as(not null).next_empty = true + if next != null then next.as(not null).prev_empty = true end # Number or leading spaces on this line. @@ -1262,8 +1602,8 @@ class MDLine # Used by `check_html`. private fun read_xml_comment(first_line: MDLine, start: Int): Int do var line: nullable MDLine = first_line - if start + 3 < line.value.length then - if line.value[2] == '-' and line.value[3] == '-' then + if start + 3 < line.as(not null).value.length then + if line.as(not null).value[2] == '-' and line.as(not null).value[3] == '-' then var pos = start + 4 while line != null do while pos < line.value.length and line.value[pos] != '-' do @@ -1304,7 +1644,7 @@ class LineEmpty super Line redef fun process(v) do - v.current_line = v.current_line.next + v.current_line = v.current_line.as(not null).next end end @@ -1316,13 +1656,13 @@ class LineOther redef fun process(v) do var line = v.current_line # go to block end - var was_empty = line.prev_empty + var was_empty = line.as(not null).prev_empty while line != null and not line.is_empty do var t = v.line_kind(line) - if v.in_list and t isa LineList then + if (v.in_list or v.ext_mode) and t isa LineList then break end - if t isa LineCode or t isa LineFence then + if v.ext_mode and (t isa LineCode or t isa LineFence) then break end if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or @@ -1332,30 +1672,30 @@ class LineOther line = line.next end # build block - var bk: Block + var current_block = v.current_block.as(not null) if line != null and not line.is_empty then - var block = v.current_block.split(line.prev.as(not null)) + var block = current_block.split(line.prev.as(not null)) if v.in_list and not was_empty then block.kind = new BlockNone(block) else block.kind = new BlockParagraph(block) end - v.current_block.remove_leading_empty_lines + current_block.remove_leading_empty_lines else var block: MDBlock if line != null then - block = v.current_block.split(line) + block = current_block.split(line) else - block = v.current_block.split(v.current_block.last_line.as(not null)) + block = current_block.split(current_block.last_line.as(not null)) end if v.in_list and (line == null or not line.is_empty) and not was_empty then block.kind = new BlockNone(block) else block.kind = new BlockParagraph(block) end - v.current_block.remove_leading_empty_lines + current_block.remove_leading_empty_lines end - v.current_line = v.current_block.first_line + v.current_line = current_block.first_line end end @@ -1370,15 +1710,16 @@ class LineCode line = line.next end # split at block end line + var current_block = v.current_block.as(not null) var block: MDBlock if line != null then - block = v.current_block.split(line.prev.as(not null)) + block = current_block.split(line.prev.as(not null)) else - block = v.current_block.split(v.current_block.last_line.as(not null)) + block = current_block.split(current_block.last_line.as(not null)) end block.kind = new BlockCode(block) block.remove_surrounding_empty_lines - v.current_line = v.current_block.first_line + v.current_line = current_block.first_line end end @@ -1388,12 +1729,14 @@ class LineXML redef fun process(v) do var line = v.current_line + if line == null then return + var current_block = v.current_block.as(not null) var prev = line.prev - if prev != null then v.current_block.split(prev) - var block = v.current_block.split(line.xml_end_line.as(not null)) + if prev != null then current_block.split(prev) + var block = current_block.split(line.xml_end_line.as(not null)) block.kind = new BlockXML(block) - v.current_block.remove_leading_empty_lines - v.current_line = v.current_block.first_line + current_block.remove_leading_empty_lines + v.current_line = current_block.first_line end end @@ -1403,6 +1746,7 @@ class LineBlockquote redef fun process(v) do var line = v.current_line + var current_block = v.current_block.as(not null) # go to bquote end while line != null do if not line.is_empty and (line.prev_empty and @@ -1413,9 +1757,9 @@ class LineBlockquote # build sub block var block: MDBlock if line != null then - block = v.current_block.split(line.prev.as(not null)) + block = current_block.split(line.prev.as(not null)) else - block = v.current_block.split(v.current_block.last_line.as(not null)) + block = current_block.split(current_block.last_line.as(not null)) end var kind = new BlockQuote(block) block.kind = kind @@ -1423,7 +1767,7 @@ class LineBlockquote kind.remove_block_quote_prefix(block) v.current_line = line v.recurse(block, false) - v.current_line = v.current_block.first_line + v.current_line = current_block.first_line end end @@ -1433,11 +1777,13 @@ class LineHR redef fun process(v) do var line = v.current_line - if line.prev != null then v.current_block.split(line.prev.as(not null)) - var block = v.current_block.split(line.as(not null)) + if line == null then return + var current_block = v.current_block.as(not null) + if line.prev != null then current_block.split(line.prev.as(not null)) + var block = current_block.split(line) block.kind = new BlockRuler(block) - v.current_block.remove_leading_empty_lines - v.current_line = v.current_block.first_line + current_block.remove_leading_empty_lines + v.current_line = current_block.first_line end end @@ -1447,7 +1793,8 @@ class LineFence redef fun process(v) do # go to fence end - var line = v.current_line.next + var line = v.current_line.as(not null).next + var current_block = v.current_block.as(not null) while line != null do if v.line_kind(line) isa LineFence then break line = line.next @@ -1458,15 +1805,17 @@ class LineFence # build fence block var block: MDBlock if line != null then - block = v.current_block.split(line.prev.as(not null)) + block = current_block.split(line.prev.as(not null)) else - block = v.current_block.split(v.current_block.last_line.as(not null)) + block = current_block.split(current_block.last_line.as(not null)) end - block.kind = new BlockFence(block) - block.first_line.clear + block.remove_surrounding_empty_lines + var meta = block.first_line.as(not null).value.meta_from_fence + block.kind = new BlockFence(block, meta) + block.first_line.as(not null).clear var last = block.last_line if last != null and v.line_kind(last) isa LineFence then - block.last_line.clear + block.last_line.as(not null).clear end block.remove_surrounding_empty_lines v.current_line = line @@ -1479,14 +1828,16 @@ class LineHeadline redef fun process(v) do var line = v.current_line + if line == null then return + var current_block = v.current_block.as(not null) var lprev = line.prev - if lprev != null then v.current_block.split(lprev) - var block = v.current_block.split(line.as(not null)) + if lprev != null then current_block.split(lprev) + var block = current_block.split(line) var kind = new BlockHeadline(block) block.kind = kind kind.transform_headline(block) - v.current_block.remove_leading_empty_lines - v.current_line = v.current_block.first_line + current_block.remove_leading_empty_lines + v.current_line = current_block.first_line end end @@ -1496,16 +1847,18 @@ class LineHeadline1 redef fun process(v) do var line = v.current_line + if line == null then return + var current_block = v.current_block.as(not null) var lprev = line.prev - if lprev != null then v.current_block.split(lprev) - line.next.clear - var block = v.current_block.split(line.as(not null)) + if lprev != null then current_block.split(lprev) + line.next.as(not null).clear + var block = current_block.split(line) var kind = new BlockHeadline(block) kind.depth = 1 kind.transform_headline(block) block.kind = kind - v.current_block.remove_leading_empty_lines - v.current_line = v.current_block.first_line + current_block.remove_leading_empty_lines + v.current_line = current_block.first_line end end @@ -1515,22 +1868,24 @@ class LineHeadline2 redef fun process(v) do var line = v.current_line + if line == null then return + var current_block = v.current_block.as(not null) var lprev = line.prev - if lprev != null then v.current_block.split(lprev) - line.next.clear - var block = v.current_block.split(line.as(not null)) + if lprev != null then current_block.split(lprev) + line.next.as(not null).clear + var block = current_block.split(line) var kind = new BlockHeadline(block) kind.depth = 2 kind.transform_headline(block) block.kind = kind - v.current_block.remove_leading_empty_lines - v.current_line = v.current_block.first_line + current_block.remove_leading_empty_lines + v.current_line = current_block.first_line end end # A markdown list line. # Mainly used to factorize code between ordered and unordered lists. -class LineList +abstract class LineList super Line redef fun process(v) do @@ -1543,19 +1898,20 @@ class LineList line = line.next end # build list block + var current_block = v.current_block.as(not null) var list: MDBlock if line != null then - list = v.current_block.split(line.prev.as(not null)) + list = current_block.split(line.prev.as(not null)) else - list = v.current_block.split(v.current_block.last_line.as(not null)) + list = current_block.split(current_block.last_line.as(not null)) end var kind = block_kind(list) list.kind = kind - list.first_line.prev_empty = false - list.last_line.next_empty = false + list.first_line.as(not null).prev_empty = false + list.last_line.as(not null).next_empty = false list.remove_surrounding_empty_lines - list.first_line.prev_empty = false - list.last_line.next_empty = false + list.first_line.as(not null).prev_empty = false + list.last_line.as(not null).next_empty = false kind.init_block(v) var block = list.first_block while block != null do @@ -1600,14 +1956,17 @@ end # Some tokens have a specific markup behaviour that is handled here. abstract class Token - # Position of `self` in markdown input. + # Location of `self` in the original input. + var location: nullable MDLocation + + # Position of `self` in input independant from lines. var pos: Int # Character found at `pos` in the markdown input. var char: Char # Output that token using `MarkdownEmitter::decorator`. - fun emit(v: MarkdownEmitter) do v.addc char + fun emit(v: MarkdownProcessor) do v.decorator.add_char(v, char) end # A token without a specific meaning. @@ -1675,14 +2034,15 @@ abstract class TokenCode super Token redef fun emit(v) do + var current_text = v.current_text.as(not null) var a = pos + next_pos + 1 - var b = v.current_text.find_token(a, self) + var b = v.find_token(current_text, a, self) if b > 0 then v.current_pos = b + next_pos - while a < b and v.current_text[a] == ' ' do a += 1 + while a < b and current_text[a] == ' ' do a += 1 if a < b then - while v.current_text[b - 1] == ' ' do b -= 1 - v.decorator.add_span_code(v, v.current_text.as(not null), a, b) + while current_text[b - 1] == ' ' do b -= 1 + v.decorator.add_span_code(v, current_text, a, b) end else v.addc char @@ -1735,11 +2095,12 @@ abstract class TokenLinkOrImage end # Emit the hyperlink as link or image. - private fun emit_hyper(v: MarkdownEmitter) is abstract + private fun emit_hyper(v: MarkdownProcessor) is abstract # Check if the link is a valid link. - private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do + private fun check_link(v: MarkdownProcessor, out: FlatBuffer, start: Int, token: Token): Int do var md = v.current_text + if md == null then return -1 var pos if token isa TokenLink then pos = start + 1 @@ -1754,9 +2115,9 @@ abstract class TokenLinkOrImage pos += 1 pos = md.skip_spaces(pos) if pos < start then - var tid = name.write_to_string.to_lower - if v.processor.link_refs.has_key(tid) then - var lr = v.processor.link_refs[tid] + var tid = name.as(not null).write_to_string.to_lower + if v.link_refs.has_key(tid) then + var lr = v.link_refs[tid] is_abbrev = lr.is_abbrev link = lr.link comment = lr.title @@ -1791,6 +2152,7 @@ abstract class TokenLinkOrImage if pos == -1 then return -1 end end + if pos < start then return -1 if md[pos] != ')' then return -1 else if md[pos] == '[' then pos += 1 @@ -1803,16 +2165,16 @@ abstract class TokenLinkOrImage else id = name end - var tid = id.write_to_string.to_lower - if v.processor.link_refs.has_key(tid) then - var lr = v.processor.link_refs[tid] + var tid = id.as(not null).write_to_string.to_lower + if v.link_refs.has_key(tid) then + var lr = v.link_refs[tid] link = lr.link comment = lr.title end else - var tid = name.write_to_string.replace("\n", " ").to_lower - if v.processor.link_refs.has_key(tid) then - var lr = v.processor.link_refs[tid] + var tid = name.as(not null).write_to_string.replace("\n", " ").to_lower + if v.link_refs.has_key(tid) then + var lr = v.link_refs[tid] link = lr.link comment = lr.title pos = old_pos @@ -1864,7 +2226,7 @@ class TokenHTML # Is the HTML valid? # Also take care of link and mailto shortcuts. - private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do + private fun check_html(v: MarkdownProcessor, out: FlatBuffer, md: Text, start: Int): Int do # check for auto links var tmp = new FlatBuffer var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n') @@ -1945,21 +2307,23 @@ class TokenEscape redef fun emit(v) do v.current_pos += 1 - v.addc v.current_text[v.current_pos] + v.addc v.current_text.as(not null)[v.current_pos] end end -# A markdown super token. -class TokenSuper +# A markdown strike token. +# +# Extended mode only (see `MarkdownProcessor::ext_mode`) +class TokenStrike super Token redef fun emit(v) do var tmp = v.push_buffer - var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self) + var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self) v.pop_buffer if b > 0 then - v.decorator.add_super(v, tmp) - v.current_pos = b + v.decorator.add_strike(v, tmp) + v.current_pos = b + 1 else v.addc char end @@ -1968,108 +2332,6 @@ end redef class Text - # Get the token kind at `pos`. - private fun token_at(pos: Int): Token do - var c0: Char - var c1: Char - var c2: Char - var c3: Char - - if pos > 0 then - c0 = self[pos - 1] - else - c0 = ' ' - end - var c = self[pos] - - if pos + 1 < length then - c1 = self[pos + 1] - else - c1 = ' ' - end - if pos + 2 < length then - c2 = self[pos + 2] - else - c2 = ' ' - end - if pos + 3 < length then - c3 = self[pos + 3] - else - c3 = ' ' - end - - if c == '*' then - if c1 == '*' then - if c0 != ' ' or c2 != ' ' then - return new TokenStrongStar(pos, c) - else - return new TokenEmStar(pos, c) - end - end - if c0 != ' ' or c1 != ' ' then - return new TokenEmStar(pos, c) - else - return new TokenNone(pos, c) - end - else if c == '_' then - if c1 == '_' then - if c0 != ' ' or c2 != ' 'then - return new TokenStrongUnderscore(pos, c) - else - return new TokenEmUnderscore(pos, c) - end - end - if c0 != ' ' or c1 != ' ' then - return new TokenEmUnderscore(pos, c) - else - return new TokenNone(pos, c) - end - else if c == '!' then - if c1 == '[' then return new TokenImage(pos, c) - return new TokenNone(pos, c) - else if c == '[' then - return new TokenLink(pos, c) - else if c == ']' then - return new TokenNone(pos, c) - else if c == '`' then - if c1 == '`' then - return new TokenCodeDouble(pos, c) - else - return new TokenCodeSingle(pos, c) - end - else if c == '\\' then - if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then - return new TokenEscape(pos, c) - else - return new TokenNone(pos, c) - end - else if c == '<' then - return new TokenHTML(pos, c) - else if c == '&' then - return new TokenEntity(pos, c) - else if c == '^' then - if c0 == '^' or c1 == '^' then - return new TokenNone(pos, c) - else - return new TokenSuper(pos, c) - end - else - return new TokenNone(pos, c) - end - end - - # Find the position of a `token` in `self`. - private fun find_token(start: Int, token: Token): Int do - var pos = start - while pos < length do - if token_at(pos).is_same_type(token) then - return pos - end - pos += 1 - end - return -1 - end - # Get the position of the next non-space character. private fun skip_spaces(start: Int): Int do var pos = start @@ -2089,18 +2351,11 @@ redef class Text if c == '\\' and pos + 1 < length then pos = escape(out, self[pos + 1], pos) else - var end_reached = false - for n in nend do - if c == n then - end_reached = true - break - end - end - if end_reached then break + for n in nend do if c == n then break label out.add c end pos += 1 - end + end label if pos == length then return -1 return pos end @@ -2176,6 +2431,7 @@ redef class Text # Safe mode can be activated to limit reading to valid xml. private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do var pos = 0 + var is_valid = true var is_close_tag = false if start + 1 >= length then return -1 if self[start + 1] == '/' then @@ -2193,7 +2449,11 @@ redef class Text pos = read_xml_until(tmp, pos, ' ', '/', '>') if pos == -1 then return -1 var tag = tmp.write_to_string.trim.to_lower - if tag.is_html_unsafe then + if not tag.is_valid_html_tag then + out.append "<" + pos = -1 + else if tag.is_html_unsafe then + is_valid = false out.append "<" if is_close_tag then out.add '/' out.append tmp @@ -2216,7 +2476,11 @@ redef class Text if pos == -1 then return -1 end if self[pos] == '>' then - out.add '>' + if is_valid then + out.add '>' + else + out.append ">" + end return pos end return -1 @@ -2288,6 +2552,14 @@ redef class Text return tpl.write_to_string.to_lower end + private fun is_valid_html_tag: Bool do + if is_empty then return false + for c in self do + if not c.is_alpha then return false + end + return true + end + # Read and escape the markdown contained in `self`. private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or @@ -2301,6 +2573,17 @@ redef class Text return pos end + # Extract string found at end of fence opening. + private fun meta_from_fence: nullable Text do + for i in [0..chars.length[ do + var c = chars[i] + if c != ' ' and c != '`' and c != '~' then + return substring_from(i).trim + end + end + return null + end + # Is `self` an unsafe HTML element? private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string) @@ -2324,7 +2607,7 @@ redef class String # var md = "**Hello World!**" # var html = md.md_to_html # assert html == "

Hello World!

\n" - fun md_to_html: Streamable do + fun md_to_html: Writable do var processor = new MarkdownProcessor return processor.process(self) end