# This file is part of NIT ( http://www.nitlanguage.org ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Markdown blocks parsing # # Introduce the parsers for the different Markdown blocks such as headings, lists # code blocks etc. module markdown_block_parsing import markdown_inline_parsing # Markdown parser # # Used to create the AST representation of a Markdown document. class MdParser # Inline parser used to parse block content private var inline_parser = new MdInlineParser is lazy # Block parsers factories private var block_parser_factories: Collection[MdBlockParserFactory] do var factories = new Array[MdBlockParserFactory] factories.add new MdBlockQuoteParserFactory factories.add new MdHeadingParserFactory factories.add new MdFencedCodeBlockParserFactory factories.add new MdHtmlBlockParserFactory factories.add new MdThematicBreakParserFactory factories.add new MdListBlockParserFactory factories.add new MdIndentedCodeBlockParserFactory return factories end # Active block parsers # # Used as a stack to parse nested blocks. private var active_block_parsers = new Array[MdBlockParser] # All active block parsers private var all_block_parsers = new HashSet[MdBlockParser] # Return the active block parser # # The last entry in the `active_block_parsers` stack. private fun active_block_parser: MdBlockParser do return active_block_parsers.last end # Activate a `block_parser` # # Add the `block_parser` on the top of the `active_block_parsers` stack. # Also register it in `all_block_parsers`. private fun activate_block_parser(block_parser: MdBlockParser) do active_block_parsers.add block_parser all_block_parsers.add block_parser end # Deactivate the `active_block_parser` private fun deactivate_block_parser do active_block_parsers.pop end # Deactivate and remove the `active_block_parser` from the `all_block_parsers` list private fun remove_active_block_parser do var old = active_block_parser deactivate_block_parser all_block_parsers.remove(old) old.block.unlink end # Post-processors applied after the parsing of a document var post_processors = new Array[MdPostProcessor] is writable # Currently parsed line private var line_string: String is noinit # Current index (offset) in input `line_string` (starts at 0) private var index = 0 # Current column in input `line_string` (starts at 0) # # Tab causes column to go to next 4-space tab stop. private var column = 0 # Is the current column within a tab character (partially consumed tab) private var column_is_in_tab: Bool is noinit # Current line in input string (starts at 1) private var line = 1 # Index of the next non-space character starting from `index` private var next_non_space_index = 0 # Next non-space column private var next_non_space_column = 0 # Current indent in columns # # Either by spaces or tab stop of 4, starting from `column`. private var indent = 0 # Is the current `line` blank starting from `index`? private var is_blank: Bool is noinit # Does a node end with a blank line? private var last_line_blank = new HashMap[MdNode, Bool] # Initialize parser state private fun initialize do active_block_parsers.clear all_block_parsers.clear index = 0 column = 0 column_is_in_tab = false line = 1 next_non_space_index = 0 next_non_space_column = 0 indent = 0 is_blank = false last_line_blank.clear end # Parse the `input` string as a MdDocument fun parse(input: String): MdDocument do initialize var document_block_parser = new MdDocumentBlockParser(1, 1, 0) activate_block_parser(document_block_parser) var line_start = 0 var line_break = find_line_break(input, line_start) while line_break != -1 do var line_string = input.substring(line_start, line_break - line_start) incorporate_line(line_string) if line_break + 1 < input.length and input.chars[line_break] == '\r' and input.chars[line_break + 1] == '\n' then line_start = line_break + 2 else line_start = line_break + 1 end line_break = find_line_break(input, line_start) line += 1 column = 0 end # Finalize pending line if input.length > 0 and (line_start == 0 or line_start < input.length) then incorporate_line(input.substring(line_start, input.length - line_start)) end finalize_blocks(active_block_parsers) # Walk through a block and its chiildren revursively # Parsing string content into inline content where appropriate. var all_block_parsers = all_block_parsers.to_a var i = all_block_parsers.length - 1 while i >= 0 do var block_parser = all_block_parsers[i] block_parser.parse_inlines(inline_parser) i -= 1 end var document = document_block_parser.block return document end # Post-process the `document` fun post_process(document: MdDocument) do for processor in post_processors do processor.post_process(self, document) end end # Analyze a line of text and update the document # # We parse Markdown text by calling this on each line of `input`. private fun incorporate_line(input: String) do line_string = input index = 0 column = 0 column_is_in_tab = false # For each containing block, try to parse the associated line start. var matches = 1 for i in [1 .. active_block_parsers.length[ do var block_parser = active_block_parsers[i] find_next_non_space var result = block_parser.try_continue(self) if result isa MdBlockContinue then if result.is_finalize then block_parser.finalize(self) return else if result.new_index != -1 then set_new_index result.new_index else if result.new_column != -1 then set_new_column result.new_column end end matches += 1 else break end end var unmatched_block_parsers = active_block_parsers.subarray( matches, active_block_parsers.length - matches) var last_matched_block_parser = active_block_parsers[matches - 1] var block_parser = last_matched_block_parser var all_closed = unmatched_block_parsers.is_empty # Unless last matched container is a code block, try new container starts, # adding children to the last matched container. var try_block_starts = block_parser.block isa MdParagraph or block_parser.block.is_container while try_block_starts do find_next_non_space # Optimize lookup if is_blank or (indent < 4 and line_string.chars[next_non_space_index].is_letter) then set_new_index next_non_space_index break end var block_start = find_block_start(block_parser) if block_start == null then set_new_index next_non_space_index break end if not all_closed then finalize_blocks(unmatched_block_parsers) all_closed = true end if block_start.new_index != -1 then set_new_index block_start.new_index else if block_start.new_column != -1 then set_new_column block_start.new_column end if block_start.replace_active_block_parser then remove_active_block_parser end for new_block_parser in block_start.block_parsers do add_child(new_block_parser) block_parser = new_block_parser try_block_starts = new_block_parser.block.is_container end end # What remains at the offset is a text line. # Add the text to the appropriate block. # First check for a lazy paragraph continuation if not all_closed and not is_blank and active_block_parser isa MdParagraphParser then add_line else # Finalize any blocks not matched if not all_closed then finalize_blocks(unmatched_block_parsers) end propagate_last_line_blank(block_parser, last_matched_block_parser) if not block_parser.block.is_container then add_line else if not is_blank then # Create a paragraph container for the line add_child(new MdParagraphParser(line, column + 1, block_parser.content_offset)) add_line end end end # Find what kind of block starts at `index` in `input` private fun find_block_start(block_parser: MdBlockParser): nullable MdBlockStart do for block_parser_factory in block_parser_factories do var result = block_parser_factory.try_start(self, block_parser) if result != null then return result end return null end # Add a `block_parser` block's as child of the active block parser block private fun add_child(block_parser: MdBlockParser) do # Finalize non-parentable blocks while not active_block_parser.block.can_contain(block_parser.block) do active_block_parser.finalize(self) end # Append block block parser block to its parent active_block_parser.block.append_child(block_parser.block) activate_block_parser(block_parser) end # Add line content to the active block parser # # We assume it can accept lines. private fun add_line do var content = null if column_is_in_tab then # Out column is in a partially consumed tab. # Expand the remaining columns to the next tab stop to spaces. var after_tab = index + 1 var rest = line_string.substring(after_tab, line_string.length - after_tab) var spaces = column.columns_to_next_tab_stop var buffer = new Buffer for i in [0 .. spaces[ do buffer.add ' ' end buffer.append(rest) content = buffer.write_to_string else content = line_string.substring(index, line_string.length - index) end active_block_parser.add_line(content) end # Finalize blocks of previous line private fun finalize_blocks(block_parsers: Sequence[MdBlockParser]) do var i = block_parsers.length - 1 while i >= 0 do var block_parser = block_parsers[i] block_parser.finalize(self) i -= 1 end end # Advance the `index` position to the next character # # Also set the `column`. # If the next character is a tab, compute the new column accordingly. private fun advance do var c = line_string.chars[index] if c == '\t' then index += 1 column += column.columns_to_next_tab_stop else index += 1 column += 1 end end # Move `index` to the next non-space character index in the `input` string # # Also set `next_non_space_index`, `next_non_space_column`, `is_blank` and `indent`. private fun find_next_non_space do var i = index var cols = column is_blank = true while i < line_string.length do var c = line_string.chars[i] if c == ' ' then i += 1 cols += 1 continue else if c == '\t' then i += 1 cols += 4 - (cols % 4) continue end is_blank = false break end next_non_space_index = i next_non_space_column = cols indent = next_non_space_column - column end # Return the position of the next line break # # We consider `\r` and `\n`. private fun find_line_break(input: String, start_index: Int): Int do for i in [start_index .. input.length[ do var char = input.chars[i] if char == '\r' or char == '\n' then return i end return -1 end # Set the parser `index` at `new_index` # # Also set `column` and `column_is_in_tab`. private fun set_new_index(new_index: Int) do if new_index >= next_non_space_index then # We can start from here, no need to calculate tab stops again index = next_non_space_index column = next_non_space_column end while index < new_index and index != line_string.length do advance end # If we're going to an index as opposed to a column, we're never within a tab column_is_in_tab = false end # Set the parser `column` at `new_column` # # Also set `index` and `column_is_in_tab`. private fun set_new_column(new_column: Int) do if new_column >= next_non_space_column then # We can start from here, no need to calculate tab stops again index = next_non_space_index column = next_non_space_column end while column < new_column and index != line_string.length do advance end if column > new_column then # Last character was a tab and we overshot our target index -= 1 column = new_column column_is_in_tab = true else column_is_in_tab = false end end # Does `block` end with a blank line? private fun ends_with_blank_line(block: nullable MdNode): Bool do while block != null do if is_last_line_blank(block) then return true if block isa MdListBlock or block isa MdListItem then block = block.last_child else break end end return false end # Propagate a blank line to all block_parser blocl's parents private fun propagate_last_line_blank(block_parser: MdBlockParser, last_matched_block_parser: MdBlockParser) do var last_child = block_parser.block.last_child if is_blank and last_child != null then last_line_blank[last_child] = true end var block = block_parser.block # Block quotes lines are never blank as they start with `>`. # We don't count blanks in fenced code for purposes of thight/loose lists. # We also don't set `last_line_blank` on an empty list item. var last_line_blank = is_blank and not (block isa MdBlockQuote or block isa MdFencedCodeBlock or (block isa MdListItem and block.first_child == null and block_parser != last_matched_block_parser)) # Propagate `last_line_blank` up through parents var node: nullable MdNode = block_parser.block while node != null do self.last_line_blank[node] = last_line_blank node = node.parent end end # Is last line blank for `node`? private fun is_last_line_blank(node: MdNode): Bool do if not last_line_blank.has_key(node) then return false return last_line_blank[node] end end # Block parsing # Parser for a specific block node abstract class MdBlockParser # Kind of block under construction type BLOCK: MdBlock # MdBlock under construction fun block: BLOCK is abstract # Line Start var line_start: Int # Column start var column_start: Int # Location at start # # The location end it initialized at `-1` and will be set later in the # `finalize` method. var location: MdLocation is lazy do return new MdLocation(line_start, column_start, -1, -1) # Column where the content starts var content_offset: Int # Initialize the current `block` fun initialize(parser: MdParser) do end # Can `self` continue from the current `index` in `parser`? # # Return a new `MdBlockContinue` if `self` can continue parsing. # Return null otherwise. fun try_continue(state: MdParser): nullable MdBlockContinue is abstract # Add `line` to the current `block` fun add_line(line: String) do end # Finalize the current `block` # # Deactivate `self` from `parser` and call `close_block`. fun finalize(parser: MdParser) do if parser.active_block_parser == self then parser.deactivate_block_parser end end # Parse `block` lines fun parse_inlines(inline_parser: MdInlineParser) do end end # Result object for continuing parsing of a block class MdBlockContinue # Index from which continue parsing var new_index: Int # Column from which continue parsing var new_column: Int # Is the block finalized? var is_finalize: Bool # Continue from index init at_index(new_index: Int) do init(new_index, -1, false) end # Continue from column init at_column(new_column: Int) do init(-1, new_column, false) end # Block is finished init finished do init(-1, -1, true) end end # Block parser factory for a block node for determining when a block starts abstract class MdBlockParserFactory # Can the associated block parser can start at the current line in `parser`? # # Return a new `MdBlockStart` if the block parser can start. # Return null otherwise. fun try_start(parser: MdParser, matched_block_parser: MdBlockParser): nullable MdBlockStart is abstract end # Result object from starting parsing of a block class MdBlockStart # Block parsers for this block start var block_parsers: Array[MdBlockParser] # Index where the parsing should start var new_index = -1 # Column where the parsing should start var new_column = -1 # Does the block starting with `self` terminate a previous block? var replace_active_block_parser = false # Start from `new_index` fun at_index(new_index: Int): MdBlockStart do self.new_index = new_index return self end # Start from `new_column` fun at_column(new_column: Int): MdBlockStart do self.new_column = new_column return self end # Start replacing the active block parser fun replacing_active_block_parser: MdBlockStart do self.replace_active_block_parser = true return self end end # Parser for the whole document class MdDocumentBlockParser super MdBlockParser redef type BLOCK: MdDocument redef var block = new MdDocument(location) is lazy # Always continue at current indent redef fun try_continue(state) do return new MdBlockContinue.at_index(state.index) redef fun finalize(parser) do end # redef fun finalize(state) do redef fun parse_inlines(inline_parser) do var last_child = block.last_child if last_child != null then location.line_end = last_child.location.line_end location.column_end = last_child.location.column_end end end end # Headings parser class MdHeadingParser super MdBlockParser redef type BLOCK: MdHeading redef var block = new MdHeading(location, level, is_setext, has_atx_trailing) is lazy redef var location = new MdLocation(line_start, column_start, line_end, column_end) is lazy # Line end var line_end: Int # Column end var column_end: Int # Heading level var level: Int # Heading content var content: String # Heading has ATX trailing var has_atx_trailing: Bool # Heading is setext format var is_setext: Bool # Never continue parsing as an heading is a one liner redef fun try_continue(state) do return null # Parse the heading content redef fun parse_inlines(inline_parser) do inline_parser.parse(content, content_offset, block) end end # Heading parser factory class MdHeadingParserFactory super MdBlockParserFactory redef fun try_start(state, matched_block_parser) do if state.indent >= 4 then return null var next_non_space = state.next_non_space_index var line = state.line_string var paragraph = null if matched_block_parser isa MdParagraphParser then paragraph = matched_block_parser.content end var line_content = line.substring(next_non_space, line.length - next_non_space) var match = line_content.search(re_atx_heading) if match != null then # ATX heading var new_offset = next_non_space + match.subs.first.as(not null).length var level = match.subs.first.as(not null).to_s.trim.length # remove trailing ###s var after_leading = line.substring(new_offset, line.length - new_offset) var trailing = after_leading.search(re_atx_trailing) var has_trailing = trailing != null var trailing_length = if trailing != null then trailing.length else 0 var content = after_leading.replace(re_atx_trailing, "") return (new MdBlockStart( [new MdHeadingParser( state.line, next_non_space + 1, new_offset + 1, state.line, new_offset + content.length + trailing_length, level, content, has_trailing, false)]) ).at_index(line.length) end if paragraph == null then return null match = line_content.search(re_setext_heading) if match == null then return null var level = 2 if match.subs.first.as(not null).to_s.chars.first == '=' then level = 1 var content = paragraph.to_s return (new MdBlockStart( [new MdHeadingParser( state.line - 1, next_non_space + 1, 0, state.line, state.column + match.length, level, content, false, true)]) ).at_index(line.length).replacing_active_block_parser end end # Blockquotes parser class MdBlockQuoteParser super MdBlockParser redef type BLOCK: MdBlockQuote redef var block = new MdBlockQuote(location) is lazy redef fun try_continue(state) do var next_non_space = state.next_non_space_index var indent = state.indent var line = state.line_string if indent >= 4 then return null if next_non_space >= line.length then return null if line.chars[next_non_space] != '>' then return null var new_column = state.column + state.indent + 1 # optional following space or tab if state.line_string.is_space_or_tab(next_non_space + 1) then new_column += 1 end return new MdBlockContinue.at_column(new_column) end redef fun parse_inlines(inline_parser) do var last_child = block.last_child if last_child != null then location.line_end = last_child.location.line_end location.column_end = last_child.location.column_end end end end # Blockquotes parser factory class MdBlockQuoteParserFactory super MdBlockParserFactory redef fun try_start(state, matched_block_parser) do var next_non_space = state.next_non_space_index var indent = state.indent var line = state.line_string if indent >= 4 then return null if next_non_space >= line.length then return null if line.chars[next_non_space] != '>' then return null var new_column = state.column + state.indent + 1 # optional following space or tab if state.line_string.is_space_or_tab(next_non_space + 1) then new_column += 1 end return (new MdBlockStart( [new MdBlockQuoteParser( state.line, state.column + 1, new_column)]) ).at_column(new_column) end end # Indented code blocks parser class MdIndentedCodeBlockParser super MdBlockParser redef type BLOCK: MdIndentedCodeBlock redef var block = new MdIndentedCodeBlock(location, use_tabs) is lazy # Indent is tab? var use_tabs: Bool # Block content var content = new Buffer redef fun try_continue(state) do if state.indent >= 4 then return new MdBlockContinue.at_column(state.column + 4) else if state.is_blank then return new MdBlockContinue.at_index(state.next_non_space_index) end return null end redef fun add_line(line) do if not content.is_empty then content.add('\n') end content.append(line) end redef fun finalize(parser) do super add_line(" ") var content = self.content.to_s var literal = content.replace_first(re_trailing_blank_lines, "\n") block.literal = literal var lines = literal.split("\n") location.line_end = location.line_start + lines.length - 2 location.column_end = content_offset + lines[lines.length - 2].length + 4 end end # Indented code blocks parser factory class MdIndentedCodeBlockParserFactory super MdBlockParserFactory redef fun try_start(state, matched_block_parser) do if state.indent < 4 then return null if state.is_blank then return null if state.active_block_parser.block isa MdParagraph then return null var use_tabs = state.line_string.has_prefix("\t") return (new MdBlockStart( [new MdIndentedCodeBlockParser( state.line, state.column + 1, state.column, use_tabs)]) ).at_column(state.column + 4) end end # Fenced code blocks parser class MdFencedCodeBlockParser super MdBlockParser redef type BLOCK: MdFencedCodeBlock redef var block = new MdFencedCodeBlock(location, fence_char, fence_length, fence_indent) is lazy # Fence character var fence_char: Char # Fence length var fence_length: Int # Fence indent var fence_indent: Int # Fence first line var first_line: nullable String = null # Fence other lines var other_lines = new Buffer redef fun try_continue(state) do var next_non_space = state.next_non_space_index var new_index = state.index var line = state.line_string if state.indent <= 3 and next_non_space < line.length and line.chars[next_non_space] == fence_char then var match = line.substring(next_non_space, line.length - next_non_space). search(re_closing_fence) if match != null and match.subs[0].as(not null).length >= fence_length then # closing fence - we're at end of line, so we can finalize now return new MdBlockContinue.finished end end # skip optional spaces of fence indent var i = fence_indent while i > 0 and new_index < line.length and line.chars[new_index] == ' ' do new_index += 1 i -= 1 end return new MdBlockContinue.at_index(new_index) end redef fun add_line(line) do if first_line == null then first_line = line else other_lines.append(line) other_lines.add '\n' end end redef fun finalize(parser) do super # first line become info string var first_line = self.first_line if first_line != null then var info = first_line.trim.unescape_string if not info.is_empty then block.info = info end var content = other_lines.to_s block.literal = content var lines = content.split("\n") location.line_end = location.line_start + lines.length location.column_end = content_offset + fence_indent + fence_length end end # Fenced code blocks parser factory class MdFencedCodeBlockParserFactory super MdBlockQuoteParserFactory redef fun try_start(state, matched_block_parser) do var next_non_space = state.next_non_space_index var line = state.line_string if state.indent >= 4 then return null var match = line.substring(next_non_space, line.length - next_non_space).search(re_opening_fence) if match == null then return null var fence_length var fence_char var sub0 = match.subs[0] if sub0 != null then fence_length = sub0.length fence_char = sub0.to_s.chars.first else fence_length = match.subs[2].as(not null).length fence_char = match.subs[2].as(not null).to_s.chars.first end if fence_char == '`' and match.to_s.has("[^`]+`".to_re) then return null else if match.to_s.has("[^~]+~".to_re) then return null end return (new MdBlockStart( [new MdFencedCodeBlockParser( state.line, state.column + 1, state.column, fence_char, fence_length, state.indent)] )).at_index(next_non_space + fence_length) end end # List blocks parser class MdListBlockParser super MdBlockParser redef type BLOCK: MdListBlock redef var block is lazy do if is_ordered then return new MdOrderedList(location, digit.as(not null), delim.as(not null)) else return new MdUnorderedList(location, bullet.as(not null)) end end # Is this list ordered var is_ordered: Bool # List bullet if unordered var bullet: nullable Char # List digit if ordered var digit: nullable Int # List delimiter if ordered var delim: nullable Char redef fun try_continue(state) do return new MdBlockContinue.at_index(state.index) redef fun finalize(parser) do super var item = block.first_child while item != null do # check for non-final list item ending with blank line if parser.ends_with_blank_line(item) and item.next != null then block.is_tight = false break end # recurse into children of list item to see if there are spaces between any of them var sub_item = item.first_child while sub_item != null do if parser.ends_with_blank_line(sub_item) and (item.next != null or sub_item.next != null) then block.is_tight = false break end sub_item = sub_item.next end item = item.next end end redef fun parse_inlines(inline_parser) do var last_child = block.last_child if last_child != null then location.line_end = last_child.location.line_end location.column_end = last_child.location.column_end end end end # List blocks parser factory class MdListBlockParserFactory super MdBlockQuoteParserFactory redef fun try_start(state, matched_block_parser) do if state.indent >= 4 and not matched_block_parser isa MdListBlockParser then return null var marker_index = state.next_non_space_index var marker_column = state.column + state.indent var in_paragraph = matched_block_parser isa MdParagraphParser and matched_block_parser.content != null var list_data = parse_list_marker(state, state.line_string, marker_index, marker_column, in_paragraph) if list_data == null then return null var new_column = list_data.content_column var list_item_parser = new MdListItemParser( state.line, state.column + 1, new_column, new_column - state.column) # prepend the list block if needed if not matched_block_parser isa MdListBlockParser or not lists_match(matched_block_parser.block, list_data) then var list_block_parser = new MdListBlockParser(state.line, state.column + 1, new_column - state.column, list_data.is_ordered, list_data.bullet, list_data.digit, list_data.delim) list_block_parser.block.is_tight = true return (new MdBlockStart([list_block_parser, list_item_parser: MdBlockParser])).at_column(new_column) end return (new MdBlockStart([list_item_parser])).at_column(new_column) end private fun parse_list_marker(state: MdParser, line: String, marker_index, marker_column: Int, in_paragraph: Bool): nullable MdListData do var rest = line.substring(marker_index, line.length - marker_index) var match = rest.search(re_list_marker) if match == null then return null var is_ordered var bullet = null var digit = null var delim = null var bullet_match = match.subs[0] if bullet_match != null then is_ordered = false bullet = bullet_match.to_s.chars[0] else is_ordered = true digit = match.subs[2].as(not null).to_s.to_i delim = match.subs[3].as(not null).to_s.chars[0] end var marker_length = match.length if match.to_s.has_suffix(" ") or match.to_s.has_suffix("\t") then marker_length -= 1 end var index_after_marker = marker_index + marker_length # marker doesn't include tabs, so counting them as column directly is ok var column_after_marker = marker_column + marker_length # the column within the line where the content starts var content_column = column_after_marker # see at which column the content starts if there is content var has_content = false for i in [index_after_marker .. line.length[ do var c = line.chars[i] if c == '\t' then content_column += content_column.columns_to_next_tab_stop else if c == ' ' then content_column += 1 else has_content = true break end end if in_paragraph then # if the list item is ordered, then start number must be 1 to interrupt a paragraph if is_ordered and digit != 1 then return null end # empty list item can not interrupt a paragraph if not has_content then return null end end if not has_content or (content_column - column_after_marker) > 4 then # if this line is blank or has a code block, default to 1 space after marker content_column = column_after_marker + 1 end return new MdListData(is_ordered, bullet, digit, delim, content_column) end # Return true if the two list items are of the same type # # With the same delimiter and bullet character. # This is used in agglomerating list items into lists private fun lists_match(a: MdListBlock, b: MdListData): Bool do if a isa MdUnorderedList and not b.is_ordered then return a.bullet_marker == b.bullet else if a isa MdOrderedList and b.is_ordered then return a.delimiter == b.delim end return false end end # Parsed list data private class MdListData var is_ordered: Bool var bullet: nullable Char var digit: nullable Int var delim: nullable Char # Column the content start at var content_column: Int end # List items parser class MdListItemParser super MdBlockParser redef type BLOCK: MdListItem redef var block = new MdListItem(location) is lazy # List item content indend var content_indent: Int redef fun try_continue(state) do if state.is_blank then if block.first_child == null then # blank line after empty list item return null end return new MdBlockContinue.at_index(state.next_non_space_index) end if state.indent >= content_indent then return new MdBlockContinue.at_column(state.column + content_indent) end return null end redef fun parse_inlines(inline_parser) do var last_child = block.last_child if last_child != null then location.line_end = last_child.location.line_end location.column_end = last_child.location.column_end end end end # Thematic breaks parser class MdThematicBreakParser super MdBlockParser redef type BLOCK: MdThematicBreak redef var block = new MdThematicBreak(location, pattern) is lazy # Thematic break pattern var pattern: String redef fun try_continue(state) do return null redef fun finalize(parser) do super location.line_end = line_start location.column_end = column_start + pattern.length - 1 end end # Thematic breaks parser factory class MdThematicBreakParserFactory super MdBlockQuoteParserFactory redef fun try_start(state, matched_block_parser) do if state.indent >= 4 then return null var next_non_space = state.next_non_space_index var line = state.line_string var tbreak = line.substring(next_non_space, line.length - next_non_space).search(re_thematic_break) if tbreak != null then return (new MdBlockStart( [new MdThematicBreakParser( state.line, state.column + 1, next_non_space, tbreak.to_s)] )).at_index(line.length) end return null end end # Paragraphs parser class MdParagraphParser super MdBlockParser redef type BLOCK: MdParagraph redef var block = new MdParagraph(location) is lazy # Paragraph content var content: nullable Buffer = new Buffer redef fun try_continue(state) do if state.is_blank then return null return new MdBlockContinue.at_index(state.index) end redef fun add_line(line) do var content = self.content if content == null then return if not content.is_empty then content.add('\n') end content.append(line) end redef fun finalize(parser) do super var inline_parser = parser.inline_parser var content = self.content if content == null then return var content_string = content.to_s var has_reference_defs = false var pos = inline_parser.parse_reference(content_string) # try parsing the beginning as link reference definitions while content_string.length > 3 and content_string.chars[0] == '[' and pos != 0 do content_string = content_string.substring(pos, content_string.length - pos) has_reference_defs = true pos = inline_parser.parse_reference(content_string) end if has_reference_defs and content_string.is_blank then block.unlink self.content = null else self.content = new Buffer.from_text(content_string) end end redef fun parse_inlines(inline_parser) do var content = self.content if content == null then return inline_parser.parse(content.to_s, content_offset, block) var last_child = block.last_child if last_child != null then location.line_end = last_child.location.line_end location.column_end = last_child.location.column_end end end end # Html blocks parser class MdHtmlBlockParser super MdBlockParser redef type BLOCK: MdHtmlBlock redef var block = new MdHtmlBlock(location) is lazy # Closing tag pattern # # Or null if the block is not closed var closing_pattern: nullable Pattern # Is the current block finished? var finished = false # Block content var content = new Buffer redef fun try_continue(state) do if finished then return null # blank lin ends type 6 and 7 blocks if state.is_blank and closing_pattern == null then return null return new MdBlockContinue.at_index(state.index) end redef fun add_line(line) do if not content.is_empty then content.add('\n') end content.append(line) var closing_pattern = self.closing_pattern if closing_pattern != null and line.has(closing_pattern) then finished = true end end redef fun finalize(parser) do super var content = self.content.to_s block.literal = content var lines = content.split("\n") location.line_end = location.line_start + lines.length - 1 location.column_end = lines.last.length end end # Html blocks parser factory class MdHtmlBlockParserFactory super MdBlockParserFactory redef fun try_start(state, matched_block_parser) do var next_non_space = state.next_non_space_index var line = state.line_string if state.indent >= 4 or line.chars[next_non_space] != '<' then return null for block_type in [0..6] do # type 7 can not interrupt a paragraph if block_type == 6 and matched_block_parser.block isa MdParagraph then continue var opener = re_html_blocks[block_type].first var closer = re_html_blocks[block_type].last if line.substring(next_non_space, line.length - next_non_space).has(opener.as(not null)) then return (new MdBlockStart( [new MdHtmlBlockParser( state.line, state.column + 1, next_non_space, closer)]) ).at_index(state.index) end end return null end end # Post Processing # Markdown post processor # # A Markdown AST visitor called after parsing from a MdParser abstract class MdPostProcessor super MdVisitor # Document behing processed # # Availlable only during a call to `post_process`. var document: nullable MdDocument = null # Post process the `document` parsed by `parser` fun post_process(parser: MdParser, document: MdDocument) do self.document = document enter_visit(document) self.document = null end # Call `MdNode::post_process` redef fun visit(node) do node.post_process(self) end redef class MdNode # Accept the visit of a `MdPostProcessor` fun post_process(v: MdPostProcessor) do visit_all(v) end # Utils redef class Sys # ATX headings matching private var re_atx_heading: Regex = "^(#\{1,6\})([ \t]+|$)".to_re # ATX trailings matching private var re_atx_trailing: Regex = "(^|[ \t]+)#+[ \t]*$".to_re # SeText headings matching private var re_setext_heading: Regex = "^(=+|-+)[ \t]*$".to_re # Blank lines matching var re_trailing_blank_lines: Regex = "(\n[ \t]*)+$".to_re # Opening fence matching var re_opening_fence: Regex = "^(`\{3,\})(.*)|^(~\{3,\})(.*)".to_re # Closing fence matching var re_closing_fence: Regex = "^(`\{3,\}|~\{3,\})( *$)".to_re # List marker matching var re_list_marker: Regex = "^([*+-])( |\t|$)|^([0-9]\{1,9\})([.)])( |\t|$)".to_re # Thematic break pattern var re_thematic_break: Regex = "^((\\*[ \t]*)\{3,\}|(_[ \t]*)\{3,\}|(-[ \t]*)\{3,\})[ \t]*$".to_re # HTML blocks patterns var re_html_blocks: Array[Array[nullable Regex]] do var blocks = new Array[Array[nullable Regex]] var re0_opening = "^<(script|pre|style)(\\s|>|$)".to_re re0_opening.ignore_case = true var re0_closing = "".to_re re0_closing.ignore_case = true blocks.add([re0_opening, re0_closing]) blocks.add([ "^".to_re ]) blocks.add([ "^<[?]".to_re, "\\?>".to_re ]) blocks.add([ "^".to_re ]) blocks.add([ "^".to_re ]) var re5_opening = "^]|$)".to_re re5_opening.ignore_case = true blocks.add([re5_opening, null]) var p_tagname = "[A-Za-z][A-Za-z0-9-]*" var p_attribute_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*" var p_uquoted_value = "[^\"'=<>`\\x00-\\x20]+" var p_squoted_value = "'[^']*'" var p_dquoted_value = "\"[^\"]*\"" var p_attribute_value = "({p_uquoted_value}|{p_squoted_value}|{p_dquoted_value})" var p_attribute_value_spec = "(\\s*=\\s*{p_attribute_value})" var p_attribute = "(\\s{p_attribute_name}{p_attribute_value_spec}?)" var p_opentag = "<{p_tagname}{p_attribute}*\\s*/?>" var p_closetag = "]" var re6_opening = "^({p_opentag}|{p_closetag})\\s*$".to_re re6_opening.ignore_case = true blocks.add([re6_opening, null]) return blocks end end redef class Int # Tab stop is 4 private fun columns_to_next_tab_stop: Int do return 4 - (self % 4) end redef class String # Is this string blank? # # i.e. contains only spacing characters. private fun is_blank: Bool do for i in [0 .. length[ do var c = chars[i] if c == ' ' or c == '\t' or c == '\n' or c == '\r' then continue else return false end end return true end # Is the character at `index` a space or a tab # # Return false if `index > self.length`. private fun is_space_or_tab(index: Int): Bool do if index >= length then return false var c = chars[index] return c == ' ' or c == '\t' end end