Used to create the AST representation of a Markdown document.
markdown2 :: MdParser :: defaultinit
markdown2 :: MdParser :: post_process
Post-process thedocument
markdown2 :: MdParser :: post_processors
Post-processors applied after the parsing of a documentmarkdown2 :: MdParser :: post_processors=
Post-processors applied after the parsing of a documentmarkdown2 :: MdParser :: wikilinks_mode=
Enable wikilinks modecore :: Object :: class_factory
Implementation used byget_class
to create the specific class.
markdown2 :: MdParser :: defaultinit
core :: Object :: defaultinit
core :: Object :: is_same_instance
Return true ifself
and other
are the same instance (i.e. same identity).
core :: Object :: is_same_serialized
Isself
the same as other
in a serialization context?
core :: Object :: is_same_type
Return true ifself
and other
have the same dynamic type.
core :: Object :: output_class_name
Display class name on stdout (debug only).markdown2 :: MdParser :: post_process
Post-process thedocument
markdown2 :: MdParser :: post_processors
Post-processors applied after the parsing of a documentmarkdown2 :: MdParser :: post_processors=
Post-processors applied after the parsing of a documentmarkdown2 :: MdParser :: wikilinks_mode=
Enable wikilinks mode
# Markdown parser
#
# Used to create the AST representation of a Markdown document.
class MdParser
# Inline parser used to parse block content
private var inline_parser = new MdInlineParser is lazy
# Block parsers factories
private var block_parser_factories: Collection[MdBlockParserFactory] do
var factories = new Array[MdBlockParserFactory]
factories.add new MdBlockQuoteParserFactory
factories.add new MdHeadingParserFactory
factories.add new MdFencedCodeBlockParserFactory
factories.add new MdHtmlBlockParserFactory
factories.add new MdThematicBreakParserFactory
factories.add new MdListBlockParserFactory
factories.add new MdIndentedCodeBlockParserFactory
return factories
end
# Active block parsers
#
# Used as a stack to parse nested blocks.
private var active_block_parsers = new Array[MdBlockParser]
# All active block parsers
private var all_block_parsers = new HashSet[MdBlockParser]
# Return the active block parser
#
# The last entry in the `active_block_parsers` stack.
private fun active_block_parser: MdBlockParser do
return active_block_parsers.last
end
# Activate a `block_parser`
#
# Add the `block_parser` on the top of the `active_block_parsers` stack.
# Also register it in `all_block_parsers`.
private fun activate_block_parser(block_parser: MdBlockParser) do
active_block_parsers.add block_parser
all_block_parsers.add block_parser
end
# Deactivate the `active_block_parser`
private fun deactivate_block_parser do
active_block_parsers.pop
end
# Deactivate and remove the `active_block_parser` from the `all_block_parsers` list
private fun remove_active_block_parser do
var old = active_block_parser
deactivate_block_parser
all_block_parsers.remove(old)
old.block.unlink
end
# Post-processors applied after the parsing of a document
var post_processors = new Array[MdPostProcessor] is writable
# Currently parsed line
private var line_string: String is noinit
# Current index (offset) in input `line_string` (starts at 0)
private var index = 0
# Current column in input `line_string` (starts at 0)
#
# Tab causes column to go to next 4-space tab stop.
private var column = 0
# Is the current column within a tab character (partially consumed tab)
private var column_is_in_tab: Bool is noinit
# Current line in input string (starts at 1)
private var line = 1
# Index of the next non-space character starting from `index`
private var next_non_space_index = 0
# Next non-space column
private var next_non_space_column = 0
# Current indent in columns
#
# Either by spaces or tab stop of 4, starting from `column`.
private var indent = 0
# Is the current `line` blank starting from `index`?
private var is_blank: Bool is noinit
# Does a node end with a blank line?
private var last_line_blank = new HashMap[MdNode, Bool]
# Initialize parser state
private fun initialize do
active_block_parsers.clear
all_block_parsers.clear
index = 0
column = 0
column_is_in_tab = false
line = 1
next_non_space_index = 0
next_non_space_column = 0
indent = 0
is_blank = false
last_line_blank.clear
end
# Parse the `input` string as a MdDocument
fun parse(input: String): MdDocument do
initialize
var document_block_parser = new MdDocumentBlockParser(1, 1, 0)
activate_block_parser(document_block_parser)
var line_start = 0
var line_break = find_line_break(input, line_start)
while line_break != -1 do
var line_string = input.substring(line_start, line_break - line_start)
incorporate_line(line_string)
if line_break + 1 < input.length and
input.chars[line_break] == '\r' and
input.chars[line_break + 1] == '\n' then
line_start = line_break + 2
else
line_start = line_break + 1
end
line_break = find_line_break(input, line_start)
line += 1
column = 0
end
# Finalize pending line
if input.length > 0 and (line_start == 0 or line_start < input.length) then
incorporate_line(input.substring(line_start, input.length - line_start))
end
finalize_blocks(active_block_parsers)
# Walk through a block and its chiildren revursively
# Parsing string content into inline content where appropriate.
var all_block_parsers = all_block_parsers.to_a
var i = all_block_parsers.length - 1
while i >= 0 do
var block_parser = all_block_parsers[i]
block_parser.parse_inlines(inline_parser)
i -= 1
end
var document = document_block_parser.block
return document
end
# Post-process the `document`
fun post_process(document: MdDocument) do
for processor in post_processors do
processor.post_process(self, document)
end
end
# Analyze a line of text and update the document
#
# We parse Markdown text by calling this on each line of `input`.
private fun incorporate_line(input: String) do
line_string = input
index = 0
column = 0
column_is_in_tab = false
# For each containing block, try to parse the associated line start.
var matches = 1
for i in [1 .. active_block_parsers.length[ do
var block_parser = active_block_parsers[i]
find_next_non_space
var result = block_parser.try_continue(self)
if result isa MdBlockContinue then
if result.is_finalize then
block_parser.finalize(self)
return
else
if result.new_index != -1 then
set_new_index result.new_index
else if result.new_column != -1 then
set_new_column result.new_column
end
end
matches += 1
else
break
end
end
var unmatched_block_parsers = active_block_parsers.subarray(
matches, active_block_parsers.length - matches)
var last_matched_block_parser = active_block_parsers[matches - 1]
var block_parser = last_matched_block_parser
var all_closed = unmatched_block_parsers.is_empty
# Unless last matched container is a code block, try new container starts,
# adding children to the last matched container.
var try_block_starts = block_parser.block isa MdParagraph or
block_parser.block.is_container
while try_block_starts do
find_next_non_space
# Optimize lookup
if is_blank or (indent < 4 and line_string.chars[next_non_space_index].is_letter) then
set_new_index next_non_space_index
break
end
var block_start = find_block_start(block_parser)
if block_start == null then
set_new_index next_non_space_index
break
end
if not all_closed then
finalize_blocks(unmatched_block_parsers)
all_closed = true
end
if block_start.new_index != -1 then
set_new_index block_start.new_index
else if block_start.new_column != -1 then
set_new_column block_start.new_column
end
if block_start.replace_active_block_parser then
remove_active_block_parser
end
for new_block_parser in block_start.block_parsers do
add_child(new_block_parser)
block_parser = new_block_parser
try_block_starts = new_block_parser.block.is_container
end
end
# What remains at the offset is a text line.
# Add the text to the appropriate block.
# First check for a lazy paragraph continuation
if not all_closed and not is_blank and active_block_parser isa MdParagraphParser then
add_line
else
# Finalize any blocks not matched
if not all_closed then
finalize_blocks(unmatched_block_parsers)
end
propagate_last_line_blank(block_parser, last_matched_block_parser)
if not block_parser.block.is_container then
add_line
else if not is_blank then
# Create a paragraph container for the line
add_child(new MdParagraphParser(line, column + 1, block_parser.content_offset))
add_line
end
end
end
# Find what kind of block starts at `index` in `input`
private fun find_block_start(block_parser: MdBlockParser): nullable MdBlockStart do
for block_parser_factory in block_parser_factories do
var result = block_parser_factory.try_start(self, block_parser)
if result != null then return result
end
return null
end
# Add a `block_parser` block's as child of the active block parser block
private fun add_child(block_parser: MdBlockParser) do
# Finalize non-parentable blocks
while not active_block_parser.block.can_contain(block_parser.block) do
active_block_parser.finalize(self)
end
# Append block block parser block to its parent
active_block_parser.block.append_child(block_parser.block)
activate_block_parser(block_parser)
end
# Add line content to the active block parser
#
# We assume it can accept lines.
private fun add_line do
var content = null
if column_is_in_tab then
# Out column is in a partially consumed tab.
# Expand the remaining columns to the next tab stop to spaces.
var after_tab = index + 1
var rest = line_string.substring(after_tab, line_string.length - after_tab)
var spaces = column.columns_to_next_tab_stop
var buffer = new Buffer
for i in [0 .. spaces[ do
buffer.add ' '
end
buffer.append(rest)
content = buffer.write_to_string
else
content = line_string.substring(index, line_string.length - index)
end
active_block_parser.add_line(content)
end
# Finalize blocks of previous line
private fun finalize_blocks(block_parsers: Sequence[MdBlockParser]) do
var i = block_parsers.length - 1
while i >= 0 do
var block_parser = block_parsers[i]
block_parser.finalize(self)
i -= 1
end
end
# Advance the `index` position to the next character
#
# Also set the `column`.
# If the next character is a tab, compute the new column accordingly.
private fun advance do
var c = line_string.chars[index]
if c == '\t' then
index += 1
column += column.columns_to_next_tab_stop
else
index += 1
column += 1
end
end
# Move `index` to the next non-space character index in the `input` string
#
# Also set `next_non_space_index`, `next_non_space_column`, `is_blank` and `indent`.
private fun find_next_non_space do
var i = index
var cols = column
is_blank = true
while i < line_string.length do
var c = line_string.chars[i]
if c == ' ' then
i += 1
cols += 1
continue
else if c == '\t' then
i += 1
cols += 4 - (cols % 4)
continue
end
is_blank = false
break
end
next_non_space_index = i
next_non_space_column = cols
indent = next_non_space_column - column
end
# Return the position of the next line break
#
# We consider `\r` and `\n`.
private fun find_line_break(input: String, start_index: Int): Int do
for i in [start_index .. input.length[ do
var char = input.chars[i]
if char == '\r' or char == '\n' then return i
end
return -1
end
# Set the parser `index` at `new_index`
#
# Also set `column` and `column_is_in_tab`.
private fun set_new_index(new_index: Int) do
if new_index >= next_non_space_index then
# We can start from here, no need to calculate tab stops again
index = next_non_space_index
column = next_non_space_column
end
while index < new_index and index != line_string.length do
advance
end
# If we're going to an index as opposed to a column, we're never within a tab
column_is_in_tab = false
end
# Set the parser `column` at `new_column`
#
# Also set `index` and `column_is_in_tab`.
private fun set_new_column(new_column: Int) do
if new_column >= next_non_space_column then
# We can start from here, no need to calculate tab stops again
index = next_non_space_index
column = next_non_space_column
end
while column < new_column and index != line_string.length do
advance
end
if column > new_column then
# Last character was a tab and we overshot our target
index -= 1
column = new_column
column_is_in_tab = true
else
column_is_in_tab = false
end
end
# Does `block` end with a blank line?
private fun ends_with_blank_line(block: nullable MdNode): Bool do
while block != null do
if is_last_line_blank(block) then return true
if block isa MdListBlock or block isa MdListItem then
block = block.last_child
else
break
end
end
return false
end
# Propagate a blank line to all block_parser blocl's parents
private fun propagate_last_line_blank(block_parser: MdBlockParser, last_matched_block_parser: MdBlockParser) do
var last_child = block_parser.block.last_child
if is_blank and last_child != null then
last_line_blank[last_child] = true
end
var block = block_parser.block
# Block quotes lines are never blank as they start with `>`.
# We don't count blanks in fenced code for purposes of thight/loose lists.
# We also don't set `last_line_blank` on an empty list item.
var last_line_blank = is_blank and
not (block isa MdBlockQuote or
block isa MdFencedCodeBlock or
(block isa MdListItem and block.first_child == null and
block_parser != last_matched_block_parser))
# Propagate `last_line_blank` up through parents
var node: nullable MdNode = block_parser.block
while node != null do
self.last_line_blank[node] = last_line_blank
node = node.parent
end
end
# Is last line blank for `node`?
private fun is_last_line_blank(node: MdNode): Bool do
if not last_line_blank.has_key(node) then return false
return last_line_blank[node]
end
end
lib/markdown2/markdown_block_parsing.nit:23,1--474,3
redef class MdParser
# Enable Github mode
var github_mode = false is writable
redef var inline_parser is lazy do
var parser = super
parser.github_mode = github_mode
return parser
end
end
lib/markdown2/markdown_github.nit:28,1--38,3
redef class MdParser
# Enable wikilinks mode
var wikilinks_mode = false is writable
redef var inline_parser is lazy do
var parser = super
parser.wikilinks_mode = wikilinks_mode
return parser
end
end
lib/markdown2/markdown_wikilinks.nit:23,1--33,3