X-Git-Url: http://nitlanguage.org diff --git a/lib/markdown/markdown.nit b/lib/markdown/markdown.nit index 7c91ecc..613e1a4 100644 --- a/lib/markdown/markdown.nit +++ b/lib/markdown/markdown.nit @@ -30,12 +30,113 @@ import template # SEE: `String::md_to_html` for a shortcut. class MarkdownProcessor - var emitter: MarkdownEmitter is noinit + # `MarkdownEmitter` used for ouput. + var emitter: MarkdownEmitter is noinit, protected writable + + # Work in extended mode (default). + # + # Behavior changes when using extended mode: + # + # * Lists and code blocks end a paragraph + # + # In normal markdown the following: + # + # ~~~md + # This is a paragraph + # * and this is not a list + # ~~~ + # + # Will produce: + # + # ~~~html + #
This is a paragraph + # * and this is not a list
+ # ~~~ + # + # When using extended mode this changes to: + # + # ~~~html + #This is a paragraph
+ #import markdown
+ #
+ # print "Hello World!".md_to_html
+ #
+ # ~~~
+ #
+ # * Underscores (Emphasis)
+ #
+ # Underscores in the middle of a word like:
+ #
+ # ~~~md
+ # Con_cat_this
+ # ~~~
+ #
+ # normally produces this:
+ #
+ # ~~~html
+ # Concatthis
+ # ~~~ + # + # With extended mode they don't result in emphasis. + # + # ~~~html + #Con_cat_this
+ # ~~~ + # + # * Strikethrough + # + # Like in [GFM](https://help.github.com/articles/github-flavored-markdown), + # strikethrought span is marked with `~~`. + # + # ~~~md + # ~~Mistaken text.~~ + # ~~~ + # + # becomes + # + # ~~~html + #"
+ if block isa BlockFence and block.meta != null then
+ v.add ""
+ else
+ v.add ""
+ end
v.emit_in block
v.add "
\n"
end
@@ -543,10 +803,10 @@ class HTMLDecorator
v.add ""
end
- redef fun add_super(v, text) do
- v.add ""
+ redef fun add_strike(v, text) do
+ v.add ""
v.add text
- v.add ""
+ v.add ""
end
redef fun add_image(v, link, name, comment) do
@@ -658,9 +918,36 @@ class HTMLDecorator
private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
end
+# Location in a Markdown input.
+class MDLocation
+
+ # Starting line number (starting from 1).
+ var line_start: Int
+
+ # Starting column number (starting from 1).
+ var column_start: Int
+
+ # Stopping line number (starting from 1).
+ var line_end: Int
+
+ # Stopping column number (starting from 1).
+ var column_end: Int
+
+ redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
+
+ # Return a copy of `self`.
+ fun copy: MDLocation do
+ return new MDLocation(line_start, column_start, line_end, column_end)
+ end
+end
+
# A block of markdown lines.
# A `MDBlock` can contains lines and/or sub-blocks.
class MDBlock
+
+ # Position of `self` in the input.
+ var location: MDLocation
+
# Kind of block.
# See `Block`.
var kind: Block = new BlockNone(self) is writable
@@ -713,7 +1000,14 @@ class MDBlock
# Split `self` creating a new sub-block having `line` has `last_line`.
fun split(line: MDLine): MDBlock do
- var block = new MDBlock
+ # location for new block
+ var new_loc = new MDLocation(
+ first_line.location.line_start,
+ first_line.location.column_start,
+ line.location.line_end,
+ line.location.column_end)
+ # create block
+ var block = new MDBlock(new_loc)
block.first_line = first_line
block.last_line = line
first_line = line.next
@@ -722,6 +1016,9 @@ class MDBlock
last_line = null
else
first_line.prev = null
+ # update current block loc
+ location.line_start = first_line.location.line_start
+ location.column_start = first_line.location.column_start
end
if first_block == null then
first_block = block
@@ -870,10 +1167,32 @@ abstract class Block
fun emit_blocks(v: MarkdownEmitter) do
var block = self.block.first_block
while block != null do
+ v.push_loc(block.location)
block.kind.emit(v)
+ v.pop_loc
block = block.next
end
end
+
+ # The raw content of the block as a multi-line string.
+ fun raw_content: String do
+ var infence = self isa BlockFence
+ var text = new FlatBuffer
+ var line = self.block.first_line
+ while line != null do
+ if not line.is_empty then
+ var str = line.value
+ if not infence and str.has_prefix(" ") then
+ text.append str.substring(4, str.length - line.trailing)
+ else
+ text.append str
+ end
+ end
+ text.append "\n"
+ line = line.next
+ end
+ return text.write_to_string
+ end
end
# A block without any markdown specificities.
@@ -914,13 +1233,21 @@ end
class BlockCode
super Block
+ # Any string found after fence token.
+ var meta: nullable Text
+
+ # Number of char to skip at the beginning of the line.
+ #
+ # Block code lines start at 4 spaces.
+ protected var line_start = 4
+
redef fun emit(v) do v.decorator.add_code(v, self)
redef fun emit_lines(v) do
var line = block.first_line
while line != null do
if not line.is_empty then
- v.decorator.append_code(v, line.value, 4, line.value.length)
+ v.decorator.append_code(v, line.value, line_start, line.value.length)
end
v.addn
line = line.next
@@ -934,13 +1261,24 @@ end
# this class is only used for typing purposes.
class BlockFence
super BlockCode
+
+ # Fence code lines start at 0 spaces.
+ redef var line_start = 0
end
# A markdown headline.
class BlockHeadline
super Block
- redef fun emit(v) do v.decorator.add_headline(v, self)
+ redef fun emit(v) do
+ var loc = block.location.copy
+ loc.column_start += start
+ v.push_loc(loc)
+ v.decorator.add_headline(v, self)
+ v.pop_loc
+ end
+
+ private var start = 0
# Depth of the headline used to determine the headline level.
var depth = 0
@@ -969,6 +1307,7 @@ class BlockHeadline
line.leading = 0
line.trailing = 0
end
+ self.start = start
depth = level.min(6)
end
end
@@ -1083,6 +1422,9 @@ end
# A markdown line.
class MDLine
+ # Location of `self` in the original input.
+ var location: MDLocation
+
# Text contained in this line.
var value: String is writable
@@ -1102,8 +1444,8 @@ class MDLine
# Is the next line empty?
var next_empty: Bool = false is writable
- init(value: String) do
- self.value = value
+ # Initialize a new MDLine from its string value
+ init do
self.leading = process_leading
if leading != value.length then
self.is_empty = false
@@ -1309,10 +1651,10 @@ class LineOther
var was_empty = line.prev_empty
while line != null and not line.is_empty do
var t = v.line_kind(line)
- if v.in_list and t isa LineList then
+ if (v.in_list or v.ext_mode) and t isa LineList then
break
end
- if t isa LineCode or t isa LineFence then
+ if v.ext_mode and (t isa LineCode or t isa LineFence) then
break
end
if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
@@ -1322,7 +1664,6 @@ class LineOther
line = line.next
end
# build block
- var bk: Block
if line != null and not line.is_empty then
var block = v.current_block.split(line.prev.as(not null))
if v.in_list and not was_empty then
@@ -1452,7 +1793,9 @@ class LineFence
else
block = v.current_block.split(v.current_block.last_line.as(not null))
end
- block.kind = new BlockFence(block)
+ block.remove_surrounding_empty_lines
+ var meta = block.first_line.value.meta_from_fence
+ block.kind = new BlockFence(block, meta)
block.first_line.clear
var last = block.last_line
if last != null and v.line_kind(last) isa LineFence then
@@ -1520,7 +1863,7 @@ end
# A markdown list line.
# Mainly used to factorize code between ordered and unordered lists.
-class LineList
+abstract class LineList
super Line
redef fun process(v) do
@@ -1560,6 +1903,7 @@ class LineList
# Create a new block kind based on this line.
protected fun block_kind(block: MDBlock): BlockList is abstract
+ # Extract string value from `MDLine`.
protected fun extract_value(line: MDLine): String is abstract
end
@@ -1589,14 +1933,17 @@ end
# Some tokens have a specific markup behaviour that is handled here.
abstract class Token
- # Position of `self` in markdown input.
+ # Location of `self` in the original input.
+ var location: MDLocation
+
+ # Position of `self` in input independant from lines.
var pos: Int
# Character found at `pos` in the markdown input.
var char: Char
# Output that token using `MarkdownEmitter::decorator`.
- fun emit(v: MarkdownEmitter) do v.addc char
+ fun emit(v: MarkdownEmitter) do v.decorator.add_char(v, char)
end
# A token without a specific meaning.
@@ -1665,7 +2012,7 @@ abstract class TokenCode
redef fun emit(v) do
var a = pos + next_pos + 1
- var b = v.current_text.find_token(a, self)
+ var b = v.processor.find_token(v.current_text.as(not null), a, self)
if b > 0 then
v.current_pos = b + next_pos
while a < b and v.current_text[a] == ' ' do a += 1
@@ -1780,6 +2127,7 @@ abstract class TokenLinkOrImage
if pos == -1 then return -1
end
end
+ if pos < start then return -1
if md[pos] != ')' then return -1
else if md[pos] == '[' then
pos += 1
@@ -1799,7 +2147,7 @@ abstract class TokenLinkOrImage
comment = lr.title
end
else
- var tid = name.write_to_string.replace("\n", " ").to_lower
+ var tid = name.write_to_string.replace("\n", " ").to_lower
if v.processor.link_refs.has_key(tid) then
var lr = v.processor.link_refs[tid]
link = lr.link
@@ -1938,17 +2286,19 @@ class TokenEscape
end
end
-# A markdown super token.
-class TokenSuper
+# A markdown strike token.
+#
+# Extended mode only (see `MarkdownProcessor::ext_mode`)
+class TokenStrike
super Token
redef fun emit(v) do
var tmp = v.push_buffer
- var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
+ var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
v.pop_buffer
if b > 0 then
- v.decorator.add_super(v, tmp)
- v.current_pos = b
+ v.decorator.add_strike(v, tmp)
+ v.current_pos = b + 1
else
v.addc char
end
@@ -1957,108 +2307,6 @@ end
redef class Text
- # Get the token kind at `pos`.
- private fun token_at(pos: Int): Token do
- var c0: Char
- var c1: Char
- var c2: Char
- var c3: Char
-
- if pos > 0 then
- c0 = self[pos - 1]
- else
- c0 = ' '
- end
- var c = self[pos]
-
- if pos + 1 < length then
- c1 = self[pos + 1]
- else
- c1 = ' '
- end
- if pos + 2 < length then
- c2 = self[pos + 2]
- else
- c2 = ' '
- end
- if pos + 3 < length then
- c3 = self[pos + 3]
- else
- c3 = ' '
- end
-
- if c == '*' then
- if c1 == '*' then
- if c0 != ' ' or c2 != ' ' then
- return new TokenStrongStar(pos, c)
- else
- return new TokenEmStar(pos, c)
- end
- end
- if c0 != ' ' or c1 != ' ' then
- return new TokenEmStar(pos, c)
- else
- return new TokenNone(pos, c)
- end
- else if c == '_' then
- if c1 == '_' then
- if c0 != ' ' or c2 != ' 'then
- return new TokenStrongUnderscore(pos, c)
- else
- return new TokenEmUnderscore(pos, c)
- end
- end
- if c0 != ' ' or c1 != ' ' then
- return new TokenEmUnderscore(pos, c)
- else
- return new TokenNone(pos, c)
- end
- else if c == '!' then
- if c1 == '[' then return new TokenImage(pos, c)
- return new TokenNone(pos, c)
- else if c == '[' then
- return new TokenLink(pos, c)
- else if c == ']' then
- return new TokenNone(pos, c)
- else if c == '`' then
- if c1 == '`' then
- return new TokenCodeDouble(pos, c)
- else
- return new TokenCodeSingle(pos, c)
- end
- else if c == '\\' then
- if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
- return new TokenEscape(pos, c)
- else
- return new TokenNone(pos, c)
- end
- else if c == '<' then
- return new TokenHTML(pos, c)
- else if c == '&' then
- return new TokenEntity(pos, c)
- else if c == '^' then
- if c0 == '^' or c1 == '^' then
- return new TokenNone(pos, c)
- else
- return new TokenSuper(pos, c)
- end
- else
- return new TokenNone(pos, c)
- end
- end
-
- # Find the position of a `token` in `self`.
- private fun find_token(start: Int, token: Token): Int do
- var pos = start
- while pos < length do
- if token_at(pos).is_same_type(token) then
- return pos
- end
- pos += 1
- end
- return -1
- end
-
# Get the position of the next non-space character.
private fun skip_spaces(start: Int): Int do
var pos = start
@@ -2165,6 +2413,7 @@ redef class Text
# Safe mode can be activated to limit reading to valid xml.
private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
var pos = 0
+ var is_valid = true
var is_close_tag = false
if start + 1 >= length then return -1
if self[start + 1] == '/' then
@@ -2182,7 +2431,11 @@ redef class Text
pos = read_xml_until(tmp, pos, ' ', '/', '>')
if pos == -1 then return -1
var tag = tmp.write_to_string.trim.to_lower
- if tag.is_html_unsafe then
+ if not tag.is_valid_html_tag then
+ out.append "<"
+ pos = -1
+ else if tag.is_html_unsafe then
+ is_valid = false
out.append "<"
if is_close_tag then out.add '/'
out.append tmp
@@ -2205,7 +2458,11 @@ redef class Text
if pos == -1 then return -1
end
if self[pos] == '>' then
- out.add '>'
+ if is_valid then
+ out.add '>'
+ else
+ out.append ">"
+ end
return pos
end
return -1
@@ -2277,6 +2534,14 @@ redef class Text
return tpl.write_to_string.to_lower
end
+ private fun is_valid_html_tag: Bool do
+ if is_empty then return false
+ for c in self do
+ if not c.is_alpha then return false
+ end
+ return true
+ end
+
# Read and escape the markdown contained in `self`.
private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
@@ -2290,6 +2555,17 @@ redef class Text
return pos
end
+ # Extract string found at end of fence opening.
+ private fun meta_from_fence: nullable Text do
+ for i in [0..chars.length[ do
+ var c = chars[i]
+ if c != ' ' and c != '`' and c != '~' then
+ return substring_from(i).trim
+ end
+ end
+ return null
+ end
+
# Is `self` an unsafe HTML element?
private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
@@ -2313,7 +2589,7 @@ redef class String
# var md = "**Hello World!**"
# var html = md.md_to_html
# assert html == "Hello World!
\n"
- fun md_to_html: Streamable do
+ fun md_to_html: Writable do
var processor = new MarkdownProcessor
return processor.process(self)
end