Merge: Markdown Extended
authorJean Privat <jean@pryen.org>
Tue, 25 Nov 2014 01:04:59 +0000 (20:04 -0500)
committerJean Privat <jean@pryen.org>
Tue, 25 Nov 2014 01:04:59 +0000 (20:04 -0500)
Added extrended mode to the markdown parser.

Features:
* fences and meta
* super span
* strike span

Pull-Request: #784
Reviewed-by: Jean Privat <jean@pryen.org>
Reviewed-by: Lucas Bajolet <r4pass@hotmail.com>
Reviewed-by: Alexis Laferrière <alexis.laf@xymus.net>

lib/markdown/markdown.nit
lib/markdown/test_markdown.nit

index add43e1..2f675d6 100644 (file)
@@ -33,6 +33,88 @@ class MarkdownProcessor
        # `MarkdownEmitter` used for ouput.
        var emitter: MarkdownEmitter is noinit
 
+       # Work in extended mode (default).
+       #
+       # Behavior changes when using extended mode:
+       #
+       # * Lists and code blocks end a paragraph
+       #
+       #   In normal markdown the following:
+       #
+       #               This is a paragraph
+       #               * and this is not a list
+       #
+       #   Will produce:
+       #
+       #               <p>This is a paragraph
+       #               * and this is not a list</p>
+       #
+       #       When using extended mode this changes to:
+       #
+       #               <p>This is a paragraph</p>
+       #               <ul>
+       #               <li>and this is not a list</li>
+       #               </ul>
+       #
+       # * Fences code blocks
+       #
+       #   If you don't want to indent your all your code with 4 spaces,
+       #   you can wrap your code in ``` ``` ``` or `~~~`.
+       #
+       #       Here's an example:
+       #
+       #               ```
+       #               fun test do
+       #                       print "Hello World!"
+       #               end
+       #               ```
+       #
+       # * Code blocks meta
+       #
+       #   If you want to use syntax highlighting tools, most of them need to know what kind
+       #   of language they are highlighting.
+       #   You can add an optional language identifier after the fence declaration to output
+       #   it in the HTML render.
+       #
+       #               ```nit
+       #               import markdown
+       #
+       #               print "# Hello World!".md_to_html
+       #               ```
+       #
+       #   Becomes
+       #
+       #               <pre class="nit"><code>import markdown
+       #
+       #               print "Hello World!".md_to_html
+       #               </code></pre>
+       #
+       # * Underscores (Emphasis)
+       #
+       #   Underscores in the middle of a word like:
+       #
+       #               Con_cat_this
+       #
+       #       normally produces this:
+       #
+       #               <p>Con<em>cat</em>this</p>
+       #
+       #   With extended mode they don't result in emphasis.
+       #
+       #               <p>Con_cat_this</p>
+       #
+       # * Strikethrough
+       #
+       #   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
+       #   strikethrought span is marked with `~~`.
+       #
+       #               ~~Mistaken text.~~
+       #
+       #   becomes
+       #
+       #               <del>Mistaken text.</del>
+       var ext_mode = true
+
        init do self.emitter = new MarkdownEmitter(self)
 
        # Process the mardown `input` string and return the processed output.
@@ -219,12 +301,14 @@ class MarkdownProcessor
                if value[leading] == '#' then return new LineHeadline
                if value[leading] == '>' then return new LineBlockquote
 
-               if value.length - leading - trailing > 2 then
-                       if value[leading] == '`' and md.count_chars_start('`') >= 3 then
-                               return new LineFence
-                       end
-                       if value[leading] == '~' and md.count_chars_start('~') >= 3 then
-                               return new LineFence
+               if ext_mode then
+                       if value.length - leading - trailing > 2 then
+                               if value[leading] == '`' and md.count_chars_start('`') >= 3 then
+                                       return new LineFence
+                               end
+                               if value[leading] == '~' and md.count_chars_start('~') >= 3 then
+                                       return new LineFence
+                               end
                        end
                end
 
@@ -307,6 +391,14 @@ class MarkdownProcessor
                                        return new TokenEmUnderscore(pos, c)
                                end
                        end
+                       if ext_mode then
+                               if (c0.is_letter or c0.is_digit) and c0 != '_' and
+                                  (c1.is_letter or c1.is_digit) then
+                                       return new TokenNone(pos, c)
+                               else
+                                       return new TokenEmUnderscore(pos, c)
+                               end
+                       end
                        if c0 != ' ' or c1 != ' ' then
                                return new TokenEmUnderscore(pos, c)
                        else
@@ -335,13 +427,12 @@ class MarkdownProcessor
                        return new TokenHTML(pos, c)
                else if c == '&' then
                        return new TokenEntity(pos, c)
-               else if c == '^' then
-                       if c0 == '^' or c1 == '^' then
-                               return new TokenNone(pos, c)
-                       else
-                               return new TokenSuper(pos, c)
-                       end
                else
+                       if ext_mode then
+                               if c == '~' and c1 == '~' then
+                                       return new TokenStrike(pos, c)
+                               end
+                       end
                        return new TokenNone(pos, c)
                end
        end
@@ -519,8 +610,10 @@ interface Decorator
        # Render a strong text.
        fun add_strong(v: MarkdownEmitter, text: Text) is abstract
 
-       # Render a super text.
-       fun add_super(v: MarkdownEmitter, text: Text) is abstract
+       # Render a strike text.
+       #
+       # Extended mode only (see `MarkdownProcessor::ext_mode`)
+       fun add_strike(v: MarkdownEmitter, text: Text) is abstract
 
        # Render a link.
        fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
@@ -594,7 +687,11 @@ class HTMLDecorator
        end
 
        redef fun add_code(v, block) do
-               v.add "<pre><code>"
+               if block isa BlockFence and block.meta != null then
+                       v.add "<pre class=\"{block.meta.to_s}\"><code>"
+               else
+                       v.add "<pre><code>"
+               end
                v.emit_in block
                v.add "</code></pre>\n"
        end
@@ -635,10 +732,10 @@ class HTMLDecorator
                v.add "</strong>"
        end
 
-       redef fun add_super(v, text) do
-               v.add "<sup>"
+       redef fun add_strike(v, text) do
+               v.add "<del>"
                v.add text
-               v.add "</sup>"
+               v.add "</del>"
        end
 
        redef fun add_image(v, link, name, comment) do
@@ -1032,6 +1129,9 @@ end
 class BlockFence
        super BlockCode
 
+       # Any string found after fence token.
+       var meta: nullable Text
+
        # Fence code lines start at 0 spaces.
        redef var line_start = 0
 end
@@ -1409,10 +1509,10 @@ class LineOther
                var was_empty = line.prev_empty
                while line != null and not line.is_empty do
                        var t = v.line_kind(line)
-                       if v.in_list and t isa LineList then
+                       if (v.in_list or v.ext_mode) and t isa LineList then
                                break
                        end
-                       if t isa LineCode or t isa LineFence then
+                       if v.ext_mode and (t isa LineCode or t isa LineFence) then
                                break
                        end
                        if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
@@ -1551,7 +1651,8 @@ class LineFence
                else
                        block = v.current_block.split(v.current_block.last_line.as(not null))
                end
-               block.kind = new BlockFence(block)
+               var meta = block.first_line.value.meta_from_fence
+               block.kind = new BlockFence(block, meta)
                block.first_line.clear
                var last = block.last_line
                if last != null and v.line_kind(last) isa LineFence then
@@ -2038,17 +2139,19 @@ class TokenEscape
        end
 end
 
-# A markdown super token.
-class TokenSuper
+# A markdown strike token.
+#
+# Extended mode only (see `MarkdownProcessor::ext_mode`)
+class TokenStrike
        super Token
 
        redef fun emit(v) do
                var tmp = v.push_buffer
-               var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
+               var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
                v.pop_buffer
                if b > 0 then
-                       v.decorator.add_super(v, tmp)
-                       v.current_pos = b
+                       v.decorator.add_strike(v, tmp)
+                       v.current_pos = b + 1
                else
                        v.addc char
                end
@@ -2288,6 +2391,18 @@ redef class Text
                return pos
        end
 
+       # Extract string found at end of fence opening.
+       private fun meta_from_fence: nullable Text do
+               for i in [0..chars.length[ do
+                       var c = chars[i]
+                       print c
+                       if c != ' ' and c != '`' and c != '~' then
+                               return substring_from(i).trim
+                       end
+               end
+               return null
+       end
+
        # Is `self` an unsafe HTML element?
        private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
 
index dd7fd58..a8d3b9f 100644 (file)
@@ -407,6 +407,36 @@ sit amet, consectetuer adipiscing elit.</p>
                assert res == exp
        end
 
+       fun test_process_list11 do
+               var test = """
+This is a paragraph
+* and this is not a list
+"""
+               var exp = """
+<p>This is a paragraph
+* and this is not a list</p>
+"""
+               var proc = new MarkdownProcessor
+               proc.ext_mode = false
+               var res = proc.process(test).write_to_string
+               assert res == exp
+       end
+
+       fun test_process_list_ext do
+               var test = """
+This is a paragraph
+* and this is not a list
+"""
+               var exp = """
+<p>This is a paragraph</p>
+<ul>
+<li>and this is not a list</li>
+</ul>
+"""
+               var res = test.md_to_html.write_to_string
+               assert res == exp
+       end
+
        fun test_process_code1 do
                var test = """
 This is a normal paragraph:
@@ -448,7 +478,7 @@ end tell
                assert res == exp
        end
 
-       fun test_process_code3 do
+       fun test_process_code_ext1 do
                var test = """
 Here is an example of AppleScript:
 ~~~
@@ -476,7 +506,7 @@ end tell
                assert res == exp
        end
 
-       fun test_process_code4 do
+       fun test_process_code_ext2 do
                var test = """
 Here is an example of AppleScript:
 ```
@@ -504,6 +534,49 @@ end tell
                assert res == exp
        end
 
+       fun test_process_code_ext3 do
+               var proc = new MarkdownProcessor
+               proc.ext_mode = false
+
+               var test = """
+Here is an example of AppleScript:
+    beep
+"""
+               var exp = """
+<p>Here is an example of AppleScript:
+beep</p>
+"""
+               var res = proc.process(test).write_to_string
+               assert res == exp
+       end
+
+       fun test_process_code_ext4 do
+               var test = """
+Here is an example of AppleScript:
+    beep
+"""
+               var exp = """
+<p>Here is an example of AppleScript:</p>
+<pre><code>beep
+</code></pre>
+"""
+               var res = test.md_to_html.write_to_string
+               assert res == exp
+       end
+
+       fun test_process_code_ext5 do
+               var test = """
+```nit
+print "Hello World!"
+```
+"""
+               var exp = """
+<pre class="nit"><code>print "Hello World!"
+</code></pre>
+"""
+               var res = test.md_to_html.write_to_string
+               assert res == exp
+       end
 
        fun test_process_nesting1 do
                var test = """
@@ -641,6 +714,22 @@ __double underscores__
                assert res == exp
        end
 
+       fun test_process_emph3 do
+               var proc = new MarkdownProcessor
+               proc.ext_mode = false
+               var test = "Con_cat_this"
+               var exp = "<p>Con<em>cat</em>this</p>\n"
+               var res = proc.process(test).write_to_string
+               assert res == exp
+       end
+
+       fun test_process_emph_ext do
+               var test = "Con_cat_this"
+               var exp = "<p>Con_cat_this</p>\n"
+               var res = test.md_to_html.write_to_string
+               assert res == exp
+       end
+
        fun test_process_xml1 do
                var test = """
 This is a regular paragraph.
@@ -908,6 +997,23 @@ break</a> with a line-ending space.</p>
                assert res == exp
        end
 
+       fun test_process_strike do
+               var proc = new MarkdownProcessor
+               proc.ext_mode = false
+               var test = "This is how you ~~strike text~~"
+               var exp = "<p>This is how you ~~strike text~~</p>\n"
+               var res = proc.process(test).write_to_string
+               assert exp == res
+       end
+
+       fun test_process_strike_ext do
+               var test = "This is how you ~~strike text~~"
+               var exp = "<p>This is how you <del>strike text</del></p>\n"
+               var res = test.md_to_html.write_to_string
+               assert exp == res
+       end
+
+
        fun test_daring_encoding do
                var test = """
 AT&T has an ampersand in their name.
@@ -1275,6 +1381,9 @@ Here's how you put `` `backticks` `` in a code span.
        end
 
        fun test_daring_pars do
+               var proc = new MarkdownProcessor
+               proc.ext_mode = false
+
                var test = """
 In Markdown 1.0.0 and earlier. Version
 8. This line turns into a list item.
@@ -1295,7 +1404,7 @@ list item.</p>
 <p>Here's one with a bullet.
 * criminey.</p>
 """
-               var res = test.md_to_html.write_to_string
+               var res = proc.process(test).write_to_string
                assert res == exp
        end
 
@@ -2462,10 +2571,6 @@ class TestLine
                assert v.line_kind(subject) isa LineHeadline
                subject = new MDLine("    code")
                assert v.line_kind(subject) isa LineCode
-               subject = new MDLine("  ~~~")
-               assert v.line_kind(subject) isa LineFence
-               subject = new MDLine("  ```")
-               assert v.line_kind(subject) isa LineFence
                subject = new MDLine("   Title  ")
                subject.next = new MDLine("== ")
                assert v.line_kind(subject) isa LineHeadline1
@@ -2498,6 +2603,14 @@ class TestLine
                assert v.line_kind(subject) isa LineOList
        end
 
+       fun test_line_type_ext do
+               var v = new MarkdownProcessor
+               subject = new MDLine("  ~~~")
+               assert v.line_kind(subject) isa LineFence
+               subject = new MDLine("  ```")
+               assert v.line_kind(subject) isa LineFence
+       end
+
        fun test_count_chars do
                subject = new MDLine("")
                assert subject.count_chars('*') == 0