Merge: markdown: decorator generate headline ids and save them for further use in...
authorJean Privat <jean@pryen.org>
Tue, 23 Sep 2014 05:01:52 +0000 (01:01 -0400)
committerJean Privat <jean@pryen.org>
Tue, 23 Sep 2014 05:01:52 +0000 (01:01 -0400)
Markdown decorator now remembers outputed headlines.

This will be usefull to reconstruct headline summary from parsed markdown.

Also added a fix on outofbound exception.

Signed-off-by: Alexandre Terrasa <alexandre@moz-code.org>

Pull-Request: #764
Reviewed-by: Jean Privat <jean@pryen.org>

lib/markdown/markdown.nit
lib/markdown/test_markdown.nit

index 336fc3d..4efde74 100644 (file)
@@ -398,18 +398,46 @@ interface Decorator
 
        # Render a line break
        fun add_line_break(v: MarkdownEmitter) is abstract
+
+       # Generate a new html valid id from a `String`.
+       fun strip_id(txt: String): String is abstract
+
+       # Found headlines during the processing labeled by their ids.
+       fun headlines: ArrayMap[String, HeadLine] is abstract
+end
+
+# Class representing a markdown headline.
+class HeadLine
+       # Unique identifier of this headline.
+       var id: String
+
+       # Text of the headline.
+       var title: String
+
+       # Level of this headline.
+       #
+       # According toe the markdown specification, level must be in `[1..6]`.
+       var level: Int
 end
 
 # `Decorator` that outputs HTML.
 class HTMLDecorator
        super Decorator
 
+       redef var headlines = new ArrayMap[String, HeadLine]
+
        redef fun add_ruler(v, block) do v.add "<hr/>\n"
 
        redef fun add_headline(v, block) do
-               v.add "<h{block.depth}>"
+               # save headline
+               var txt = block.block.first_line.value
+               var id = strip_id(txt)
+               var lvl = block.depth
+               headlines[id] = new HeadLine(id, txt, lvl)
+               # output it
+               v.add "<h{lvl} id=\"{id}\">"
                v.emit_in block
-               v.add "</h{block.depth}>\n"
+               v.add "</h{lvl}>\n"
        end
 
        redef fun add_paragraph(v, block) do
@@ -544,6 +572,35 @@ class HTMLDecorator
                        end
                end
        end
+
+       redef fun strip_id(txt) do
+               # strip id
+               var b = new FlatBuffer
+               for c in txt do
+                       if c == ' ' then
+                               b.add '_'
+                       else
+                               if not c.is_letter and
+                                  not c.is_digit and
+                                  not allowed_id_chars.has(c) then continue
+                               b.add c
+                       end
+               end
+               var res = b.to_s
+               var key = res
+               # check for multiple id definitions
+               if headlines.has_key(key) then
+                       var i = 1
+                       key = "{res}_{i}"
+                       while headlines.has_key(key) do
+                               i += 1
+                               key = "{res}_{i}"
+                       end
+               end
+               return key
+       end
+
+       private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 end
 
 # A block of markdown lines.
@@ -1148,7 +1205,7 @@ class MDLine
                                        pos += 1
                                end
                                if pos >= line.value.length then
-                                       if line.value[pos - 2] == '/' then
+                                       if pos - 2 >= 0 and line.value[pos - 2] == '/' then
                                                tags.pop
                                                if tags.is_empty then
                                                        xml_end_line = line
@@ -1412,8 +1469,6 @@ class LineHeadline
                var block = v.current_block.split(line.as(not null))
                var kind = new BlockHeadline(block)
                block.kind = kind
-               # TODO block ID
-               # block.id = block.first_line.strip_id
                kind.transform_headline(block)
                v.current_block.remove_leading_empty_lines
                v.current_line = v.current_block.first_line
@@ -1432,8 +1487,6 @@ class LineHeadline1
                var block = v.current_block.split(line.as(not null))
                var kind = new BlockHeadline(block)
                kind.depth = 1
-               # TODO block ID
-               # block.id = block.first_line.strip_id
                kind.transform_headline(block)
                block.kind = kind
                v.current_block.remove_leading_empty_lines
@@ -1453,8 +1506,6 @@ class LineHeadline2
                var block = v.current_block.split(line.as(not null))
                var kind = new BlockHeadline(block)
                kind.depth = 2
-               # TODO block ID
-               # block.id = block.first_line.strip_id
                kind.transform_headline(block)
                block.kind = kind
                v.current_block.remove_leading_empty_lines
index 6dba442..f3c1252 100644 (file)
@@ -93,7 +93,10 @@ This is a H1
 This is a H2
 -------------
 """
-               var exp = "<h1>This is a H1</h1>\n<h2>This is a H2</h2>\n"
+               var exp = """
+<h1 id="This_is_a_H1">This is a H1</h1>
+<h2 id="This_is_a_H2">This is a H2</h2>
+"""
                var res = test.md_to_html.write_to_string
                assert res == exp
        end
@@ -105,7 +108,11 @@ This is a H2
 ## This is a H2
 ###### This is a H6
 """
-               var exp = "<h1>This is a H1</h1>\n<h2>This is a H2</h2>\n<h6>This is a H6</h6>\n"
+               var exp = """
+<h1 id="This_is_a_H1">This is a H1</h1>
+<h2 id="This_is_a_H2">This is a H2</h2>
+<h6 id="This_is_a_H6">This is a H6</h6>
+"""
                var res = test.md_to_html.write_to_string
                assert res == exp
        end
@@ -118,7 +125,11 @@ This is a H2
 
 ### This is a H3 ######
 """
-               var exp = "<h1>This is a H1</h1>\n<h2>This is a H2</h2>\n<h3>This is a H3</h3>\n"
+               var exp = """
+<h1 id="This_is_a_H1">This is a H1</h1>
+<h2 id="This_is_a_H2">This is a H2</h2>
+<h3 id="This_is_a_H3">This is a H3</h3>
+"""
                var res = test.md_to_html.write_to_string
                assert res == exp
        end
@@ -450,7 +461,7 @@ end tell
 """
                var exp = """
 <blockquote>
-<h2>This is a header.</h2>
+<h2 id="This_is_a_header.">This is a header.</h2>
 <ol>
 <li>This is the first list item.</li>
 <li>This is the second list item.</li>
@@ -1951,7 +1962,7 @@ Same thing but with paragraphs:
 """
 
                var exp = """
-<h2>Unordered</h2>
+<h2 id="Unordered">Unordered</h2>
 <p>Asterisks tight:</p>
 <ul>
 <li>asterisk 1</li>
@@ -1999,7 +2010,7 @@ Same thing but with paragraphs:
 <li><p>Minus 3</p>
 </li>
 </ul>
-<h2>Ordered</h2>
+<h2 id="Ordered">Ordered</h2>
 <p>Tight:</p>
 <ol>
 <li>First</li>
@@ -2041,7 +2052,7 @@ back.</p>
 <li><p>Item 3.</p>
 </li>
 </ol>
-<h2>Nested</h2>
+<h2 id="Nested">Nested</h2>
 <ul>
 <li>Tab<ul>
 <li>Tab<ul>
@@ -2456,3 +2467,44 @@ class TestLine
                assert subject.count_chars_start('*') == 0
        end
 end
+
+class TestHTMLDecorator
+       super TestSuite
+
+       fun test_headlines do
+               var test = """
+# **a**
+## a.a
+### a.a.b
+### a.a.b
+## a.b
+# [b](test)
+## b.a
+### b.a.c
+## b.b
+## b.c
+# c
+"""
+               var proc = new MarkdownProcessor
+               var decorator = proc.emitter.decorator.as(HTMLDecorator)
+               proc.process(test)
+               var res = ""
+               for id, headline in decorator.headlines do
+                       res += "{headline.title}:{id}\n"
+               end
+               var exp = """
+**a**:a
+a.a:a.a
+a.a.b:a.a.b
+a.a.b:a.a.b_1
+a.b:a.b
+[b](test):btest
+b.a:b.a
+b.a.c:b.a.c
+b.b:b.b
+b.c:b.c
+c:c
+"""
+               assert res == exp
+       end
+end