lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # `MarkdownEmitter` used for ouput.
  34         var emitter: MarkdownEmitter is noinit, protected writable
  35
  36         # Work in extended mode (default).
  37         #
  38         # Behavior changes when using extended mode:
  39         #
  40         # * Lists and code blocks end a paragraph
  41         #
  42         #   In normal markdown the following:
  43         #
  44         #               This is a paragraph
  45         #               * and this is not a list
  46         #
  47         #   Will produce:
  48         #
  49         #               <p>This is a paragraph
  50         #               * and this is not a list</p>
  51         #
  52         #       When using extended mode this changes to:
  53         #
  54         #               <p>This is a paragraph</p>
  55         #               <ul>
  56         #               <li>and this is not a list</li>
  57         #               </ul>
  58         #
  59         # * Fences code blocks
  60         #
  61         #   If you don't want to indent your all your code with 4 spaces,
  62         #   you can wrap your code in ``` ``` ``` or `~~~`.
  63         #
  64         #       Here's an example:
  65         #
  66         #               ```
  67         #               fun test do
  68         #                       print "Hello World!"
  69         #               end
  70         #               ```
  71         #
  72         # * Code blocks meta
  73         #
  74         #   If you want to use syntax highlighting tools, most of them need to know what kind
  75         #   of language they are highlighting.
  76         #   You can add an optional language identifier after the fence declaration to output
  77         #   it in the HTML render.
  78         #
  79         #               ```nit
  80         #               import markdown
  81         #
  82         #               print "# Hello World!".md_to_html
  83         #               ```
  84         #
  85         #   Becomes
  86         #
  87         #               <pre class="nit"><code>import markdown
  88         #
  89         #               print "Hello World!".md_to_html
  90         #               </code></pre>
  91         #
  92         # * Underscores (Emphasis)
  93         #
  94         #   Underscores in the middle of a word like:
  95         #
  96         #               Con_cat_this
  97         #
  98         #       normally produces this:
  99         #
 100         #               <p>Con<em>cat</em>this</p>
 101         #
 102         #   With extended mode they don't result in emphasis.
 103         #
 104         #               <p>Con_cat_this</p>
 105         #
 106         # * Strikethrough
 107         #
 108         #   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
 109         #   strikethrought span is marked with `~~`.
 110         #
 111         #               ~~Mistaken text.~~
 112         #
 113         #   becomes
 114         #
 115         #               <del>Mistaken text.</del>
 116         var ext_mode = true
 117
 118         init do self.emitter = new MarkdownEmitter(self)
 119
 120         # Process the mardown `input` string and return the processed output.
 121         fun process(input: String): Writable do
 122                 # init processor
 123                 link_refs.clear
 124                 last_link_ref = null
 125                 current_line = null
 126                 current_block = null
 127                 # parse markdown
 128                 var parent = read_lines(input)
 129                 parent.remove_surrounding_empty_lines
 130                 recurse(parent, false)
 131                 # output processed text
 132                 return emitter.emit(parent.kind)
 133         end
 134
 135         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
 136         private fun read_lines(input: String): MDBlock do
 137                 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
 138                 var value = new FlatBuffer
 139                 var i = 0
 140
 141                 var line_pos = 0
 142                 var col_pos = 0
 143
 144                 while i < input.length do
 145                         value.clear
 146                         var pos = 0
 147                         var eol = false
 148                         while not eol and i < input.length do
 149                                 col_pos += 1
 150                                 var c = input[i]
 151                                 if c == '\n' then
 152                                         eol = true
 153                                 else if c == '\t' then
 154                                         var np = pos + (4 - (pos.bin_and(3)))
 155                                         while pos < np do
 156                                                 value.add ' '
 157                                                 pos += 1
 158                                         end
 159                                 else
 160                                         pos += 1
 161                                         value.add c
 162                                 end
 163                                 i += 1
 164                         end
 165                         line_pos += 1
 166
 167                         var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
 168                         var line = new MDLine(loc, value.write_to_string)
 169                         var is_link_ref = check_link_ref(line)
 170                         # Skip link refs
 171                         if not is_link_ref then block.add_line line
 172                         col_pos = 0
 173                 end
 174                 return block
 175         end
 176
 177         # Check if line is a block link definition.
 178         # Return `true` if line contains a valid link ref and save it into `link_refs`.
 179         private fun check_link_ref(line: MDLine): Bool do
 180                 var md = line.value
 181                 var is_link_ref = false
 182                 var id = new FlatBuffer
 183                 var link = new FlatBuffer
 184                 var comment = new FlatBuffer
 185                 var pos = -1
 186                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
 187                         pos = line.leading + 1
 188                         pos = md.read_until(id, pos, ']')
 189                         if not id.is_empty and pos + 2 < line.value.length then
 190                                 if line.value[pos + 1] == ':' then
 191                                         pos += 2
 192                                         pos = md.skip_spaces(pos)
 193                                         if line.value[pos] == '<' then
 194                                                 pos += 1
 195                                                 pos = md.read_until(link, pos, '>')
 196                                                 pos += 1
 197                                         else
 198                                                 pos = md.read_until(link, pos, ' ', '\n')
 199                                         end
 200                                         if not link.is_empty then
 201                                                 pos = md.skip_spaces(pos)
 202                                                 if pos > 0 and pos < line.value.length then
 203                                                         var c = line.value[pos]
 204                                                         if c == '\"' or c == '\'' or c == '(' then
 205                                                                 pos += 1
 206                                                                 if c == '(' then
 207                                                                         pos = md.read_until(comment, pos, ')')
 208                                                                 else
 209                                                                         pos = md.read_until(comment, pos, c)
 210                                                                 end
 211                                                                 if pos > 0 then is_link_ref = true
 212                                                         end
 213                                                 else
 214                                                         is_link_ref = true
 215                                                 end
 216                                         end
 217                                 end
 218                         end
 219                 end
 220                 if is_link_ref and not id.is_empty and not link.is_empty then
 221                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 222                         add_link_ref(id.write_to_string, lr)
 223                         if comment.is_empty then last_link_ref = lr
 224                         return true
 225                 else
 226                         comment = new FlatBuffer
 227                         if not line.is_empty and last_link_ref != null then
 228                                 pos = line.leading
 229                                 var c = line.value[pos]
 230                                 if c == '\"' or c == '\'' or c ==  '(' then
 231                                         pos += 1
 232                                         if c == '(' then
 233                                                 pos = md.read_until(comment, pos, ')')
 234                                         else
 235                                                 pos = md.read_until(comment, pos, c)
 236                                         end
 237                                 end
 238                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 239                         end
 240                         if comment.is_empty then return false
 241                         return true
 242                 end
 243         end
 244
 245         # Known link refs
 246         # This list will be needed during output to expand links.
 247         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 248
 249         # Last encountered link ref (for multiline definitions)
 250         #
 251         # Markdown allows link refs to be defined over two lines:
 252         #
 253         #       [id]: http://example.com/longish/path/to/resource/here
 254         #               "Optional Title Here"
 255         #
 256         private var last_link_ref: nullable LinkRef = null
 257
 258         # Add a link ref to the list
 259         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 260
 261         # Recursively split a `block`.
 262         #
 263         # The block is splitted according to the type of lines it contains.
 264         # Some blocks can be splited again recursively like lists.
 265         # The `in_list` mode is used to recurse on list and build
 266         # nested paragraphs or code blocks.
 267         fun recurse(root: MDBlock, in_list: Bool) do
 268                 var old_mode = self.in_list
 269                 var old_root = self.current_block
 270                 self.in_list = in_list
 271
 272                 var line = root.first_line
 273                 while line != null and line.is_empty do
 274                         line = line.next
 275                         if line == null then return
 276                 end
 277
 278                 current_line = line
 279                 current_block = root
 280                 while current_line != null do
 281                         line_kind(current_line.as(not null)).process(self)
 282                 end
 283                 self.in_list = old_mode
 284                 self.current_block = old_root
 285         end
 286
 287         # Currently processed line.
 288         # Used when visiting blocks with `recurse`.
 289         var current_line: nullable MDLine = null is writable
 290
 291         # Currently processed block.
 292         # Used when visiting blocks with `recurse`.
 293         var current_block: nullable MDBlock = null is writable
 294
 295         # Is the current recursion in list mode?
 296         # Used when visiting blocks with `recurse`
 297         private var in_list = false
 298
 299         # The type of line.
 300         # see: `md_line_*`
 301         fun line_kind(md: MDLine): Line do
 302                 var value = md.value
 303                 var leading = md.leading
 304                 var trailing = md.trailing
 305                 if md.is_empty then return new LineEmpty
 306                 if md.leading > 3 then return new LineCode
 307                 if value[leading] == '#' then return new LineHeadline
 308                 if value[leading] == '>' then return new LineBlockquote
 309
 310                 if ext_mode then
 311                         if value.length - leading - trailing > 2 then
 312                                 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 313                                         return new LineFence
 314                                 end
 315                                 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 316                                         return new LineFence
 317                                 end
 318                         end
 319                 end
 320
 321                 if value.length - leading - trailing > 2 and
 322                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 323                    if md.count_chars(value[leading]) >= 3 then
 324                                 return new LineHR
 325                    end
 326                 end
 327
 328                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 329                         var c = value[leading]
 330                         if c == '*' or c == '-' or c == '+' then return new LineUList
 331                 end
 332
 333                 if value.length - leading >= 3 and value[leading].is_digit then
 334                         var i = leading + 1
 335                         while i < value.length and value[i].is_digit do i += 1
 336                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 337                                 return new LineOList
 338                         end
 339                 end
 340
 341                 if value[leading] == '<' and md.check_html then return new LineXML
 342
 343                 var next = md.next
 344                 if next != null and not next.is_empty then
 345                         if next.count_chars('=') > 0 then
 346                                 return new LineHeadline1
 347                         end
 348                         if next.count_chars('-') > 0 then
 349                                 return new LineHeadline2
 350                         end
 351                 end
 352                 return new LineOther
 353         end
 354
 355         # Get the token kind at `pos`.
 356         fun token_at(text: Text, pos: Int): Token do
 357                 var c0: Char
 358                 var c1: Char
 359                 var c2: Char
 360
 361                 if pos > 0 then
 362                         c0 = text[pos - 1]
 363                 else
 364                         c0 = ' '
 365                 end
 366                 var c = text[pos]
 367
 368                 if pos + 1 < text.length then
 369                         c1 = text[pos + 1]
 370                 else
 371                         c1 = ' '
 372                 end
 373                 if pos + 2 < text.length then
 374                         c2 = text[pos + 2]
 375                 else
 376                         c2 = ' '
 377                 end
 378
 379                 var loc = text.pos_to_loc(pos)
 380
 381                 if c == '*' then
 382                         if c1 == '*' then
 383                                 if c0 != ' ' or c2 != ' ' then
 384                                         return new TokenStrongStar(loc, pos, c)
 385                                 else
 386                                         return new TokenEmStar(loc, pos, c)
 387                                 end
 388                         end
 389                         if c0 != ' ' or c1 != ' ' then
 390                                 return new TokenEmStar(loc, pos, c)
 391                         else
 392                                 return new TokenNone(loc, pos, c)
 393                         end
 394                 else if c == '_' then
 395                         if c1 == '_' then
 396                                 if c0 != ' ' or c2 != ' 'then
 397                                         return new TokenStrongUnderscore(loc, pos, c)
 398                                 else
 399                                         return new TokenEmUnderscore(loc, pos, c)
 400                                 end
 401                         end
 402                         if ext_mode then
 403                                 if (c0.is_letter or c0.is_digit) and c0 != '_' and
 404                                    (c1.is_letter or c1.is_digit) then
 405                                         return new TokenNone(loc, pos, c)
 406                                 else
 407                                         return new TokenEmUnderscore(loc, pos, c)
 408                                 end
 409                         end
 410                         if c0 != ' ' or c1 != ' ' then
 411                                 return new TokenEmUnderscore(loc, pos, c)
 412                         else
 413                                 return new TokenNone(loc, pos, c)
 414                         end
 415                 else if c == '!' then
 416                         if c1 == '[' then return new TokenImage(loc, pos, c)
 417                         return new TokenNone(loc, pos, c)
 418                 else if c == '[' then
 419                         return new TokenLink(loc, pos, c)
 420                 else if c == ']' then
 421                         return new TokenNone(loc, pos, c)
 422                 else if c == '`' then
 423                         if c1 == '`' then
 424                                 return new TokenCodeDouble(loc, pos, c)
 425                         else
 426                                 return new TokenCodeSingle(loc, pos, c)
 427                         end
 428                 else if c == '\\' then
 429                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 430                                 return new TokenEscape(loc, pos, c)
 431                         else
 432                                 return new TokenNone(loc, pos, c)
 433                         end
 434                 else if c == '<' then
 435                         return new TokenHTML(loc, pos, c)
 436                 else if c == '&' then
 437                         return new TokenEntity(loc, pos, c)
 438                 else
 439                         if ext_mode then
 440                                 if c == '~' and c1 == '~' then
 441                                         return new TokenStrike(loc, pos, c)
 442                                 end
 443                         end
 444                         return new TokenNone(loc, pos, c)
 445                 end
 446         end
 447
 448         # Find the position of a `token` in `self`.
 449         fun find_token(text: Text, start: Int, token: Token): Int do
 450                 var pos = start
 451                 while pos < text.length do
 452                         if token_at(text, pos).is_same_type(token) then
 453                                 return pos
 454                         end
 455                         pos += 1
 456                 end
 457                 return -1
 458         end
 459 end
 460
 461 # Emit output corresponding to blocks content.
 462 #
 463 # Blocks are created by a previous pass in `MarkdownProcessor`.
 464 # The emitter use a `Decorator` to select the output format.
 465 class MarkdownEmitter
 466
 467         # Kind of processor used for parsing.
 468         type PROCESSOR: MarkdownProcessor
 469
 470         # Processor containing link refs.
 471         var processor: PROCESSOR
 472
 473         # Kind of decorator used for decoration.
 474         type DECORATOR: Decorator
 475
 476         # Decorator used for output.
 477         # Default is `HTMLDecorator`
 478         var decorator: DECORATOR is writable, lazy do
 479                 return new HTMLDecorator
 480         end
 481
 482         # Create a new `MarkdownEmitter` using a custom `decorator`.
 483         init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
 484                 init processor
 485                 self.decorator = decorator
 486         end
 487
 488         # Output `block` using `decorator` in the current buffer.
 489         fun emit(block: Block): Text do
 490                 var buffer = push_buffer
 491                 block.emit(self)
 492                 pop_buffer
 493                 return buffer
 494         end
 495
 496         # Output the content of `block`.
 497         fun emit_in(block: Block) do block.emit_in(self)
 498
 499         # Transform and emit mardown text
 500         fun emit_text(text: Text) do emit_text_until(text, 0, null)
 501
 502         # Transform and emit mardown text starting at `from` and
 503         # until a token with the same type as `token` is found.
 504         # Go until the end of text if `token` is null.
 505         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 506                 var old_text = current_text
 507                 var old_pos = current_pos
 508                 current_text = text
 509                 current_pos = start
 510                 while current_pos < text.length do
 511                         var mt = processor.token_at(text, current_pos)
 512                         if (token != null and not token isa TokenNone) and
 513                         (mt.is_same_type(token) or
 514                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 515                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 516                                 return current_pos
 517                         end
 518                         mt.emit(self)
 519                         current_pos += 1
 520                 end
 521                 current_text = old_text
 522                 current_pos = old_pos
 523                 return -1
 524         end
 525
 526         # Currently processed position in `current_text`.
 527         # Used when visiting inline production with `emit_text_until`.
 528         private var current_pos: Int = -1
 529
 530         # Currently processed text.
 531         # Used when visiting inline production with `emit_text_until`.
 532         private var current_text: nullable Text = null
 533
 534         # Stacked buffers.
 535         private var buffer_stack = new List[FlatBuffer]
 536
 537         # Push a new buffer on the stack.
 538         private fun push_buffer: FlatBuffer do
 539                 var buffer = new FlatBuffer
 540                 buffer_stack.add buffer
 541                 return buffer
 542         end
 543
 544         # Pop the last buffer.
 545         private fun pop_buffer do buffer_stack.pop
 546
 547         # Current output buffer.
 548         private fun current_buffer: FlatBuffer do
 549                 assert not buffer_stack.is_empty
 550                 return buffer_stack.last
 551         end
 552
 553         # Append `e` to current buffer.
 554         fun add(e: Writable) do
 555                 if e isa Text then
 556                         current_buffer.append e
 557                 else
 558                         current_buffer.append e.write_to_string
 559                 end
 560         end
 561
 562         # Append `c` to current buffer.
 563         fun addc(c: Char) do add c.to_s
 564
 565         # Append a "\n" line break.
 566         fun addn do add "\n"
 567 end
 568
 569 # A Link Reference.
 570 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 571 #
 572 # ~~~raw
 573 # [1]: http://example.com/ "Optional title"
 574 # ~~~
 575 class LinkRef
 576
 577         # Link href
 578         var link: String
 579
 580         # Optional link title
 581         var title: nullable String = null
 582
 583         # Is the link an abreviation?
 584         var is_abbrev = false
 585
 586         # Create a link with a title.
 587         init with_title(link: String, title: nullable String) do
 588                 self.link = link
 589                 self.title = title
 590         end
 591 end
 592
 593 # A `Decorator` is used to emit mardown into a specific format.
 594 # Default decorator used is `HTMLDecorator`.
 595 interface Decorator
 596
 597         # Kind of emitter used for decoration.
 598         type EMITTER: MarkdownEmitter
 599
 600         # Render a ruler block.
 601         fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
 602
 603         # Render a headline block with corresponding level.
 604         fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
 605
 606         # Render a paragraph block.
 607         fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
 608
 609         # Render a code or fence block.
 610         fun add_code(v: EMITTER, block: BlockCode) is abstract
 611
 612         # Render a blockquote.
 613         fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
 614
 615         # Render an unordered list.
 616         fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
 617
 618         # Render an ordered list.
 619         fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
 620
 621         # Render a list item.
 622         fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
 623
 624         # Render an emphasis text.
 625         fun add_em(v: EMITTER, text: Text) is abstract
 626
 627         # Render a strong text.
 628         fun add_strong(v: EMITTER, text: Text) is abstract
 629
 630         # Render a strike text.
 631         #
 632         # Extended mode only (see `MarkdownProcessor::ext_mode`)
 633         fun add_strike(v: EMITTER, text: Text) is abstract
 634
 635         # Render a link.
 636         fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 637
 638         # Render an image.
 639         fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 640
 641         # Render an abbreviation.
 642         fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
 643
 644         # Render a code span reading from a buffer.
 645         fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 646
 647         # Render a text and escape it.
 648         fun append_value(v: EMITTER, value: Text) is abstract
 649
 650         # Render code text from buffer and escape it.
 651         fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 652
 653         # Render a character escape.
 654         fun escape_char(v: EMITTER, char: Char) is abstract
 655
 656         # Render a line break
 657         fun add_line_break(v: EMITTER) is abstract
 658
 659         # Generate a new html valid id from a `String`.
 660         fun strip_id(txt: String): String is abstract
 661
 662         # Found headlines during the processing labeled by their ids.
 663         fun headlines: ArrayMap[String, HeadLine] is abstract
 664 end
 665
 666 # Class representing a markdown headline.
 667 class HeadLine
 668         # Unique identifier of this headline.
 669         var id: String
 670
 671         # Text of the headline.
 672         var title: String
 673
 674         # Level of this headline.
 675         #
 676         # According toe the markdown specification, level must be in `[1..6]`.
 677         var level: Int
 678 end
 679
 680 # `Decorator` that outputs HTML.
 681 class HTMLDecorator
 682         super Decorator
 683
 684         redef var headlines = new ArrayMap[String, HeadLine]
 685
 686         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 687
 688         redef fun add_headline(v, block) do
 689                 # save headline
 690                 var txt = block.block.first_line.value
 691                 var id = strip_id(txt)
 692                 var lvl = block.depth
 693                 headlines[id] = new HeadLine(id, txt, lvl)
 694                 # output it
 695                 v.add "<h{lvl} id=\"{id}\">"
 696                 v.emit_in block
 697                 v.add "</h{lvl}>\n"
 698         end
 699
 700         redef fun add_paragraph(v, block) do
 701                 v.add "<p>"
 702                 v.emit_in block
 703                 v.add "</p>\n"
 704         end
 705
 706         redef fun add_code(v, block) do
 707                 if block isa BlockFence and block.meta != null then
 708                         v.add "<pre class=\"{block.meta.to_s}\"><code>"
 709                 else
 710                         v.add "<pre><code>"
 711                 end
 712                 v.emit_in block
 713                 v.add "</code></pre>\n"
 714         end
 715
 716         redef fun add_blockquote(v, block) do
 717                 v.add "<blockquote>\n"
 718                 v.emit_in block
 719                 v.add "</blockquote>\n"
 720         end
 721
 722         redef fun add_unorderedlist(v, block) do
 723                 v.add "<ul>\n"
 724                 v.emit_in block
 725                 v.add "</ul>\n"
 726         end
 727
 728         redef fun add_orderedlist(v, block) do
 729                 v.add "<ol>\n"
 730                 v.emit_in block
 731                 v.add "</ol>\n"
 732         end
 733
 734         redef fun add_listitem(v, block) do
 735                 v.add "<li>"
 736                 v.emit_in block
 737                 v.add "</li>\n"
 738         end
 739
 740         redef fun add_em(v, text) do
 741                 v.add "<em>"
 742                 v.add text
 743                 v.add "</em>"
 744         end
 745
 746         redef fun add_strong(v, text) do
 747                 v.add "<strong>"
 748                 v.add text
 749                 v.add "</strong>"
 750         end
 751
 752         redef fun add_strike(v, text) do
 753                 v.add "<del>"
 754                 v.add text
 755                 v.add "</del>"
 756         end
 757
 758         redef fun add_image(v, link, name, comment) do
 759                 v.add "<img src=\""
 760                 append_value(v, link)
 761                 v.add "\" alt=\""
 762                 append_value(v, name)
 763                 v.add "\""
 764                 if comment != null and not comment.is_empty then
 765                         v.add " title=\""
 766                         append_value(v, comment)
 767                         v.add "\""
 768                 end
 769                 v.add "/>"
 770         end
 771
 772         redef fun add_link(v, link, name, comment) do
 773                 v.add "<a href=\""
 774                 append_value(v, link)
 775                 v.add "\""
 776                 if comment != null and not comment.is_empty then
 777                         v.add " title=\""
 778                         append_value(v, comment)
 779                         v.add "\""
 780                 end
 781                 v.add ">"
 782                 v.emit_text(name)
 783                 v.add "</a>"
 784         end
 785
 786         redef fun add_abbr(v, name, comment) do
 787                 v.add "<abbr title=\""
 788                 append_value(v, comment)
 789                 v.add "\">"
 790                 v.emit_text(name)
 791                 v.add "</abbr>"
 792         end
 793
 794         redef fun add_span_code(v, text, from, to) do
 795                 v.add "<code>"
 796                 append_code(v, text, from, to)
 797                 v.add "</code>"
 798         end
 799
 800         redef fun add_line_break(v) do
 801                 v.add "<br/>"
 802         end
 803
 804         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 805
 806         redef fun escape_char(v, c) do
 807                 if c == '&' then
 808                         v.add "&amp;"
 809                 else if c == '<' then
 810                         v.add "&lt;"
 811                 else if c == '>' then
 812                         v.add "&gt;"
 813                 else if c == '"' then
 814                         v.add "&quot;"
 815                 else if c == '\'' then
 816                         v.add "&apos;"
 817                 else
 818                         v.addc c
 819                 end
 820         end
 821
 822         redef fun append_code(v, buffer, from, to) do
 823                 for i in [from..to[ do
 824                         var c = buffer[i]
 825                         if c == '&' then
 826                                 v.add "&amp;"
 827                         else if c == '<' then
 828                                 v.add "&lt;"
 829                         else if c == '>' then
 830                                 v.add "&gt;"
 831                         else
 832                                 v.addc c
 833                         end
 834                 end
 835         end
 836
 837         redef fun strip_id(txt) do
 838                 # strip id
 839                 var b = new FlatBuffer
 840                 for c in txt do
 841                         if c == ' ' then
 842                                 b.add '_'
 843                         else
 844                                 if not c.is_letter and
 845                                    not c.is_digit and
 846                                    not allowed_id_chars.has(c) then continue
 847                                 b.add c
 848                         end
 849                 end
 850                 var res = b.to_s
 851                 var key = res
 852                 # check for multiple id definitions
 853                 if headlines.has_key(key) then
 854                         var i = 1
 855                         key = "{res}_{i}"
 856                         while headlines.has_key(key) do
 857                                 i += 1
 858                                 key = "{res}_{i}"
 859                         end
 860                 end
 861                 return key
 862         end
 863
 864         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 865 end
 866
 867 # Location in a Markdown input.
 868 class MDLocation
 869
 870         # Starting line number (starting from 1).
 871         var line_start: Int
 872
 873         # Starting column number (starting from 1).
 874         var column_start: Int
 875
 876         # Stopping line number (starting from 1).
 877         var line_end: Int
 878
 879         # Stopping column number (starting from 1).
 880         var column_end: Int
 881
 882         redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
 883 end
 884
 885 # A block of markdown lines.
 886 # A `MDBlock` can contains lines and/or sub-blocks.
 887 class MDBlock
 888
 889         # Position of `self` in the input.
 890         var location: MDLocation
 891
 892         # Kind of block.
 893         # See `Block`.
 894         var kind: Block = new BlockNone(self) is writable
 895
 896         # First line if any.
 897         var first_line: nullable MDLine = null is writable
 898
 899         # Last line if any.
 900         var last_line: nullable MDLine = null is writable
 901
 902         # First sub-block if any.
 903         var first_block: nullable MDBlock = null is writable
 904
 905         # Last sub-block if any.
 906         var last_block: nullable MDBlock = null is writable
 907
 908         # Previous block if any.
 909         var prev: nullable MDBlock = null is writable
 910
 911         # Next block if any.
 912         var next: nullable MDBlock = null is writable
 913
 914         # Does this block contain subblocks?
 915         fun has_blocks: Bool do return first_block != null
 916
 917         # Count sub-blocks.
 918         fun count_blocks: Int do
 919                 var count = 0
 920                 var block = first_block
 921                 while block != null do
 922                         count += 1
 923                         block = block.next
 924                 end
 925                 return count
 926         end
 927
 928         # Does this block contain lines?
 929         fun has_lines: Bool do return first_line != null
 930
 931         # Count block lines.
 932         fun count_lines: Int do
 933                 var count = 0
 934                 var line = first_line
 935                 while line != null do
 936                         count += 1
 937                         line = line.next
 938                 end
 939                 return count
 940         end
 941
 942         # Split `self` creating a new sub-block having `line` has `last_line`.
 943         fun split(line: MDLine): MDBlock do
 944                 # location for new block
 945                 var new_loc = new MDLocation(
 946                         first_line.location.line_start,
 947                         first_line.location.column_start,
 948                         line.location.line_end,
 949                         line.location.column_end)
 950                 # create block
 951                 var block = new MDBlock(new_loc)
 952                 block.first_line = first_line
 953                 block.last_line = line
 954                 first_line = line.next
 955                 line.next = null
 956                 if first_line == null then
 957                         last_line = null
 958                 else
 959                         first_line.prev = null
 960                         # update current block loc
 961                         location.line_start = first_line.location.line_start
 962                         location.column_start = first_line.location.column_start
 963                 end
 964                 if first_block == null then
 965                         first_block = block
 966                         last_block = block
 967                 else
 968                         last_block.next = block
 969                         last_block = block
 970                 end
 971                 return block
 972         end
 973
 974         # Add a `line` to this block.
 975         fun add_line(line: MDLine) do
 976                 if last_line == null then
 977                         first_line = line
 978                         last_line = line
 979                 else
 980                         last_line.next_empty = line.is_empty
 981                         line.prev_empty = last_line.is_empty
 982                         line.prev = last_line
 983                         last_line.next = line
 984                         last_line = line
 985                 end
 986         end
 987
 988         # Remove `line` from this block.
 989         fun remove_line(line: MDLine) do
 990                 if line.prev == null then
 991                         first_line = line.next
 992                 else
 993                         line.prev.next = line.next
 994                 end
 995                 if line.next == null then
 996                         last_line = line.prev
 997                 else
 998                         line.next.prev = line.prev
 999                 end
1000                 line.prev = null
1001                 line.next = null
1002         end
1003
1004         # Remove leading empty lines.
1005         fun remove_leading_empty_lines: Bool do
1006                 var was_empty = false
1007                 var line = first_line
1008                 while line != null and line.is_empty do
1009                         remove_line line
1010                         line = first_line
1011                         was_empty = true
1012                 end
1013                 return was_empty
1014         end
1015
1016         # Remove trailing empty lines.
1017         fun remove_trailing_empty_lines: Bool do
1018                 var was_empty = false
1019                 var line = last_line
1020                 while line != null and line.is_empty do
1021                         remove_line line
1022                         line = last_line
1023                         was_empty = true
1024                 end
1025                 return was_empty
1026         end
1027
1028         # Remove leading and trailing empty lines.
1029         fun remove_surrounding_empty_lines: Bool do
1030                 var was_empty = false
1031                 if remove_leading_empty_lines then was_empty = true
1032                 if remove_trailing_empty_lines then was_empty = true
1033                 return was_empty
1034         end
1035
1036         # Remove list markers and up to 4 leading spaces.
1037         # Used to clean nested lists.
1038         fun remove_list_indent(v: MarkdownProcessor) do
1039                 var line = first_line
1040                 while line != null do
1041                         if not line.is_empty then
1042                                 var kind = v.line_kind(line)
1043                                 if kind isa LineList then
1044                                         line.value = kind.extract_value(line)
1045                                 else
1046                                         line.value = line.value.substring_from(line.leading.min(4))
1047                                 end
1048                                 line.leading = line.process_leading
1049                         end
1050                         line = line.next
1051                 end
1052         end
1053
1054         # Collect block line text.
1055         fun text: String do
1056                 var text = new FlatBuffer
1057                 var line = first_line
1058                 while line != null do
1059                         if not line.is_empty then
1060                                 text.append line.text
1061                         end
1062                         text.append "\n"
1063                         line = line.next
1064                 end
1065                 return text.write_to_string
1066         end
1067 end
1068
1069 # Representation of a markdown block in the AST.
1070 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1071 abstract class Block
1072
1073         # The markdown block `self` is related to.
1074         var block: MDBlock
1075
1076         # Output `self` using `v.decorator`.
1077         fun emit(v: MarkdownEmitter) do v.emit_in(self)
1078
1079         # Emit the containts of `self`, lines or blocks.
1080         fun emit_in(v: MarkdownEmitter) do
1081                 block.remove_surrounding_empty_lines
1082                 if block.has_lines then
1083                         emit_lines(v)
1084                 else
1085                         emit_blocks(v)
1086                 end
1087         end
1088
1089         # Emit lines contained in `block`.
1090         fun emit_lines(v: MarkdownEmitter) do
1091                 var tpl = v.push_buffer
1092                 var line = block.first_line
1093                 while line != null do
1094                         if not line.is_empty then
1095                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1096                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
1097                         end
1098                         if line.next != null then
1099                                 v.addn
1100                         end
1101                         line = line.next
1102                 end
1103                 v.pop_buffer
1104                 v.emit_text(tpl)
1105         end
1106
1107         # Emit sub-blocks contained in `block`.
1108         fun emit_blocks(v: MarkdownEmitter) do
1109                 var block = self.block.first_block
1110                 while block != null do
1111                         block.kind.emit(v)
1112                         block = block.next
1113                 end
1114         end
1115 end
1116
1117 # A block without any markdown specificities.
1118 #
1119 # Actually use the same implementation than `BlockCode`,
1120 # this class is only used for typing purposes.
1121 class BlockNone
1122         super Block
1123 end
1124
1125 # A markdown blockquote.
1126 class BlockQuote
1127         super Block
1128
1129         redef fun emit(v) do v.decorator.add_blockquote(v, self)
1130
1131         # Remove blockquote markers.
1132         private fun remove_block_quote_prefix(block: MDBlock) do
1133                 var line = block.first_line
1134                 while line != null do
1135                         if not line.is_empty then
1136                                 if line.value[line.leading] == '>' then
1137                                         var rem = line.leading + 1
1138                                         if line.leading + 1 < line.value.length and
1139                                            line.value[line.leading + 1] == ' ' then
1140                                                 rem += 1
1141                                         end
1142                                         line.value = line.value.substring_from(rem)
1143                                         line.leading = line.process_leading
1144                                 end
1145                         end
1146                         line = line.next
1147                 end
1148         end
1149 end
1150
1151 # A markdown code block.
1152 class BlockCode
1153         super Block
1154
1155         # Number of char to skip at the beginning of the line.
1156         #
1157         # Block code lines start at 4 spaces.
1158         protected var line_start = 4
1159
1160         redef fun emit(v) do v.decorator.add_code(v, self)
1161
1162         redef fun emit_lines(v) do
1163                 var line = block.first_line
1164                 while line != null do
1165                         if not line.is_empty then
1166                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1167                         end
1168                         v.addn
1169                         line = line.next
1170                 end
1171         end
1172 end
1173
1174 # A markdown code-fence block.
1175 #
1176 # Actually use the same implementation than `BlockCode`,
1177 # this class is only used for typing purposes.
1178 class BlockFence
1179         super BlockCode
1180
1181         # Any string found after fence token.
1182         var meta: nullable Text
1183
1184         # Fence code lines start at 0 spaces.
1185         redef var line_start = 0
1186 end
1187
1188 # A markdown headline.
1189 class BlockHeadline
1190         super Block
1191
1192         redef fun emit(v) do v.decorator.add_headline(v, self)
1193
1194         # Depth of the headline used to determine the headline level.
1195         var depth = 0
1196
1197         # Remove healine marks from lines contained in `self`.
1198         private fun transform_headline(block: MDBlock) do
1199                 if depth > 0 then return
1200                 var level = 0
1201                 var line = block.first_line
1202                 if line.is_empty then return
1203                 var start = line.leading
1204                 while start < line.value.length and line.value[start] == '#' do
1205                         level += 1
1206                         start += 1
1207                 end
1208                 while start < line.value.length and line.value[start] == ' ' do
1209                         start += 1
1210                 end
1211                 if start >= line.value.length then
1212                         line.is_empty = true
1213                 else
1214                         var nend = line.value.length - line.trailing - 1
1215                         while line.value[nend] == '#' do nend -= 1
1216                         while line.value[nend] == ' ' do nend -= 1
1217                         line.value = line.value.substring(start, nend - start + 1)
1218                         line.leading = 0
1219                         line.trailing = 0
1220                 end
1221                 depth = level.min(6)
1222         end
1223 end
1224
1225 # A markdown list item block.
1226 class BlockListItem
1227         super Block
1228
1229         redef fun emit(v) do v.decorator.add_listitem(v, self)
1230 end
1231
1232 # A markdown list block.
1233 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1234 abstract class BlockList
1235         super Block
1236
1237         # Split list block into list items sub-blocks.
1238         private fun init_block(v: MarkdownProcessor) do
1239                 var line = block.first_line
1240                 line = line.next
1241                 while line != null do
1242                         var t = v.line_kind(line)
1243                         if t isa LineList or
1244                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1245                            not (t isa LineList))) then
1246                                    var sblock = block.split(line.prev.as(not null))
1247                                    sblock.kind = new BlockListItem(sblock)
1248                         end
1249                         line = line.next
1250                 end
1251                 var sblock = block.split(block.last_line.as(not null))
1252                 sblock.kind = new BlockListItem(sblock)
1253         end
1254
1255         # Expand list items as paragraphs if needed.
1256         private fun expand_paragraphs(block: MDBlock) do
1257                 var outer = block.first_block
1258                 var inner: nullable MDBlock
1259                 var has_paragraph = false
1260                 while outer != null and not has_paragraph do
1261                         if outer.kind isa BlockListItem then
1262                                 inner = outer.first_block
1263                                 while inner != null and not has_paragraph do
1264                                         if inner.kind isa BlockParagraph then
1265                                                 has_paragraph = true
1266                                         end
1267                                         inner = inner.next
1268                                 end
1269                         end
1270                         outer = outer.next
1271                 end
1272                 if has_paragraph then
1273                         outer = block.first_block
1274                         while outer != null do
1275                                 if outer.kind isa BlockListItem then
1276                                         inner = outer.first_block
1277                                         while inner != null do
1278                                                 if inner.kind isa BlockNone then
1279                                                         inner.kind = new BlockParagraph(inner)
1280                                                 end
1281                                                 inner = inner.next
1282                                         end
1283                                 end
1284                                 outer = outer.next
1285                         end
1286                 end
1287         end
1288 end
1289
1290 # A markdown ordered list.
1291 class BlockOrderedList
1292         super BlockList
1293
1294         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1295 end
1296
1297 # A markdown unordred list.
1298 class BlockUnorderedList
1299         super BlockList
1300
1301         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1302 end
1303
1304 # A markdown paragraph block.
1305 class BlockParagraph
1306         super Block
1307
1308         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1309 end
1310
1311 # A markdown ruler.
1312 class BlockRuler
1313         super Block
1314
1315         redef fun emit(v) do v.decorator.add_ruler(v, self)
1316 end
1317
1318 # Xml blocks that can be found in markdown markup.
1319 class BlockXML
1320         super Block
1321
1322         redef fun emit_lines(v) do
1323                 var line = block.first_line
1324                 while line != null do
1325                         if not line.is_empty then v.add line.value
1326                         v.addn
1327                         line = line.next
1328                 end
1329         end
1330 end
1331
1332 # A markdown line.
1333 class MDLine
1334
1335         # Location of `self` in the original input.
1336         var location: MDLocation
1337
1338         # Text contained in this line.
1339         var value: String is writable
1340
1341         # Is this line empty?
1342         # Lines containing only spaces are considered empty.
1343         var is_empty: Bool = true is writable
1344
1345         # Previous line in `MDBlock` or null if first line.
1346         var prev: nullable MDLine = null is writable
1347
1348         # Next line in `MDBlock` or null if last line.
1349         var next: nullable MDLine = null is writable
1350
1351         # Is the previous line empty?
1352         var prev_empty: Bool = false is writable
1353
1354         # Is the next line empty?
1355         var next_empty: Bool = false is writable
1356
1357         # Initialize a new MDLine from its string value
1358         init do
1359                 self.leading = process_leading
1360                 if leading != value.length then
1361                         self.is_empty = false
1362                         self.trailing = process_trailing
1363                 end
1364         end
1365
1366         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1367         fun clear do
1368                 value = ""
1369                 leading = 0
1370                 trailing = 0
1371                 is_empty = true
1372                 if prev != null then prev.next_empty = true
1373                 if next != null then next.prev_empty = true
1374         end
1375
1376         # Number or leading spaces on this line.
1377         var leading: Int = 0 is writable
1378
1379         # Compute `leading` depending on `value`.
1380         fun process_leading: Int do
1381                 var count = 0
1382                 var value = self.value
1383                 while count < value.length and value[count] == ' ' do count += 1
1384                 if leading == value.length then clear
1385                 return count
1386         end
1387
1388         # Number of trailing spaces on this line.
1389         var trailing: Int = 0 is writable
1390
1391         # Compute `trailing` depending on `value`.
1392         fun process_trailing: Int do
1393                 var count = 0
1394                 var value = self.value
1395                 while value[value.length - count - 1] == ' ' do
1396                         count += 1
1397                 end
1398                 return count
1399         end
1400
1401         # Count the amount of `ch` in this line.
1402         # Return A value > 0 if this line only consists of `ch` end spaces.
1403         fun count_chars(ch: Char): Int do
1404                 var count = 0
1405                 for c in value do
1406                         if c == ' ' then
1407                                 continue
1408                         end
1409                         if c == ch then
1410                                 count += 1
1411                                 continue
1412                         end
1413                         count = 0
1414                         break
1415                 end
1416                 return count
1417         end
1418
1419         # Count the amount of `ch` at the start of this line ignoring spaces.
1420         fun count_chars_start(ch: Char): Int do
1421                 var count = 0
1422                 for c in value do
1423                         if c == ' ' then
1424                                 continue
1425                         end
1426                         if c == ch then
1427                                 count += 1
1428                         else
1429                                 break
1430                         end
1431                 end
1432                 return count
1433         end
1434
1435         # Last XML line if any.
1436         private var xml_end_line: nullable MDLine = null
1437
1438         # Does `value` contains valid XML markup?
1439         private fun check_html: Bool do
1440                 var tags = new Array[String]
1441                 var tmp = new FlatBuffer
1442                 var pos = leading
1443                 if pos + 1 < value.length and value[pos + 1] == '!' then
1444                         if read_xml_comment(self, pos) > 0 then return true
1445                 end
1446                 pos = value.read_xml(tmp, pos, false)
1447                 var tag: String
1448                 if pos > -1 then
1449                         tag = tmp.xml_tag
1450                         if not tag.is_html_block then
1451                                 return false
1452                         end
1453                         if tag == "hr" then
1454                                 xml_end_line = self
1455                                 return true
1456                         end
1457                         tags.add tag
1458                         var line: nullable MDLine = self
1459                         while line != null do
1460                                 while pos < line.value.length and line.value[pos] != '<' do
1461                                         pos += 1
1462                                 end
1463                                 if pos >= line.value.length then
1464                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1465                                                 tags.pop
1466                                                 if tags.is_empty then
1467                                                         xml_end_line = line
1468                                                         break
1469                                                 end
1470                                         end
1471                                         line = line.next
1472                                         pos = 0
1473                                 else
1474                                         tmp = new FlatBuffer
1475                                         var new_pos = line.value.read_xml(tmp, pos, false)
1476                                         if new_pos > 0 then
1477                                                 tag = tmp.xml_tag
1478                                                 if tag.is_html_block and not tag == "hr" then
1479                                                         if tmp[1] == '/' then
1480                                                                 if tags.last != tag then
1481                                                                         return false
1482                                                                 end
1483                                                                 tags.pop
1484                                                         else
1485                                                                 tags.add tag
1486                                                         end
1487                                                 end
1488                                                 if tags.is_empty then
1489                                                         xml_end_line = line
1490                                                         break
1491                                                 end
1492                                                 pos = new_pos
1493                                         else
1494                                                 pos += 1
1495                                         end
1496                                 end
1497                         end
1498                         return tags.is_empty
1499                 end
1500                 return false
1501         end
1502
1503         # Read a XML comment.
1504         # Used by `check_html`.
1505         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1506                 var line: nullable MDLine = first_line
1507                 if start + 3 < line.value.length then
1508                         if line.value[2] == '-' and line.value[3] == '-' then
1509                                 var pos = start + 4
1510                                 while line != null do
1511                                         while pos < line.value.length and line.value[pos] != '-' do
1512                                                 pos += 1
1513                                         end
1514                                         if pos == line.value.length then
1515                                                 line = line.next
1516                                                 pos = 0
1517                                         else
1518                                                 if pos + 2 < line.value.length then
1519                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1520                                                                 first_line.xml_end_line = line
1521                                                                 return pos + 3
1522                                                         end
1523                                                 end
1524                                                 pos += 1
1525                                         end
1526                                 end
1527                         end
1528                 end
1529                 return -1
1530         end
1531
1532         # Extract the text of `self` without leading and trailing.
1533         fun text: String do return value.substring(leading, value.length - trailing)
1534 end
1535
1536 # A markdown line.
1537 interface Line
1538
1539         # Parse the line.
1540         # See `MarkdownProcessor::recurse`.
1541         fun process(v: MarkdownProcessor) is abstract
1542 end
1543
1544 # An empty markdown line.
1545 class LineEmpty
1546         super Line
1547
1548         redef fun process(v) do
1549                 v.current_line = v.current_line.next
1550         end
1551 end
1552
1553 # A non-specific markdown construction.
1554 # Mainly used as part of another line construct such as paragraphs or lists.
1555 class LineOther
1556         super Line
1557
1558         redef fun process(v) do
1559                 var line = v.current_line
1560                 # go to block end
1561                 var was_empty = line.prev_empty
1562                 while line != null and not line.is_empty do
1563                         var t = v.line_kind(line)
1564                         if (v.in_list or v.ext_mode) and t isa LineList then
1565                                 break
1566                         end
1567                         if v.ext_mode and (t isa LineCode or t isa LineFence) then
1568                                 break
1569                         end
1570                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1571                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1572                                    break
1573                         end
1574                         line = line.next
1575                 end
1576                 # build block
1577                 if line != null and not line.is_empty then
1578                         var block = v.current_block.split(line.prev.as(not null))
1579                         if v.in_list and not was_empty then
1580                                 block.kind = new BlockNone(block)
1581                         else
1582                                 block.kind = new BlockParagraph(block)
1583                         end
1584                         v.current_block.remove_leading_empty_lines
1585                 else
1586                         var block: MDBlock
1587                         if line != null then
1588                                 block = v.current_block.split(line)
1589                         else
1590                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1591                         end
1592                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1593                                 block.kind = new BlockNone(block)
1594                         else
1595                                 block.kind = new BlockParagraph(block)
1596                         end
1597                         v.current_block.remove_leading_empty_lines
1598                 end
1599                 v.current_line = v.current_block.first_line
1600         end
1601 end
1602
1603 # A line of markdown code.
1604 class LineCode
1605         super Line
1606
1607         redef fun process(v) do
1608                 var line = v.current_line
1609                 # lookup block end
1610                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1611                         line = line.next
1612                 end
1613                 # split at block end line
1614                 var block: MDBlock
1615                 if line != null then
1616                         block = v.current_block.split(line.prev.as(not null))
1617                 else
1618                         block = v.current_block.split(v.current_block.last_line.as(not null))
1619                 end
1620                 block.kind = new BlockCode(block)
1621                 block.remove_surrounding_empty_lines
1622                 v.current_line = v.current_block.first_line
1623         end
1624 end
1625
1626 # A line of raw XML.
1627 class LineXML
1628         super Line
1629
1630         redef fun process(v) do
1631                 var line = v.current_line
1632                 var prev = line.prev
1633                 if prev != null then v.current_block.split(prev)
1634                 var block = v.current_block.split(line.xml_end_line.as(not null))
1635                 block.kind = new BlockXML(block)
1636                 v.current_block.remove_leading_empty_lines
1637                 v.current_line = v.current_block.first_line
1638         end
1639 end
1640
1641 # A markdown blockquote line.
1642 class LineBlockquote
1643         super Line
1644
1645         redef fun process(v) do
1646                 var line = v.current_line
1647                 # go to bquote end
1648                 while line != null do
1649                         if not line.is_empty and (line.prev_empty and
1650                            line.leading == 0 and
1651                            not v.line_kind(line) isa LineBlockquote) then break
1652                         line = line.next
1653                 end
1654                 # build sub block
1655                 var block: MDBlock
1656                 if line != null then
1657                         block = v.current_block.split(line.prev.as(not null))
1658                 else
1659                         block = v.current_block.split(v.current_block.last_line.as(not null))
1660                 end
1661                 var kind = new BlockQuote(block)
1662                 block.kind = kind
1663                 block.remove_surrounding_empty_lines
1664                 kind.remove_block_quote_prefix(block)
1665                 v.current_line = line
1666                 v.recurse(block, false)
1667                 v.current_line = v.current_block.first_line
1668         end
1669 end
1670
1671 # A markdown ruler line.
1672 class LineHR
1673         super Line
1674
1675         redef fun process(v) do
1676                 var line = v.current_line
1677                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1678                 var block = v.current_block.split(line.as(not null))
1679                 block.kind = new BlockRuler(block)
1680                 v.current_block.remove_leading_empty_lines
1681                 v.current_line = v.current_block.first_line
1682         end
1683 end
1684
1685 # A markdown fence code line.
1686 class LineFence
1687         super Line
1688
1689         redef fun process(v) do
1690                 # go to fence end
1691                 var line = v.current_line.next
1692                 while line != null do
1693                         if v.line_kind(line) isa LineFence then break
1694                         line = line.next
1695                 end
1696                 if line != null then
1697                         line = line.next
1698                 end
1699                 # build fence block
1700                 var block: MDBlock
1701                 if line != null then
1702                         block = v.current_block.split(line.prev.as(not null))
1703                 else
1704                         block = v.current_block.split(v.current_block.last_line.as(not null))
1705                 end
1706                 var meta = block.first_line.value.meta_from_fence
1707                 block.kind = new BlockFence(block, meta)
1708                 block.first_line.clear
1709                 var last = block.last_line
1710                 if last != null and v.line_kind(last) isa LineFence then
1711                         block.last_line.clear
1712                 end
1713                 block.remove_surrounding_empty_lines
1714                 v.current_line = line
1715         end
1716 end
1717
1718 # A markdown headline.
1719 class LineHeadline
1720         super Line
1721
1722         redef fun process(v) do
1723                 var line = v.current_line
1724                 var lprev = line.prev
1725                 if lprev != null then v.current_block.split(lprev)
1726                 var block = v.current_block.split(line.as(not null))
1727                 var kind = new BlockHeadline(block)
1728                 block.kind = kind
1729                 kind.transform_headline(block)
1730                 v.current_block.remove_leading_empty_lines
1731                 v.current_line = v.current_block.first_line
1732         end
1733 end
1734
1735 # A markdown headline of level 1.
1736 class LineHeadline1
1737         super LineHeadline
1738
1739         redef fun process(v) do
1740                 var line = v.current_line
1741                 var lprev = line.prev
1742                 if lprev != null then v.current_block.split(lprev)
1743                 line.next.clear
1744                 var block = v.current_block.split(line.as(not null))
1745                 var kind = new BlockHeadline(block)
1746                 kind.depth = 1
1747                 kind.transform_headline(block)
1748                 block.kind = kind
1749                 v.current_block.remove_leading_empty_lines
1750                 v.current_line = v.current_block.first_line
1751         end
1752 end
1753
1754 # A markdown headline of level 2.
1755 class LineHeadline2
1756         super LineHeadline
1757
1758         redef fun process(v) do
1759                 var line = v.current_line
1760                 var lprev = line.prev
1761                 if lprev != null then v.current_block.split(lprev)
1762                 line.next.clear
1763                 var block = v.current_block.split(line.as(not null))
1764                 var kind = new BlockHeadline(block)
1765                 kind.depth = 2
1766                 kind.transform_headline(block)
1767                 block.kind = kind
1768                 v.current_block.remove_leading_empty_lines
1769                 v.current_line = v.current_block.first_line
1770         end
1771 end
1772
1773 # A markdown list line.
1774 # Mainly used to factorize code between ordered and unordered lists.
1775 class LineList
1776         super Line
1777
1778         redef fun process(v) do
1779                 var line = v.current_line
1780                 # go to list end
1781                 while line != null do
1782                         var t = v.line_kind(line)
1783                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1784                            not t isa LineList) then break
1785                         line = line.next
1786                 end
1787                 # build list block
1788                 var list: MDBlock
1789                 if line != null then
1790                         list = v.current_block.split(line.prev.as(not null))
1791                 else
1792                         list = v.current_block.split(v.current_block.last_line.as(not null))
1793                 end
1794                 var kind = block_kind(list)
1795                 list.kind = kind
1796                 list.first_line.prev_empty = false
1797                 list.last_line.next_empty = false
1798                 list.remove_surrounding_empty_lines
1799                 list.first_line.prev_empty = false
1800                 list.last_line.next_empty = false
1801                 kind.init_block(v)
1802                 var block = list.first_block
1803                 while block != null do
1804                         block.remove_list_indent(v)
1805                         v.recurse(block, true)
1806                         block = block.next
1807                 end
1808                 kind.expand_paragraphs(list)
1809                 v.current_line = line
1810         end
1811
1812         # Create a new block kind based on this line.
1813         protected fun block_kind(block: MDBlock): BlockList is abstract
1814
1815         # Extract string value from `MDLine`.
1816         protected fun extract_value(line: MDLine): String is abstract
1817 end
1818
1819 # An ordered list line.
1820 class LineOList
1821         super LineList
1822
1823         redef fun block_kind(block) do return new BlockOrderedList(block)
1824
1825         redef fun extract_value(line) do
1826                 return line.value.substring_from(line.value.index_of('.') + 2)
1827         end
1828 end
1829
1830 # An unordered list line.
1831 class LineUList
1832         super LineList
1833
1834         redef fun block_kind(block) do return new BlockUnorderedList(block)
1835
1836         redef fun extract_value(line) do
1837                 return line.value.substring_from(line.leading + 2)
1838         end
1839 end
1840
1841 # A token represent a character in the markdown input.
1842 # Some tokens have a specific markup behaviour that is handled here.
1843 abstract class Token
1844
1845         # Location of `self` in the original input.
1846         var location: MDLocation
1847
1848         # Position of `self` in input independant from lines.
1849         var pos: Int
1850
1851         # Character found at `pos` in the markdown input.
1852         var char: Char
1853
1854         # Output that token using `MarkdownEmitter::decorator`.
1855         fun emit(v: MarkdownEmitter) do v.addc char
1856 end
1857
1858 # A token without a specific meaning.
1859 class TokenNone
1860         super Token
1861 end
1862
1863 # An emphasis token.
1864 abstract class TokenEm
1865         super Token
1866
1867         redef fun emit(v) do
1868                 var tmp = v.push_buffer
1869                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1870                 v.pop_buffer
1871                 if b > 0 then
1872                         v.decorator.add_em(v, tmp)
1873                         v.current_pos = b
1874                 else
1875                         v.addc char
1876                 end
1877         end
1878 end
1879
1880 # An emphasis star token.
1881 class TokenEmStar
1882         super TokenEm
1883 end
1884
1885 # An emphasis underscore token.
1886 class TokenEmUnderscore
1887         super TokenEm
1888 end
1889
1890 # A strong token.
1891 abstract class TokenStrong
1892         super Token
1893
1894         redef fun emit(v) do
1895                 var tmp = v.push_buffer
1896                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1897                 v.pop_buffer
1898                 if b > 0 then
1899                         v.decorator.add_strong(v, tmp)
1900                         v.current_pos = b + 1
1901                 else
1902                         v.addc char
1903                 end
1904         end
1905 end
1906
1907 # A strong star token.
1908 class TokenStrongStar
1909         super TokenStrong
1910 end
1911
1912 # A strong underscore token.
1913 class TokenStrongUnderscore
1914         super TokenStrong
1915 end
1916
1917 # A code token.
1918 # This class is mainly used to factorize work between single and double quoted span codes.
1919 abstract class TokenCode
1920         super Token
1921
1922         redef fun emit(v) do
1923                 var a = pos + next_pos + 1
1924                 var b = v.processor.find_token(v.current_text.as(not null), a, self)
1925                 if b > 0 then
1926                         v.current_pos = b + next_pos
1927                         while a < b and v.current_text[a] == ' ' do a += 1
1928                         if a < b then
1929                                 while v.current_text[b - 1] == ' ' do b -= 1
1930                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1931                         end
1932                 else
1933                         v.addc char
1934                 end
1935         end
1936
1937         private fun next_pos: Int is abstract
1938 end
1939
1940 # A span code token.
1941 class TokenCodeSingle
1942         super TokenCode
1943
1944         redef fun next_pos do return 0
1945 end
1946
1947 # A doubled span code token.
1948 class TokenCodeDouble
1949         super TokenCode
1950
1951         redef fun next_pos do return 1
1952 end
1953
1954 # A link or image token.
1955 # This class is mainly used to factorize work between images and links.
1956 abstract class TokenLinkOrImage
1957         super Token
1958
1959         # Link adress
1960         var link: nullable Text = null
1961
1962         # Link text
1963         var name: nullable Text = null
1964
1965         # Link title
1966         var comment: nullable Text = null
1967
1968         # Is the link construct an abbreviation?
1969         var is_abbrev = false
1970
1971         redef fun emit(v) do
1972                 var tmp = new FlatBuffer
1973                 var b = check_link(v, tmp, pos, self)
1974                 if b > 0 then
1975                         emit_hyper(v)
1976                         v.current_pos = b
1977                 else
1978                         v.addc char
1979                 end
1980         end
1981
1982         # Emit the hyperlink as link or image.
1983         private fun emit_hyper(v: MarkdownEmitter) is abstract
1984
1985         # Check if the link is a valid link.
1986         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1987                 var md = v.current_text
1988                 var pos
1989                 if token isa TokenLink then
1990                         pos = start + 1
1991                 else
1992                         pos = start + 2
1993                 end
1994                 var tmp = new FlatBuffer
1995                 pos = md.read_md_link_id(tmp, pos)
1996                 if pos < start then return -1
1997                 name = tmp
1998                 var old_pos = pos
1999                 pos += 1
2000                 pos = md.skip_spaces(pos)
2001                 if pos < start then
2002                         var tid = name.write_to_string.to_lower
2003                         if v.processor.link_refs.has_key(tid) then
2004                                 var lr = v.processor.link_refs[tid]
2005                                 is_abbrev = lr.is_abbrev
2006                                 link = lr.link
2007                                 comment = lr.title
2008                                 pos = old_pos
2009                         else
2010                                 return -1
2011                         end
2012                 else if md[pos] == '(' then
2013                         pos += 1
2014                         pos = md.skip_spaces(pos)
2015                         if pos < start then return -1
2016                         tmp = new FlatBuffer
2017                         var use_lt = md[pos] == '<'
2018                         if use_lt then
2019                                 pos = md.read_until(tmp, pos + 1, '>')
2020                         else
2021                                 pos = md.read_md_link(tmp, pos)
2022                         end
2023                         if pos < start then return -1
2024                         if use_lt then pos += 1
2025                         link = tmp.write_to_string
2026                         if md[pos] == ' ' then
2027                                 pos = md.skip_spaces(pos)
2028                                 if pos > start and md[pos] == '"' then
2029                                         pos += 1
2030                                         tmp = new FlatBuffer
2031                                         pos = md.read_until(tmp, pos, '"')
2032                                         if pos < start then return -1
2033                                         comment = tmp.write_to_string
2034                                         pos += 1
2035                                         pos = md.skip_spaces(pos)
2036                                         if pos == -1 then return -1
2037                                 end
2038                         end
2039                         if md[pos] != ')' then return -1
2040                 else if md[pos] == '[' then
2041                         pos += 1
2042                         tmp = new FlatBuffer
2043                         pos = md.read_raw_until(tmp, pos, ']')
2044                         if pos < start then return -1
2045                         var id
2046                         if tmp.length > 0 then
2047                                 id = tmp
2048                         else
2049                                 id = name
2050                         end
2051                         var tid = id.write_to_string.to_lower
2052                         if v.processor.link_refs.has_key(tid) then
2053                                 var lr = v.processor.link_refs[tid]
2054                                 link = lr.link
2055                                 comment = lr.title
2056                         end
2057                 else
2058                         var tid = name.write_to_string.replace("\n", " ").to_lower
2059                         if v.processor.link_refs.has_key(tid) then
2060                                 var lr = v.processor.link_refs[tid]
2061                                 link = lr.link
2062                                 comment = lr.title
2063                                 pos = old_pos
2064                         else
2065                                 return -1
2066                         end
2067                 end
2068                 if link == null then return -1
2069                 return pos
2070         end
2071 end
2072
2073 # A markdown link token.
2074 class TokenLink
2075         super TokenLinkOrImage
2076
2077         redef fun emit_hyper(v) do
2078                 if is_abbrev and comment != null then
2079                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2080                 else
2081                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2082                 end
2083         end
2084 end
2085
2086 # A markdown image token.
2087 class TokenImage
2088         super TokenLinkOrImage
2089
2090         redef fun emit_hyper(v) do
2091                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2092         end
2093 end
2094
2095 # A HTML/XML token.
2096 class TokenHTML
2097         super Token
2098
2099         redef fun emit(v) do
2100                 var tmp = new FlatBuffer
2101                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2102                 if b > 0 then
2103                         v.add tmp
2104                         v.current_pos = b
2105                 else
2106                         v.decorator.escape_char(v, char)
2107                 end
2108         end
2109
2110         # Is the HTML valid?
2111         # Also take care of link and mailto shortcuts.
2112         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2113                 # check for auto links
2114                 var tmp = new FlatBuffer
2115                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2116                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2117                         pos = md.read_until(tmp, pos, '>')
2118                         if pos != -1 then
2119                                 var link = tmp.write_to_string
2120                                 v.decorator.add_link(v, link, link, null)
2121                                 return pos
2122                         end
2123                 end
2124                 # TODO check for mailto
2125                 # check for inline html
2126                 if start + 2 < md.length then
2127                         return md.read_xml(out, start, true)
2128                 end
2129                 return -1
2130         end
2131 end
2132
2133 # An HTML entity token.
2134 class TokenEntity
2135         super Token
2136
2137         redef fun emit(v) do
2138                 var tmp = new FlatBuffer
2139                 var b = check_entity(tmp, v.current_text.as(not null), pos)
2140                 if b > 0 then
2141                         v.add tmp
2142                         v.current_pos = b
2143                 else
2144                         v.decorator.escape_char(v, char)
2145                 end
2146         end
2147
2148         # Is the entity valid?
2149         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2150                 var pos = md.read_until(out, start, ';')
2151                 if pos < 0 or out.length < 3 then
2152                         return -1
2153                 end
2154                 if out[1] == '#' then
2155                         if out[2] == 'x' or out[2] == 'X' then
2156                                 if out.length < 4 then return -1
2157                                 for i in [3..out.length[ do
2158                                         var c = out[i]
2159                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2160                                                 return -1
2161                                         end
2162                                 end
2163                         else
2164                                 for i in [2..out.length[ do
2165                                         var c = out[i]
2166                                         if c < '0' or c > '9' then return -1
2167                                 end
2168                         end
2169                         out.add ';'
2170                 else
2171                         for i in [1..out.length[ do
2172                                 var c = out[i]
2173                                 if not c.is_digit and not c.is_letter then return -1
2174                         end
2175                         out.add ';'
2176                         # TODO check entity is valid
2177                         # if out.is_entity then
2178                                 return pos
2179                         # else
2180                                 # return -1
2181                         # end
2182                 end
2183                 return pos
2184         end
2185 end
2186
2187 # A markdown escape token.
2188 class TokenEscape
2189         super Token
2190
2191         redef fun emit(v) do
2192                 v.current_pos += 1
2193                 v.addc v.current_text[v.current_pos]
2194         end
2195 end
2196
2197 # A markdown strike token.
2198 #
2199 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2200 class TokenStrike
2201         super Token
2202
2203         redef fun emit(v) do
2204                 var tmp = v.push_buffer
2205                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2206                 v.pop_buffer
2207                 if b > 0 then
2208                         v.decorator.add_strike(v, tmp)
2209                         v.current_pos = b + 1
2210                 else
2211                         v.addc char
2212                 end
2213         end
2214 end
2215
2216 redef class Text
2217
2218         # Get the position of the next non-space character.
2219         private fun skip_spaces(start: Int): Int do
2220                 var pos = start
2221                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2222                         pos += 1
2223                 end
2224                 if pos < length then return pos
2225                 return -1
2226         end
2227
2228         # Read `self` until `nend` and append it to the `out` buffer.
2229         # Escape markdown special chars.
2230         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2231                 var pos = start
2232                 while pos < length do
2233                         var c = self[pos]
2234                         if c == '\\' and pos + 1 < length then
2235                                 pos = escape(out, self[pos + 1], pos)
2236                         else
2237                                 var end_reached = false
2238                                 for n in nend do
2239                                         if c == n then
2240                                                 end_reached = true
2241                                                 break
2242                                         end
2243                                 end
2244                                 if end_reached then break
2245                                 out.add c
2246                         end
2247                         pos += 1
2248                 end
2249                 if pos == length then return -1
2250                 return pos
2251         end
2252
2253         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2254         # No escape is made.
2255         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2256                 var pos = start
2257                 while pos < length do
2258                         var c = self[pos]
2259                         var end_reached = false
2260                         for n in nend do
2261                                 if c == n then
2262                                         end_reached = true
2263                                         break
2264                                 end
2265                         end
2266                         if end_reached then break
2267                         out.add c
2268                         pos += 1
2269                 end
2270                 if pos == length then return -1
2271                 return pos
2272         end
2273
2274         # Read `self` as XML until `to` and append it to the `out` buffer.
2275         # Escape HTML special chars.
2276         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2277                 var pos = from
2278                 var in_str = false
2279                 var str_char: nullable Char = null
2280                 while pos < length do
2281                         var c = self[pos]
2282                         if in_str then
2283                                 if c == '\\' then
2284                                         out.add c
2285                                         pos += 1
2286                                         if pos < length then
2287                                                 out.add c
2288                                                 pos += 1
2289                                         end
2290                                         continue
2291                                 end
2292                                 if c == str_char then
2293                                         in_str = false
2294                                         out.add c
2295                                         pos += 1
2296                                         continue
2297                                 end
2298                         end
2299                         if c == '"' or c == '\'' then
2300                                 in_str = true
2301                                 str_char = c
2302                         end
2303                         if not in_str then
2304                                 var end_reached = false
2305                                 for n in [0..to.length[ do
2306                                         if c == to[n] then
2307                                                 end_reached = true
2308                                                 break
2309                                         end
2310                                 end
2311                                 if end_reached then break
2312                         end
2313                         out.add c
2314                         pos += 1
2315                 end
2316                 if pos == length then return -1
2317                 return pos
2318         end
2319
2320         # Read `self` as XML and append it to the `out` buffer.
2321         # Safe mode can be activated to limit reading to valid xml.
2322         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2323                 var pos = 0
2324                 var is_valid = true
2325                 var is_close_tag = false
2326                 if start + 1 >= length then return -1
2327                 if self[start + 1] == '/' then
2328                         is_close_tag = true
2329                         pos = start + 2
2330                 else if self[start + 1] == '!' then
2331                         out.append "<!"
2332                         return start + 1
2333                 else
2334                         is_close_tag = false
2335                         pos = start + 1
2336                 end
2337                 if safe_mode then
2338                         var tmp = new FlatBuffer
2339                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2340                         if pos == -1 then return -1
2341                         var tag = tmp.write_to_string.trim.to_lower
2342                         if not tag.is_valid_html_tag then
2343                                 out.append "&lt;"
2344                                 pos = -1
2345                         else if tag.is_html_unsafe then
2346                                 is_valid = false
2347                                 out.append "&lt;"
2348                                 if is_close_tag then out.add '/'
2349                                 out.append tmp
2350                         else
2351                                 out.append "<"
2352                                 if is_close_tag then out.add '/'
2353                                 out.append tmp
2354                         end
2355                 else
2356                         out.add '<'
2357                         if is_close_tag then out.add '/'
2358                         pos = read_xml_until(out, pos, ' ', '/', '>')
2359                 end
2360                 if pos == -1 then return -1
2361                 pos = read_xml_until(out, pos, '/', '>')
2362                 if pos == -1 then return -1
2363                 if self[pos] == '/' then
2364                         out.append " /"
2365                         pos = self.read_xml_until(out, pos + 1, '>')
2366                         if pos == -1 then return -1
2367                 end
2368                 if self[pos] == '>' then
2369                         if is_valid then
2370                                 out.add '>'
2371                         else
2372                                 out.append "&gt;"
2373                         end
2374                         return pos
2375                 end
2376                 return -1
2377         end
2378
2379         # Read a markdown link address and append it to the `out` buffer.
2380         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2381                 var pos = start
2382                 var counter = 1
2383                 while pos < length do
2384                         var c = self[pos]
2385                         if c == '\\' and pos + 1 < length then
2386                                 pos = escape(out, self[pos + 1], pos)
2387                         else
2388                                 var end_reached = false
2389                                 if c == '(' then
2390                                         counter += 1
2391                                 else if c == ' ' then
2392                                         if counter == 1 then end_reached = true
2393                                 else if c == ')' then
2394                                         counter -= 1
2395                                         if counter == 0 then end_reached = true
2396                                 end
2397                                 if end_reached then break
2398                                 out.add c
2399                         end
2400                         pos += 1
2401                 end
2402                 if pos == length then return -1
2403                 return pos
2404         end
2405
2406         # Read a markdown link text and append it to the `out` buffer.
2407         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2408                 var pos = start
2409                 var counter = 1
2410                 while pos < length do
2411                         var c = self[pos]
2412                         var end_reached = false
2413                         if c == '[' then
2414                                 counter += 1
2415                                 out.add c
2416                         else if c == ']' then
2417                                 counter -= 1
2418                                 if counter == 0 then
2419                                         end_reached = true
2420                                 else
2421                                         out.add c
2422                                 end
2423                         else
2424                                 out.add c
2425                         end
2426                         if end_reached then break
2427                         pos += 1
2428                 end
2429                 if pos == length then return -1
2430                 return pos
2431         end
2432
2433         # Extract the XML tag name from a XML tag.
2434         private fun xml_tag: String do
2435                 var tpl = new FlatBuffer
2436                 var pos = 1
2437                 if pos < length and self[1] == '/' then pos += 1
2438                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2439                         tpl.add self[pos]
2440                         pos += 1
2441                 end
2442                 return tpl.write_to_string.to_lower
2443         end
2444
2445         private fun is_valid_html_tag: Bool do
2446                 if is_empty then return false
2447                 for c in self do
2448                         if not c.is_alpha then return false
2449                 end
2450                 return true
2451         end
2452
2453         # Read and escape the markdown contained in `self`.
2454         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2455                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2456                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2457                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2458                    c == '`' or c == '~' or c == '^' then
2459                         out.add c
2460                         return pos + 1
2461                 end
2462                 out.add '\\'
2463                 return pos
2464         end
2465
2466         # Extract string found at end of fence opening.
2467         private fun meta_from_fence: nullable Text do
2468                 for i in [0..chars.length[ do
2469                         var c = chars[i]
2470                         if c != ' ' and c != '`' and c != '~' then
2471                                 return substring_from(i).trim
2472                         end
2473                 end
2474                 return null
2475         end
2476
2477         # Init a `MDLocation` instance at `pos` in `self`.
2478         private fun pos_to_loc(pos: Int): MDLocation do
2479                 assert pos <= length
2480                 var line = 1
2481                 var col = 0
2482                 var i = 0
2483                 while i <= pos do
2484                         col += 1
2485                         var c = self[i]
2486                         if c == '\n' then
2487                                 line +=1
2488                                 col = 0
2489                         end
2490                         i +=1
2491                 end
2492                 return new MDLocation(line, col, line, col)
2493         end
2494
2495         # Is `self` an unsafe HTML element?
2496         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2497
2498         # Is `self` a HRML block element?
2499         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2500
2501         # Is `self` a link prefix?
2502         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2503
2504         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2505
2506         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2507
2508         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2509 end
2510
2511 redef class String
2512
2513         # Parse `self` as markdown and return the HTML representation
2514         #.
2515         #    var md = "**Hello World!**"
2516         #    var html = md.md_to_html
2517         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2518         fun md_to_html: Writable do
2519                 var processor = new MarkdownProcessor
2520                 return processor.process(self)
2521         end
2522 end