lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # Work in extended mode (default).
  34         #
  35         # Behavior changes when using extended mode:
  36         #
  37         # * Lists and code blocks end a paragraph
  38         #
  39         #   In normal markdown the following:
  40         #
  41         # ~~~md
  42         # This is a paragraph
  43         # * and this is not a list
  44         # ~~~
  45         #
  46         #   Will produce:
  47         #
  48         # ~~~html
  49         # <p>This is a paragraph
  50         # * and this is not a list</p>
  51         # ~~~
  52         #
  53         #   When using extended mode this changes to:
  54         #
  55         # ~~~html
  56         # <p>This is a paragraph</p>
  57         # <ul>
  58         # <li>and this is not a list</li>
  59         # </ul>
  60         # ~~~
  61         #
  62         # * Fences code blocks
  63         #
  64         #   If you don't want to indent your all your code with 4 spaces,
  65         #   you can wrap your code in ``` ``` ``` or `~~~`.
  66         #
  67         #   Here's an example:
  68         #
  69         # ~~~md
  70         # fun test do
  71         #    print "Hello World!"
  72         # end
  73         # ~~~
  74         #
  75         # * Code blocks meta
  76         #
  77         #   If you want to use syntax highlighting tools, most of them need to know what kind
  78         #   of language they are highlighting.
  79         #   You can add an optional language identifier after the fence declaration to output
  80         #   it in the HTML render.
  81         #
  82         # ```nit
  83         # import markdown
  84         #
  85         # print "# Hello World!".md_to_html
  86         # ```
  87         #
  88         #   Becomes
  89         #
  90         # ~~~html
  91         # <pre class="nit"><code>import markdown
  92         #
  93         # print "Hello World!".md_to_html
  94         # </code></pre>
  95         # ~~~
  96         #
  97         # * Underscores (Emphasis)
  98         #
  99         #   Underscores in the middle of a word like:
 100         #
 101         # ~~~md
 102         # Con_cat_this
 103         # ~~~
 104         #
 105         #   normally produces this:
 106         #
 107         # ~~~html
 108         # <p>Con<em>cat</em>this</p>
 109         # ~~~
 110         #
 111         #   With extended mode they don't result in emphasis.
 112         #
 113         # ~~~html
 114         # <p>Con_cat_this</p>
 115         # ~~~
 116         #
 117         # * Strikethrough
 118         #
 119         #   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
 120         #   strikethrought span is marked with `~~`.
 121         #
 122         # ~~~md
 123         # ~~Mistaken text.~~
 124         # ~~~
 125         #
 126         #   becomes
 127         #
 128         # ~~~html
 129         # <del>Mistaken text.</del>
 130         # ~~~
 131         var ext_mode = true
 132
 133         # Disable attaching MDLocation to Tokens
 134         #
 135         # Locations are useful for some tools but they may
 136         # cause an important time and space overhead.
 137         #
 138         # Default = `false`
 139         var no_location = false is writable
 140
 141         # Process the mardown `input` string and return the processed output.
 142         fun process(input: String): Writable do
 143                 # init processor
 144                 link_refs.clear
 145                 last_link_ref = null
 146                 current_line = null
 147                 current_block = null
 148                 # parse markdown
 149                 var parent = read_lines(input)
 150                 parent.remove_surrounding_empty_lines
 151                 recurse(parent, false)
 152                 # output processed text
 153                 return emit(parent.kind)
 154         end
 155
 156         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
 157         private fun read_lines(input: String): MDBlock do
 158                 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
 159                 var value = new FlatBuffer
 160                 var i = 0
 161
 162                 var line_pos = 0
 163                 var col_pos = 0
 164
 165                 while i < input.length do
 166                         value.clear
 167                         var pos = 0
 168                         var eol = false
 169                         while not eol and i < input.length do
 170                                 col_pos += 1
 171                                 var c = input[i]
 172                                 if c == '\n' then
 173                                         eol = true
 174                                 else if c == '\r' then
 175                                 else if c == '\t' then
 176                                         var np = pos + (4 - (pos & 3))
 177                                         while pos < np do
 178                                                 value.add ' '
 179                                                 pos += 1
 180                                         end
 181                                 else
 182                                         pos += 1
 183                                         value.add c
 184                                 end
 185                                 i += 1
 186                         end
 187                         line_pos += 1
 188
 189                         var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
 190                         var line = new MDLine(loc, value.write_to_string)
 191                         var is_link_ref = check_link_ref(line)
 192                         # Skip link refs
 193                         if not is_link_ref then block.add_line line
 194                         col_pos = 0
 195                 end
 196                 return block
 197         end
 198
 199         # Check if line is a block link definition.
 200         # Return `true` if line contains a valid link ref and save it into `link_refs`.
 201         private fun check_link_ref(line: MDLine): Bool do
 202                 var md = line.value
 203                 var is_link_ref = false
 204                 var id = new FlatBuffer
 205                 var link = new FlatBuffer
 206                 var comment = new FlatBuffer
 207                 var pos = -1
 208                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
 209                         pos = line.leading + 1
 210                         pos = md.read_until(id, pos, ']')
 211                         if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
 212                                 if line.value[pos + 1] == ':' then
 213                                         pos += 2
 214                                         pos = md.skip_spaces(pos)
 215                                         if pos >= 0 and line.value[pos] == '<' then
 216                                                 pos += 1
 217                                                 pos = md.read_until(link, pos, '>')
 218                                                 pos += 1
 219                                         else if pos >= 0 then
 220                                                 pos = md.read_until(link, pos, ' ', '\n')
 221                                         end
 222                                         if not link.is_empty then
 223                                                 pos = md.skip_spaces(pos)
 224                                                 if pos > 0 and pos < line.value.length then
 225                                                         var c = line.value[pos]
 226                                                         if c == '\"' or c == '\'' or c == '(' then
 227                                                                 pos += 1
 228                                                                 if c == '(' then
 229                                                                         pos = md.read_until(comment, pos, ')')
 230                                                                 else
 231                                                                         pos = md.read_until(comment, pos, c)
 232                                                                 end
 233                                                                 if pos > 0 then is_link_ref = true
 234                                                         end
 235                                                 else
 236                                                         is_link_ref = true
 237                                                 end
 238                                         end
 239                                 end
 240                         end
 241                 end
 242                 if is_link_ref and not id.is_empty and not link.is_empty then
 243                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 244                         add_link_ref(id.write_to_string, lr)
 245                         if comment.is_empty then last_link_ref = lr
 246                         return true
 247                 else
 248                         comment = new FlatBuffer
 249                         if not line.is_empty and last_link_ref != null then
 250                                 pos = line.leading
 251                                 var c = line.value[pos]
 252                                 if c == '\"' or c == '\'' or c ==  '(' then
 253                                         pos += 1
 254                                         if c == '(' then
 255                                                 pos = md.read_until(comment, pos, ')')
 256                                         else
 257                                                 pos = md.read_until(comment, pos, c)
 258                                         end
 259                                 end
 260                                 var last_link_ref = self.last_link_ref
 261                                 if not comment.is_empty and last_link_ref != null then
 262                                         last_link_ref.title = comment.write_to_string
 263                                 end
 264                         end
 265                         if comment.is_empty then return false
 266                         return true
 267                 end
 268         end
 269
 270         # Known link refs
 271         # This list will be needed during output to expand links.
 272         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 273
 274         # Last encountered link ref (for multiline definitions)
 275         #
 276         # Markdown allows link refs to be defined over two lines:
 277         #
 278         # ~~~md
 279         # [id]: http://example.com/longish/path/to/resource/here
 280         #       "Optional Title Here"
 281         # ~~~
 282         #
 283         private var last_link_ref: nullable LinkRef = null
 284
 285         # Add a link ref to the list
 286         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 287
 288         # Recursively split a `block`.
 289         #
 290         # The block is splitted according to the type of lines it contains.
 291         # Some blocks can be splited again recursively like lists.
 292         # The `in_list` mode is used to recurse on list and build
 293         # nested paragraphs or code blocks.
 294         fun recurse(root: MDBlock, in_list: Bool) do
 295                 var old_mode = self.in_list
 296                 var old_root = self.current_block
 297                 self.in_list = in_list
 298
 299                 var line = root.first_line
 300                 while line != null and line.is_empty do
 301                         line = line.next
 302                         if line == null then return
 303                 end
 304
 305                 current_line = line
 306                 current_block = root
 307                 while current_line != null do
 308                         line_kind(current_line.as(not null)).process(self)
 309                 end
 310                 self.in_list = old_mode
 311                 self.current_block = old_root
 312         end
 313
 314         # Currently processed line.
 315         # Used when visiting blocks with `recurse`.
 316         var current_line: nullable MDLine = null is writable
 317
 318         # Currently processed block.
 319         # Used when visiting blocks with `recurse`.
 320         var current_block: nullable MDBlock = null is writable
 321
 322         # Is the current recursion in list mode?
 323         # Used when visiting blocks with `recurse`
 324         private var in_list = false
 325
 326         # The type of line.
 327         # see: `md_line_*`
 328         fun line_kind(md: MDLine): Line do
 329                 var value = md.value
 330                 var leading = md.leading
 331                 var trailing = md.trailing
 332                 if md.is_empty then return new LineEmpty
 333                 if md.leading > 3 then return new LineCode
 334                 if value[leading] == '#' then return new LineHeadline
 335                 if value[leading] == '>' then return new LineBlockquote
 336
 337                 if ext_mode then
 338                         if value.length - leading - trailing > 2 then
 339                                 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 340                                         return new LineFence
 341                                 end
 342                                 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 343                                         return new LineFence
 344                                 end
 345                         end
 346                 end
 347
 348                 if value.length - leading - trailing > 2 and
 349                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 350                    if md.count_chars(value[leading]) >= 3 then
 351                                 return new LineHR
 352                    end
 353                 end
 354
 355                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 356                         var c = value[leading]
 357                         if c == '*' or c == '-' or c == '+' then return new LineUList
 358                 end
 359
 360                 if value.length - leading >= 3 and value[leading].is_digit then
 361                         var i = leading + 1
 362                         while i < value.length and value[i].is_digit do i += 1
 363                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 364                                 return new LineOList
 365                         end
 366                 end
 367
 368                 if value[leading] == '<' and md.check_html then return new LineXML
 369
 370                 var next = md.next
 371                 if next != null and not next.is_empty then
 372                         if next.count_chars('=') > 0 then
 373                                 return new LineHeadline1
 374                         end
 375                         if next.count_chars('-') > 0 then
 376                                 return new LineHeadline2
 377                         end
 378                 end
 379                 return new LineOther
 380         end
 381
 382         # Get the token kind at `pos`.
 383         fun token_at(text: Text, pos: Int): Token do
 384                 var c0: Char
 385                 var c1: Char
 386                 var c2: Char
 387
 388                 if pos > 0 then
 389                         c0 = text[pos - 1]
 390                 else
 391                         c0 = ' '
 392                 end
 393                 var c = text[pos]
 394
 395                 if pos + 1 < text.length then
 396                         c1 = text[pos + 1]
 397                 else
 398                         c1 = ' '
 399                 end
 400                 if pos + 2 < text.length then
 401                         c2 = text[pos + 2]
 402                 else
 403                         c2 = ' '
 404                 end
 405
 406                 var loc
 407                 if no_location then
 408                         loc = null
 409                 else
 410                         loc = new MDLocation(
 411                                 current_loc.line_start,
 412                                 current_loc.column_start + pos,
 413                                 current_loc.line_start,
 414                                 current_loc.column_start + pos)
 415                 end
 416
 417                 if c == '*' then
 418                         if c1 == '*' then
 419                                 if c0 != ' ' or c2 != ' ' then
 420                                         return new TokenStrongStar(loc, pos, c)
 421                                 else
 422                                         return new TokenEmStar(loc, pos, c)
 423                                 end
 424                         end
 425                         if c0 != ' ' or c1 != ' ' then
 426                                 return new TokenEmStar(loc, pos, c)
 427                         else
 428                                 return new TokenNone(loc, pos, c)
 429                         end
 430                 else if c == '_' then
 431                         if c1 == '_' then
 432                                 if c0 != ' ' or c2 != ' ' then
 433                                         return new TokenStrongUnderscore(loc, pos, c)
 434                                 else
 435                                         return new TokenEmUnderscore(loc, pos, c)
 436                                 end
 437                         end
 438                         if ext_mode then
 439                                 if (c0.is_letter or c0.is_digit) and c0 != '_' and
 440                                    (c1.is_letter or c1.is_digit) then
 441                                         return new TokenNone(loc, pos, c)
 442                                 else
 443                                         return new TokenEmUnderscore(loc, pos, c)
 444                                 end
 445                         end
 446                         if c0 != ' ' or c1 != ' ' then
 447                                 return new TokenEmUnderscore(loc, pos, c)
 448                         else
 449                                 return new TokenNone(loc, pos, c)
 450                         end
 451                 else if c == '!' then
 452                         if c1 == '[' then return new TokenImage(loc, pos, c)
 453                         return new TokenNone(loc, pos, c)
 454                 else if c == '[' then
 455                         return new TokenLink(loc, pos, c)
 456                 else if c == ']' then
 457                         return new TokenNone(loc, pos, c)
 458                 else if c == '`' then
 459                         if c1 == '`' then
 460                                 return new TokenCodeDouble(loc, pos, c)
 461                         else
 462                                 return new TokenCodeSingle(loc, pos, c)
 463                         end
 464                 else if c == '\\' then
 465                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 466                                 return new TokenEscape(loc, pos, c)
 467                         else
 468                                 return new TokenNone(loc, pos, c)
 469                         end
 470                 else if c == '<' then
 471                         return new TokenHTML(loc, pos, c)
 472                 else if c == '&' then
 473                         return new TokenEntity(loc, pos, c)
 474                 else
 475                         if ext_mode then
 476                                 if c == '~' and c1 == '~' then
 477                                         return new TokenStrike(loc, pos, c)
 478                                 end
 479                         end
 480                         return new TokenNone(loc, pos, c)
 481                 end
 482         end
 483
 484         # Find the position of a `token` in `self`.
 485         fun find_token(text: Text, start: Int, token: Token): Int do
 486                 var pos = start
 487                 while pos < text.length do
 488                         if token_at(text, pos).is_same_type(token) then
 489                                 return pos
 490                         end
 491                         pos += 1
 492                 end
 493                 return -1
 494         end
 495
 496         # Kind of decorator used for decoration.
 497         type DECORATOR: Decorator
 498
 499         # Decorator used for output.
 500         # Default is `HTMLDecorator`
 501         var decorator: DECORATOR is writable, lazy do
 502                 return new HTMLDecorator
 503         end
 504
 505         # Create a new `MarkdownEmitter` using a custom `decorator`.
 506         init with_decorator(decorator: DECORATOR) do
 507                 self.decorator = decorator
 508         end
 509
 510         # Output `block` using `decorator` in the current buffer.
 511         fun emit(block: Block): Text do
 512                 var buffer = push_buffer
 513                 block.emit(self)
 514                 pop_buffer
 515                 return buffer
 516         end
 517
 518         # Output the content of `block`.
 519         fun emit_in(block: Block) do block.emit_in(self)
 520
 521         # Transform and emit mardown text
 522         fun emit_text(text: Text) do emit_text_until(text, 0, null)
 523
 524         # Transform and emit mardown text starting at `start` and
 525         # until a token with the same type as `token` is found.
 526         # Go until the end of `text` if `token` is null.
 527         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 528                 var old_text = current_text
 529                 var old_pos = current_pos
 530                 current_text = text
 531                 current_pos = start
 532                 while current_pos < text.length do
 533                         if text[current_pos] == '\n' then
 534                                 current_loc.line_start += 1
 535                                 current_loc.column_start = -current_pos
 536                         end
 537                         var mt = token_at(text, current_pos)
 538                         if (token != null and not token isa TokenNone) and
 539                         (mt.is_same_type(token) or
 540                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 541                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 542                                 return current_pos
 543                         end
 544                         mt.emit(self)
 545                         current_pos += 1
 546                 end
 547                 current_text = old_text
 548                 current_pos = old_pos
 549                 return -1
 550         end
 551
 552         # Currently processed position in `current_text`.
 553         # Used when visiting inline production with `emit_text_until`.
 554         private var current_pos: Int = -1
 555
 556         # Currently processed text.
 557         # Used when visiting inline production with `emit_text_until`.
 558         private var current_text: nullable Text = null
 559
 560         # Stacked buffers.
 561         private var buffer_stack = new List[FlatBuffer]
 562
 563         # Push a new buffer on the stack.
 564         private fun push_buffer: FlatBuffer do
 565                 var buffer = new FlatBuffer
 566                 buffer_stack.add buffer
 567                 return buffer
 568         end
 569
 570         # Pop the last buffer.
 571         private fun pop_buffer do buffer_stack.pop
 572
 573         # Current output buffer.
 574         private fun current_buffer: FlatBuffer do
 575                 assert not buffer_stack.is_empty
 576                 return buffer_stack.last
 577         end
 578
 579         # Stacked locations.
 580         private var loc_stack = new List[MDLocation]
 581
 582         # Push a new MDLocation on the stack.
 583         private fun push_loc(location: MDLocation) do loc_stack.add location
 584
 585         # Pop the last buffer.
 586         private fun pop_loc: MDLocation do return loc_stack.pop
 587
 588         # Current output buffer.
 589         private fun current_loc: MDLocation do
 590                 assert not loc_stack.is_empty
 591                 return loc_stack.last
 592         end
 593
 594         # Append `e` to current buffer.
 595         fun add(e: Writable) do
 596                 if e isa Text then
 597                         current_buffer.append e
 598                 else
 599                         current_buffer.append e.write_to_string
 600                 end
 601         end
 602
 603         # Append `c` to current buffer.
 604         fun addc(c: Char) do
 605                 current_buffer.add c
 606         end
 607
 608         # Append a "\n" line break.
 609         fun addn do addc '\n'
 610 end
 611
 612 # A Link Reference.
 613 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 614 #
 615 # ~~~raw
 616 # [1]: http://example.com/ "Optional title"
 617 # ~~~
 618 class LinkRef
 619
 620         # Link href
 621         var link: String
 622
 623         # Optional link title
 624         var title: nullable String = null
 625
 626         # Is the link an abreviation?
 627         var is_abbrev = false
 628
 629         # Create a link with a title.
 630         init with_title(link: String, title: nullable String) do
 631                 init(link)
 632                 self.title = title
 633         end
 634 end
 635
 636 # A `Decorator` is used to emit mardown into a specific format.
 637 # Default decorator used is `HTMLDecorator`.
 638 interface Decorator
 639
 640         # Kind of processor used
 641         type PROCESSOR: MarkdownProcessor
 642
 643         # Render a single plain char.
 644         #
 645         # Redefine this method to add special escaping for plain text.
 646         fun add_char(v: PROCESSOR, c: Char) do v.addc c
 647
 648         # Render a ruler block.
 649         fun add_ruler(v: PROCESSOR, block: BlockRuler) is abstract
 650
 651         # Render a headline block with corresponding level.
 652         fun add_headline(v: PROCESSOR, block: BlockHeadline) is abstract
 653
 654         # Render a paragraph block.
 655         fun add_paragraph(v: PROCESSOR, block: BlockParagraph) is abstract
 656
 657         # Render a code or fence block.
 658         fun add_code(v: PROCESSOR, block: BlockCode) is abstract
 659
 660         # Render a blockquote.
 661         fun add_blockquote(v: PROCESSOR, block: BlockQuote) is abstract
 662
 663         # Render an unordered list.
 664         fun add_unorderedlist(v: PROCESSOR, block: BlockUnorderedList) is abstract
 665
 666         # Render an ordered list.
 667         fun add_orderedlist(v: PROCESSOR, block: BlockOrderedList) is abstract
 668
 669         # Render a list item.
 670         fun add_listitem(v: PROCESSOR, block: BlockListItem) is abstract
 671
 672         # Render an emphasis text.
 673         fun add_em(v: PROCESSOR, text: Text) is abstract
 674
 675         # Render a strong text.
 676         fun add_strong(v: PROCESSOR, text: Text) is abstract
 677
 678         # Render a strike text.
 679         #
 680         # Extended mode only (see `MarkdownProcessor::ext_mode`)
 681         fun add_strike(v: PROCESSOR, text: Text) is abstract
 682
 683         # Render a link.
 684         fun add_link(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract
 685
 686         # Render an image.
 687         fun add_image(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract
 688
 689         # Render an abbreviation.
 690         fun add_abbr(v: PROCESSOR, name: Text, comment: Text) is abstract
 691
 692         # Render a code span reading from a buffer.
 693         fun add_span_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract
 694
 695         # Render a text and escape it.
 696         fun append_value(v: PROCESSOR, value: Text) is abstract
 697
 698         # Render code text from buffer and escape it.
 699         fun append_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract
 700
 701         # Render a character escape.
 702         fun escape_char(v: PROCESSOR, char: Char) is abstract
 703
 704         # Render a line break
 705         fun add_line_break(v: PROCESSOR) is abstract
 706
 707         # Generate a new html valid id from a `String`.
 708         fun strip_id(txt: String): String is abstract
 709
 710         # Found headlines during the processing labeled by their ids.
 711         fun headlines: ArrayMap[String, HeadLine] is abstract
 712 end
 713
 714 # Class representing a markdown headline.
 715 class HeadLine
 716         # Unique identifier of this headline.
 717         var id: String
 718
 719         # Text of the headline.
 720         var title: String
 721
 722         # Level of this headline.
 723         #
 724         # According toe the markdown specification, level must be in `[1..6]`.
 725         var level: Int
 726 end
 727
 728 # `Decorator` that outputs HTML.
 729 class HTMLDecorator
 730         super Decorator
 731
 732         redef var headlines = new ArrayMap[String, HeadLine]
 733
 734         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 735
 736         redef fun add_headline(v, block) do
 737                 # save headline
 738                 var line = block.block.first_line
 739                 if line == null then return
 740                 var txt = line.value
 741                 var id = strip_id(txt)
 742                 var lvl = block.depth
 743                 headlines[id] = new HeadLine(id, txt, lvl)
 744                 # output it
 745                 v.add "<h{lvl} id=\"{id}\">"
 746                 v.emit_in block
 747                 v.add "</h{lvl}>\n"
 748         end
 749
 750         redef fun add_paragraph(v, block) do
 751                 v.add "<p>"
 752                 v.emit_in block
 753                 v.add "</p>\n"
 754         end
 755
 756         redef fun add_code(v, block) do
 757                 var meta = block.meta
 758                 if meta != null then
 759                         v.add "<pre class=\""
 760                         append_value(v, meta)
 761                         v.add "\"><code>"
 762                 else
 763                         v.add "<pre><code>"
 764                 end
 765                 v.emit_in block
 766                 v.add "</code></pre>\n"
 767         end
 768
 769         redef fun add_blockquote(v, block) do
 770                 v.add "<blockquote>\n"
 771                 v.emit_in block
 772                 v.add "</blockquote>\n"
 773         end
 774
 775         redef fun add_unorderedlist(v, block) do
 776                 v.add "<ul>\n"
 777                 v.emit_in block
 778                 v.add "</ul>\n"
 779         end
 780
 781         redef fun add_orderedlist(v, block) do
 782                 v.add "<ol>\n"
 783                 v.emit_in block
 784                 v.add "</ol>\n"
 785         end
 786
 787         redef fun add_listitem(v, block) do
 788                 v.add "<li>"
 789                 v.emit_in block
 790                 v.add "</li>\n"
 791         end
 792
 793         redef fun add_em(v, text) do
 794                 v.add "<em>"
 795                 v.add text
 796                 v.add "</em>"
 797         end
 798
 799         redef fun add_strong(v, text) do
 800                 v.add "<strong>"
 801                 v.add text
 802                 v.add "</strong>"
 803         end
 804
 805         redef fun add_strike(v, text) do
 806                 v.add "<del>"
 807                 v.add text
 808                 v.add "</del>"
 809         end
 810
 811         redef fun add_image(v, link, name, comment) do
 812                 v.add "<img src=\""
 813                 append_value(v, link)
 814                 v.add "\" alt=\""
 815                 append_value(v, name)
 816                 v.add "\""
 817                 if comment != null and not comment.is_empty then
 818                         v.add " title=\""
 819                         append_value(v, comment)
 820                         v.add "\""
 821                 end
 822                 v.add "/>"
 823         end
 824
 825         redef fun add_link(v, link, name, comment) do
 826                 v.add "<a href=\""
 827                 append_value(v, link)
 828                 v.add "\""
 829                 if comment != null and not comment.is_empty then
 830                         v.add " title=\""
 831                         append_value(v, comment)
 832                         v.add "\""
 833                 end
 834                 v.add ">"
 835                 v.emit_text(name)
 836                 v.add "</a>"
 837         end
 838
 839         redef fun add_abbr(v, name, comment) do
 840                 v.add "<abbr title=\""
 841                 append_value(v, comment)
 842                 v.add "\">"
 843                 v.emit_text(name)
 844                 v.add "</abbr>"
 845         end
 846
 847         redef fun add_span_code(v, text, from, to) do
 848                 v.add "<code>"
 849                 append_code(v, text, from, to)
 850                 v.add "</code>"
 851         end
 852
 853         redef fun add_line_break(v) do
 854                 v.add "<br/>"
 855         end
 856
 857         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 858
 859         redef fun escape_char(v, c) do
 860                 if c == '&' then
 861                         v.add "&amp;"
 862                 else if c == '<' then
 863                         v.add "&lt;"
 864                 else if c == '>' then
 865                         v.add "&gt;"
 866                 else if c == '"' then
 867                         v.add "&quot;"
 868                 else if c == '\'' then
 869                         v.add "&apos;"
 870                 else
 871                         v.addc c
 872                 end
 873         end
 874
 875         redef fun append_code(v, buffer, from, to) do
 876                 for i in [from..to[ do
 877                         var c = buffer[i]
 878                         if c == '&' then
 879                                 v.add "&amp;"
 880                         else if c == '<' then
 881                                 v.add "&lt;"
 882                         else if c == '>' then
 883                                 v.add "&gt;"
 884                         else
 885                                 v.addc c
 886                         end
 887                 end
 888         end
 889
 890         redef fun strip_id(txt) do
 891                 # strip id
 892                 var b = new FlatBuffer
 893                 for c in txt do
 894                         if c == ' ' then
 895                                 b.add '_'
 896                         else
 897                                 if not c.is_letter and
 898                                    not c.is_digit and
 899                                    not allowed_id_chars.has(c) then continue
 900                                 b.add c
 901                         end
 902                 end
 903                 var res = b.to_s
 904                 var key = res
 905                 # check for multiple id definitions
 906                 if headlines.has_key(key) then
 907                         var i = 1
 908                         key = "{res}_{i}"
 909                         while headlines.has_key(key) do
 910                                 i += 1
 911                                 key = "{res}_{i}"
 912                         end
 913                 end
 914                 return key
 915         end
 916
 917         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 918 end
 919
 920 # Location in a Markdown input.
 921 class MDLocation
 922
 923         # Starting line number (starting from 1).
 924         var line_start: Int
 925
 926         # Starting column number (starting from 1).
 927         var column_start: Int
 928
 929         # Stopping line number (starting from 1).
 930         var line_end: Int
 931
 932         # Stopping column number (starting from 1).
 933         var column_end: Int
 934
 935         redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
 936
 937         # Return a copy of `self`.
 938         fun copy: MDLocation do
 939                 return new MDLocation(line_start, column_start, line_end, column_end)
 940         end
 941 end
 942
 943 # A block of markdown lines.
 944 # A `MDBlock` can contains lines and/or sub-blocks.
 945 class MDBlock
 946
 947         # Position of `self` in the input.
 948         var location: MDLocation
 949
 950         # Kind of block.
 951         # See `Block`.
 952         var kind: Block = new BlockNone(self) is writable
 953
 954         # First line if any.
 955         var first_line: nullable MDLine = null is writable
 956
 957         # Last line if any.
 958         var last_line: nullable MDLine = null is writable
 959
 960         # First sub-block if any.
 961         var first_block: nullable MDBlock = null is writable
 962
 963         # Last sub-block if any.
 964         var last_block: nullable MDBlock = null is writable
 965
 966         # Previous block if any.
 967         var prev: nullable MDBlock = null is writable
 968
 969         # Next block if any.
 970         var next: nullable MDBlock = null is writable
 971
 972         # Does this block contain subblocks?
 973         fun has_blocks: Bool do return first_block != null
 974
 975         # Count sub-blocks.
 976         fun count_blocks: Int do
 977                 var count = 0
 978                 var block = first_block
 979                 while block != null do
 980                         count += 1
 981                         block = block.next
 982                 end
 983                 return count
 984         end
 985
 986         # Does this block contain lines?
 987         fun has_lines: Bool do return first_line != null
 988
 989         # Count block lines.
 990         fun count_lines: Int do
 991                 var count = 0
 992                 var line = first_line
 993                 while line != null do
 994                         count += 1
 995                         line = line.next
 996                 end
 997                 return count
 998         end
 999
1000         # Split `self` creating a new sub-block having `line` has `last_line`.
1001         fun split(line: MDLine): MDBlock do
1002                 # location for new block
1003                 var new_loc = new MDLocation(
1004                         first_line.as(not null).location.line_start,
1005                         first_line.as(not null).location.column_start,
1006                         line.location.line_end,
1007                         line.location.column_end)
1008                 # create block
1009                 var block = new MDBlock(new_loc)
1010                 block.first_line = first_line
1011                 block.last_line = line
1012                 first_line = line.next
1013                 line.next = null
1014                 if first_line == null then
1015                         last_line = null
1016                 else
1017                         first_line.as(not null).prev = null
1018                         # update current block loc
1019                         location.line_start = first_line.as(not null).location.line_start
1020                         location.column_start = first_line.as(not null).location.column_start
1021                 end
1022                 if first_block == null then
1023                         first_block = block
1024                         last_block = block
1025                 else
1026                         last_block.as(not null).next = block
1027                         last_block = block
1028                 end
1029                 return block
1030         end
1031
1032         # Add a `line` to this block.
1033         fun add_line(line: MDLine) do
1034                 if last_line == null then
1035                         first_line = line
1036                         last_line = line
1037                 else
1038                         last_line.as(not null).next_empty = line.is_empty
1039                         line.prev_empty = last_line.as(not null).is_empty
1040                         line.prev = last_line
1041                         last_line.as(not null).next = line
1042                         last_line = line
1043                 end
1044         end
1045
1046         # Remove `line` from this block.
1047         fun remove_line(line: MDLine) do
1048                 if line.prev == null then
1049                         first_line = line.next
1050                 else
1051                         line.prev.as(not null).next = line.next
1052                 end
1053                 if line.next == null then
1054                         last_line = line.prev
1055                 else
1056                         line.next.as(not null).prev = line.prev
1057                 end
1058                 line.prev = null
1059                 line.next = null
1060         end
1061
1062         # Remove leading empty lines.
1063         fun remove_leading_empty_lines: Bool do
1064                 var was_empty = false
1065                 var line = first_line
1066                 while line != null and line.is_empty do
1067                         remove_line line
1068                         line = first_line
1069                         was_empty = true
1070                 end
1071                 return was_empty
1072         end
1073
1074         # Remove trailing empty lines.
1075         fun remove_trailing_empty_lines: Bool do
1076                 var was_empty = false
1077                 var line = last_line
1078                 while line != null and line.is_empty do
1079                         remove_line line
1080                         line = last_line
1081                         was_empty = true
1082                 end
1083                 return was_empty
1084         end
1085
1086         # Remove leading and trailing empty lines.
1087         fun remove_surrounding_empty_lines: Bool do
1088                 var was_empty = false
1089                 if remove_leading_empty_lines then was_empty = true
1090                 if remove_trailing_empty_lines then was_empty = true
1091                 return was_empty
1092         end
1093
1094         # Remove list markers and up to 4 leading spaces.
1095         # Used to clean nested lists.
1096         fun remove_list_indent(v: MarkdownProcessor) do
1097                 var line = first_line
1098                 while line != null do
1099                         if not line.is_empty then
1100                                 var kind = v.line_kind(line)
1101                                 if kind isa LineList then
1102                                         line.value = kind.extract_value(line)
1103                                 else
1104                                         line.value = line.value.substring_from(line.leading.min(4))
1105                                 end
1106                                 line.leading = line.process_leading
1107                         end
1108                         line = line.next
1109                 end
1110         end
1111
1112         # Collect block line text.
1113         fun text: String do
1114                 var text = new FlatBuffer
1115                 var line = first_line
1116                 while line != null do
1117                         if not line.is_empty then
1118                                 text.append line.text
1119                         end
1120                         text.append "\n"
1121                         line = line.next
1122                 end
1123                 var block = first_block
1124                 while block != null do
1125                         text.append block.text
1126                         text.append "\n"
1127                         block = block.next
1128                 end
1129                 return text.write_to_string
1130         end
1131 end
1132
1133 # Representation of a markdown block in the AST.
1134 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1135 abstract class Block
1136
1137         # The markdown block `self` is related to.
1138         var block: MDBlock
1139
1140         # Output `self` using `v.decorator`.
1141         fun emit(v: MarkdownProcessor) do v.emit_in(self)
1142
1143         # Emit the containts of `self`, lines or blocks.
1144         fun emit_in(v: MarkdownProcessor) do
1145                 block.remove_surrounding_empty_lines
1146                 if block.has_lines then
1147                         emit_lines(v)
1148                 else
1149                         emit_blocks(v)
1150                 end
1151         end
1152
1153         # Emit lines contained in `block`.
1154         fun emit_lines(v: MarkdownProcessor) do
1155                 var tpl = v.push_buffer
1156                 var line = block.first_line
1157                 while line != null do
1158                         if not line.is_empty then
1159                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1160                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
1161                         end
1162                         if line.next != null then
1163                                 v.addn
1164                         end
1165                         line = line.next
1166                 end
1167                 v.pop_buffer
1168                 v.emit_text(tpl)
1169         end
1170
1171         # Emit sub-blocks contained in `block`.
1172         fun emit_blocks(v: MarkdownProcessor) do
1173                 var block = self.block.first_block
1174                 while block != null do
1175                         v.push_loc(block.location)
1176                         block.kind.emit(v)
1177                         v.pop_loc
1178                         block = block.next
1179                 end
1180         end
1181
1182         # The raw content of the block as a multi-line string.
1183         fun raw_content: String do
1184                 var infence = self isa BlockFence
1185                 var text = new FlatBuffer
1186                 var line = self.block.first_line
1187                 while line != null do
1188                         if not line.is_empty then
1189                                 var str = line.value
1190                                 if not infence and str.has_prefix("    ") then
1191                                         text.append str.substring(4, str.length - line.trailing)
1192                                 else
1193                                         text.append str
1194                                 end
1195                         end
1196                         text.append "\n"
1197                         line = line.next
1198                 end
1199                 return text.write_to_string
1200         end
1201 end
1202
1203 # A block without any markdown specificities.
1204 #
1205 # Actually use the same implementation than `BlockCode`,
1206 # this class is only used for typing purposes.
1207 class BlockNone
1208         super Block
1209 end
1210
1211 # A markdown blockquote.
1212 class BlockQuote
1213         super Block
1214
1215         redef fun emit(v) do v.decorator.add_blockquote(v, self)
1216
1217         # Remove blockquote markers.
1218         private fun remove_block_quote_prefix(block: MDBlock) do
1219                 var line = block.first_line
1220                 while line != null do
1221                         if not line.is_empty then
1222                                 if line.value[line.leading] == '>' then
1223                                         var rem = line.leading + 1
1224                                         if line.leading + 1 < line.value.length and
1225                                            line.value[line.leading + 1] == ' ' then
1226                                                 rem += 1
1227                                         end
1228                                         line.value = line.value.substring_from(rem)
1229                                         line.leading = line.process_leading
1230                                 end
1231                         end
1232                         line = line.next
1233                 end
1234         end
1235 end
1236
1237 # A markdown code block.
1238 class BlockCode
1239         super Block
1240
1241         # Any string found after fence token.
1242         var meta: nullable Text
1243
1244         # Number of char to skip at the beginning of the line.
1245         #
1246         # Block code lines start at 4 spaces.
1247         protected var line_start = 4
1248
1249         redef fun emit(v) do v.decorator.add_code(v, self)
1250
1251         redef fun emit_lines(v) do
1252                 var line = block.first_line
1253                 while line != null do
1254                         if not line.is_empty then
1255                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1256                         end
1257                         v.addn
1258                         line = line.next
1259                 end
1260         end
1261 end
1262
1263 # A markdown code-fence block.
1264 #
1265 # Actually use the same implementation than `BlockCode`,
1266 # this class is only used for typing purposes.
1267 class BlockFence
1268         super BlockCode
1269
1270         # Fence code lines start at 0 spaces.
1271         redef var line_start = 0
1272 end
1273
1274 # A markdown headline.
1275 class BlockHeadline
1276         super Block
1277
1278         redef fun emit(v) do
1279                 var loc = block.location.copy
1280                 loc.column_start += start
1281                 v.push_loc(loc)
1282                 v.decorator.add_headline(v, self)
1283                 v.pop_loc
1284         end
1285
1286         private var start = 0
1287
1288         # Depth of the headline used to determine the headline level.
1289         var depth = 0
1290
1291         # Remove healine marks from lines contained in `self`.
1292         private fun transform_headline(block: MDBlock) do
1293                 if depth > 0 then return
1294                 var level = 0
1295                 var line = block.first_line
1296                 if line == null then return
1297                 if line.is_empty then return
1298                 var start = line.leading
1299                 while start < line.value.length and line.value[start] == '#' do
1300                         level += 1
1301                         start += 1
1302                 end
1303                 while start < line.value.length and line.value[start] == ' ' do
1304                         start += 1
1305                 end
1306                 if start >= line.value.length then
1307                         line.is_empty = true
1308                 else
1309                         var nend = line.value.length - line.trailing - 1
1310                         while line.value[nend] == '#' do nend -= 1
1311                         while line.value[nend] == ' ' do nend -= 1
1312                         line.value = line.value.substring(start, nend - start + 1)
1313                         line.leading = 0
1314                         line.trailing = 0
1315                 end
1316                 self.start = start
1317                 depth = level.min(6)
1318         end
1319 end
1320
1321 # A markdown list item block.
1322 class BlockListItem
1323         super Block
1324
1325         redef fun emit(v) do v.decorator.add_listitem(v, self)
1326 end
1327
1328 # A markdown list block.
1329 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1330 abstract class BlockList
1331         super Block
1332
1333         # Split list block into list items sub-blocks.
1334         private fun init_block(v: MarkdownProcessor) do
1335                 var line = block.first_line
1336                 if line == null then return
1337                 line = line.next
1338                 while line != null do
1339                         var t = v.line_kind(line)
1340                         if t isa LineList or
1341                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1342                            not (t isa LineList))) then
1343                                    var sblock = block.split(line.prev.as(not null))
1344                                    sblock.kind = new BlockListItem(sblock)
1345                         end
1346                         line = line.next
1347                 end
1348                 var sblock = block.split(block.last_line.as(not null))
1349                 sblock.kind = new BlockListItem(sblock)
1350         end
1351
1352         # Expand list items as paragraphs if needed.
1353         private fun expand_paragraphs(block: MDBlock) do
1354                 var outer = block.first_block
1355                 var inner: nullable MDBlock
1356                 var has_paragraph = false
1357                 while outer != null and not has_paragraph do
1358                         if outer.kind isa BlockListItem then
1359                                 inner = outer.first_block
1360                                 while inner != null and not has_paragraph do
1361                                         if inner.kind isa BlockParagraph then
1362                                                 has_paragraph = true
1363                                         end
1364                                         inner = inner.next
1365                                 end
1366                         end
1367                         outer = outer.next
1368                 end
1369                 if has_paragraph then
1370                         outer = block.first_block
1371                         while outer != null do
1372                                 if outer.kind isa BlockListItem then
1373                                         inner = outer.first_block
1374                                         while inner != null do
1375                                                 if inner.kind isa BlockNone then
1376                                                         inner.kind = new BlockParagraph(inner)
1377                                                 end
1378                                                 inner = inner.next
1379                                         end
1380                                 end
1381                                 outer = outer.next
1382                         end
1383                 end
1384         end
1385 end
1386
1387 # A markdown ordered list.
1388 class BlockOrderedList
1389         super BlockList
1390
1391         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1392 end
1393
1394 # A markdown unordred list.
1395 class BlockUnorderedList
1396         super BlockList
1397
1398         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1399 end
1400
1401 # A markdown paragraph block.
1402 class BlockParagraph
1403         super Block
1404
1405         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1406 end
1407
1408 # A markdown ruler.
1409 class BlockRuler
1410         super Block
1411
1412         redef fun emit(v) do v.decorator.add_ruler(v, self)
1413 end
1414
1415 # Xml blocks that can be found in markdown markup.
1416 class BlockXML
1417         super Block
1418
1419         redef fun emit_lines(v) do
1420                 var line = block.first_line
1421                 while line != null do
1422                         if not line.is_empty then v.add line.value
1423                         v.addn
1424                         line = line.next
1425                 end
1426         end
1427 end
1428
1429 # A markdown line.
1430 class MDLine
1431
1432         # Location of `self` in the original input.
1433         var location: MDLocation
1434
1435         # Text contained in this line.
1436         var value: String is writable
1437
1438         # Is this line empty?
1439         # Lines containing only spaces are considered empty.
1440         var is_empty: Bool = true is writable
1441
1442         # Previous line in `MDBlock` or null if first line.
1443         var prev: nullable MDLine = null is writable
1444
1445         # Next line in `MDBlock` or null if last line.
1446         var next: nullable MDLine = null is writable
1447
1448         # Is the previous line empty?
1449         var prev_empty: Bool = false is writable
1450
1451         # Is the next line empty?
1452         var next_empty: Bool = false is writable
1453
1454         # Initialize a new MDLine from its string value
1455         init do
1456                 self.leading = process_leading
1457                 if leading != value.length then
1458                         self.is_empty = false
1459                         self.trailing = process_trailing
1460                 end
1461         end
1462
1463         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1464         fun clear do
1465                 value = ""
1466                 leading = 0
1467                 trailing = 0
1468                 is_empty = true
1469                 if prev != null then prev.as(not null).next_empty = true
1470                 if next != null then next.as(not null).prev_empty = true
1471         end
1472
1473         # Number or leading spaces on this line.
1474         var leading: Int = 0 is writable
1475
1476         # Compute `leading` depending on `value`.
1477         fun process_leading: Int do
1478                 var count = 0
1479                 var value = self.value
1480                 while count < value.length and value[count] == ' ' do count += 1
1481                 if leading == value.length then clear
1482                 return count
1483         end
1484
1485         # Number of trailing spaces on this line.
1486         var trailing: Int = 0 is writable
1487
1488         # Compute `trailing` depending on `value`.
1489         fun process_trailing: Int do
1490                 var count = 0
1491                 var value = self.value
1492                 while value[value.length - count - 1] == ' ' do
1493                         count += 1
1494                 end
1495                 return count
1496         end
1497
1498         # Count the amount of `ch` in this line.
1499         # Return A value > 0 if this line only consists of `ch` end spaces.
1500         fun count_chars(ch: Char): Int do
1501                 var count = 0
1502                 for c in value do
1503                         if c == ' ' then
1504                                 continue
1505                         end
1506                         if c == ch then
1507                                 count += 1
1508                                 continue
1509                         end
1510                         count = 0
1511                         break
1512                 end
1513                 return count
1514         end
1515
1516         # Count the amount of `ch` at the start of this line ignoring spaces.
1517         fun count_chars_start(ch: Char): Int do
1518                 var count = 0
1519                 for c in value do
1520                         if c == ' ' then
1521                                 continue
1522                         end
1523                         if c == ch then
1524                                 count += 1
1525                         else
1526                                 break
1527                         end
1528                 end
1529                 return count
1530         end
1531
1532         # Last XML line if any.
1533         private var xml_end_line: nullable MDLine = null
1534
1535         # Does `value` contains valid XML markup?
1536         private fun check_html: Bool do
1537                 var tags = new Array[String]
1538                 var tmp = new FlatBuffer
1539                 var pos = leading
1540                 if pos + 1 < value.length and value[pos + 1] == '!' then
1541                         if read_xml_comment(self, pos) > 0 then return true
1542                 end
1543                 pos = value.read_xml(tmp, pos, false)
1544                 var tag: String
1545                 if pos > -1 then
1546                         tag = tmp.xml_tag
1547                         if not tag.is_html_block then
1548                                 return false
1549                         end
1550                         if tag == "hr" then
1551                                 xml_end_line = self
1552                                 return true
1553                         end
1554                         tags.add tag
1555                         var line: nullable MDLine = self
1556                         while line != null do
1557                                 while pos < line.value.length and line.value[pos] != '<' do
1558                                         pos += 1
1559                                 end
1560                                 if pos >= line.value.length then
1561                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1562                                                 tags.pop
1563                                                 if tags.is_empty then
1564                                                         xml_end_line = line
1565                                                         break
1566                                                 end
1567                                         end
1568                                         line = line.next
1569                                         pos = 0
1570                                 else
1571                                         tmp = new FlatBuffer
1572                                         var new_pos = line.value.read_xml(tmp, pos, false)
1573                                         if new_pos > 0 then
1574                                                 tag = tmp.xml_tag
1575                                                 if tag.is_html_block and not tag == "hr" then
1576                                                         if tmp[1] == '/' then
1577                                                                 if tags.last != tag then
1578                                                                         return false
1579                                                                 end
1580                                                                 tags.pop
1581                                                         else
1582                                                                 tags.add tag
1583                                                         end
1584                                                 end
1585                                                 if tags.is_empty then
1586                                                         xml_end_line = line
1587                                                         break
1588                                                 end
1589                                                 pos = new_pos
1590                                         else
1591                                                 pos += 1
1592                                         end
1593                                 end
1594                         end
1595                         return tags.is_empty
1596                 end
1597                 return false
1598         end
1599
1600         # Read a XML comment.
1601         # Used by `check_html`.
1602         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1603                 var line: nullable MDLine = first_line
1604                 if start + 3 < line.as(not null).value.length then
1605                         if line.as(not null).value[2] == '-' and line.as(not null).value[3] == '-' then
1606                                 var pos = start + 4
1607                                 while line != null do
1608                                         while pos < line.value.length and line.value[pos] != '-' do
1609                                                 pos += 1
1610                                         end
1611                                         if pos == line.value.length then
1612                                                 line = line.next
1613                                                 pos = 0
1614                                         else
1615                                                 if pos + 2 < line.value.length then
1616                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1617                                                                 first_line.xml_end_line = line
1618                                                                 return pos + 3
1619                                                         end
1620                                                 end
1621                                                 pos += 1
1622                                         end
1623                                 end
1624                         end
1625                 end
1626                 return -1
1627         end
1628
1629         # Extract the text of `self` without leading and trailing.
1630         fun text: String do return value.substring(leading, value.length - trailing)
1631 end
1632
1633 # A markdown line.
1634 interface Line
1635
1636         # Parse the line.
1637         # See `MarkdownProcessor::recurse`.
1638         fun process(v: MarkdownProcessor) is abstract
1639 end
1640
1641 # An empty markdown line.
1642 class LineEmpty
1643         super Line
1644
1645         redef fun process(v) do
1646                 v.current_line = v.current_line.as(not null).next
1647         end
1648 end
1649
1650 # A non-specific markdown construction.
1651 # Mainly used as part of another line construct such as paragraphs or lists.
1652 class LineOther
1653         super Line
1654
1655         redef fun process(v) do
1656                 var line = v.current_line
1657                 # go to block end
1658                 var was_empty = line.as(not null).prev_empty
1659                 while line != null and not line.is_empty do
1660                         var t = v.line_kind(line)
1661                         if (v.in_list or v.ext_mode) and t isa LineList then
1662                                 break
1663                         end
1664                         if v.ext_mode and (t isa LineCode or t isa LineFence) then
1665                                 break
1666                         end
1667                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1668                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1669                                    break
1670                         end
1671                         line = line.next
1672                 end
1673                 # build block
1674                 var current_block = v.current_block.as(not null)
1675                 if line != null and not line.is_empty then
1676                         var block = current_block.split(line.prev.as(not null))
1677                         if v.in_list and not was_empty then
1678                                 block.kind = new BlockNone(block)
1679                         else
1680                                 block.kind = new BlockParagraph(block)
1681                         end
1682                         current_block.remove_leading_empty_lines
1683                 else
1684                         var block: MDBlock
1685                         if line != null then
1686                                 block = current_block.split(line)
1687                         else
1688                                 block = current_block.split(current_block.last_line.as(not null))
1689                         end
1690                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1691                                 block.kind = new BlockNone(block)
1692                         else
1693                                 block.kind = new BlockParagraph(block)
1694                         end
1695                         current_block.remove_leading_empty_lines
1696                 end
1697                 v.current_line = current_block.first_line
1698         end
1699 end
1700
1701 # A line of markdown code.
1702 class LineCode
1703         super Line
1704
1705         redef fun process(v) do
1706                 var line = v.current_line
1707                 # lookup block end
1708                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1709                         line = line.next
1710                 end
1711                 # split at block end line
1712                 var current_block = v.current_block.as(not null)
1713                 var block: MDBlock
1714                 if line != null then
1715                         block = current_block.split(line.prev.as(not null))
1716                 else
1717                         block = current_block.split(current_block.last_line.as(not null))
1718                 end
1719                 block.kind = new BlockCode(block)
1720                 block.remove_surrounding_empty_lines
1721                 v.current_line = current_block.first_line
1722         end
1723 end
1724
1725 # A line of raw XML.
1726 class LineXML
1727         super Line
1728
1729         redef fun process(v) do
1730                 var line = v.current_line
1731                 if line == null then return
1732                 var current_block = v.current_block.as(not null)
1733                 var prev = line.prev
1734                 if prev != null then current_block.split(prev)
1735                 var block = current_block.split(line.xml_end_line.as(not null))
1736                 block.kind = new BlockXML(block)
1737                 current_block.remove_leading_empty_lines
1738                 v.current_line = current_block.first_line
1739         end
1740 end
1741
1742 # A markdown blockquote line.
1743 class LineBlockquote
1744         super Line
1745
1746         redef fun process(v) do
1747                 var line = v.current_line
1748                 var current_block = v.current_block.as(not null)
1749                 # go to bquote end
1750                 while line != null do
1751                         if not line.is_empty and (line.prev_empty and
1752                            line.leading == 0 and
1753                            not v.line_kind(line) isa LineBlockquote) then break
1754                         line = line.next
1755                 end
1756                 # build sub block
1757                 var block: MDBlock
1758                 if line != null then
1759                         block = current_block.split(line.prev.as(not null))
1760                 else
1761                         block = current_block.split(current_block.last_line.as(not null))
1762                 end
1763                 var kind = new BlockQuote(block)
1764                 block.kind = kind
1765                 block.remove_surrounding_empty_lines
1766                 kind.remove_block_quote_prefix(block)
1767                 v.current_line = line
1768                 v.recurse(block, false)
1769                 v.current_line = current_block.first_line
1770         end
1771 end
1772
1773 # A markdown ruler line.
1774 class LineHR
1775         super Line
1776
1777         redef fun process(v) do
1778                 var line = v.current_line
1779                 if line == null then return
1780                 var current_block = v.current_block.as(not null)
1781                 if line.prev != null then current_block.split(line.prev.as(not null))
1782                 var block = current_block.split(line)
1783                 block.kind = new BlockRuler(block)
1784                 current_block.remove_leading_empty_lines
1785                 v.current_line = current_block.first_line
1786         end
1787 end
1788
1789 # A markdown fence code line.
1790 class LineFence
1791         super Line
1792
1793         redef fun process(v) do
1794                 # go to fence end
1795                 var line = v.current_line.as(not null).next
1796                 var current_block = v.current_block.as(not null)
1797                 while line != null do
1798                         if v.line_kind(line) isa LineFence then break
1799                         line = line.next
1800                 end
1801                 if line != null then
1802                         line = line.next
1803                 end
1804                 # build fence block
1805                 var block: MDBlock
1806                 if line != null then
1807                         block = current_block.split(line.prev.as(not null))
1808                 else
1809                         block = current_block.split(current_block.last_line.as(not null))
1810                 end
1811                 block.remove_surrounding_empty_lines
1812                 var meta = block.first_line.as(not null).value.meta_from_fence
1813                 block.kind = new BlockFence(block, meta)
1814                 block.first_line.as(not null).clear
1815                 var last = block.last_line
1816                 if last != null and v.line_kind(last) isa LineFence then
1817                         block.last_line.as(not null).clear
1818                 end
1819                 block.remove_surrounding_empty_lines
1820                 v.current_line = line
1821         end
1822 end
1823
1824 # A markdown headline.
1825 class LineHeadline
1826         super Line
1827
1828         redef fun process(v) do
1829                 var line = v.current_line
1830                 if line == null then return
1831                 var current_block = v.current_block.as(not null)
1832                 var lprev = line.prev
1833                 if lprev != null then current_block.split(lprev)
1834                 var block = current_block.split(line)
1835                 var kind = new BlockHeadline(block)
1836                 block.kind = kind
1837                 kind.transform_headline(block)
1838                 current_block.remove_leading_empty_lines
1839                 v.current_line = current_block.first_line
1840         end
1841 end
1842
1843 # A markdown headline of level 1.
1844 class LineHeadline1
1845         super LineHeadline
1846
1847         redef fun process(v) do
1848                 var line = v.current_line
1849                 if line == null then return
1850                 var current_block = v.current_block.as(not null)
1851                 var lprev = line.prev
1852                 if lprev != null then current_block.split(lprev)
1853                 line.next.as(not null).clear
1854                 var block = current_block.split(line)
1855                 var kind = new BlockHeadline(block)
1856                 kind.depth = 1
1857                 kind.transform_headline(block)
1858                 block.kind = kind
1859                 current_block.remove_leading_empty_lines
1860                 v.current_line = current_block.first_line
1861         end
1862 end
1863
1864 # A markdown headline of level 2.
1865 class LineHeadline2
1866         super LineHeadline
1867
1868         redef fun process(v) do
1869                 var line = v.current_line
1870                 if line == null then return
1871                 var current_block = v.current_block.as(not null)
1872                 var lprev = line.prev
1873                 if lprev != null then current_block.split(lprev)
1874                 line.next.as(not null).clear
1875                 var block = current_block.split(line)
1876                 var kind = new BlockHeadline(block)
1877                 kind.depth = 2
1878                 kind.transform_headline(block)
1879                 block.kind = kind
1880                 current_block.remove_leading_empty_lines
1881                 v.current_line = current_block.first_line
1882         end
1883 end
1884
1885 # A markdown list line.
1886 # Mainly used to factorize code between ordered and unordered lists.
1887 abstract class LineList
1888         super Line
1889
1890         redef fun process(v) do
1891                 var line = v.current_line
1892                 # go to list end
1893                 while line != null do
1894                         var t = v.line_kind(line)
1895                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1896                            not t isa LineList) then break
1897                         line = line.next
1898                 end
1899                 # build list block
1900                 var current_block = v.current_block.as(not null)
1901                 var list: MDBlock
1902                 if line != null then
1903                         list = current_block.split(line.prev.as(not null))
1904                 else
1905                         list = current_block.split(current_block.last_line.as(not null))
1906                 end
1907                 var kind = block_kind(list)
1908                 list.kind = kind
1909                 list.first_line.as(not null).prev_empty = false
1910                 list.last_line.as(not null).next_empty = false
1911                 list.remove_surrounding_empty_lines
1912                 list.first_line.as(not null).prev_empty = false
1913                 list.last_line.as(not null).next_empty = false
1914                 kind.init_block(v)
1915                 var block = list.first_block
1916                 while block != null do
1917                         block.remove_list_indent(v)
1918                         v.recurse(block, true)
1919                         block = block.next
1920                 end
1921                 kind.expand_paragraphs(list)
1922                 v.current_line = line
1923         end
1924
1925         # Create a new block kind based on this line.
1926         protected fun block_kind(block: MDBlock): BlockList is abstract
1927
1928         # Extract string value from `MDLine`.
1929         protected fun extract_value(line: MDLine): String is abstract
1930 end
1931
1932 # An ordered list line.
1933 class LineOList
1934         super LineList
1935
1936         redef fun block_kind(block) do return new BlockOrderedList(block)
1937
1938         redef fun extract_value(line) do
1939                 return line.value.substring_from(line.value.index_of('.') + 2)
1940         end
1941 end
1942
1943 # An unordered list line.
1944 class LineUList
1945         super LineList
1946
1947         redef fun block_kind(block) do return new BlockUnorderedList(block)
1948
1949         redef fun extract_value(line) do
1950                 return line.value.substring_from(line.leading + 2)
1951         end
1952 end
1953
1954 # A token represent a character in the markdown input.
1955 # Some tokens have a specific markup behaviour that is handled here.
1956 abstract class Token
1957
1958         # Location of `self` in the original input.
1959         var location: nullable MDLocation
1960
1961         # Position of `self` in input independant from lines.
1962         var pos: Int
1963
1964         # Character found at `pos` in the markdown input.
1965         var char: Char
1966
1967         # Output that token using `MarkdownEmitter::decorator`.
1968         fun emit(v: MarkdownProcessor) do v.decorator.add_char(v, char)
1969 end
1970
1971 # A token without a specific meaning.
1972 class TokenNone
1973         super Token
1974 end
1975
1976 # An emphasis token.
1977 abstract class TokenEm
1978         super Token
1979
1980         redef fun emit(v) do
1981                 var tmp = v.push_buffer
1982                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1983                 v.pop_buffer
1984                 if b > 0 then
1985                         v.decorator.add_em(v, tmp)
1986                         v.current_pos = b
1987                 else
1988                         v.addc char
1989                 end
1990         end
1991 end
1992
1993 # An emphasis star token.
1994 class TokenEmStar
1995         super TokenEm
1996 end
1997
1998 # An emphasis underscore token.
1999 class TokenEmUnderscore
2000         super TokenEm
2001 end
2002
2003 # A strong token.
2004 abstract class TokenStrong
2005         super Token
2006
2007         redef fun emit(v) do
2008                 var tmp = v.push_buffer
2009                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2010                 v.pop_buffer
2011                 if b > 0 then
2012                         v.decorator.add_strong(v, tmp)
2013                         v.current_pos = b + 1
2014                 else
2015                         v.addc char
2016                 end
2017         end
2018 end
2019
2020 # A strong star token.
2021 class TokenStrongStar
2022         super TokenStrong
2023 end
2024
2025 # A strong underscore token.
2026 class TokenStrongUnderscore
2027         super TokenStrong
2028 end
2029
2030 # A code token.
2031 # This class is mainly used to factorize work between single and double quoted span codes.
2032 abstract class TokenCode
2033         super Token
2034
2035         redef fun emit(v) do
2036                 var current_text = v.current_text.as(not null)
2037                 var a = pos + next_pos + 1
2038                 var b = v.find_token(current_text, a, self)
2039                 if b > 0 then
2040                         v.current_pos = b + next_pos
2041                         while a < b and current_text[a] == ' ' do a += 1
2042                         if a < b then
2043                                 while current_text[b - 1] == ' ' do b -= 1
2044                                 v.decorator.add_span_code(v, current_text, a, b)
2045                         end
2046                 else
2047                         v.addc char
2048                 end
2049         end
2050
2051         private fun next_pos: Int is abstract
2052 end
2053
2054 # A span code token.
2055 class TokenCodeSingle
2056         super TokenCode
2057
2058         redef fun next_pos do return 0
2059 end
2060
2061 # A doubled span code token.
2062 class TokenCodeDouble
2063         super TokenCode
2064
2065         redef fun next_pos do return 1
2066 end
2067
2068 # A link or image token.
2069 # This class is mainly used to factorize work between images and links.
2070 abstract class TokenLinkOrImage
2071         super Token
2072
2073         # Link adress
2074         var link: nullable Text = null
2075
2076         # Link text
2077         var name: nullable Text = null
2078
2079         # Link title
2080         var comment: nullable Text = null
2081
2082         # Is the link construct an abbreviation?
2083         var is_abbrev = false
2084
2085         redef fun emit(v) do
2086                 var tmp = new FlatBuffer
2087                 var b = check_link(v, tmp, pos, self)
2088                 if b > 0 then
2089                         emit_hyper(v)
2090                         v.current_pos = b
2091                 else
2092                         v.addc char
2093                 end
2094         end
2095
2096         # Emit the hyperlink as link or image.
2097         private fun emit_hyper(v: MarkdownProcessor) is abstract
2098
2099         # Check if the link is a valid link.
2100         private fun check_link(v: MarkdownProcessor, out: FlatBuffer, start: Int, token: Token): Int do
2101                 var md = v.current_text
2102                 if md == null then return -1
2103                 var pos
2104                 if token isa TokenLink then
2105                         pos = start + 1
2106                 else
2107                         pos = start + 2
2108                 end
2109                 var tmp = new FlatBuffer
2110                 pos = md.read_md_link_id(tmp, pos)
2111                 if pos < start then return -1
2112                 name = tmp
2113                 var old_pos = pos
2114                 pos += 1
2115                 pos = md.skip_spaces(pos)
2116                 if pos < start then
2117                         var tid = name.as(not null).write_to_string.to_lower
2118                         if v.link_refs.has_key(tid) then
2119                                 var lr = v.link_refs[tid]
2120                                 is_abbrev = lr.is_abbrev
2121                                 link = lr.link
2122                                 comment = lr.title
2123                                 pos = old_pos
2124                         else
2125                                 return -1
2126                         end
2127                 else if md[pos] == '(' then
2128                         pos += 1
2129                         pos = md.skip_spaces(pos)
2130                         if pos < start then return -1
2131                         tmp = new FlatBuffer
2132                         var use_lt = md[pos] == '<'
2133                         if use_lt then
2134                                 pos = md.read_until(tmp, pos + 1, '>')
2135                         else
2136                                 pos = md.read_md_link(tmp, pos)
2137                         end
2138                         if pos < start then return -1
2139                         if use_lt then pos += 1
2140                         link = tmp.write_to_string
2141                         if md[pos] == ' ' then
2142                                 pos = md.skip_spaces(pos)
2143                                 if pos > start and md[pos] == '"' then
2144                                         pos += 1
2145                                         tmp = new FlatBuffer
2146                                         pos = md.read_until(tmp, pos, '"')
2147                                         if pos < start then return -1
2148                                         comment = tmp.write_to_string
2149                                         pos += 1
2150                                         pos = md.skip_spaces(pos)
2151                                         if pos == -1 then return -1
2152                                 end
2153                         end
2154                         if pos < start then return -1
2155                         if md[pos] != ')' then return -1
2156                 else if md[pos] == '[' then
2157                         pos += 1
2158                         tmp = new FlatBuffer
2159                         pos = md.read_raw_until(tmp, pos, ']')
2160                         if pos < start then return -1
2161                         var id
2162                         if tmp.length > 0 then
2163                                 id = tmp
2164                         else
2165                                 id = name
2166                         end
2167                         var tid = id.as(not null).write_to_string.to_lower
2168                         if v.link_refs.has_key(tid) then
2169                                 var lr = v.link_refs[tid]
2170                                 link = lr.link
2171                                 comment = lr.title
2172                         end
2173                 else
2174                         var tid = name.as(not null).write_to_string.replace("\n", " ").to_lower
2175                         if v.link_refs.has_key(tid) then
2176                                 var lr = v.link_refs[tid]
2177                                 link = lr.link
2178                                 comment = lr.title
2179                                 pos = old_pos
2180                         else
2181                                 return -1
2182                         end
2183                 end
2184                 if link == null then return -1
2185                 return pos
2186         end
2187 end
2188
2189 # A markdown link token.
2190 class TokenLink
2191         super TokenLinkOrImage
2192
2193         redef fun emit_hyper(v) do
2194                 if is_abbrev and comment != null then
2195                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2196                 else
2197                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2198                 end
2199         end
2200 end
2201
2202 # A markdown image token.
2203 class TokenImage
2204         super TokenLinkOrImage
2205
2206         redef fun emit_hyper(v) do
2207                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2208         end
2209 end
2210
2211 # A HTML/XML token.
2212 class TokenHTML
2213         super Token
2214
2215         redef fun emit(v) do
2216                 var tmp = new FlatBuffer
2217                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2218                 if b > 0 then
2219                         v.add tmp
2220                         v.current_pos = b
2221                 else
2222                         v.decorator.escape_char(v, char)
2223                 end
2224         end
2225
2226         # Is the HTML valid?
2227         # Also take care of link and mailto shortcuts.
2228         private fun check_html(v: MarkdownProcessor, out: FlatBuffer, md: Text, start: Int): Int do
2229                 # check for auto links
2230                 var tmp = new FlatBuffer
2231                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2232                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2233                         pos = md.read_until(tmp, pos, '>')
2234                         if pos != -1 then
2235                                 var link = tmp.write_to_string
2236                                 v.decorator.add_link(v, link, link, null)
2237                                 return pos
2238                         end
2239                 end
2240                 # TODO check for mailto
2241                 # check for inline html
2242                 if start + 2 < md.length then
2243                         return md.read_xml(out, start, true)
2244                 end
2245                 return -1
2246         end
2247 end
2248
2249 # An HTML entity token.
2250 class TokenEntity
2251         super Token
2252
2253         redef fun emit(v) do
2254                 var tmp = new FlatBuffer
2255                 var b = check_entity(tmp, v.current_text.as(not null), pos)
2256                 if b > 0 then
2257                         v.add tmp
2258                         v.current_pos = b
2259                 else
2260                         v.decorator.escape_char(v, char)
2261                 end
2262         end
2263
2264         # Is the entity valid?
2265         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2266                 var pos = md.read_until(out, start, ';')
2267                 if pos < 0 or out.length < 3 then
2268                         return -1
2269                 end
2270                 if out[1] == '#' then
2271                         if out[2] == 'x' or out[2] == 'X' then
2272                                 if out.length < 4 then return -1
2273                                 for i in [3..out.length[ do
2274                                         var c = out[i]
2275                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2276                                                 return -1
2277                                         end
2278                                 end
2279                         else
2280                                 for i in [2..out.length[ do
2281                                         var c = out[i]
2282                                         if c < '0' or c > '9' then return -1
2283                                 end
2284                         end
2285                         out.add ';'
2286                 else
2287                         for i in [1..out.length[ do
2288                                 var c = out[i]
2289                                 if not c.is_digit and not c.is_letter then return -1
2290                         end
2291                         out.add ';'
2292                         # TODO check entity is valid
2293                         # if out.is_entity then
2294                                 return pos
2295                         # else
2296                                 # return -1
2297                         # end
2298                 end
2299                 return pos
2300         end
2301 end
2302
2303 # A markdown escape token.
2304 class TokenEscape
2305         super Token
2306
2307         redef fun emit(v) do
2308                 v.current_pos += 1
2309                 v.addc v.current_text.as(not null)[v.current_pos]
2310         end
2311 end
2312
2313 # A markdown strike token.
2314 #
2315 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2316 class TokenStrike
2317         super Token
2318
2319         redef fun emit(v) do
2320                 var tmp = v.push_buffer
2321                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2322                 v.pop_buffer
2323                 if b > 0 then
2324                         v.decorator.add_strike(v, tmp)
2325                         v.current_pos = b + 1
2326                 else
2327                         v.addc char
2328                 end
2329         end
2330 end
2331
2332 redef class Text
2333
2334         # Get the position of the next non-space character.
2335         private fun skip_spaces(start: Int): Int do
2336                 var pos = start
2337                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2338                         pos += 1
2339                 end
2340                 if pos < length then return pos
2341                 return -1
2342         end
2343
2344         # Read `self` until `nend` and append it to the `out` buffer.
2345         # Escape markdown special chars.
2346         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2347                 var pos = start
2348                 while pos < length do
2349                         var c = self[pos]
2350                         if c == '\\' and pos + 1 < length then
2351                                 pos = escape(out, self[pos + 1], pos)
2352                         else
2353                                 for n in nend do if c == n then break label
2354                                 out.add c
2355                         end
2356                         pos += 1
2357                 end label
2358                 if pos == length then return -1
2359                 return pos
2360         end
2361
2362         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2363         # No escape is made.
2364         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2365                 var pos = start
2366                 while pos < length do
2367                         var c = self[pos]
2368                         var end_reached = false
2369                         for n in nend do
2370                                 if c == n then
2371                                         end_reached = true
2372                                         break
2373                                 end
2374                         end
2375                         if end_reached then break
2376                         out.add c
2377                         pos += 1
2378                 end
2379                 if pos == length then return -1
2380                 return pos
2381         end
2382
2383         # Read `self` as XML until `to` and append it to the `out` buffer.
2384         # Escape HTML special chars.
2385         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2386                 var pos = from
2387                 var in_str = false
2388                 var str_char: nullable Char = null
2389                 while pos < length do
2390                         var c = self[pos]
2391                         if in_str then
2392                                 if c == '\\' then
2393                                         out.add c
2394                                         pos += 1
2395                                         if pos < length then
2396                                                 out.add c
2397                                                 pos += 1
2398                                         end
2399                                         continue
2400                                 end
2401                                 if c == str_char then
2402                                         in_str = false
2403                                         out.add c
2404                                         pos += 1
2405                                         continue
2406                                 end
2407                         end
2408                         if c == '"' or c == '\'' then
2409                                 in_str = true
2410                                 str_char = c
2411                         end
2412                         if not in_str then
2413                                 var end_reached = false
2414                                 for n in [0..to.length[ do
2415                                         if c == to[n] then
2416                                                 end_reached = true
2417                                                 break
2418                                         end
2419                                 end
2420                                 if end_reached then break
2421                         end
2422                         out.add c
2423                         pos += 1
2424                 end
2425                 if pos == length then return -1
2426                 return pos
2427         end
2428
2429         # Read `self` as XML and append it to the `out` buffer.
2430         # Safe mode can be activated to limit reading to valid xml.
2431         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2432                 var pos = 0
2433                 var is_valid = true
2434                 var is_close_tag = false
2435                 if start + 1 >= length then return -1
2436                 if self[start + 1] == '/' then
2437                         is_close_tag = true
2438                         pos = start + 2
2439                 else if self[start + 1] == '!' then
2440                         out.append "<!"
2441                         return start + 1
2442                 else
2443                         is_close_tag = false
2444                         pos = start + 1
2445                 end
2446                 if safe_mode then
2447                         var tmp = new FlatBuffer
2448                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2449                         if pos == -1 then return -1
2450                         var tag = tmp.write_to_string.trim.to_lower
2451                         if not tag.is_valid_html_tag then
2452                                 out.append "&lt;"
2453                                 pos = -1
2454                         else if tag.is_html_unsafe then
2455                                 is_valid = false
2456                                 out.append "&lt;"
2457                                 if is_close_tag then out.add '/'
2458                                 out.append tmp
2459                         else
2460                                 out.append "<"
2461                                 if is_close_tag then out.add '/'
2462                                 out.append tmp
2463                         end
2464                 else
2465                         out.add '<'
2466                         if is_close_tag then out.add '/'
2467                         pos = read_xml_until(out, pos, ' ', '/', '>')
2468                 end
2469                 if pos == -1 then return -1
2470                 pos = read_xml_until(out, pos, '/', '>')
2471                 if pos == -1 then return -1
2472                 if self[pos] == '/' then
2473                         out.append " /"
2474                         pos = self.read_xml_until(out, pos + 1, '>')
2475                         if pos == -1 then return -1
2476                 end
2477                 if self[pos] == '>' then
2478                         if is_valid then
2479                                 out.add '>'
2480                         else
2481                                 out.append "&gt;"
2482                         end
2483                         return pos
2484                 end
2485                 return -1
2486         end
2487
2488         # Read a markdown link address and append it to the `out` buffer.
2489         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2490                 var pos = start
2491                 var counter = 1
2492                 while pos < length do
2493                         var c = self[pos]
2494                         if c == '\\' and pos + 1 < length then
2495                                 pos = escape(out, self[pos + 1], pos)
2496                         else
2497                                 var end_reached = false
2498                                 if c == '(' then
2499                                         counter += 1
2500                                 else if c == ' ' then
2501                                         if counter == 1 then end_reached = true
2502                                 else if c == ')' then
2503                                         counter -= 1
2504                                         if counter == 0 then end_reached = true
2505                                 end
2506                                 if end_reached then break
2507                                 out.add c
2508                         end
2509                         pos += 1
2510                 end
2511                 if pos == length then return -1
2512                 return pos
2513         end
2514
2515         # Read a markdown link text and append it to the `out` buffer.
2516         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2517                 var pos = start
2518                 var counter = 1
2519                 while pos < length do
2520                         var c = self[pos]
2521                         var end_reached = false
2522                         if c == '[' then
2523                                 counter += 1
2524                                 out.add c
2525                         else if c == ']' then
2526                                 counter -= 1
2527                                 if counter == 0 then
2528                                         end_reached = true
2529                                 else
2530                                         out.add c
2531                                 end
2532                         else
2533                                 out.add c
2534                         end
2535                         if end_reached then break
2536                         pos += 1
2537                 end
2538                 if pos == length then return -1
2539                 return pos
2540         end
2541
2542         # Extract the XML tag name from a XML tag.
2543         private fun xml_tag: String do
2544                 var tpl = new FlatBuffer
2545                 var pos = 1
2546                 if pos < length and self[1] == '/' then pos += 1
2547                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2548                         tpl.add self[pos]
2549                         pos += 1
2550                 end
2551                 return tpl.write_to_string.to_lower
2552         end
2553
2554         private fun is_valid_html_tag: Bool do
2555                 if is_empty then return false
2556                 for c in self do
2557                         if not c.is_alpha then return false
2558                 end
2559                 return true
2560         end
2561
2562         # Read and escape the markdown contained in `self`.
2563         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2564                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2565                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2566                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2567                    c == '`' or c == '~' or c == '^' then
2568                         out.add c
2569                         return pos + 1
2570                 end
2571                 out.add '\\'
2572                 return pos
2573         end
2574
2575         # Extract string found at end of fence opening.
2576         private fun meta_from_fence: nullable Text do
2577                 for i in [0..chars.length[ do
2578                         var c = chars[i]
2579                         if c != ' ' and c != '`' and c != '~' then
2580                                 return substring_from(i).trim
2581                         end
2582                 end
2583                 return null
2584         end
2585
2586         # Is `self` an unsafe HTML element?
2587         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2588
2589         # Is `self` a HRML block element?
2590         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2591
2592         # Is `self` a link prefix?
2593         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2594
2595         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2596
2597         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2598
2599         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2600 end
2601
2602 redef class String
2603
2604         # Parse `self` as markdown and return the HTML representation
2605         #.
2606         #    var md = "**Hello World!**"
2607         #    var html = md.md_to_html
2608         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2609         fun md_to_html: Writable do
2610                 var processor = new MarkdownProcessor
2611                 return processor.process(self)
2612         end
2613 end