lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # Work in extended mode (default).
  34         #
  35         # Behavior changes when using extended mode:
  36         #
  37         # * Lists and code blocks end a paragraph
  38         #
  39         #   In normal markdown the following:
  40         #
  41         # ~~~md
  42         # This is a paragraph
  43         # * and this is not a list
  44         # ~~~
  45         #
  46         #   Will produce:
  47         #
  48         # ~~~html
  49         # <p>This is a paragraph
  50         # * and this is not a list</p>
  51         # ~~~
  52         #
  53         #   When using extended mode this changes to:
  54         #
  55         # ~~~html
  56         # <p>This is a paragraph</p>
  57         # <ul>
  58         # <li>and this is not a list</li>
  59         # </ul>
  60         # ~~~
  61         #
  62         # * Fences code blocks
  63         #
  64         #   If you don't want to indent your all your code with 4 spaces,
  65         #   you can wrap your code in ``` ``` ``` or `~~~`.
  66         #
  67         #   Here's an example:
  68         #
  69         # ~~~md
  70         # fun test do
  71         #    print "Hello World!"
  72         # end
  73         # ~~~
  74         #
  75         # * Code blocks meta
  76         #
  77         #   If you want to use syntax highlighting tools, most of them need to know what kind
  78         #   of language they are highlighting.
  79         #   You can add an optional language identifier after the fence declaration to output
  80         #   it in the HTML render.
  81         #
  82         # ```nit
  83         # import markdown
  84         #
  85         # print "# Hello World!".md_to_html
  86         # ```
  87         #
  88         #   Becomes
  89         #
  90         # ~~~html
  91         # <pre class="nit"><code>import markdown
  92         #
  93         # print "Hello World!".md_to_html
  94         # </code></pre>
  95         # ~~~
  96         #
  97         # * Underscores (Emphasis)
  98         #
  99         #   Underscores in the middle of a word like:
 100         #
 101         # ~~~md
 102         # Con_cat_this
 103         # ~~~
 104         #
 105         #   normally produces this:
 106         #
 107         # ~~~html
 108         # <p>Con<em>cat</em>this</p>
 109         # ~~~
 110         #
 111         #   With extended mode they don't result in emphasis.
 112         #
 113         # ~~~html
 114         # <p>Con_cat_this</p>
 115         # ~~~
 116         #
 117         # * Strikethrough
 118         #
 119         #   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
 120         #   strikethrought span is marked with `~~`.
 121         #
 122         # ~~~md
 123         # ~~Mistaken text.~~
 124         # ~~~
 125         #
 126         #   becomes
 127         #
 128         # ~~~html
 129         # <del>Mistaken text.</del>
 130         # ~~~
 131         var ext_mode = true
 132
 133         # Disable attaching MDLocation to Tokens
 134         #
 135         # Locations are useful for some tools but they may
 136         # cause an important time and space overhead.
 137         #
 138         # Default = `false`
 139         var no_location = false is writable
 140
 141         # Process the mardown `input` string and return the processed output.
 142         fun process(input: String): Writable do
 143                 # init processor
 144                 link_refs.clear
 145                 last_link_ref = null
 146                 current_line = null
 147                 current_block = null
 148                 # parse markdown
 149                 var parent = read_lines(input)
 150                 parent.remove_surrounding_empty_lines
 151                 recurse(parent, false)
 152                 # output processed text
 153                 decorator.headlines.clear
 154                 return emit(parent.kind)
 155         end
 156
 157         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
 158         private fun read_lines(input: String): MDBlock do
 159                 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
 160                 var value = new FlatBuffer
 161                 var i = 0
 162
 163                 var line_pos = 0
 164                 var col_pos = 0
 165
 166                 while i < input.length do
 167                         value.clear
 168                         var pos = 0
 169                         var eol = false
 170                         while not eol and i < input.length do
 171                                 col_pos += 1
 172                                 var c = input[i]
 173                                 if c == '\n' then
 174                                         eol = true
 175                                 else if c == '\r' then
 176                                 else if c == '\t' then
 177                                         var np = pos + (4 - (pos & 3))
 178                                         while pos < np do
 179                                                 value.add ' '
 180                                                 pos += 1
 181                                         end
 182                                 else
 183                                         pos += 1
 184                                         value.add c
 185                                 end
 186                                 i += 1
 187                         end
 188                         line_pos += 1
 189
 190                         var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
 191                         var line = new MDLine(loc, value.write_to_string)
 192                         var is_link_ref = check_link_ref(line)
 193                         # Skip link refs
 194                         if not is_link_ref then block.add_line line
 195                         col_pos = 0
 196                 end
 197                 return block
 198         end
 199
 200         # Check if line is a block link definition.
 201         # Return `true` if line contains a valid link ref and save it into `link_refs`.
 202         private fun check_link_ref(line: MDLine): Bool do
 203                 var md = line.value
 204                 var is_link_ref = false
 205                 var id = new FlatBuffer
 206                 var link = new FlatBuffer
 207                 var comment = new FlatBuffer
 208                 var pos = -1
 209                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
 210                         pos = line.leading + 1
 211                         pos = md.read_until(id, pos, ']')
 212                         if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
 213                                 if line.value[pos + 1] == ':' then
 214                                         pos += 2
 215                                         pos = md.skip_spaces(pos)
 216                                         if pos >= 0 and line.value[pos] == '<' then
 217                                                 pos += 1
 218                                                 pos = md.read_until(link, pos, '>')
 219                                                 pos += 1
 220                                         else if pos >= 0 then
 221                                                 pos = md.read_until(link, pos, ' ', '\n')
 222                                         end
 223                                         if not link.is_empty then
 224                                                 pos = md.skip_spaces(pos)
 225                                                 if pos > 0 and pos < line.value.length then
 226                                                         var c = line.value[pos]
 227                                                         if c == '\"' or c == '\'' or c == '(' then
 228                                                                 pos += 1
 229                                                                 if c == '(' then
 230                                                                         pos = md.read_until(comment, pos, ')')
 231                                                                 else
 232                                                                         pos = md.read_until(comment, pos, c)
 233                                                                 end
 234                                                                 if pos > 0 then is_link_ref = true
 235                                                         end
 236                                                 else
 237                                                         is_link_ref = true
 238                                                 end
 239                                         end
 240                                 end
 241                         end
 242                 end
 243                 if is_link_ref and not id.is_empty and not link.is_empty then
 244                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 245                         add_link_ref(id.write_to_string, lr)
 246                         if comment.is_empty then last_link_ref = lr
 247                         return true
 248                 else
 249                         comment = new FlatBuffer
 250                         if not line.is_empty and last_link_ref != null then
 251                                 pos = line.leading
 252                                 var c = line.value[pos]
 253                                 if c == '\"' or c == '\'' or c ==  '(' then
 254                                         pos += 1
 255                                         if c == '(' then
 256                                                 pos = md.read_until(comment, pos, ')')
 257                                         else
 258                                                 pos = md.read_until(comment, pos, c)
 259                                         end
 260                                 end
 261                                 var last_link_ref = self.last_link_ref
 262                                 if not comment.is_empty and last_link_ref != null then
 263                                         last_link_ref.title = comment.write_to_string
 264                                 end
 265                         end
 266                         if comment.is_empty then return false
 267                         return true
 268                 end
 269         end
 270
 271         # Known link refs
 272         # This list will be needed during output to expand links.
 273         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 274
 275         # Last encountered link ref (for multiline definitions)
 276         #
 277         # Markdown allows link refs to be defined over two lines:
 278         #
 279         # ~~~md
 280         # [id]: http://example.com/longish/path/to/resource/here
 281         #       "Optional Title Here"
 282         # ~~~
 283         #
 284         private var last_link_ref: nullable LinkRef = null
 285
 286         # Add a link ref to the list
 287         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 288
 289         # Recursively split a `block`.
 290         #
 291         # The block is splitted according to the type of lines it contains.
 292         # Some blocks can be splited again recursively like lists.
 293         # The `in_list` mode is used to recurse on list and build
 294         # nested paragraphs or code blocks.
 295         fun recurse(root: MDBlock, in_list: Bool) do
 296                 var old_mode = self.in_list
 297                 var old_root = self.current_block
 298                 self.in_list = in_list
 299
 300                 var line = root.first_line
 301                 while line != null and line.is_empty do
 302                         line = line.next
 303                         if line == null then return
 304                 end
 305
 306                 current_line = line
 307                 current_block = root
 308                 while current_line != null do
 309                         line_kind(current_line.as(not null)).process(self)
 310                 end
 311                 self.in_list = old_mode
 312                 self.current_block = old_root
 313         end
 314
 315         # Currently processed line.
 316         # Used when visiting blocks with `recurse`.
 317         var current_line: nullable MDLine = null is writable
 318
 319         # Currently processed block.
 320         # Used when visiting blocks with `recurse`.
 321         var current_block: nullable MDBlock = null is writable
 322
 323         # Is the current recursion in list mode?
 324         # Used when visiting blocks with `recurse`
 325         private var in_list = false
 326
 327         # The type of line.
 328         # see: `md_line_*`
 329         fun line_kind(md: MDLine): Line do
 330                 var value = md.value
 331                 var leading = md.leading
 332                 var trailing = md.trailing
 333                 if md.is_empty then return new LineEmpty
 334                 if md.leading > 3 then return new LineCode
 335                 if value[leading] == '#' then return new LineHeadline
 336                 if value[leading] == '>' then return new LineBlockquote
 337
 338                 if ext_mode then
 339                         if value.length - leading - trailing > 2 then
 340                                 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 341                                         return new LineFence
 342                                 end
 343                                 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 344                                         return new LineFence
 345                                 end
 346                         end
 347                 end
 348
 349                 if value.length - leading - trailing > 2 and
 350                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 351                    if md.count_chars(value[leading]) >= 3 then
 352                                 return new LineHR
 353                    end
 354                 end
 355
 356                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 357                         var c = value[leading]
 358                         if c == '*' or c == '-' or c == '+' then return new LineUList
 359                 end
 360
 361                 if value.length - leading >= 3 and value[leading].is_digit then
 362                         var i = leading + 1
 363                         while i < value.length and value[i].is_digit do i += 1
 364                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 365                                 return new LineOList
 366                         end
 367                 end
 368
 369                 if value[leading] == '<' and md.check_html then return new LineXML
 370
 371                 var next = md.next
 372                 if next != null and not next.is_empty then
 373                         if next.count_chars('=') > 0 then
 374                                 return new LineHeadline1
 375                         end
 376                         if next.count_chars('-') > 0 then
 377                                 return new LineHeadline2
 378                         end
 379                 end
 380                 return new LineOther
 381         end
 382
 383         # Get the token kind at `pos`.
 384         fun token_at(text: Text, pos: Int): Token do
 385                 var c0: Char
 386                 var c1: Char
 387                 var c2: Char
 388
 389                 if pos > 0 then
 390                         c0 = text[pos - 1]
 391                 else
 392                         c0 = ' '
 393                 end
 394                 var c = text[pos]
 395
 396                 if pos + 1 < text.length then
 397                         c1 = text[pos + 1]
 398                 else
 399                         c1 = ' '
 400                 end
 401                 if pos + 2 < text.length then
 402                         c2 = text[pos + 2]
 403                 else
 404                         c2 = ' '
 405                 end
 406
 407                 var loc
 408                 if no_location then
 409                         loc = null
 410                 else
 411                         loc = new MDLocation(
 412                                 current_loc.line_start,
 413                                 current_loc.column_start + pos,
 414                                 current_loc.line_start,
 415                                 current_loc.column_start + pos)
 416                 end
 417
 418                 if c == '*' then
 419                         if c1 == '*' then
 420                                 if c0 != ' ' or c2 != ' ' then
 421                                         return new TokenStrongStar(loc, pos, c)
 422                                 else
 423                                         return new TokenEmStar(loc, pos, c)
 424                                 end
 425                         end
 426                         if c0 != ' ' or c1 != ' ' then
 427                                 return new TokenEmStar(loc, pos, c)
 428                         else
 429                                 return new TokenNone(loc, pos, c)
 430                         end
 431                 else if c == '_' then
 432                         if c1 == '_' then
 433                                 if c0 != ' ' or c2 != ' ' then
 434                                         return new TokenStrongUnderscore(loc, pos, c)
 435                                 else
 436                                         return new TokenEmUnderscore(loc, pos, c)
 437                                 end
 438                         end
 439                         if ext_mode then
 440                                 if (c0.is_letter or c0.is_digit) and c0 != '_' and
 441                                    (c1.is_letter or c1.is_digit) then
 442                                         return new TokenNone(loc, pos, c)
 443                                 else
 444                                         return new TokenEmUnderscore(loc, pos, c)
 445                                 end
 446                         end
 447                         if c0 != ' ' or c1 != ' ' then
 448                                 return new TokenEmUnderscore(loc, pos, c)
 449                         else
 450                                 return new TokenNone(loc, pos, c)
 451                         end
 452                 else if c == '!' then
 453                         if c1 == '[' then return new TokenImage(loc, pos, c)
 454                         return new TokenNone(loc, pos, c)
 455                 else if c == '[' then
 456                         return new TokenLink(loc, pos, c)
 457                 else if c == ']' then
 458                         return new TokenNone(loc, pos, c)
 459                 else if c == '`' then
 460                         if c1 == '`' then
 461                                 return new TokenCodeDouble(loc, pos, c)
 462                         else
 463                                 return new TokenCodeSingle(loc, pos, c)
 464                         end
 465                 else if c == '\\' then
 466                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 467                                 return new TokenEscape(loc, pos, c)
 468                         else
 469                                 return new TokenNone(loc, pos, c)
 470                         end
 471                 else if c == '<' then
 472                         return new TokenHTML(loc, pos, c)
 473                 else if c == '&' then
 474                         return new TokenEntity(loc, pos, c)
 475                 else
 476                         if ext_mode then
 477                                 if c == '~' and c1 == '~' then
 478                                         return new TokenStrike(loc, pos, c)
 479                                 end
 480                         end
 481                         return new TokenNone(loc, pos, c)
 482                 end
 483         end
 484
 485         # Find the position of a `token` in `self`.
 486         fun find_token(text: Text, start: Int, token: Token): Int do
 487                 var pos = start
 488                 while pos < text.length do
 489                         if token_at(text, pos).is_same_type(token) then
 490                                 return pos
 491                         end
 492                         pos += 1
 493                 end
 494                 return -1
 495         end
 496
 497         # Kind of decorator used for decoration.
 498         type DECORATOR: Decorator
 499
 500         # Decorator used for output.
 501         # Default is `HTMLDecorator`
 502         var decorator: DECORATOR is writable, lazy do
 503                 return new HTMLDecorator
 504         end
 505
 506         # Create a new `MarkdownEmitter` using a custom `decorator`.
 507         init with_decorator(decorator: DECORATOR) do
 508                 self.decorator = decorator
 509         end
 510
 511         # Output `block` using `decorator` in the current buffer.
 512         fun emit(block: Block): Text do
 513                 var buffer = push_buffer
 514                 block.emit(self)
 515                 pop_buffer
 516                 return buffer
 517         end
 518
 519         # Output the content of `block`.
 520         fun emit_in(block: Block) do block.emit_in(self)
 521
 522         # Transform and emit mardown text
 523         fun emit_text(text: Text) do emit_text_until(text, 0, null)
 524
 525         # Transform and emit mardown text starting at `start` and
 526         # until a token with the same type as `token` is found.
 527         # Go until the end of `text` if `token` is null.
 528         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 529                 var old_text = current_text
 530                 var old_pos = current_pos
 531                 current_text = text
 532                 current_pos = start
 533                 while current_pos < text.length do
 534                         if text[current_pos] == '\n' then
 535                                 current_loc.line_start += 1
 536                                 current_loc.column_start = -current_pos
 537                         end
 538                         var mt = token_at(text, current_pos)
 539                         if (token != null and not token isa TokenNone) and
 540                         (mt.is_same_type(token) or
 541                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 542                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 543                                 return current_pos
 544                         end
 545                         mt.emit(self)
 546                         current_pos += 1
 547                 end
 548                 current_text = old_text
 549                 current_pos = old_pos
 550                 return -1
 551         end
 552
 553         # Currently processed position in `current_text`.
 554         # Used when visiting inline production with `emit_text_until`.
 555         private var current_pos: Int = -1
 556
 557         # Currently processed text.
 558         # Used when visiting inline production with `emit_text_until`.
 559         private var current_text: nullable Text = null
 560
 561         # Stacked buffers.
 562         private var buffer_stack = new List[FlatBuffer]
 563
 564         # Push a new buffer on the stack.
 565         private fun push_buffer: FlatBuffer do
 566                 var buffer = new FlatBuffer
 567                 buffer_stack.add buffer
 568                 return buffer
 569         end
 570
 571         # Pop the last buffer.
 572         private fun pop_buffer do buffer_stack.pop
 573
 574         # Current output buffer.
 575         private fun current_buffer: FlatBuffer do
 576                 assert not buffer_stack.is_empty
 577                 return buffer_stack.last
 578         end
 579
 580         # Stacked locations.
 581         private var loc_stack = new List[MDLocation]
 582
 583         # Push a new MDLocation on the stack.
 584         private fun push_loc(location: MDLocation) do loc_stack.add location
 585
 586         # Pop the last buffer.
 587         private fun pop_loc: MDLocation do return loc_stack.pop
 588
 589         # Current output buffer.
 590         private fun current_loc: MDLocation do
 591                 assert not loc_stack.is_empty
 592                 return loc_stack.last
 593         end
 594
 595         # Append `e` to current buffer.
 596         fun add(e: Writable) do
 597                 if e isa Text then
 598                         current_buffer.append e
 599                 else
 600                         current_buffer.append e.write_to_string
 601                 end
 602         end
 603
 604         # Append `c` to current buffer.
 605         fun addc(c: Char) do
 606                 current_buffer.add c
 607         end
 608
 609         # Append a "\n" line break.
 610         fun addn do addc '\n'
 611 end
 612
 613 # A Link Reference.
 614 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 615 #
 616 # ~~~raw
 617 # [1]: http://example.com/ "Optional title"
 618 # ~~~
 619 class LinkRef
 620
 621         # Link href
 622         var link: String
 623
 624         # Optional link title
 625         var title: nullable String = null
 626
 627         # Is the link an abreviation?
 628         var is_abbrev = false
 629
 630         # Create a link with a title.
 631         init with_title(link: String, title: nullable String) do
 632                 init(link)
 633                 self.title = title
 634         end
 635 end
 636
 637 # A `Decorator` is used to emit mardown into a specific format.
 638 # Default decorator used is `HTMLDecorator`.
 639 interface Decorator
 640
 641         # Kind of processor used
 642         type PROCESSOR: MarkdownProcessor
 643
 644         # Render a single plain char.
 645         #
 646         # Redefine this method to add special escaping for plain text.
 647         fun add_char(v: PROCESSOR, c: Char) do v.addc c
 648
 649         # Render a ruler block.
 650         fun add_ruler(v: PROCESSOR, block: BlockRuler) is abstract
 651
 652         # Render a headline block with corresponding level.
 653         fun add_headline(v: PROCESSOR, block: BlockHeadline) is abstract
 654
 655         # Render a paragraph block.
 656         fun add_paragraph(v: PROCESSOR, block: BlockParagraph) is abstract
 657
 658         # Render a code or fence block.
 659         fun add_code(v: PROCESSOR, block: BlockCode) is abstract
 660
 661         # Render a blockquote.
 662         fun add_blockquote(v: PROCESSOR, block: BlockQuote) is abstract
 663
 664         # Render an unordered list.
 665         fun add_unorderedlist(v: PROCESSOR, block: BlockUnorderedList) is abstract
 666
 667         # Render an ordered list.
 668         fun add_orderedlist(v: PROCESSOR, block: BlockOrderedList) is abstract
 669
 670         # Render a list item.
 671         fun add_listitem(v: PROCESSOR, block: BlockListItem) is abstract
 672
 673         # Render an emphasis text.
 674         fun add_em(v: PROCESSOR, text: Text) is abstract
 675
 676         # Render a strong text.
 677         fun add_strong(v: PROCESSOR, text: Text) is abstract
 678
 679         # Render a strike text.
 680         #
 681         # Extended mode only (see `MarkdownProcessor::ext_mode`)
 682         fun add_strike(v: PROCESSOR, text: Text) is abstract
 683
 684         # Render a link.
 685         fun add_link(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract
 686
 687         # Render an image.
 688         fun add_image(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract
 689
 690         # Render an abbreviation.
 691         fun add_abbr(v: PROCESSOR, name: Text, comment: Text) is abstract
 692
 693         # Render a code span reading from a buffer.
 694         fun add_span_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract
 695
 696         # Render a text and escape it.
 697         fun append_value(v: PROCESSOR, value: Text) is abstract
 698
 699         # Render code text from buffer and escape it.
 700         fun append_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract
 701
 702         # Render a character escape.
 703         fun escape_char(v: PROCESSOR, char: Char) is abstract
 704
 705         # Render a line break
 706         fun add_line_break(v: PROCESSOR) is abstract
 707
 708         # Generate a new html valid id from a `String`.
 709         fun strip_id(txt: String): String is abstract
 710
 711         # Found headlines during the processing labeled by their ids.
 712         fun headlines: ArrayMap[String, HeadLine] is abstract
 713 end
 714
 715 # Class representing a markdown headline.
 716 class HeadLine
 717         # Unique identifier of this headline.
 718         var id: String
 719
 720         # Text of the headline.
 721         var title: String
 722
 723         # Level of this headline.
 724         #
 725         # According toe the markdown specification, level must be in `[1..6]`.
 726         var level: Int
 727 end
 728
 729 # `Decorator` that outputs HTML.
 730 class HTMLDecorator
 731         super Decorator
 732
 733         redef var headlines = new ArrayMap[String, HeadLine]
 734
 735         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 736
 737         redef fun add_headline(v, block) do
 738                 # save headline
 739                 var line = block.block.first_line
 740                 if line == null then return
 741                 var txt = line.value
 742                 var id = strip_id(txt)
 743                 var lvl = block.depth
 744                 headlines[id] = new HeadLine(id, txt, lvl)
 745                 # output it
 746                 v.add "<h{lvl} id=\"{id}\">"
 747                 v.emit_in block
 748                 v.add "</h{lvl}>\n"
 749         end
 750
 751         redef fun add_paragraph(v, block) do
 752                 v.add "<p>"
 753                 v.emit_in block
 754                 v.add "</p>\n"
 755         end
 756
 757         redef fun add_code(v, block) do
 758                 var meta = block.meta
 759                 if meta != null then
 760                         v.add "<pre class=\""
 761                         append_value(v, meta)
 762                         v.add "\"><code>"
 763                 else
 764                         v.add "<pre><code>"
 765                 end
 766                 v.emit_in block
 767                 v.add "</code></pre>\n"
 768         end
 769
 770         redef fun add_blockquote(v, block) do
 771                 v.add "<blockquote>\n"
 772                 v.emit_in block
 773                 v.add "</blockquote>\n"
 774         end
 775
 776         redef fun add_unorderedlist(v, block) do
 777                 v.add "<ul>\n"
 778                 v.emit_in block
 779                 v.add "</ul>\n"
 780         end
 781
 782         redef fun add_orderedlist(v, block) do
 783                 v.add "<ol>\n"
 784                 v.emit_in block
 785                 v.add "</ol>\n"
 786         end
 787
 788         redef fun add_listitem(v, block) do
 789                 v.add "<li>"
 790                 v.emit_in block
 791                 v.add "</li>\n"
 792         end
 793
 794         redef fun add_em(v, text) do
 795                 v.add "<em>"
 796                 v.add text
 797                 v.add "</em>"
 798         end
 799
 800         redef fun add_strong(v, text) do
 801                 v.add "<strong>"
 802                 v.add text
 803                 v.add "</strong>"
 804         end
 805
 806         redef fun add_strike(v, text) do
 807                 v.add "<del>"
 808                 v.add text
 809                 v.add "</del>"
 810         end
 811
 812         redef fun add_image(v, link, name, comment) do
 813                 v.add "<img src=\""
 814                 append_value(v, link)
 815                 v.add "\" alt=\""
 816                 append_value(v, name)
 817                 v.add "\""
 818                 if comment != null and not comment.is_empty then
 819                         v.add " title=\""
 820                         append_value(v, comment)
 821                         v.add "\""
 822                 end
 823                 v.add "/>"
 824         end
 825
 826         redef fun add_link(v, link, name, comment) do
 827                 v.add "<a href=\""
 828                 append_value(v, link)
 829                 v.add "\""
 830                 if comment != null and not comment.is_empty then
 831                         v.add " title=\""
 832                         append_value(v, comment)
 833                         v.add "\""
 834                 end
 835                 v.add ">"
 836                 v.emit_text(name)
 837                 v.add "</a>"
 838         end
 839
 840         redef fun add_abbr(v, name, comment) do
 841                 v.add "<abbr title=\""
 842                 append_value(v, comment)
 843                 v.add "\">"
 844                 v.emit_text(name)
 845                 v.add "</abbr>"
 846         end
 847
 848         redef fun add_span_code(v, text, from, to) do
 849                 v.add "<code>"
 850                 append_code(v, text, from, to)
 851                 v.add "</code>"
 852         end
 853
 854         redef fun add_line_break(v) do
 855                 v.add "<br/>"
 856         end
 857
 858         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 859
 860         redef fun escape_char(v, c) do
 861                 if c == '&' then
 862                         v.add "&amp;"
 863                 else if c == '<' then
 864                         v.add "&lt;"
 865                 else if c == '>' then
 866                         v.add "&gt;"
 867                 else if c == '"' then
 868                         v.add "&quot;"
 869                 else if c == '\'' then
 870                         v.add "&apos;"
 871                 else
 872                         v.addc c
 873                 end
 874         end
 875
 876         redef fun append_code(v, buffer, from, to) do
 877                 for i in [from..to[ do
 878                         var c = buffer[i]
 879                         if c == '&' then
 880                                 v.add "&amp;"
 881                         else if c == '<' then
 882                                 v.add "&lt;"
 883                         else if c == '>' then
 884                                 v.add "&gt;"
 885                         else
 886                                 v.addc c
 887                         end
 888                 end
 889         end
 890
 891         redef fun strip_id(txt) do
 892                 # strip id
 893                 var b = new FlatBuffer
 894                 for c in txt do
 895                         if c == ' ' then
 896                                 b.add '_'
 897                         else
 898                                 if not c.is_letter and
 899                                    not c.is_digit and
 900                                    not allowed_id_chars.has(c) then continue
 901                                 b.add c
 902                         end
 903                 end
 904                 var res = b.to_s
 905                 var key = res
 906                 # check for multiple id definitions
 907                 if headlines.has_key(key) then
 908                         var i = 1
 909                         key = "{res}_{i}"
 910                         while headlines.has_key(key) do
 911                                 i += 1
 912                                 key = "{res}_{i}"
 913                         end
 914                 end
 915                 return key
 916         end
 917
 918         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 919 end
 920
 921 # Location in a Markdown input.
 922 class MDLocation
 923
 924         # Starting line number (starting from 1).
 925         var line_start: Int
 926
 927         # Starting column number (starting from 1).
 928         var column_start: Int
 929
 930         # Stopping line number (starting from 1).
 931         var line_end: Int
 932
 933         # Stopping column number (starting from 1).
 934         var column_end: Int
 935
 936         redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
 937
 938         # Return a copy of `self`.
 939         fun copy: MDLocation do
 940                 return new MDLocation(line_start, column_start, line_end, column_end)
 941         end
 942 end
 943
 944 # A block of markdown lines.
 945 # A `MDBlock` can contains lines and/or sub-blocks.
 946 class MDBlock
 947
 948         # Position of `self` in the input.
 949         var location: MDLocation
 950
 951         # Kind of block.
 952         # See `Block`.
 953         var kind: Block = new BlockNone(self) is writable
 954
 955         # First line if any.
 956         var first_line: nullable MDLine = null is writable
 957
 958         # Last line if any.
 959         var last_line: nullable MDLine = null is writable
 960
 961         # First sub-block if any.
 962         var first_block: nullable MDBlock = null is writable
 963
 964         # Last sub-block if any.
 965         var last_block: nullable MDBlock = null is writable
 966
 967         # Previous block if any.
 968         var prev: nullable MDBlock = null is writable
 969
 970         # Next block if any.
 971         var next: nullable MDBlock = null is writable
 972
 973         # Does this block contain subblocks?
 974         fun has_blocks: Bool do return first_block != null
 975
 976         # Count sub-blocks.
 977         fun count_blocks: Int do
 978                 var count = 0
 979                 var block = first_block
 980                 while block != null do
 981                         count += 1
 982                         block = block.next
 983                 end
 984                 return count
 985         end
 986
 987         # Does this block contain lines?
 988         fun has_lines: Bool do return first_line != null
 989
 990         # Count block lines.
 991         fun count_lines: Int do
 992                 var count = 0
 993                 var line = first_line
 994                 while line != null do
 995                         count += 1
 996                         line = line.next
 997                 end
 998                 return count
 999         end
1000
1001         # Split `self` creating a new sub-block having `line` has `last_line`.
1002         fun split(line: MDLine): MDBlock do
1003                 # location for new block
1004                 var new_loc = new MDLocation(
1005                         first_line.as(not null).location.line_start,
1006                         first_line.as(not null).location.column_start,
1007                         line.location.line_end,
1008                         line.location.column_end)
1009                 # create block
1010                 var block = new MDBlock(new_loc)
1011                 block.first_line = first_line
1012                 block.last_line = line
1013                 first_line = line.next
1014                 line.next = null
1015                 if first_line == null then
1016                         last_line = null
1017                 else
1018                         first_line.as(not null).prev = null
1019                         # update current block loc
1020                         location.line_start = first_line.as(not null).location.line_start
1021                         location.column_start = first_line.as(not null).location.column_start
1022                 end
1023                 if first_block == null then
1024                         first_block = block
1025                         last_block = block
1026                 else
1027                         last_block.as(not null).next = block
1028                         last_block = block
1029                 end
1030                 return block
1031         end
1032
1033         # Add a `line` to this block.
1034         fun add_line(line: MDLine) do
1035                 if last_line == null then
1036                         first_line = line
1037                         last_line = line
1038                 else
1039                         last_line.as(not null).next_empty = line.is_empty
1040                         line.prev_empty = last_line.as(not null).is_empty
1041                         line.prev = last_line
1042                         last_line.as(not null).next = line
1043                         last_line = line
1044                 end
1045         end
1046
1047         # Remove `line` from this block.
1048         fun remove_line(line: MDLine) do
1049                 if line.prev == null then
1050                         first_line = line.next
1051                 else
1052                         line.prev.as(not null).next = line.next
1053                 end
1054                 if line.next == null then
1055                         last_line = line.prev
1056                 else
1057                         line.next.as(not null).prev = line.prev
1058                 end
1059                 line.prev = null
1060                 line.next = null
1061         end
1062
1063         # Remove leading empty lines.
1064         fun remove_leading_empty_lines: Bool do
1065                 var was_empty = false
1066                 var line = first_line
1067                 while line != null and line.is_empty do
1068                         remove_line line
1069                         line = first_line
1070                         was_empty = true
1071                 end
1072                 return was_empty
1073         end
1074
1075         # Remove trailing empty lines.
1076         fun remove_trailing_empty_lines: Bool do
1077                 var was_empty = false
1078                 var line = last_line
1079                 while line != null and line.is_empty do
1080                         remove_line line
1081                         line = last_line
1082                         was_empty = true
1083                 end
1084                 return was_empty
1085         end
1086
1087         # Remove leading and trailing empty lines.
1088         fun remove_surrounding_empty_lines: Bool do
1089                 var was_empty = false
1090                 if remove_leading_empty_lines then was_empty = true
1091                 if remove_trailing_empty_lines then was_empty = true
1092                 return was_empty
1093         end
1094
1095         # Remove list markers and up to 4 leading spaces.
1096         # Used to clean nested lists.
1097         fun remove_list_indent(v: MarkdownProcessor) do
1098                 var line = first_line
1099                 while line != null do
1100                         if not line.is_empty then
1101                                 var kind = v.line_kind(line)
1102                                 if kind isa LineList then
1103                                         line.value = kind.extract_value(line)
1104                                 else
1105                                         line.value = line.value.substring_from(line.leading.min(4))
1106                                 end
1107                                 line.leading = line.process_leading
1108                         end
1109                         line = line.next
1110                 end
1111         end
1112
1113         # Collect block line text.
1114         fun text: String do
1115                 var text = new FlatBuffer
1116                 var line = first_line
1117                 while line != null do
1118                         if not line.is_empty then
1119                                 text.append line.text
1120                         end
1121                         text.append "\n"
1122                         line = line.next
1123                 end
1124                 var block = first_block
1125                 while block != null do
1126                         text.append block.text
1127                         text.append "\n"
1128                         block = block.next
1129                 end
1130                 return text.write_to_string
1131         end
1132 end
1133
1134 # Representation of a markdown block in the AST.
1135 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1136 abstract class Block
1137
1138         # The markdown block `self` is related to.
1139         var block: MDBlock
1140
1141         # Output `self` using `v.decorator`.
1142         fun emit(v: MarkdownProcessor) do v.emit_in(self)
1143
1144         # Emit the containts of `self`, lines or blocks.
1145         fun emit_in(v: MarkdownProcessor) do
1146                 block.remove_surrounding_empty_lines
1147                 if block.has_lines then
1148                         emit_lines(v)
1149                 else
1150                         emit_blocks(v)
1151                 end
1152         end
1153
1154         # Emit lines contained in `block`.
1155         fun emit_lines(v: MarkdownProcessor) do
1156                 var tpl = v.push_buffer
1157                 var line = block.first_line
1158                 while line != null do
1159                         if not line.is_empty then
1160                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1161                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
1162                         end
1163                         if line.next != null then
1164                                 v.addn
1165                         end
1166                         line = line.next
1167                 end
1168                 v.pop_buffer
1169                 v.emit_text(tpl)
1170         end
1171
1172         # Emit sub-blocks contained in `block`.
1173         fun emit_blocks(v: MarkdownProcessor) do
1174                 var block = self.block.first_block
1175                 while block != null do
1176                         v.push_loc(block.location)
1177                         block.kind.emit(v)
1178                         v.pop_loc
1179                         block = block.next
1180                 end
1181         end
1182
1183         # The raw content of the block as a multi-line string.
1184         fun raw_content: String do
1185                 var infence = self isa BlockFence
1186                 var text = new FlatBuffer
1187                 var line = self.block.first_line
1188                 while line != null do
1189                         if not line.is_empty then
1190                                 var str = line.value
1191                                 if not infence and str.has_prefix("    ") then
1192                                         text.append str.substring(4, str.length - line.trailing)
1193                                 else
1194                                         text.append str
1195                                 end
1196                         end
1197                         text.append "\n"
1198                         line = line.next
1199                 end
1200                 return text.write_to_string
1201         end
1202 end
1203
1204 # A block without any markdown specificities.
1205 #
1206 # Actually use the same implementation than `BlockCode`,
1207 # this class is only used for typing purposes.
1208 class BlockNone
1209         super Block
1210 end
1211
1212 # A markdown blockquote.
1213 class BlockQuote
1214         super Block
1215
1216         redef fun emit(v) do v.decorator.add_blockquote(v, self)
1217
1218         # Remove blockquote markers.
1219         private fun remove_block_quote_prefix(block: MDBlock) do
1220                 var line = block.first_line
1221                 while line != null do
1222                         if not line.is_empty then
1223                                 if line.value[line.leading] == '>' then
1224                                         var rem = line.leading + 1
1225                                         if line.leading + 1 < line.value.length and
1226                                            line.value[line.leading + 1] == ' ' then
1227                                                 rem += 1
1228                                         end
1229                                         line.value = line.value.substring_from(rem)
1230                                         line.leading = line.process_leading
1231                                 end
1232                         end
1233                         line = line.next
1234                 end
1235         end
1236 end
1237
1238 # A markdown code block.
1239 class BlockCode
1240         super Block
1241
1242         # Any string found after fence token.
1243         var meta: nullable Text
1244
1245         # Number of char to skip at the beginning of the line.
1246         #
1247         # Block code lines start at 4 spaces.
1248         protected var line_start = 4
1249
1250         redef fun emit(v) do v.decorator.add_code(v, self)
1251
1252         redef fun emit_lines(v) do
1253                 var line = block.first_line
1254                 while line != null do
1255                         if not line.is_empty then
1256                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1257                         end
1258                         v.addn
1259                         line = line.next
1260                 end
1261         end
1262 end
1263
1264 # A markdown code-fence block.
1265 #
1266 # Actually use the same implementation than `BlockCode`,
1267 # this class is only used for typing purposes.
1268 class BlockFence
1269         super BlockCode
1270
1271         # Fence code lines start at 0 spaces.
1272         redef var line_start = 0
1273 end
1274
1275 # A markdown headline.
1276 class BlockHeadline
1277         super Block
1278
1279         redef fun emit(v) do
1280                 var loc = block.location.copy
1281                 loc.column_start += start
1282                 v.push_loc(loc)
1283                 v.decorator.add_headline(v, self)
1284                 v.pop_loc
1285         end
1286
1287         private var start = 0
1288
1289         # Depth of the headline used to determine the headline level.
1290         var depth = 0
1291
1292         # Remove healine marks from lines contained in `self`.
1293         private fun transform_headline(block: MDBlock) do
1294                 if depth > 0 then return
1295                 var level = 0
1296                 var line = block.first_line
1297                 if line == null then return
1298                 if line.is_empty then return
1299                 var start = line.leading
1300                 while start < line.value.length and line.value[start] == '#' do
1301                         level += 1
1302                         start += 1
1303                 end
1304                 while start < line.value.length and line.value[start] == ' ' do
1305                         start += 1
1306                 end
1307                 if start >= line.value.length then
1308                         line.is_empty = true
1309                 else
1310                         var nend = line.value.length - line.trailing - 1
1311                         while line.value[nend] == '#' do nend -= 1
1312                         while line.value[nend] == ' ' do nend -= 1
1313                         line.value = line.value.substring(start, nend - start + 1)
1314                         line.leading = 0
1315                         line.trailing = 0
1316                 end
1317                 self.start = start
1318                 depth = level.min(6)
1319         end
1320 end
1321
1322 # A markdown list item block.
1323 class BlockListItem
1324         super Block
1325
1326         redef fun emit(v) do v.decorator.add_listitem(v, self)
1327 end
1328
1329 # A markdown list block.
1330 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1331 abstract class BlockList
1332         super Block
1333
1334         # Split list block into list items sub-blocks.
1335         private fun init_block(v: MarkdownProcessor) do
1336                 var line = block.first_line
1337                 if line == null then return
1338                 line = line.next
1339                 while line != null do
1340                         var t = v.line_kind(line)
1341                         if t isa LineList or
1342                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1343                            not (t isa LineList))) then
1344                                    var sblock = block.split(line.prev.as(not null))
1345                                    sblock.kind = new BlockListItem(sblock)
1346                         end
1347                         line = line.next
1348                 end
1349                 var sblock = block.split(block.last_line.as(not null))
1350                 sblock.kind = new BlockListItem(sblock)
1351         end
1352
1353         # Expand list items as paragraphs if needed.
1354         private fun expand_paragraphs(block: MDBlock) do
1355                 var outer = block.first_block
1356                 var inner: nullable MDBlock
1357                 var has_paragraph = false
1358                 while outer != null and not has_paragraph do
1359                         if outer.kind isa BlockListItem then
1360                                 inner = outer.first_block
1361                                 while inner != null and not has_paragraph do
1362                                         if inner.kind isa BlockParagraph then
1363                                                 has_paragraph = true
1364                                         end
1365                                         inner = inner.next
1366                                 end
1367                         end
1368                         outer = outer.next
1369                 end
1370                 if has_paragraph then
1371                         outer = block.first_block
1372                         while outer != null do
1373                                 if outer.kind isa BlockListItem then
1374                                         inner = outer.first_block
1375                                         while inner != null do
1376                                                 if inner.kind isa BlockNone then
1377                                                         inner.kind = new BlockParagraph(inner)
1378                                                 end
1379                                                 inner = inner.next
1380                                         end
1381                                 end
1382                                 outer = outer.next
1383                         end
1384                 end
1385         end
1386 end
1387
1388 # A markdown ordered list.
1389 class BlockOrderedList
1390         super BlockList
1391
1392         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1393 end
1394
1395 # A markdown unordred list.
1396 class BlockUnorderedList
1397         super BlockList
1398
1399         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1400 end
1401
1402 # A markdown paragraph block.
1403 class BlockParagraph
1404         super Block
1405
1406         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1407 end
1408
1409 # A markdown ruler.
1410 class BlockRuler
1411         super Block
1412
1413         redef fun emit(v) do v.decorator.add_ruler(v, self)
1414 end
1415
1416 # Xml blocks that can be found in markdown markup.
1417 class BlockXML
1418         super Block
1419
1420         redef fun emit_lines(v) do
1421                 var line = block.first_line
1422                 while line != null do
1423                         if not line.is_empty then v.add line.value
1424                         v.addn
1425                         line = line.next
1426                 end
1427         end
1428 end
1429
1430 # A markdown line.
1431 class MDLine
1432
1433         # Location of `self` in the original input.
1434         var location: MDLocation
1435
1436         # Text contained in this line.
1437         var value: String is writable
1438
1439         # Is this line empty?
1440         # Lines containing only spaces are considered empty.
1441         var is_empty: Bool = true is writable
1442
1443         # Previous line in `MDBlock` or null if first line.
1444         var prev: nullable MDLine = null is writable
1445
1446         # Next line in `MDBlock` or null if last line.
1447         var next: nullable MDLine = null is writable
1448
1449         # Is the previous line empty?
1450         var prev_empty: Bool = false is writable
1451
1452         # Is the next line empty?
1453         var next_empty: Bool = false is writable
1454
1455         # Initialize a new MDLine from its string value
1456         init do
1457                 self.leading = process_leading
1458                 if leading != value.length then
1459                         self.is_empty = false
1460                         self.trailing = process_trailing
1461                 end
1462         end
1463
1464         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1465         fun clear do
1466                 value = ""
1467                 leading = 0
1468                 trailing = 0
1469                 is_empty = true
1470                 if prev != null then prev.as(not null).next_empty = true
1471                 if next != null then next.as(not null).prev_empty = true
1472         end
1473
1474         # Number or leading spaces on this line.
1475         var leading: Int = 0 is writable
1476
1477         # Compute `leading` depending on `value`.
1478         fun process_leading: Int do
1479                 var count = 0
1480                 var value = self.value
1481                 while count < value.length and value[count] == ' ' do count += 1
1482                 if leading == value.length then clear
1483                 return count
1484         end
1485
1486         # Number of trailing spaces on this line.
1487         var trailing: Int = 0 is writable
1488
1489         # Compute `trailing` depending on `value`.
1490         fun process_trailing: Int do
1491                 var count = 0
1492                 var value = self.value
1493                 while value[value.length - count - 1] == ' ' do
1494                         count += 1
1495                 end
1496                 return count
1497         end
1498
1499         # Count the amount of `ch` in this line.
1500         # Return A value > 0 if this line only consists of `ch` end spaces.
1501         fun count_chars(ch: Char): Int do
1502                 var count = 0
1503                 for c in value do
1504                         if c == ' ' then
1505                                 continue
1506                         end
1507                         if c == ch then
1508                                 count += 1
1509                                 continue
1510                         end
1511                         count = 0
1512                         break
1513                 end
1514                 return count
1515         end
1516
1517         # Count the amount of `ch` at the start of this line ignoring spaces.
1518         fun count_chars_start(ch: Char): Int do
1519                 var count = 0
1520                 for c in value do
1521                         if c == ' ' then
1522                                 continue
1523                         end
1524                         if c == ch then
1525                                 count += 1
1526                         else
1527                                 break
1528                         end
1529                 end
1530                 return count
1531         end
1532
1533         # Last XML line if any.
1534         private var xml_end_line: nullable MDLine = null
1535
1536         # Does `value` contains valid XML markup?
1537         private fun check_html: Bool do
1538                 var tags = new Array[String]
1539                 var tmp = new FlatBuffer
1540                 var pos = leading
1541                 if pos + 1 < value.length and value[pos + 1] == '!' then
1542                         if read_xml_comment(self, pos) > 0 then return true
1543                 end
1544                 pos = value.read_xml(tmp, pos, false)
1545                 var tag: String
1546                 if pos > -1 then
1547                         tag = tmp.xml_tag
1548                         if not tag.is_html_block then
1549                                 return false
1550                         end
1551                         if tag == "hr" then
1552                                 xml_end_line = self
1553                                 return true
1554                         end
1555                         tags.add tag
1556                         var line: nullable MDLine = self
1557                         while line != null do
1558                                 while pos < line.value.length and line.value[pos] != '<' do
1559                                         pos += 1
1560                                 end
1561                                 if pos >= line.value.length then
1562                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1563                                                 tags.pop
1564                                                 if tags.is_empty then
1565                                                         xml_end_line = line
1566                                                         break
1567                                                 end
1568                                         end
1569                                         line = line.next
1570                                         pos = 0
1571                                 else
1572                                         tmp = new FlatBuffer
1573                                         var new_pos = line.value.read_xml(tmp, pos, false)
1574                                         if new_pos > 0 then
1575                                                 tag = tmp.xml_tag
1576                                                 if tag.is_html_block and not tag == "hr" then
1577                                                         if tmp[1] == '/' then
1578                                                                 if tags.last != tag then
1579                                                                         return false
1580                                                                 end
1581                                                                 tags.pop
1582                                                         else
1583                                                                 tags.add tag
1584                                                         end
1585                                                 end
1586                                                 if tags.is_empty then
1587                                                         xml_end_line = line
1588                                                         break
1589                                                 end
1590                                                 pos = new_pos
1591                                         else
1592                                                 pos += 1
1593                                         end
1594                                 end
1595                         end
1596                         return tags.is_empty
1597                 end
1598                 return false
1599         end
1600
1601         # Read a XML comment.
1602         # Used by `check_html`.
1603         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1604                 var line: nullable MDLine = first_line
1605                 if start + 3 < line.as(not null).value.length then
1606                         if line.as(not null).value[2] == '-' and line.as(not null).value[3] == '-' then
1607                                 var pos = start + 4
1608                                 while line != null do
1609                                         while pos < line.value.length and line.value[pos] != '-' do
1610                                                 pos += 1
1611                                         end
1612                                         if pos == line.value.length then
1613                                                 line = line.next
1614                                                 pos = 0
1615                                         else
1616                                                 if pos + 2 < line.value.length then
1617                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1618                                                                 first_line.xml_end_line = line
1619                                                                 return pos + 3
1620                                                         end
1621                                                 end
1622                                                 pos += 1
1623                                         end
1624                                 end
1625                         end
1626                 end
1627                 return -1
1628         end
1629
1630         # Extract the text of `self` without leading and trailing.
1631         fun text: String do return value.substring(leading, value.length - trailing)
1632 end
1633
1634 # A markdown line.
1635 interface Line
1636
1637         # Parse the line.
1638         # See `MarkdownProcessor::recurse`.
1639         fun process(v: MarkdownProcessor) is abstract
1640 end
1641
1642 # An empty markdown line.
1643 class LineEmpty
1644         super Line
1645
1646         redef fun process(v) do
1647                 v.current_line = v.current_line.as(not null).next
1648         end
1649 end
1650
1651 # A non-specific markdown construction.
1652 # Mainly used as part of another line construct such as paragraphs or lists.
1653 class LineOther
1654         super Line
1655
1656         redef fun process(v) do
1657                 var line = v.current_line
1658                 # go to block end
1659                 var was_empty = line.as(not null).prev_empty
1660                 while line != null and not line.is_empty do
1661                         var t = v.line_kind(line)
1662                         if (v.in_list or v.ext_mode) and t isa LineList then
1663                                 break
1664                         end
1665                         if v.ext_mode and (t isa LineCode or t isa LineFence) then
1666                                 break
1667                         end
1668                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1669                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1670                                    break
1671                         end
1672                         line = line.next
1673                 end
1674                 # build block
1675                 var current_block = v.current_block.as(not null)
1676                 if line != null and not line.is_empty then
1677                         var block = current_block.split(line.prev.as(not null))
1678                         if v.in_list and not was_empty then
1679                                 block.kind = new BlockNone(block)
1680                         else
1681                                 block.kind = new BlockParagraph(block)
1682                         end
1683                         current_block.remove_leading_empty_lines
1684                 else
1685                         var block: MDBlock
1686                         if line != null then
1687                                 block = current_block.split(line)
1688                         else
1689                                 block = current_block.split(current_block.last_line.as(not null))
1690                         end
1691                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1692                                 block.kind = new BlockNone(block)
1693                         else
1694                                 block.kind = new BlockParagraph(block)
1695                         end
1696                         current_block.remove_leading_empty_lines
1697                 end
1698                 v.current_line = current_block.first_line
1699         end
1700 end
1701
1702 # A line of markdown code.
1703 class LineCode
1704         super Line
1705
1706         redef fun process(v) do
1707                 var line = v.current_line
1708                 # lookup block end
1709                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1710                         line = line.next
1711                 end
1712                 # split at block end line
1713                 var current_block = v.current_block.as(not null)
1714                 var block: MDBlock
1715                 if line != null then
1716                         block = current_block.split(line.prev.as(not null))
1717                 else
1718                         block = current_block.split(current_block.last_line.as(not null))
1719                 end
1720                 block.kind = new BlockCode(block)
1721                 block.remove_surrounding_empty_lines
1722                 v.current_line = current_block.first_line
1723         end
1724 end
1725
1726 # A line of raw XML.
1727 class LineXML
1728         super Line
1729
1730         redef fun process(v) do
1731                 var line = v.current_line
1732                 if line == null then return
1733                 var current_block = v.current_block.as(not null)
1734                 var prev = line.prev
1735                 if prev != null then current_block.split(prev)
1736                 var block = current_block.split(line.xml_end_line.as(not null))
1737                 block.kind = new BlockXML(block)
1738                 current_block.remove_leading_empty_lines
1739                 v.current_line = current_block.first_line
1740         end
1741 end
1742
1743 # A markdown blockquote line.
1744 class LineBlockquote
1745         super Line
1746
1747         redef fun process(v) do
1748                 var line = v.current_line
1749                 var current_block = v.current_block.as(not null)
1750                 # go to bquote end
1751                 while line != null do
1752                         if not line.is_empty and (line.prev_empty and
1753                            line.leading == 0 and
1754                            not v.line_kind(line) isa LineBlockquote) then break
1755                         line = line.next
1756                 end
1757                 # build sub block
1758                 var block: MDBlock
1759                 if line != null then
1760                         block = current_block.split(line.prev.as(not null))
1761                 else
1762                         block = current_block.split(current_block.last_line.as(not null))
1763                 end
1764                 var kind = new BlockQuote(block)
1765                 block.kind = kind
1766                 block.remove_surrounding_empty_lines
1767                 kind.remove_block_quote_prefix(block)
1768                 v.current_line = line
1769                 v.recurse(block, false)
1770                 v.current_line = current_block.first_line
1771         end
1772 end
1773
1774 # A markdown ruler line.
1775 class LineHR
1776         super Line
1777
1778         redef fun process(v) do
1779                 var line = v.current_line
1780                 if line == null then return
1781                 var current_block = v.current_block.as(not null)
1782                 if line.prev != null then current_block.split(line.prev.as(not null))
1783                 var block = current_block.split(line)
1784                 block.kind = new BlockRuler(block)
1785                 current_block.remove_leading_empty_lines
1786                 v.current_line = current_block.first_line
1787         end
1788 end
1789
1790 # A markdown fence code line.
1791 class LineFence
1792         super Line
1793
1794         redef fun process(v) do
1795                 # go to fence end
1796                 var line = v.current_line.as(not null).next
1797                 var current_block = v.current_block.as(not null)
1798                 while line != null do
1799                         if v.line_kind(line) isa LineFence then break
1800                         line = line.next
1801                 end
1802                 if line != null then
1803                         line = line.next
1804                 end
1805                 # build fence block
1806                 var block: MDBlock
1807                 if line != null then
1808                         block = current_block.split(line.prev.as(not null))
1809                 else
1810                         block = current_block.split(current_block.last_line.as(not null))
1811                 end
1812                 block.remove_surrounding_empty_lines
1813                 var meta = block.first_line.as(not null).value.meta_from_fence
1814                 block.kind = new BlockFence(block, meta)
1815                 block.first_line.as(not null).clear
1816                 var last = block.last_line
1817                 if last != null and v.line_kind(last) isa LineFence then
1818                         block.last_line.as(not null).clear
1819                 end
1820                 block.remove_surrounding_empty_lines
1821                 v.current_line = line
1822         end
1823 end
1824
1825 # A markdown headline.
1826 class LineHeadline
1827         super Line
1828
1829         redef fun process(v) do
1830                 var line = v.current_line
1831                 if line == null then return
1832                 var current_block = v.current_block.as(not null)
1833                 var lprev = line.prev
1834                 if lprev != null then current_block.split(lprev)
1835                 var block = current_block.split(line)
1836                 var kind = new BlockHeadline(block)
1837                 block.kind = kind
1838                 kind.transform_headline(block)
1839                 current_block.remove_leading_empty_lines
1840                 v.current_line = current_block.first_line
1841         end
1842 end
1843
1844 # A markdown headline of level 1.
1845 class LineHeadline1
1846         super LineHeadline
1847
1848         redef fun process(v) do
1849                 var line = v.current_line
1850                 if line == null then return
1851                 var current_block = v.current_block.as(not null)
1852                 var lprev = line.prev
1853                 if lprev != null then current_block.split(lprev)
1854                 line.next.as(not null).clear
1855                 var block = current_block.split(line)
1856                 var kind = new BlockHeadline(block)
1857                 kind.depth = 1
1858                 kind.transform_headline(block)
1859                 block.kind = kind
1860                 current_block.remove_leading_empty_lines
1861                 v.current_line = current_block.first_line
1862         end
1863 end
1864
1865 # A markdown headline of level 2.
1866 class LineHeadline2
1867         super LineHeadline
1868
1869         redef fun process(v) do
1870                 var line = v.current_line
1871                 if line == null then return
1872                 var current_block = v.current_block.as(not null)
1873                 var lprev = line.prev
1874                 if lprev != null then current_block.split(lprev)
1875                 line.next.as(not null).clear
1876                 var block = current_block.split(line)
1877                 var kind = new BlockHeadline(block)
1878                 kind.depth = 2
1879                 kind.transform_headline(block)
1880                 block.kind = kind
1881                 current_block.remove_leading_empty_lines
1882                 v.current_line = current_block.first_line
1883         end
1884 end
1885
1886 # A markdown list line.
1887 # Mainly used to factorize code between ordered and unordered lists.
1888 abstract class LineList
1889         super Line
1890
1891         redef fun process(v) do
1892                 var line = v.current_line
1893                 # go to list end
1894                 while line != null do
1895                         var t = v.line_kind(line)
1896                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1897                            not t isa LineList) then break
1898                         line = line.next
1899                 end
1900                 # build list block
1901                 var current_block = v.current_block.as(not null)
1902                 var list: MDBlock
1903                 if line != null then
1904                         list = current_block.split(line.prev.as(not null))
1905                 else
1906                         list = current_block.split(current_block.last_line.as(not null))
1907                 end
1908                 var kind = block_kind(list)
1909                 list.kind = kind
1910                 list.first_line.as(not null).prev_empty = false
1911                 list.last_line.as(not null).next_empty = false
1912                 list.remove_surrounding_empty_lines
1913                 list.first_line.as(not null).prev_empty = false
1914                 list.last_line.as(not null).next_empty = false
1915                 kind.init_block(v)
1916                 var block = list.first_block
1917                 while block != null do
1918                         block.remove_list_indent(v)
1919                         v.recurse(block, true)
1920                         block = block.next
1921                 end
1922                 kind.expand_paragraphs(list)
1923                 v.current_line = line
1924         end
1925
1926         # Create a new block kind based on this line.
1927         protected fun block_kind(block: MDBlock): BlockList is abstract
1928
1929         # Extract string value from `MDLine`.
1930         protected fun extract_value(line: MDLine): String is abstract
1931 end
1932
1933 # An ordered list line.
1934 class LineOList
1935         super LineList
1936
1937         redef fun block_kind(block) do return new BlockOrderedList(block)
1938
1939         redef fun extract_value(line) do
1940                 return line.value.substring_from(line.value.index_of('.') + 2)
1941         end
1942 end
1943
1944 # An unordered list line.
1945 class LineUList
1946         super LineList
1947
1948         redef fun block_kind(block) do return new BlockUnorderedList(block)
1949
1950         redef fun extract_value(line) do
1951                 return line.value.substring_from(line.leading + 2)
1952         end
1953 end
1954
1955 # A token represent a character in the markdown input.
1956 # Some tokens have a specific markup behaviour that is handled here.
1957 abstract class Token
1958
1959         # Location of `self` in the original input.
1960         var location: nullable MDLocation
1961
1962         # Position of `self` in input independant from lines.
1963         var pos: Int
1964
1965         # Character found at `pos` in the markdown input.
1966         var char: Char
1967
1968         # Output that token using `MarkdownEmitter::decorator`.
1969         fun emit(v: MarkdownProcessor) do v.decorator.add_char(v, char)
1970 end
1971
1972 # A token without a specific meaning.
1973 class TokenNone
1974         super Token
1975 end
1976
1977 # An emphasis token.
1978 abstract class TokenEm
1979         super Token
1980
1981         redef fun emit(v) do
1982                 var tmp = v.push_buffer
1983                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1984                 v.pop_buffer
1985                 if b > 0 then
1986                         v.decorator.add_em(v, tmp)
1987                         v.current_pos = b
1988                 else
1989                         v.addc char
1990                 end
1991         end
1992 end
1993
1994 # An emphasis star token.
1995 class TokenEmStar
1996         super TokenEm
1997 end
1998
1999 # An emphasis underscore token.
2000 class TokenEmUnderscore
2001         super TokenEm
2002 end
2003
2004 # A strong token.
2005 abstract class TokenStrong
2006         super Token
2007
2008         redef fun emit(v) do
2009                 var tmp = v.push_buffer
2010                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2011                 v.pop_buffer
2012                 if b > 0 then
2013                         v.decorator.add_strong(v, tmp)
2014                         v.current_pos = b + 1
2015                 else
2016                         v.addc char
2017                 end
2018         end
2019 end
2020
2021 # A strong star token.
2022 class TokenStrongStar
2023         super TokenStrong
2024 end
2025
2026 # A strong underscore token.
2027 class TokenStrongUnderscore
2028         super TokenStrong
2029 end
2030
2031 # A code token.
2032 # This class is mainly used to factorize work between single and double quoted span codes.
2033 abstract class TokenCode
2034         super Token
2035
2036         redef fun emit(v) do
2037                 var current_text = v.current_text.as(not null)
2038                 var a = pos + next_pos + 1
2039                 var b = v.find_token(current_text, a, self)
2040                 if b > 0 then
2041                         v.current_pos = b + next_pos
2042                         while a < b and current_text[a] == ' ' do a += 1
2043                         if a < b then
2044                                 while current_text[b - 1] == ' ' do b -= 1
2045                                 v.decorator.add_span_code(v, current_text, a, b)
2046                         end
2047                 else
2048                         v.addc char
2049                 end
2050         end
2051
2052         private fun next_pos: Int is abstract
2053 end
2054
2055 # A span code token.
2056 class TokenCodeSingle
2057         super TokenCode
2058
2059         redef fun next_pos do return 0
2060 end
2061
2062 # A doubled span code token.
2063 class TokenCodeDouble
2064         super TokenCode
2065
2066         redef fun next_pos do return 1
2067 end
2068
2069 # A link or image token.
2070 # This class is mainly used to factorize work between images and links.
2071 abstract class TokenLinkOrImage
2072         super Token
2073
2074         # Link adress
2075         var link: nullable Text = null
2076
2077         # Link text
2078         var name: nullable Text = null
2079
2080         # Link title
2081         var comment: nullable Text = null
2082
2083         # Is the link construct an abbreviation?
2084         var is_abbrev = false
2085
2086         redef fun emit(v) do
2087                 var tmp = new FlatBuffer
2088                 var b = check_link(v, tmp, pos, self)
2089                 if b > 0 then
2090                         emit_hyper(v)
2091                         v.current_pos = b
2092                 else
2093                         v.addc char
2094                 end
2095         end
2096
2097         # Emit the hyperlink as link or image.
2098         private fun emit_hyper(v: MarkdownProcessor) is abstract
2099
2100         # Check if the link is a valid link.
2101         private fun check_link(v: MarkdownProcessor, out: FlatBuffer, start: Int, token: Token): Int do
2102                 var md = v.current_text
2103                 if md == null then return -1
2104                 var pos
2105                 if token isa TokenLink then
2106                         pos = start + 1
2107                 else
2108                         pos = start + 2
2109                 end
2110                 var tmp = new FlatBuffer
2111                 pos = md.read_md_link_id(tmp, pos)
2112                 if pos < start then return -1
2113                 name = tmp
2114                 var old_pos = pos
2115                 pos += 1
2116                 pos = md.skip_spaces(pos)
2117                 if pos < start then
2118                         var tid = name.as(not null).write_to_string.to_lower
2119                         if v.link_refs.has_key(tid) then
2120                                 var lr = v.link_refs[tid]
2121                                 is_abbrev = lr.is_abbrev
2122                                 link = lr.link
2123                                 comment = lr.title
2124                                 pos = old_pos
2125                         else
2126                                 return -1
2127                         end
2128                 else if md[pos] == '(' then
2129                         pos += 1
2130                         pos = md.skip_spaces(pos)
2131                         if pos < start then return -1
2132                         tmp = new FlatBuffer
2133                         var use_lt = md[pos] == '<'
2134                         if use_lt then
2135                                 pos = md.read_until(tmp, pos + 1, '>')
2136                         else
2137                                 pos = md.read_md_link(tmp, pos)
2138                         end
2139                         if pos < start then return -1
2140                         if use_lt then pos += 1
2141                         link = tmp.write_to_string
2142                         if md[pos] == ' ' then
2143                                 pos = md.skip_spaces(pos)
2144                                 if pos > start and md[pos] == '"' then
2145                                         pos += 1
2146                                         tmp = new FlatBuffer
2147                                         pos = md.read_until(tmp, pos, '"')
2148                                         if pos < start then return -1
2149                                         comment = tmp.write_to_string
2150                                         pos += 1
2151                                         pos = md.skip_spaces(pos)
2152                                         if pos == -1 then return -1
2153                                 end
2154                         end
2155                         if pos < start then return -1
2156                         if md[pos] != ')' then return -1
2157                 else if md[pos] == '[' then
2158                         pos += 1
2159                         tmp = new FlatBuffer
2160                         pos = md.read_raw_until(tmp, pos, ']')
2161                         if pos < start then return -1
2162                         var id
2163                         if tmp.length > 0 then
2164                                 id = tmp
2165                         else
2166                                 id = name
2167                         end
2168                         var tid = id.as(not null).write_to_string.to_lower
2169                         if v.link_refs.has_key(tid) then
2170                                 var lr = v.link_refs[tid]
2171                                 link = lr.link
2172                                 comment = lr.title
2173                         end
2174                 else
2175                         var tid = name.as(not null).write_to_string.replace("\n", " ").to_lower
2176                         if v.link_refs.has_key(tid) then
2177                                 var lr = v.link_refs[tid]
2178                                 link = lr.link
2179                                 comment = lr.title
2180                                 pos = old_pos
2181                         else
2182                                 return -1
2183                         end
2184                 end
2185                 if link == null then return -1
2186                 return pos
2187         end
2188 end
2189
2190 # A markdown link token.
2191 class TokenLink
2192         super TokenLinkOrImage
2193
2194         redef fun emit_hyper(v) do
2195                 if is_abbrev and comment != null then
2196                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2197                 else
2198                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2199                 end
2200         end
2201 end
2202
2203 # A markdown image token.
2204 class TokenImage
2205         super TokenLinkOrImage
2206
2207         redef fun emit_hyper(v) do
2208                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2209         end
2210 end
2211
2212 # A HTML/XML token.
2213 class TokenHTML
2214         super Token
2215
2216         redef fun emit(v) do
2217                 var tmp = new FlatBuffer
2218                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2219                 if b > 0 then
2220                         v.add tmp
2221                         v.current_pos = b
2222                 else
2223                         v.decorator.escape_char(v, char)
2224                 end
2225         end
2226
2227         # Is the HTML valid?
2228         # Also take care of link and mailto shortcuts.
2229         private fun check_html(v: MarkdownProcessor, out: FlatBuffer, md: Text, start: Int): Int do
2230                 # check for auto links
2231                 var tmp = new FlatBuffer
2232                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2233                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2234                         pos = md.read_until(tmp, pos, '>')
2235                         if pos != -1 then
2236                                 var link = tmp.write_to_string
2237                                 v.decorator.add_link(v, link, link, null)
2238                                 return pos
2239                         end
2240                 end
2241                 # TODO check for mailto
2242                 # check for inline html
2243                 if start + 2 < md.length then
2244                         return md.read_xml(out, start, true)
2245                 end
2246                 return -1
2247         end
2248 end
2249
2250 # An HTML entity token.
2251 class TokenEntity
2252         super Token
2253
2254         redef fun emit(v) do
2255                 var tmp = new FlatBuffer
2256                 var b = check_entity(tmp, v.current_text.as(not null), pos)
2257                 if b > 0 then
2258                         v.add tmp
2259                         v.current_pos = b
2260                 else
2261                         v.decorator.escape_char(v, char)
2262                 end
2263         end
2264
2265         # Is the entity valid?
2266         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2267                 var pos = md.read_until(out, start, ';')
2268                 if pos < 0 or out.length < 3 then
2269                         return -1
2270                 end
2271                 if out[1] == '#' then
2272                         if out[2] == 'x' or out[2] == 'X' then
2273                                 if out.length < 4 then return -1
2274                                 for i in [3..out.length[ do
2275                                         var c = out[i]
2276                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2277                                                 return -1
2278                                         end
2279                                 end
2280                         else
2281                                 for i in [2..out.length[ do
2282                                         var c = out[i]
2283                                         if c < '0' or c > '9' then return -1
2284                                 end
2285                         end
2286                         out.add ';'
2287                 else
2288                         for i in [1..out.length[ do
2289                                 var c = out[i]
2290                                 if not c.is_digit and not c.is_letter then return -1
2291                         end
2292                         out.add ';'
2293                         # TODO check entity is valid
2294                         # if out.is_entity then
2295                                 return pos
2296                         # else
2297                                 # return -1
2298                         # end
2299                 end
2300                 return pos
2301         end
2302 end
2303
2304 # A markdown escape token.
2305 class TokenEscape
2306         super Token
2307
2308         redef fun emit(v) do
2309                 v.current_pos += 1
2310                 v.addc v.current_text.as(not null)[v.current_pos]
2311         end
2312 end
2313
2314 # A markdown strike token.
2315 #
2316 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2317 class TokenStrike
2318         super Token
2319
2320         redef fun emit(v) do
2321                 var tmp = v.push_buffer
2322                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2323                 v.pop_buffer
2324                 if b > 0 then
2325                         v.decorator.add_strike(v, tmp)
2326                         v.current_pos = b + 1
2327                 else
2328                         v.addc char
2329                 end
2330         end
2331 end
2332
2333 redef class Text
2334
2335         # Get the position of the next non-space character.
2336         private fun skip_spaces(start: Int): Int do
2337                 var pos = start
2338                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2339                         pos += 1
2340                 end
2341                 if pos < length then return pos
2342                 return -1
2343         end
2344
2345         # Read `self` until `nend` and append it to the `out` buffer.
2346         # Escape markdown special chars.
2347         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2348                 var pos = start
2349                 while pos < length do
2350                         var c = self[pos]
2351                         if c == '\\' and pos + 1 < length then
2352                                 pos = escape(out, self[pos + 1], pos)
2353                         else
2354                                 for n in nend do if c == n then break label
2355                                 out.add c
2356                         end
2357                         pos += 1
2358                 end label
2359                 if pos == length then return -1
2360                 return pos
2361         end
2362
2363         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2364         # No escape is made.
2365         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2366                 var pos = start
2367                 while pos < length do
2368                         var c = self[pos]
2369                         var end_reached = false
2370                         for n in nend do
2371                                 if c == n then
2372                                         end_reached = true
2373                                         break
2374                                 end
2375                         end
2376                         if end_reached then break
2377                         out.add c
2378                         pos += 1
2379                 end
2380                 if pos == length then return -1
2381                 return pos
2382         end
2383
2384         # Read `self` as XML until `to` and append it to the `out` buffer.
2385         # Escape HTML special chars.
2386         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2387                 var pos = from
2388                 var in_str = false
2389                 var str_char: nullable Char = null
2390                 while pos < length do
2391                         var c = self[pos]
2392                         if in_str then
2393                                 if c == '\\' then
2394                                         out.add c
2395                                         pos += 1
2396                                         if pos < length then
2397                                                 out.add c
2398                                                 pos += 1
2399                                         end
2400                                         continue
2401                                 end
2402                                 if c == str_char then
2403                                         in_str = false
2404                                         out.add c
2405                                         pos += 1
2406                                         continue
2407                                 end
2408                         end
2409                         if c == '"' or c == '\'' then
2410                                 in_str = true
2411                                 str_char = c
2412                         end
2413                         if not in_str then
2414                                 var end_reached = false
2415                                 for n in [0..to.length[ do
2416                                         if c == to[n] then
2417                                                 end_reached = true
2418                                                 break
2419                                         end
2420                                 end
2421                                 if end_reached then break
2422                         end
2423                         out.add c
2424                         pos += 1
2425                 end
2426                 if pos == length then return -1
2427                 return pos
2428         end
2429
2430         # Read `self` as XML and append it to the `out` buffer.
2431         # Safe mode can be activated to limit reading to valid xml.
2432         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2433                 var pos = 0
2434                 var is_valid = true
2435                 var is_close_tag = false
2436                 if start + 1 >= length then return -1
2437                 if self[start + 1] == '/' then
2438                         is_close_tag = true
2439                         pos = start + 2
2440                 else if self[start + 1] == '!' then
2441                         out.append "<!"
2442                         return start + 1
2443                 else
2444                         is_close_tag = false
2445                         pos = start + 1
2446                 end
2447                 if safe_mode then
2448                         var tmp = new FlatBuffer
2449                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2450                         if pos == -1 then return -1
2451                         var tag = tmp.write_to_string.trim.to_lower
2452                         if not tag.is_valid_html_tag then
2453                                 out.append "&lt;"
2454                                 pos = -1
2455                         else if tag.is_html_unsafe then
2456                                 is_valid = false
2457                                 out.append "&lt;"
2458                                 if is_close_tag then out.add '/'
2459                                 out.append tmp
2460                         else
2461                                 out.append "<"
2462                                 if is_close_tag then out.add '/'
2463                                 out.append tmp
2464                         end
2465                 else
2466                         out.add '<'
2467                         if is_close_tag then out.add '/'
2468                         pos = read_xml_until(out, pos, ' ', '/', '>')
2469                 end
2470                 if pos == -1 then return -1
2471                 pos = read_xml_until(out, pos, '/', '>')
2472                 if pos == -1 then return -1
2473                 if self[pos] == '/' then
2474                         out.append " /"
2475                         pos = self.read_xml_until(out, pos + 1, '>')
2476                         if pos == -1 then return -1
2477                 end
2478                 if self[pos] == '>' then
2479                         if is_valid then
2480                                 out.add '>'
2481                         else
2482                                 out.append "&gt;"
2483                         end
2484                         return pos
2485                 end
2486                 return -1
2487         end
2488
2489         # Read a markdown link address and append it to the `out` buffer.
2490         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2491                 var pos = start
2492                 var counter = 1
2493                 while pos < length do
2494                         var c = self[pos]
2495                         if c == '\\' and pos + 1 < length then
2496                                 pos = escape(out, self[pos + 1], pos)
2497                         else
2498                                 var end_reached = false
2499                                 if c == '(' then
2500                                         counter += 1
2501                                 else if c == ' ' then
2502                                         if counter == 1 then end_reached = true
2503                                 else if c == ')' then
2504                                         counter -= 1
2505                                         if counter == 0 then end_reached = true
2506                                 end
2507                                 if end_reached then break
2508                                 out.add c
2509                         end
2510                         pos += 1
2511                 end
2512                 if pos == length then return -1
2513                 return pos
2514         end
2515
2516         # Read a markdown link text and append it to the `out` buffer.
2517         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2518                 var pos = start
2519                 var counter = 1
2520                 while pos < length do
2521                         var c = self[pos]
2522                         var end_reached = false
2523                         if c == '[' then
2524                                 counter += 1
2525                                 out.add c
2526                         else if c == ']' then
2527                                 counter -= 1
2528                                 if counter == 0 then
2529                                         end_reached = true
2530                                 else
2531                                         out.add c
2532                                 end
2533                         else
2534                                 out.add c
2535                         end
2536                         if end_reached then break
2537                         pos += 1
2538                 end
2539                 if pos == length then return -1
2540                 return pos
2541         end
2542
2543         # Extract the XML tag name from a XML tag.
2544         private fun xml_tag: String do
2545                 var tpl = new FlatBuffer
2546                 var pos = 1
2547                 if pos < length and self[1] == '/' then pos += 1
2548                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2549                         tpl.add self[pos]
2550                         pos += 1
2551                 end
2552                 return tpl.write_to_string.to_lower
2553         end
2554
2555         private fun is_valid_html_tag: Bool do
2556                 if is_empty then return false
2557                 for c in self do
2558                         if not c.is_alpha then return false
2559                 end
2560                 return true
2561         end
2562
2563         # Read and escape the markdown contained in `self`.
2564         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2565                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2566                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2567                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2568                    c == '`' or c == '~' or c == '^' then
2569                         out.add c
2570                         return pos + 1
2571                 end
2572                 out.add '\\'
2573                 return pos
2574         end
2575
2576         # Extract string found at end of fence opening.
2577         private fun meta_from_fence: nullable Text do
2578                 for i in [0..chars.length[ do
2579                         var c = chars[i]
2580                         if c != ' ' and c != '`' and c != '~' then
2581                                 return substring_from(i).trim
2582                         end
2583                 end
2584                 return null
2585         end
2586
2587         # Is `self` an unsafe HTML element?
2588         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2589
2590         # Is `self` a HRML block element?
2591         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2592
2593         # Is `self` a link prefix?
2594         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2595
2596         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2597
2598         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2599
2600         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2601 end
2602
2603 redef class String
2604
2605         # Parse `self` as markdown and return the HTML representation
2606         #.
2607         #    var md = "**Hello World!**"
2608         #    var html = md.md_to_html
2609         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2610         fun md_to_html: Writable do
2611                 var processor = new MarkdownProcessor
2612                 return processor.process(self)
2613         end
2614 end