lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # `MarkdownEmitter` used for ouput.
  34         var emitter: MarkdownEmitter is noinit, protected writable
  35
  36         # Work in extended mode (default).
  37         #
  38         # Behavior changes when using extended mode:
  39         #
  40         # * Lists and code blocks end a paragraph
  41         #
  42         #   In normal markdown the following:
  43         #
  44         # ~~~md
  45         # This is a paragraph
  46         # * and this is not a list
  47         # ~~~
  48         #
  49         #   Will produce:
  50         #
  51         # ~~~html
  52         # <p>This is a paragraph
  53         # * and this is not a list</p>
  54         # ~~~
  55         #
  56         #   When using extended mode this changes to:
  57         #
  58         # ~~~html
  59         # <p>This is a paragraph</p>
  60         # <ul>
  61         # <li>and this is not a list</li>
  62         # </ul>
  63         # ~~~
  64         #
  65         # * Fences code blocks
  66         #
  67         #   If you don't want to indent your all your code with 4 spaces,
  68         #   you can wrap your code in ``` ``` ``` or `~~~`.
  69         #
  70         #   Here's an example:
  71         #
  72         # ~~~md
  73         # fun test do
  74         #    print "Hello World!"
  75         # end
  76         # ~~~
  77         #
  78         # * Code blocks meta
  79         #
  80         #   If you want to use syntax highlighting tools, most of them need to know what kind
  81         #   of language they are highlighting.
  82         #   You can add an optional language identifier after the fence declaration to output
  83         #   it in the HTML render.
  84         #
  85         # ```nit
  86         # import markdown
  87         #
  88         # print "# Hello World!".md_to_html
  89         # ```
  90         #
  91         #   Becomes
  92         #
  93         # ~~~html
  94         # <pre class="nit"><code>import markdown
  95         #
  96         # print "Hello World!".md_to_html
  97         # </code></pre>
  98         # ~~~
  99         #
 100         # * Underscores (Emphasis)
 101         #
 102         #   Underscores in the middle of a word like:
 103         #
 104         # ~~~md
 105         # Con_cat_this
 106         # ~~~
 107         #
 108         #   normally produces this:
 109         #
 110         # ~~~html
 111         # <p>Con<em>cat</em>this</p>
 112         # ~~~
 113         #
 114         #   With extended mode they don't result in emphasis.
 115         #
 116         # ~~~html
 117         # <p>Con_cat_this</p>
 118         # ~~~
 119         #
 120         # * Strikethrough
 121         #
 122         #   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
 123         #   strikethrought span is marked with `~~`.
 124         #
 125         # ~~~md
 126         # ~~Mistaken text.~~
 127         # ~~~
 128         #
 129         #   becomes
 130         #
 131         # ~~~html
 132         # <del>Mistaken text.</del>
 133         # ~~~
 134         var ext_mode = true
 135
 136         # Disable attaching MDLocation to Tokens
 137         #
 138         # Locations are useful for some tools but they may
 139         # cause an important time and space overhead.
 140         #
 141         # Default = `false`
 142         var no_location = false is writable
 143
 144         init do self.emitter = new MarkdownEmitter(self)
 145
 146         # Process the mardown `input` string and return the processed output.
 147         fun process(input: String): Writable do
 148                 # init processor
 149                 link_refs.clear
 150                 last_link_ref = null
 151                 current_line = null
 152                 current_block = null
 153                 # parse markdown
 154                 var parent = read_lines(input)
 155                 parent.remove_surrounding_empty_lines
 156                 recurse(parent, false)
 157                 # output processed text
 158                 return emitter.emit(parent.kind)
 159         end
 160
 161         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
 162         private fun read_lines(input: String): MDBlock do
 163                 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
 164                 var value = new FlatBuffer
 165                 var i = 0
 166
 167                 var line_pos = 0
 168                 var col_pos = 0
 169
 170                 while i < input.length do
 171                         value.clear
 172                         var pos = 0
 173                         var eol = false
 174                         while not eol and i < input.length do
 175                                 col_pos += 1
 176                                 var c = input[i]
 177                                 if c == '\n' then
 178                                         eol = true
 179                                 else if c == '\r' then
 180                                 else if c == '\t' then
 181                                         var np = pos + (4 - (pos & 3))
 182                                         while pos < np do
 183                                                 value.add ' '
 184                                                 pos += 1
 185                                         end
 186                                 else
 187                                         pos += 1
 188                                         value.add c
 189                                 end
 190                                 i += 1
 191                         end
 192                         line_pos += 1
 193
 194                         var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
 195                         var line = new MDLine(loc, value.write_to_string)
 196                         var is_link_ref = check_link_ref(line)
 197                         # Skip link refs
 198                         if not is_link_ref then block.add_line line
 199                         col_pos = 0
 200                 end
 201                 return block
 202         end
 203
 204         # Check if line is a block link definition.
 205         # Return `true` if line contains a valid link ref and save it into `link_refs`.
 206         private fun check_link_ref(line: MDLine): Bool do
 207                 var md = line.value
 208                 var is_link_ref = false
 209                 var id = new FlatBuffer
 210                 var link = new FlatBuffer
 211                 var comment = new FlatBuffer
 212                 var pos = -1
 213                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
 214                         pos = line.leading + 1
 215                         pos = md.read_until(id, pos, ']')
 216                         if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
 217                                 if line.value[pos + 1] == ':' then
 218                                         pos += 2
 219                                         pos = md.skip_spaces(pos)
 220                                         if pos >= 0 and line.value[pos] == '<' then
 221                                                 pos += 1
 222                                                 pos = md.read_until(link, pos, '>')
 223                                                 pos += 1
 224                                         else if pos >= 0 then
 225                                                 pos = md.read_until(link, pos, ' ', '\n')
 226                                         end
 227                                         if not link.is_empty then
 228                                                 pos = md.skip_spaces(pos)
 229                                                 if pos > 0 and pos < line.value.length then
 230                                                         var c = line.value[pos]
 231                                                         if c == '\"' or c == '\'' or c == '(' then
 232                                                                 pos += 1
 233                                                                 if c == '(' then
 234                                                                         pos = md.read_until(comment, pos, ')')
 235                                                                 else
 236                                                                         pos = md.read_until(comment, pos, c)
 237                                                                 end
 238                                                                 if pos > 0 then is_link_ref = true
 239                                                         end
 240                                                 else
 241                                                         is_link_ref = true
 242                                                 end
 243                                         end
 244                                 end
 245                         end
 246                 end
 247                 if is_link_ref and not id.is_empty and not link.is_empty then
 248                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 249                         add_link_ref(id.write_to_string, lr)
 250                         if comment.is_empty then last_link_ref = lr
 251                         return true
 252                 else
 253                         comment = new FlatBuffer
 254                         if not line.is_empty and last_link_ref != null then
 255                                 pos = line.leading
 256                                 var c = line.value[pos]
 257                                 if c == '\"' or c == '\'' or c ==  '(' then
 258                                         pos += 1
 259                                         if c == '(' then
 260                                                 pos = md.read_until(comment, pos, ')')
 261                                         else
 262                                                 pos = md.read_until(comment, pos, c)
 263                                         end
 264                                 end
 265                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 266                         end
 267                         if comment.is_empty then return false
 268                         return true
 269                 end
 270         end
 271
 272         # Known link refs
 273         # This list will be needed during output to expand links.
 274         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 275
 276         # Last encountered link ref (for multiline definitions)
 277         #
 278         # Markdown allows link refs to be defined over two lines:
 279         #
 280         # ~~~md
 281         # [id]: http://example.com/longish/path/to/resource/here
 282         #       "Optional Title Here"
 283         # ~~~
 284         #
 285         private var last_link_ref: nullable LinkRef = null
 286
 287         # Add a link ref to the list
 288         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 289
 290         # Recursively split a `block`.
 291         #
 292         # The block is splitted according to the type of lines it contains.
 293         # Some blocks can be splited again recursively like lists.
 294         # The `in_list` mode is used to recurse on list and build
 295         # nested paragraphs or code blocks.
 296         fun recurse(root: MDBlock, in_list: Bool) do
 297                 var old_mode = self.in_list
 298                 var old_root = self.current_block
 299                 self.in_list = in_list
 300
 301                 var line = root.first_line
 302                 while line != null and line.is_empty do
 303                         line = line.next
 304                         if line == null then return
 305                 end
 306
 307                 current_line = line
 308                 current_block = root
 309                 while current_line != null do
 310                         line_kind(current_line.as(not null)).process(self)
 311                 end
 312                 self.in_list = old_mode
 313                 self.current_block = old_root
 314         end
 315
 316         # Currently processed line.
 317         # Used when visiting blocks with `recurse`.
 318         var current_line: nullable MDLine = null is writable
 319
 320         # Currently processed block.
 321         # Used when visiting blocks with `recurse`.
 322         var current_block: nullable MDBlock = null is writable
 323
 324         # Is the current recursion in list mode?
 325         # Used when visiting blocks with `recurse`
 326         private var in_list = false
 327
 328         # The type of line.
 329         # see: `md_line_*`
 330         fun line_kind(md: MDLine): Line do
 331                 var value = md.value
 332                 var leading = md.leading
 333                 var trailing = md.trailing
 334                 if md.is_empty then return new LineEmpty
 335                 if md.leading > 3 then return new LineCode
 336                 if value[leading] == '#' then return new LineHeadline
 337                 if value[leading] == '>' then return new LineBlockquote
 338
 339                 if ext_mode then
 340                         if value.length - leading - trailing > 2 then
 341                                 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 342                                         return new LineFence
 343                                 end
 344                                 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 345                                         return new LineFence
 346                                 end
 347                         end
 348                 end
 349
 350                 if value.length - leading - trailing > 2 and
 351                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 352                    if md.count_chars(value[leading]) >= 3 then
 353                                 return new LineHR
 354                    end
 355                 end
 356
 357                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 358                         var c = value[leading]
 359                         if c == '*' or c == '-' or c == '+' then return new LineUList
 360                 end
 361
 362                 if value.length - leading >= 3 and value[leading].is_digit then
 363                         var i = leading + 1
 364                         while i < value.length and value[i].is_digit do i += 1
 365                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 366                                 return new LineOList
 367                         end
 368                 end
 369
 370                 if value[leading] == '<' and md.check_html then return new LineXML
 371
 372                 var next = md.next
 373                 if next != null and not next.is_empty then
 374                         if next.count_chars('=') > 0 then
 375                                 return new LineHeadline1
 376                         end
 377                         if next.count_chars('-') > 0 then
 378                                 return new LineHeadline2
 379                         end
 380                 end
 381                 return new LineOther
 382         end
 383
 384         # Get the token kind at `pos`.
 385         fun token_at(text: Text, pos: Int): Token do
 386                 var c0: Char
 387                 var c1: Char
 388                 var c2: Char
 389
 390                 if pos > 0 then
 391                         c0 = text[pos - 1]
 392                 else
 393                         c0 = ' '
 394                 end
 395                 var c = text[pos]
 396
 397                 if pos + 1 < text.length then
 398                         c1 = text[pos + 1]
 399                 else
 400                         c1 = ' '
 401                 end
 402                 if pos + 2 < text.length then
 403                         c2 = text[pos + 2]
 404                 else
 405                         c2 = ' '
 406                 end
 407
 408                 var loc
 409                 if no_location then
 410                         loc = null
 411                 else
 412                         loc = new MDLocation(
 413                                 current_loc.line_start,
 414                                 current_loc.column_start + pos,
 415                                 current_loc.line_start,
 416                                 current_loc.column_start + pos)
 417                 end
 418
 419                 if c == '*' then
 420                         if c1 == '*' then
 421                                 if c0 != ' ' or c2 != ' ' then
 422                                         return new TokenStrongStar(loc, pos, c)
 423                                 else
 424                                         return new TokenEmStar(loc, pos, c)
 425                                 end
 426                         end
 427                         if c0 != ' ' or c1 != ' ' then
 428                                 return new TokenEmStar(loc, pos, c)
 429                         else
 430                                 return new TokenNone(loc, pos, c)
 431                         end
 432                 else if c == '_' then
 433                         if c1 == '_' then
 434                                 if c0 != ' ' or c2 != ' ' then
 435                                         return new TokenStrongUnderscore(loc, pos, c)
 436                                 else
 437                                         return new TokenEmUnderscore(loc, pos, c)
 438                                 end
 439                         end
 440                         if ext_mode then
 441                                 if (c0.is_letter or c0.is_digit) and c0 != '_' and
 442                                    (c1.is_letter or c1.is_digit) then
 443                                         return new TokenNone(loc, pos, c)
 444                                 else
 445                                         return new TokenEmUnderscore(loc, pos, c)
 446                                 end
 447                         end
 448                         if c0 != ' ' or c1 != ' ' then
 449                                 return new TokenEmUnderscore(loc, pos, c)
 450                         else
 451                                 return new TokenNone(loc, pos, c)
 452                         end
 453                 else if c == '!' then
 454                         if c1 == '[' then return new TokenImage(loc, pos, c)
 455                         return new TokenNone(loc, pos, c)
 456                 else if c == '[' then
 457                         return new TokenLink(loc, pos, c)
 458                 else if c == ']' then
 459                         return new TokenNone(loc, pos, c)
 460                 else if c == '`' then
 461                         if c1 == '`' then
 462                                 return new TokenCodeDouble(loc, pos, c)
 463                         else
 464                                 return new TokenCodeSingle(loc, pos, c)
 465                         end
 466                 else if c == '\\' then
 467                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 468                                 return new TokenEscape(loc, pos, c)
 469                         else
 470                                 return new TokenNone(loc, pos, c)
 471                         end
 472                 else if c == '<' then
 473                         return new TokenHTML(loc, pos, c)
 474                 else if c == '&' then
 475                         return new TokenEntity(loc, pos, c)
 476                 else
 477                         if ext_mode then
 478                                 if c == '~' and c1 == '~' then
 479                                         return new TokenStrike(loc, pos, c)
 480                                 end
 481                         end
 482                         return new TokenNone(loc, pos, c)
 483                 end
 484         end
 485
 486         # Find the position of a `token` in `self`.
 487         fun find_token(text: Text, start: Int, token: Token): Int do
 488                 var pos = start
 489                 while pos < text.length do
 490                         if token_at(text, pos).is_same_type(token) then
 491                                 return pos
 492                         end
 493                         pos += 1
 494                 end
 495                 return -1
 496         end
 497
 498         # Location used for next parsed token.
 499         #
 500         # This location can be changed by the emitter to adjust with `\n` found
 501         # in the input.
 502         private fun current_loc: MDLocation do return emitter.current_loc
 503 end
 504
 505 # Emit output corresponding to blocks content.
 506 #
 507 # Blocks are created by a previous pass in `MarkdownProcessor`.
 508 # The emitter use a `Decorator` to select the output format.
 509 class MarkdownEmitter
 510
 511         # Kind of processor used for parsing.
 512         type PROCESSOR: MarkdownProcessor
 513
 514         # Processor containing link refs.
 515         var processor: PROCESSOR
 516
 517         # Kind of decorator used for decoration.
 518         type DECORATOR: Decorator
 519
 520         # Decorator used for output.
 521         # Default is `HTMLDecorator`
 522         var decorator: DECORATOR is writable, lazy do
 523                 return new HTMLDecorator
 524         end
 525
 526         # Create a new `MarkdownEmitter` using a custom `decorator`.
 527         init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
 528                 init processor
 529                 self.decorator = decorator
 530         end
 531
 532         # Output `block` using `decorator` in the current buffer.
 533         fun emit(block: Block): Text do
 534                 var buffer = push_buffer
 535                 block.emit(self)
 536                 pop_buffer
 537                 return buffer
 538         end
 539
 540         # Output the content of `block`.
 541         fun emit_in(block: Block) do block.emit_in(self)
 542
 543         # Transform and emit mardown text
 544         fun emit_text(text: Text) do emit_text_until(text, 0, null)
 545
 546         # Transform and emit mardown text starting at `start` and
 547         # until a token with the same type as `token` is found.
 548         # Go until the end of `text` if `token` is null.
 549         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 550                 var old_text = current_text
 551                 var old_pos = current_pos
 552                 current_text = text
 553                 current_pos = start
 554                 while current_pos < text.length do
 555                         if text[current_pos] == '\n' then
 556                                 current_loc.line_start += 1
 557                                 current_loc.column_start = -current_pos
 558                         end
 559                         var mt = processor.token_at(text, current_pos)
 560                         if (token != null and not token isa TokenNone) and
 561                         (mt.is_same_type(token) or
 562                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 563                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 564                                 return current_pos
 565                         end
 566                         mt.emit(self)
 567                         current_pos += 1
 568                 end
 569                 current_text = old_text
 570                 current_pos = old_pos
 571                 return -1
 572         end
 573
 574         # Currently processed position in `current_text`.
 575         # Used when visiting inline production with `emit_text_until`.
 576         private var current_pos: Int = -1
 577
 578         # Currently processed text.
 579         # Used when visiting inline production with `emit_text_until`.
 580         private var current_text: nullable Text = null
 581
 582         # Stacked buffers.
 583         private var buffer_stack = new List[FlatBuffer]
 584
 585         # Push a new buffer on the stack.
 586         private fun push_buffer: FlatBuffer do
 587                 var buffer = new FlatBuffer
 588                 buffer_stack.add buffer
 589                 return buffer
 590         end
 591
 592         # Pop the last buffer.
 593         private fun pop_buffer do buffer_stack.pop
 594
 595         # Current output buffer.
 596         private fun current_buffer: FlatBuffer do
 597                 assert not buffer_stack.is_empty
 598                 return buffer_stack.last
 599         end
 600
 601         # Stacked locations.
 602         private var loc_stack = new List[MDLocation]
 603
 604         # Push a new MDLocation on the stack.
 605         private fun push_loc(location: MDLocation) do loc_stack.add location
 606
 607         # Pop the last buffer.
 608         private fun pop_loc: MDLocation do return loc_stack.pop
 609
 610         # Current output buffer.
 611         private fun current_loc: MDLocation do
 612                 assert not loc_stack.is_empty
 613                 return loc_stack.last
 614         end
 615
 616         # Append `e` to current buffer.
 617         fun add(e: Writable) do
 618                 if e isa Text then
 619                         current_buffer.append e
 620                 else
 621                         current_buffer.append e.write_to_string
 622                 end
 623         end
 624
 625         # Append `c` to current buffer.
 626         fun addc(c: Char) do
 627                 current_buffer.add c
 628         end
 629
 630         # Append a "\n" line break.
 631         fun addn do addc '\n'
 632 end
 633
 634 # A Link Reference.
 635 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 636 #
 637 # ~~~raw
 638 # [1]: http://example.com/ "Optional title"
 639 # ~~~
 640 class LinkRef
 641
 642         # Link href
 643         var link: String
 644
 645         # Optional link title
 646         var title: nullable String = null
 647
 648         # Is the link an abreviation?
 649         var is_abbrev = false
 650
 651         # Create a link with a title.
 652         init with_title(link: String, title: nullable String) do
 653                 init(link)
 654                 self.title = title
 655         end
 656 end
 657
 658 # A `Decorator` is used to emit mardown into a specific format.
 659 # Default decorator used is `HTMLDecorator`.
 660 interface Decorator
 661
 662         # Kind of emitter used for decoration.
 663         type EMITTER: MarkdownEmitter
 664
 665         # Render a single plain char.
 666         #
 667         # Redefine this method to add special escaping for plain text.
 668         fun add_char(v: EMITTER, c: Char) do v.addc c
 669
 670         # Render a ruler block.
 671         fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
 672
 673         # Render a headline block with corresponding level.
 674         fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
 675
 676         # Render a paragraph block.
 677         fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
 678
 679         # Render a code or fence block.
 680         fun add_code(v: EMITTER, block: BlockCode) is abstract
 681
 682         # Render a blockquote.
 683         fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
 684
 685         # Render an unordered list.
 686         fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
 687
 688         # Render an ordered list.
 689         fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
 690
 691         # Render a list item.
 692         fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
 693
 694         # Render an emphasis text.
 695         fun add_em(v: EMITTER, text: Text) is abstract
 696
 697         # Render a strong text.
 698         fun add_strong(v: EMITTER, text: Text) is abstract
 699
 700         # Render a strike text.
 701         #
 702         # Extended mode only (see `MarkdownProcessor::ext_mode`)
 703         fun add_strike(v: EMITTER, text: Text) is abstract
 704
 705         # Render a link.
 706         fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 707
 708         # Render an image.
 709         fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 710
 711         # Render an abbreviation.
 712         fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
 713
 714         # Render a code span reading from a buffer.
 715         fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 716
 717         # Render a text and escape it.
 718         fun append_value(v: EMITTER, value: Text) is abstract
 719
 720         # Render code text from buffer and escape it.
 721         fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 722
 723         # Render a character escape.
 724         fun escape_char(v: EMITTER, char: Char) is abstract
 725
 726         # Render a line break
 727         fun add_line_break(v: EMITTER) is abstract
 728
 729         # Generate a new html valid id from a `String`.
 730         fun strip_id(txt: String): String is abstract
 731
 732         # Found headlines during the processing labeled by their ids.
 733         fun headlines: ArrayMap[String, HeadLine] is abstract
 734 end
 735
 736 # Class representing a markdown headline.
 737 class HeadLine
 738         # Unique identifier of this headline.
 739         var id: String
 740
 741         # Text of the headline.
 742         var title: String
 743
 744         # Level of this headline.
 745         #
 746         # According toe the markdown specification, level must be in `[1..6]`.
 747         var level: Int
 748 end
 749
 750 # `Decorator` that outputs HTML.
 751 class HTMLDecorator
 752         super Decorator
 753
 754         redef var headlines = new ArrayMap[String, HeadLine]
 755
 756         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 757
 758         redef fun add_headline(v, block) do
 759                 # save headline
 760                 var txt = block.block.first_line.value
 761                 var id = strip_id(txt)
 762                 var lvl = block.depth
 763                 headlines[id] = new HeadLine(id, txt, lvl)
 764                 # output it
 765                 v.add "<h{lvl} id=\"{id}\">"
 766                 v.emit_in block
 767                 v.add "</h{lvl}>\n"
 768         end
 769
 770         redef fun add_paragraph(v, block) do
 771                 v.add "<p>"
 772                 v.emit_in block
 773                 v.add "</p>\n"
 774         end
 775
 776         redef fun add_code(v, block) do
 777                 var meta = block.meta
 778                 if meta != null then
 779                         v.add "<pre class=\""
 780                         append_value(v, meta)
 781                         v.add "\"><code>"
 782                 else
 783                         v.add "<pre><code>"
 784                 end
 785                 v.emit_in block
 786                 v.add "</code></pre>\n"
 787         end
 788
 789         redef fun add_blockquote(v, block) do
 790                 v.add "<blockquote>\n"
 791                 v.emit_in block
 792                 v.add "</blockquote>\n"
 793         end
 794
 795         redef fun add_unorderedlist(v, block) do
 796                 v.add "<ul>\n"
 797                 v.emit_in block
 798                 v.add "</ul>\n"
 799         end
 800
 801         redef fun add_orderedlist(v, block) do
 802                 v.add "<ol>\n"
 803                 v.emit_in block
 804                 v.add "</ol>\n"
 805         end
 806
 807         redef fun add_listitem(v, block) do
 808                 v.add "<li>"
 809                 v.emit_in block
 810                 v.add "</li>\n"
 811         end
 812
 813         redef fun add_em(v, text) do
 814                 v.add "<em>"
 815                 v.add text
 816                 v.add "</em>"
 817         end
 818
 819         redef fun add_strong(v, text) do
 820                 v.add "<strong>"
 821                 v.add text
 822                 v.add "</strong>"
 823         end
 824
 825         redef fun add_strike(v, text) do
 826                 v.add "<del>"
 827                 v.add text
 828                 v.add "</del>"
 829         end
 830
 831         redef fun add_image(v, link, name, comment) do
 832                 v.add "<img src=\""
 833                 append_value(v, link)
 834                 v.add "\" alt=\""
 835                 append_value(v, name)
 836                 v.add "\""
 837                 if comment != null and not comment.is_empty then
 838                         v.add " title=\""
 839                         append_value(v, comment)
 840                         v.add "\""
 841                 end
 842                 v.add "/>"
 843         end
 844
 845         redef fun add_link(v, link, name, comment) do
 846                 v.add "<a href=\""
 847                 append_value(v, link)
 848                 v.add "\""
 849                 if comment != null and not comment.is_empty then
 850                         v.add " title=\""
 851                         append_value(v, comment)
 852                         v.add "\""
 853                 end
 854                 v.add ">"
 855                 v.emit_text(name)
 856                 v.add "</a>"
 857         end
 858
 859         redef fun add_abbr(v, name, comment) do
 860                 v.add "<abbr title=\""
 861                 append_value(v, comment)
 862                 v.add "\">"
 863                 v.emit_text(name)
 864                 v.add "</abbr>"
 865         end
 866
 867         redef fun add_span_code(v, text, from, to) do
 868                 v.add "<code>"
 869                 append_code(v, text, from, to)
 870                 v.add "</code>"
 871         end
 872
 873         redef fun add_line_break(v) do
 874                 v.add "<br/>"
 875         end
 876
 877         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 878
 879         redef fun escape_char(v, c) do
 880                 if c == '&' then
 881                         v.add "&amp;"
 882                 else if c == '<' then
 883                         v.add "&lt;"
 884                 else if c == '>' then
 885                         v.add "&gt;"
 886                 else if c == '"' then
 887                         v.add "&quot;"
 888                 else if c == '\'' then
 889                         v.add "&apos;"
 890                 else
 891                         v.addc c
 892                 end
 893         end
 894
 895         redef fun append_code(v, buffer, from, to) do
 896                 for i in [from..to[ do
 897                         var c = buffer[i]
 898                         if c == '&' then
 899                                 v.add "&amp;"
 900                         else if c == '<' then
 901                                 v.add "&lt;"
 902                         else if c == '>' then
 903                                 v.add "&gt;"
 904                         else
 905                                 v.addc c
 906                         end
 907                 end
 908         end
 909
 910         redef fun strip_id(txt) do
 911                 # strip id
 912                 var b = new FlatBuffer
 913                 for c in txt do
 914                         if c == ' ' then
 915                                 b.add '_'
 916                         else
 917                                 if not c.is_letter and
 918                                    not c.is_digit and
 919                                    not allowed_id_chars.has(c) then continue
 920                                 b.add c
 921                         end
 922                 end
 923                 var res = b.to_s
 924                 var key = res
 925                 # check for multiple id definitions
 926                 if headlines.has_key(key) then
 927                         var i = 1
 928                         key = "{res}_{i}"
 929                         while headlines.has_key(key) do
 930                                 i += 1
 931                                 key = "{res}_{i}"
 932                         end
 933                 end
 934                 return key
 935         end
 936
 937         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 938 end
 939
 940 # Location in a Markdown input.
 941 class MDLocation
 942
 943         # Starting line number (starting from 1).
 944         var line_start: Int
 945
 946         # Starting column number (starting from 1).
 947         var column_start: Int
 948
 949         # Stopping line number (starting from 1).
 950         var line_end: Int
 951
 952         # Stopping column number (starting from 1).
 953         var column_end: Int
 954
 955         redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
 956
 957         # Return a copy of `self`.
 958         fun copy: MDLocation do
 959                 return new MDLocation(line_start, column_start, line_end, column_end)
 960         end
 961 end
 962
 963 # A block of markdown lines.
 964 # A `MDBlock` can contains lines and/or sub-blocks.
 965 class MDBlock
 966
 967         # Position of `self` in the input.
 968         var location: MDLocation
 969
 970         # Kind of block.
 971         # See `Block`.
 972         var kind: Block = new BlockNone(self) is writable
 973
 974         # First line if any.
 975         var first_line: nullable MDLine = null is writable
 976
 977         # Last line if any.
 978         var last_line: nullable MDLine = null is writable
 979
 980         # First sub-block if any.
 981         var first_block: nullable MDBlock = null is writable
 982
 983         # Last sub-block if any.
 984         var last_block: nullable MDBlock = null is writable
 985
 986         # Previous block if any.
 987         var prev: nullable MDBlock = null is writable
 988
 989         # Next block if any.
 990         var next: nullable MDBlock = null is writable
 991
 992         # Does this block contain subblocks?
 993         fun has_blocks: Bool do return first_block != null
 994
 995         # Count sub-blocks.
 996         fun count_blocks: Int do
 997                 var count = 0
 998                 var block = first_block
 999                 while block != null do
1000                         count += 1
1001                         block = block.next
1002                 end
1003                 return count
1004         end
1005
1006         # Does this block contain lines?
1007         fun has_lines: Bool do return first_line != null
1008
1009         # Count block lines.
1010         fun count_lines: Int do
1011                 var count = 0
1012                 var line = first_line
1013                 while line != null do
1014                         count += 1
1015                         line = line.next
1016                 end
1017                 return count
1018         end
1019
1020         # Split `self` creating a new sub-block having `line` has `last_line`.
1021         fun split(line: MDLine): MDBlock do
1022                 # location for new block
1023                 var new_loc = new MDLocation(
1024                         first_line.location.line_start,
1025                         first_line.location.column_start,
1026                         line.location.line_end,
1027                         line.location.column_end)
1028                 # create block
1029                 var block = new MDBlock(new_loc)
1030                 block.first_line = first_line
1031                 block.last_line = line
1032                 first_line = line.next
1033                 line.next = null
1034                 if first_line == null then
1035                         last_line = null
1036                 else
1037                         first_line.prev = null
1038                         # update current block loc
1039                         location.line_start = first_line.location.line_start
1040                         location.column_start = first_line.location.column_start
1041                 end
1042                 if first_block == null then
1043                         first_block = block
1044                         last_block = block
1045                 else
1046                         last_block.next = block
1047                         last_block = block
1048                 end
1049                 return block
1050         end
1051
1052         # Add a `line` to this block.
1053         fun add_line(line: MDLine) do
1054                 if last_line == null then
1055                         first_line = line
1056                         last_line = line
1057                 else
1058                         last_line.next_empty = line.is_empty
1059                         line.prev_empty = last_line.is_empty
1060                         line.prev = last_line
1061                         last_line.next = line
1062                         last_line = line
1063                 end
1064         end
1065
1066         # Remove `line` from this block.
1067         fun remove_line(line: MDLine) do
1068                 if line.prev == null then
1069                         first_line = line.next
1070                 else
1071                         line.prev.next = line.next
1072                 end
1073                 if line.next == null then
1074                         last_line = line.prev
1075                 else
1076                         line.next.prev = line.prev
1077                 end
1078                 line.prev = null
1079                 line.next = null
1080         end
1081
1082         # Remove leading empty lines.
1083         fun remove_leading_empty_lines: Bool do
1084                 var was_empty = false
1085                 var line = first_line
1086                 while line != null and line.is_empty do
1087                         remove_line line
1088                         line = first_line
1089                         was_empty = true
1090                 end
1091                 return was_empty
1092         end
1093
1094         # Remove trailing empty lines.
1095         fun remove_trailing_empty_lines: Bool do
1096                 var was_empty = false
1097                 var line = last_line
1098                 while line != null and line.is_empty do
1099                         remove_line line
1100                         line = last_line
1101                         was_empty = true
1102                 end
1103                 return was_empty
1104         end
1105
1106         # Remove leading and trailing empty lines.
1107         fun remove_surrounding_empty_lines: Bool do
1108                 var was_empty = false
1109                 if remove_leading_empty_lines then was_empty = true
1110                 if remove_trailing_empty_lines then was_empty = true
1111                 return was_empty
1112         end
1113
1114         # Remove list markers and up to 4 leading spaces.
1115         # Used to clean nested lists.
1116         fun remove_list_indent(v: MarkdownProcessor) do
1117                 var line = first_line
1118                 while line != null do
1119                         if not line.is_empty then
1120                                 var kind = v.line_kind(line)
1121                                 if kind isa LineList then
1122                                         line.value = kind.extract_value(line)
1123                                 else
1124                                         line.value = line.value.substring_from(line.leading.min(4))
1125                                 end
1126                                 line.leading = line.process_leading
1127                         end
1128                         line = line.next
1129                 end
1130         end
1131
1132         # Collect block line text.
1133         fun text: String do
1134                 var text = new FlatBuffer
1135                 var line = first_line
1136                 while line != null do
1137                         if not line.is_empty then
1138                                 text.append line.text
1139                         end
1140                         text.append "\n"
1141                         line = line.next
1142                 end
1143                 return text.write_to_string
1144         end
1145 end
1146
1147 # Representation of a markdown block in the AST.
1148 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1149 abstract class Block
1150
1151         # The markdown block `self` is related to.
1152         var block: MDBlock
1153
1154         # Output `self` using `v.decorator`.
1155         fun emit(v: MarkdownEmitter) do v.emit_in(self)
1156
1157         # Emit the containts of `self`, lines or blocks.
1158         fun emit_in(v: MarkdownEmitter) do
1159                 block.remove_surrounding_empty_lines
1160                 if block.has_lines then
1161                         emit_lines(v)
1162                 else
1163                         emit_blocks(v)
1164                 end
1165         end
1166
1167         # Emit lines contained in `block`.
1168         fun emit_lines(v: MarkdownEmitter) do
1169                 var tpl = v.push_buffer
1170                 var line = block.first_line
1171                 while line != null do
1172                         if not line.is_empty then
1173                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1174                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
1175                         end
1176                         if line.next != null then
1177                                 v.addn
1178                         end
1179                         line = line.next
1180                 end
1181                 v.pop_buffer
1182                 v.emit_text(tpl)
1183         end
1184
1185         # Emit sub-blocks contained in `block`.
1186         fun emit_blocks(v: MarkdownEmitter) do
1187                 var block = self.block.first_block
1188                 while block != null do
1189                         v.push_loc(block.location)
1190                         block.kind.emit(v)
1191                         v.pop_loc
1192                         block = block.next
1193                 end
1194         end
1195
1196         # The raw content of the block as a multi-line string.
1197         fun raw_content: String do
1198                 var infence = self isa BlockFence
1199                 var text = new FlatBuffer
1200                 var line = self.block.first_line
1201                 while line != null do
1202                         if not line.is_empty then
1203                                 var str = line.value
1204                                 if not infence and str.has_prefix("    ") then
1205                                         text.append str.substring(4, str.length - line.trailing)
1206                                 else
1207                                         text.append str
1208                                 end
1209                         end
1210                         text.append "\n"
1211                         line = line.next
1212                 end
1213                 return text.write_to_string
1214         end
1215 end
1216
1217 # A block without any markdown specificities.
1218 #
1219 # Actually use the same implementation than `BlockCode`,
1220 # this class is only used for typing purposes.
1221 class BlockNone
1222         super Block
1223 end
1224
1225 # A markdown blockquote.
1226 class BlockQuote
1227         super Block
1228
1229         redef fun emit(v) do v.decorator.add_blockquote(v, self)
1230
1231         # Remove blockquote markers.
1232         private fun remove_block_quote_prefix(block: MDBlock) do
1233                 var line = block.first_line
1234                 while line != null do
1235                         if not line.is_empty then
1236                                 if line.value[line.leading] == '>' then
1237                                         var rem = line.leading + 1
1238                                         if line.leading + 1 < line.value.length and
1239                                            line.value[line.leading + 1] == ' ' then
1240                                                 rem += 1
1241                                         end
1242                                         line.value = line.value.substring_from(rem)
1243                                         line.leading = line.process_leading
1244                                 end
1245                         end
1246                         line = line.next
1247                 end
1248         end
1249 end
1250
1251 # A markdown code block.
1252 class BlockCode
1253         super Block
1254
1255         # Any string found after fence token.
1256         var meta: nullable Text
1257
1258         # Number of char to skip at the beginning of the line.
1259         #
1260         # Block code lines start at 4 spaces.
1261         protected var line_start = 4
1262
1263         redef fun emit(v) do v.decorator.add_code(v, self)
1264
1265         redef fun emit_lines(v) do
1266                 var line = block.first_line
1267                 while line != null do
1268                         if not line.is_empty then
1269                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1270                         end
1271                         v.addn
1272                         line = line.next
1273                 end
1274         end
1275 end
1276
1277 # A markdown code-fence block.
1278 #
1279 # Actually use the same implementation than `BlockCode`,
1280 # this class is only used for typing purposes.
1281 class BlockFence
1282         super BlockCode
1283
1284         # Fence code lines start at 0 spaces.
1285         redef var line_start = 0
1286 end
1287
1288 # A markdown headline.
1289 class BlockHeadline
1290         super Block
1291
1292         redef fun emit(v) do
1293                 var loc = block.location.copy
1294                 loc.column_start += start
1295                 v.push_loc(loc)
1296                 v.decorator.add_headline(v, self)
1297                 v.pop_loc
1298         end
1299
1300         private var start = 0
1301
1302         # Depth of the headline used to determine the headline level.
1303         var depth = 0
1304
1305         # Remove healine marks from lines contained in `self`.
1306         private fun transform_headline(block: MDBlock) do
1307                 if depth > 0 then return
1308                 var level = 0
1309                 var line = block.first_line
1310                 if line.is_empty then return
1311                 var start = line.leading
1312                 while start < line.value.length and line.value[start] == '#' do
1313                         level += 1
1314                         start += 1
1315                 end
1316                 while start < line.value.length and line.value[start] == ' ' do
1317                         start += 1
1318                 end
1319                 if start >= line.value.length then
1320                         line.is_empty = true
1321                 else
1322                         var nend = line.value.length - line.trailing - 1
1323                         while line.value[nend] == '#' do nend -= 1
1324                         while line.value[nend] == ' ' do nend -= 1
1325                         line.value = line.value.substring(start, nend - start + 1)
1326                         line.leading = 0
1327                         line.trailing = 0
1328                 end
1329                 self.start = start
1330                 depth = level.min(6)
1331         end
1332 end
1333
1334 # A markdown list item block.
1335 class BlockListItem
1336         super Block
1337
1338         redef fun emit(v) do v.decorator.add_listitem(v, self)
1339 end
1340
1341 # A markdown list block.
1342 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1343 abstract class BlockList
1344         super Block
1345
1346         # Split list block into list items sub-blocks.
1347         private fun init_block(v: MarkdownProcessor) do
1348                 var line = block.first_line
1349                 line = line.next
1350                 while line != null do
1351                         var t = v.line_kind(line)
1352                         if t isa LineList or
1353                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1354                            not (t isa LineList))) then
1355                                    var sblock = block.split(line.prev.as(not null))
1356                                    sblock.kind = new BlockListItem(sblock)
1357                         end
1358                         line = line.next
1359                 end
1360                 var sblock = block.split(block.last_line.as(not null))
1361                 sblock.kind = new BlockListItem(sblock)
1362         end
1363
1364         # Expand list items as paragraphs if needed.
1365         private fun expand_paragraphs(block: MDBlock) do
1366                 var outer = block.first_block
1367                 var inner: nullable MDBlock
1368                 var has_paragraph = false
1369                 while outer != null and not has_paragraph do
1370                         if outer.kind isa BlockListItem then
1371                                 inner = outer.first_block
1372                                 while inner != null and not has_paragraph do
1373                                         if inner.kind isa BlockParagraph then
1374                                                 has_paragraph = true
1375                                         end
1376                                         inner = inner.next
1377                                 end
1378                         end
1379                         outer = outer.next
1380                 end
1381                 if has_paragraph then
1382                         outer = block.first_block
1383                         while outer != null do
1384                                 if outer.kind isa BlockListItem then
1385                                         inner = outer.first_block
1386                                         while inner != null do
1387                                                 if inner.kind isa BlockNone then
1388                                                         inner.kind = new BlockParagraph(inner)
1389                                                 end
1390                                                 inner = inner.next
1391                                         end
1392                                 end
1393                                 outer = outer.next
1394                         end
1395                 end
1396         end
1397 end
1398
1399 # A markdown ordered list.
1400 class BlockOrderedList
1401         super BlockList
1402
1403         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1404 end
1405
1406 # A markdown unordred list.
1407 class BlockUnorderedList
1408         super BlockList
1409
1410         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1411 end
1412
1413 # A markdown paragraph block.
1414 class BlockParagraph
1415         super Block
1416
1417         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1418 end
1419
1420 # A markdown ruler.
1421 class BlockRuler
1422         super Block
1423
1424         redef fun emit(v) do v.decorator.add_ruler(v, self)
1425 end
1426
1427 # Xml blocks that can be found in markdown markup.
1428 class BlockXML
1429         super Block
1430
1431         redef fun emit_lines(v) do
1432                 var line = block.first_line
1433                 while line != null do
1434                         if not line.is_empty then v.add line.value
1435                         v.addn
1436                         line = line.next
1437                 end
1438         end
1439 end
1440
1441 # A markdown line.
1442 class MDLine
1443
1444         # Location of `self` in the original input.
1445         var location: MDLocation
1446
1447         # Text contained in this line.
1448         var value: String is writable
1449
1450         # Is this line empty?
1451         # Lines containing only spaces are considered empty.
1452         var is_empty: Bool = true is writable
1453
1454         # Previous line in `MDBlock` or null if first line.
1455         var prev: nullable MDLine = null is writable
1456
1457         # Next line in `MDBlock` or null if last line.
1458         var next: nullable MDLine = null is writable
1459
1460         # Is the previous line empty?
1461         var prev_empty: Bool = false is writable
1462
1463         # Is the next line empty?
1464         var next_empty: Bool = false is writable
1465
1466         # Initialize a new MDLine from its string value
1467         init do
1468                 self.leading = process_leading
1469                 if leading != value.length then
1470                         self.is_empty = false
1471                         self.trailing = process_trailing
1472                 end
1473         end
1474
1475         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1476         fun clear do
1477                 value = ""
1478                 leading = 0
1479                 trailing = 0
1480                 is_empty = true
1481                 if prev != null then prev.next_empty = true
1482                 if next != null then next.prev_empty = true
1483         end
1484
1485         # Number or leading spaces on this line.
1486         var leading: Int = 0 is writable
1487
1488         # Compute `leading` depending on `value`.
1489         fun process_leading: Int do
1490                 var count = 0
1491                 var value = self.value
1492                 while count < value.length and value[count] == ' ' do count += 1
1493                 if leading == value.length then clear
1494                 return count
1495         end
1496
1497         # Number of trailing spaces on this line.
1498         var trailing: Int = 0 is writable
1499
1500         # Compute `trailing` depending on `value`.
1501         fun process_trailing: Int do
1502                 var count = 0
1503                 var value = self.value
1504                 while value[value.length - count - 1] == ' ' do
1505                         count += 1
1506                 end
1507                 return count
1508         end
1509
1510         # Count the amount of `ch` in this line.
1511         # Return A value > 0 if this line only consists of `ch` end spaces.
1512         fun count_chars(ch: Char): Int do
1513                 var count = 0
1514                 for c in value do
1515                         if c == ' ' then
1516                                 continue
1517                         end
1518                         if c == ch then
1519                                 count += 1
1520                                 continue
1521                         end
1522                         count = 0
1523                         break
1524                 end
1525                 return count
1526         end
1527
1528         # Count the amount of `ch` at the start of this line ignoring spaces.
1529         fun count_chars_start(ch: Char): Int do
1530                 var count = 0
1531                 for c in value do
1532                         if c == ' ' then
1533                                 continue
1534                         end
1535                         if c == ch then
1536                                 count += 1
1537                         else
1538                                 break
1539                         end
1540                 end
1541                 return count
1542         end
1543
1544         # Last XML line if any.
1545         private var xml_end_line: nullable MDLine = null
1546
1547         # Does `value` contains valid XML markup?
1548         private fun check_html: Bool do
1549                 var tags = new Array[String]
1550                 var tmp = new FlatBuffer
1551                 var pos = leading
1552                 if pos + 1 < value.length and value[pos + 1] == '!' then
1553                         if read_xml_comment(self, pos) > 0 then return true
1554                 end
1555                 pos = value.read_xml(tmp, pos, false)
1556                 var tag: String
1557                 if pos > -1 then
1558                         tag = tmp.xml_tag
1559                         if not tag.is_html_block then
1560                                 return false
1561                         end
1562                         if tag == "hr" then
1563                                 xml_end_line = self
1564                                 return true
1565                         end
1566                         tags.add tag
1567                         var line: nullable MDLine = self
1568                         while line != null do
1569                                 while pos < line.value.length and line.value[pos] != '<' do
1570                                         pos += 1
1571                                 end
1572                                 if pos >= line.value.length then
1573                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1574                                                 tags.pop
1575                                                 if tags.is_empty then
1576                                                         xml_end_line = line
1577                                                         break
1578                                                 end
1579                                         end
1580                                         line = line.next
1581                                         pos = 0
1582                                 else
1583                                         tmp = new FlatBuffer
1584                                         var new_pos = line.value.read_xml(tmp, pos, false)
1585                                         if new_pos > 0 then
1586                                                 tag = tmp.xml_tag
1587                                                 if tag.is_html_block and not tag == "hr" then
1588                                                         if tmp[1] == '/' then
1589                                                                 if tags.last != tag then
1590                                                                         return false
1591                                                                 end
1592                                                                 tags.pop
1593                                                         else
1594                                                                 tags.add tag
1595                                                         end
1596                                                 end
1597                                                 if tags.is_empty then
1598                                                         xml_end_line = line
1599                                                         break
1600                                                 end
1601                                                 pos = new_pos
1602                                         else
1603                                                 pos += 1
1604                                         end
1605                                 end
1606                         end
1607                         return tags.is_empty
1608                 end
1609                 return false
1610         end
1611
1612         # Read a XML comment.
1613         # Used by `check_html`.
1614         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1615                 var line: nullable MDLine = first_line
1616                 if start + 3 < line.value.length then
1617                         if line.value[2] == '-' and line.value[3] == '-' then
1618                                 var pos = start + 4
1619                                 while line != null do
1620                                         while pos < line.value.length and line.value[pos] != '-' do
1621                                                 pos += 1
1622                                         end
1623                                         if pos == line.value.length then
1624                                                 line = line.next
1625                                                 pos = 0
1626                                         else
1627                                                 if pos + 2 < line.value.length then
1628                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1629                                                                 first_line.xml_end_line = line
1630                                                                 return pos + 3
1631                                                         end
1632                                                 end
1633                                                 pos += 1
1634                                         end
1635                                 end
1636                         end
1637                 end
1638                 return -1
1639         end
1640
1641         # Extract the text of `self` without leading and trailing.
1642         fun text: String do return value.substring(leading, value.length - trailing)
1643 end
1644
1645 # A markdown line.
1646 interface Line
1647
1648         # Parse the line.
1649         # See `MarkdownProcessor::recurse`.
1650         fun process(v: MarkdownProcessor) is abstract
1651 end
1652
1653 # An empty markdown line.
1654 class LineEmpty
1655         super Line
1656
1657         redef fun process(v) do
1658                 v.current_line = v.current_line.next
1659         end
1660 end
1661
1662 # A non-specific markdown construction.
1663 # Mainly used as part of another line construct such as paragraphs or lists.
1664 class LineOther
1665         super Line
1666
1667         redef fun process(v) do
1668                 var line = v.current_line
1669                 # go to block end
1670                 var was_empty = line.prev_empty
1671                 while line != null and not line.is_empty do
1672                         var t = v.line_kind(line)
1673                         if (v.in_list or v.ext_mode) and t isa LineList then
1674                                 break
1675                         end
1676                         if v.ext_mode and (t isa LineCode or t isa LineFence) then
1677                                 break
1678                         end
1679                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1680                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1681                                    break
1682                         end
1683                         line = line.next
1684                 end
1685                 # build block
1686                 if line != null and not line.is_empty then
1687                         var block = v.current_block.split(line.prev.as(not null))
1688                         if v.in_list and not was_empty then
1689                                 block.kind = new BlockNone(block)
1690                         else
1691                                 block.kind = new BlockParagraph(block)
1692                         end
1693                         v.current_block.remove_leading_empty_lines
1694                 else
1695                         var block: MDBlock
1696                         if line != null then
1697                                 block = v.current_block.split(line)
1698                         else
1699                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1700                         end
1701                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1702                                 block.kind = new BlockNone(block)
1703                         else
1704                                 block.kind = new BlockParagraph(block)
1705                         end
1706                         v.current_block.remove_leading_empty_lines
1707                 end
1708                 v.current_line = v.current_block.first_line
1709         end
1710 end
1711
1712 # A line of markdown code.
1713 class LineCode
1714         super Line
1715
1716         redef fun process(v) do
1717                 var line = v.current_line
1718                 # lookup block end
1719                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1720                         line = line.next
1721                 end
1722                 # split at block end line
1723                 var block: MDBlock
1724                 if line != null then
1725                         block = v.current_block.split(line.prev.as(not null))
1726                 else
1727                         block = v.current_block.split(v.current_block.last_line.as(not null))
1728                 end
1729                 block.kind = new BlockCode(block)
1730                 block.remove_surrounding_empty_lines
1731                 v.current_line = v.current_block.first_line
1732         end
1733 end
1734
1735 # A line of raw XML.
1736 class LineXML
1737         super Line
1738
1739         redef fun process(v) do
1740                 var line = v.current_line
1741                 var prev = line.prev
1742                 if prev != null then v.current_block.split(prev)
1743                 var block = v.current_block.split(line.xml_end_line.as(not null))
1744                 block.kind = new BlockXML(block)
1745                 v.current_block.remove_leading_empty_lines
1746                 v.current_line = v.current_block.first_line
1747         end
1748 end
1749
1750 # A markdown blockquote line.
1751 class LineBlockquote
1752         super Line
1753
1754         redef fun process(v) do
1755                 var line = v.current_line
1756                 # go to bquote end
1757                 while line != null do
1758                         if not line.is_empty and (line.prev_empty and
1759                            line.leading == 0 and
1760                            not v.line_kind(line) isa LineBlockquote) then break
1761                         line = line.next
1762                 end
1763                 # build sub block
1764                 var block: MDBlock
1765                 if line != null then
1766                         block = v.current_block.split(line.prev.as(not null))
1767                 else
1768                         block = v.current_block.split(v.current_block.last_line.as(not null))
1769                 end
1770                 var kind = new BlockQuote(block)
1771                 block.kind = kind
1772                 block.remove_surrounding_empty_lines
1773                 kind.remove_block_quote_prefix(block)
1774                 v.current_line = line
1775                 v.recurse(block, false)
1776                 v.current_line = v.current_block.first_line
1777         end
1778 end
1779
1780 # A markdown ruler line.
1781 class LineHR
1782         super Line
1783
1784         redef fun process(v) do
1785                 var line = v.current_line
1786                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1787                 var block = v.current_block.split(line.as(not null))
1788                 block.kind = new BlockRuler(block)
1789                 v.current_block.remove_leading_empty_lines
1790                 v.current_line = v.current_block.first_line
1791         end
1792 end
1793
1794 # A markdown fence code line.
1795 class LineFence
1796         super Line
1797
1798         redef fun process(v) do
1799                 # go to fence end
1800                 var line = v.current_line.next
1801                 while line != null do
1802                         if v.line_kind(line) isa LineFence then break
1803                         line = line.next
1804                 end
1805                 if line != null then
1806                         line = line.next
1807                 end
1808                 # build fence block
1809                 var block: MDBlock
1810                 if line != null then
1811                         block = v.current_block.split(line.prev.as(not null))
1812                 else
1813                         block = v.current_block.split(v.current_block.last_line.as(not null))
1814                 end
1815                 block.remove_surrounding_empty_lines
1816                 var meta = block.first_line.value.meta_from_fence
1817                 block.kind = new BlockFence(block, meta)
1818                 block.first_line.clear
1819                 var last = block.last_line
1820                 if last != null and v.line_kind(last) isa LineFence then
1821                         block.last_line.clear
1822                 end
1823                 block.remove_surrounding_empty_lines
1824                 v.current_line = line
1825         end
1826 end
1827
1828 # A markdown headline.
1829 class LineHeadline
1830         super Line
1831
1832         redef fun process(v) do
1833                 var line = v.current_line
1834                 var lprev = line.prev
1835                 if lprev != null then v.current_block.split(lprev)
1836                 var block = v.current_block.split(line.as(not null))
1837                 var kind = new BlockHeadline(block)
1838                 block.kind = kind
1839                 kind.transform_headline(block)
1840                 v.current_block.remove_leading_empty_lines
1841                 v.current_line = v.current_block.first_line
1842         end
1843 end
1844
1845 # A markdown headline of level 1.
1846 class LineHeadline1
1847         super LineHeadline
1848
1849         redef fun process(v) do
1850                 var line = v.current_line
1851                 var lprev = line.prev
1852                 if lprev != null then v.current_block.split(lprev)
1853                 line.next.clear
1854                 var block = v.current_block.split(line.as(not null))
1855                 var kind = new BlockHeadline(block)
1856                 kind.depth = 1
1857                 kind.transform_headline(block)
1858                 block.kind = kind
1859                 v.current_block.remove_leading_empty_lines
1860                 v.current_line = v.current_block.first_line
1861         end
1862 end
1863
1864 # A markdown headline of level 2.
1865 class LineHeadline2
1866         super LineHeadline
1867
1868         redef fun process(v) do
1869                 var line = v.current_line
1870                 var lprev = line.prev
1871                 if lprev != null then v.current_block.split(lprev)
1872                 line.next.clear
1873                 var block = v.current_block.split(line.as(not null))
1874                 var kind = new BlockHeadline(block)
1875                 kind.depth = 2
1876                 kind.transform_headline(block)
1877                 block.kind = kind
1878                 v.current_block.remove_leading_empty_lines
1879                 v.current_line = v.current_block.first_line
1880         end
1881 end
1882
1883 # A markdown list line.
1884 # Mainly used to factorize code between ordered and unordered lists.
1885 abstract class LineList
1886         super Line
1887
1888         redef fun process(v) do
1889                 var line = v.current_line
1890                 # go to list end
1891                 while line != null do
1892                         var t = v.line_kind(line)
1893                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1894                            not t isa LineList) then break
1895                         line = line.next
1896                 end
1897                 # build list block
1898                 var list: MDBlock
1899                 if line != null then
1900                         list = v.current_block.split(line.prev.as(not null))
1901                 else
1902                         list = v.current_block.split(v.current_block.last_line.as(not null))
1903                 end
1904                 var kind = block_kind(list)
1905                 list.kind = kind
1906                 list.first_line.prev_empty = false
1907                 list.last_line.next_empty = false
1908                 list.remove_surrounding_empty_lines
1909                 list.first_line.prev_empty = false
1910                 list.last_line.next_empty = false
1911                 kind.init_block(v)
1912                 var block = list.first_block
1913                 while block != null do
1914                         block.remove_list_indent(v)
1915                         v.recurse(block, true)
1916                         block = block.next
1917                 end
1918                 kind.expand_paragraphs(list)
1919                 v.current_line = line
1920         end
1921
1922         # Create a new block kind based on this line.
1923         protected fun block_kind(block: MDBlock): BlockList is abstract
1924
1925         # Extract string value from `MDLine`.
1926         protected fun extract_value(line: MDLine): String is abstract
1927 end
1928
1929 # An ordered list line.
1930 class LineOList
1931         super LineList
1932
1933         redef fun block_kind(block) do return new BlockOrderedList(block)
1934
1935         redef fun extract_value(line) do
1936                 return line.value.substring_from(line.value.index_of('.') + 2)
1937         end
1938 end
1939
1940 # An unordered list line.
1941 class LineUList
1942         super LineList
1943
1944         redef fun block_kind(block) do return new BlockUnorderedList(block)
1945
1946         redef fun extract_value(line) do
1947                 return line.value.substring_from(line.leading + 2)
1948         end
1949 end
1950
1951 # A token represent a character in the markdown input.
1952 # Some tokens have a specific markup behaviour that is handled here.
1953 abstract class Token
1954
1955         # Location of `self` in the original input.
1956         var location: nullable MDLocation
1957
1958         # Position of `self` in input independant from lines.
1959         var pos: Int
1960
1961         # Character found at `pos` in the markdown input.
1962         var char: Char
1963
1964         # Output that token using `MarkdownEmitter::decorator`.
1965         fun emit(v: MarkdownEmitter) do v.decorator.add_char(v, char)
1966 end
1967
1968 # A token without a specific meaning.
1969 class TokenNone
1970         super Token
1971 end
1972
1973 # An emphasis token.
1974 abstract class TokenEm
1975         super Token
1976
1977         redef fun emit(v) do
1978                 var tmp = v.push_buffer
1979                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1980                 v.pop_buffer
1981                 if b > 0 then
1982                         v.decorator.add_em(v, tmp)
1983                         v.current_pos = b
1984                 else
1985                         v.addc char
1986                 end
1987         end
1988 end
1989
1990 # An emphasis star token.
1991 class TokenEmStar
1992         super TokenEm
1993 end
1994
1995 # An emphasis underscore token.
1996 class TokenEmUnderscore
1997         super TokenEm
1998 end
1999
2000 # A strong token.
2001 abstract class TokenStrong
2002         super Token
2003
2004         redef fun emit(v) do
2005                 var tmp = v.push_buffer
2006                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2007                 v.pop_buffer
2008                 if b > 0 then
2009                         v.decorator.add_strong(v, tmp)
2010                         v.current_pos = b + 1
2011                 else
2012                         v.addc char
2013                 end
2014         end
2015 end
2016
2017 # A strong star token.
2018 class TokenStrongStar
2019         super TokenStrong
2020 end
2021
2022 # A strong underscore token.
2023 class TokenStrongUnderscore
2024         super TokenStrong
2025 end
2026
2027 # A code token.
2028 # This class is mainly used to factorize work between single and double quoted span codes.
2029 abstract class TokenCode
2030         super Token
2031
2032         redef fun emit(v) do
2033                 var a = pos + next_pos + 1
2034                 var b = v.processor.find_token(v.current_text.as(not null), a, self)
2035                 if b > 0 then
2036                         v.current_pos = b + next_pos
2037                         while a < b and v.current_text[a] == ' ' do a += 1
2038                         if a < b then
2039                                 while v.current_text[b - 1] == ' ' do b -= 1
2040                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
2041                         end
2042                 else
2043                         v.addc char
2044                 end
2045         end
2046
2047         private fun next_pos: Int is abstract
2048 end
2049
2050 # A span code token.
2051 class TokenCodeSingle
2052         super TokenCode
2053
2054         redef fun next_pos do return 0
2055 end
2056
2057 # A doubled span code token.
2058 class TokenCodeDouble
2059         super TokenCode
2060
2061         redef fun next_pos do return 1
2062 end
2063
2064 # A link or image token.
2065 # This class is mainly used to factorize work between images and links.
2066 abstract class TokenLinkOrImage
2067         super Token
2068
2069         # Link adress
2070         var link: nullable Text = null
2071
2072         # Link text
2073         var name: nullable Text = null
2074
2075         # Link title
2076         var comment: nullable Text = null
2077
2078         # Is the link construct an abbreviation?
2079         var is_abbrev = false
2080
2081         redef fun emit(v) do
2082                 var tmp = new FlatBuffer
2083                 var b = check_link(v, tmp, pos, self)
2084                 if b > 0 then
2085                         emit_hyper(v)
2086                         v.current_pos = b
2087                 else
2088                         v.addc char
2089                 end
2090         end
2091
2092         # Emit the hyperlink as link or image.
2093         private fun emit_hyper(v: MarkdownEmitter) is abstract
2094
2095         # Check if the link is a valid link.
2096         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
2097                 var md = v.current_text
2098                 var pos
2099                 if token isa TokenLink then
2100                         pos = start + 1
2101                 else
2102                         pos = start + 2
2103                 end
2104                 var tmp = new FlatBuffer
2105                 pos = md.read_md_link_id(tmp, pos)
2106                 if pos < start then return -1
2107                 name = tmp
2108                 var old_pos = pos
2109                 pos += 1
2110                 pos = md.skip_spaces(pos)
2111                 if pos < start then
2112                         var tid = name.write_to_string.to_lower
2113                         if v.processor.link_refs.has_key(tid) then
2114                                 var lr = v.processor.link_refs[tid]
2115                                 is_abbrev = lr.is_abbrev
2116                                 link = lr.link
2117                                 comment = lr.title
2118                                 pos = old_pos
2119                         else
2120                                 return -1
2121                         end
2122                 else if md[pos] == '(' then
2123                         pos += 1
2124                         pos = md.skip_spaces(pos)
2125                         if pos < start then return -1
2126                         tmp = new FlatBuffer
2127                         var use_lt = md[pos] == '<'
2128                         if use_lt then
2129                                 pos = md.read_until(tmp, pos + 1, '>')
2130                         else
2131                                 pos = md.read_md_link(tmp, pos)
2132                         end
2133                         if pos < start then return -1
2134                         if use_lt then pos += 1
2135                         link = tmp.write_to_string
2136                         if md[pos] == ' ' then
2137                                 pos = md.skip_spaces(pos)
2138                                 if pos > start and md[pos] == '"' then
2139                                         pos += 1
2140                                         tmp = new FlatBuffer
2141                                         pos = md.read_until(tmp, pos, '"')
2142                                         if pos < start then return -1
2143                                         comment = tmp.write_to_string
2144                                         pos += 1
2145                                         pos = md.skip_spaces(pos)
2146                                         if pos == -1 then return -1
2147                                 end
2148                         end
2149                         if pos < start then return -1
2150                         if md[pos] != ')' then return -1
2151                 else if md[pos] == '[' then
2152                         pos += 1
2153                         tmp = new FlatBuffer
2154                         pos = md.read_raw_until(tmp, pos, ']')
2155                         if pos < start then return -1
2156                         var id
2157                         if tmp.length > 0 then
2158                                 id = tmp
2159                         else
2160                                 id = name
2161                         end
2162                         var tid = id.write_to_string.to_lower
2163                         if v.processor.link_refs.has_key(tid) then
2164                                 var lr = v.processor.link_refs[tid]
2165                                 link = lr.link
2166                                 comment = lr.title
2167                         end
2168                 else
2169                         var tid = name.write_to_string.replace("\n", " ").to_lower
2170                         if v.processor.link_refs.has_key(tid) then
2171                                 var lr = v.processor.link_refs[tid]
2172                                 link = lr.link
2173                                 comment = lr.title
2174                                 pos = old_pos
2175                         else
2176                                 return -1
2177                         end
2178                 end
2179                 if link == null then return -1
2180                 return pos
2181         end
2182 end
2183
2184 # A markdown link token.
2185 class TokenLink
2186         super TokenLinkOrImage
2187
2188         redef fun emit_hyper(v) do
2189                 if is_abbrev and comment != null then
2190                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2191                 else
2192                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2193                 end
2194         end
2195 end
2196
2197 # A markdown image token.
2198 class TokenImage
2199         super TokenLinkOrImage
2200
2201         redef fun emit_hyper(v) do
2202                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2203         end
2204 end
2205
2206 # A HTML/XML token.
2207 class TokenHTML
2208         super Token
2209
2210         redef fun emit(v) do
2211                 var tmp = new FlatBuffer
2212                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2213                 if b > 0 then
2214                         v.add tmp
2215                         v.current_pos = b
2216                 else
2217                         v.decorator.escape_char(v, char)
2218                 end
2219         end
2220
2221         # Is the HTML valid?
2222         # Also take care of link and mailto shortcuts.
2223         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2224                 # check for auto links
2225                 var tmp = new FlatBuffer
2226                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2227                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2228                         pos = md.read_until(tmp, pos, '>')
2229                         if pos != -1 then
2230                                 var link = tmp.write_to_string
2231                                 v.decorator.add_link(v, link, link, null)
2232                                 return pos
2233                         end
2234                 end
2235                 # TODO check for mailto
2236                 # check for inline html
2237                 if start + 2 < md.length then
2238                         return md.read_xml(out, start, true)
2239                 end
2240                 return -1
2241         end
2242 end
2243
2244 # An HTML entity token.
2245 class TokenEntity
2246         super Token
2247
2248         redef fun emit(v) do
2249                 var tmp = new FlatBuffer
2250                 var b = check_entity(tmp, v.current_text.as(not null), pos)
2251                 if b > 0 then
2252                         v.add tmp
2253                         v.current_pos = b
2254                 else
2255                         v.decorator.escape_char(v, char)
2256                 end
2257         end
2258
2259         # Is the entity valid?
2260         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2261                 var pos = md.read_until(out, start, ';')
2262                 if pos < 0 or out.length < 3 then
2263                         return -1
2264                 end
2265                 if out[1] == '#' then
2266                         if out[2] == 'x' or out[2] == 'X' then
2267                                 if out.length < 4 then return -1
2268                                 for i in [3..out.length[ do
2269                                         var c = out[i]
2270                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2271                                                 return -1
2272                                         end
2273                                 end
2274                         else
2275                                 for i in [2..out.length[ do
2276                                         var c = out[i]
2277                                         if c < '0' or c > '9' then return -1
2278                                 end
2279                         end
2280                         out.add ';'
2281                 else
2282                         for i in [1..out.length[ do
2283                                 var c = out[i]
2284                                 if not c.is_digit and not c.is_letter then return -1
2285                         end
2286                         out.add ';'
2287                         # TODO check entity is valid
2288                         # if out.is_entity then
2289                                 return pos
2290                         # else
2291                                 # return -1
2292                         # end
2293                 end
2294                 return pos
2295         end
2296 end
2297
2298 # A markdown escape token.
2299 class TokenEscape
2300         super Token
2301
2302         redef fun emit(v) do
2303                 v.current_pos += 1
2304                 v.addc v.current_text[v.current_pos]
2305         end
2306 end
2307
2308 # A markdown strike token.
2309 #
2310 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2311 class TokenStrike
2312         super Token
2313
2314         redef fun emit(v) do
2315                 var tmp = v.push_buffer
2316                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2317                 v.pop_buffer
2318                 if b > 0 then
2319                         v.decorator.add_strike(v, tmp)
2320                         v.current_pos = b + 1
2321                 else
2322                         v.addc char
2323                 end
2324         end
2325 end
2326
2327 redef class Text
2328
2329         # Get the position of the next non-space character.
2330         private fun skip_spaces(start: Int): Int do
2331                 var pos = start
2332                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2333                         pos += 1
2334                 end
2335                 if pos < length then return pos
2336                 return -1
2337         end
2338
2339         # Read `self` until `nend` and append it to the `out` buffer.
2340         # Escape markdown special chars.
2341         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2342                 var pos = start
2343                 while pos < length do
2344                         var c = self[pos]
2345                         if c == '\\' and pos + 1 < length then
2346                                 pos = escape(out, self[pos + 1], pos)
2347                         else
2348                                 for n in nend do if c == n then break label
2349                                 out.add c
2350                         end
2351                         pos += 1
2352                 end label
2353                 if pos == length then return -1
2354                 return pos
2355         end
2356
2357         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2358         # No escape is made.
2359         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2360                 var pos = start
2361                 while pos < length do
2362                         var c = self[pos]
2363                         var end_reached = false
2364                         for n in nend do
2365                                 if c == n then
2366                                         end_reached = true
2367                                         break
2368                                 end
2369                         end
2370                         if end_reached then break
2371                         out.add c
2372                         pos += 1
2373                 end
2374                 if pos == length then return -1
2375                 return pos
2376         end
2377
2378         # Read `self` as XML until `to` and append it to the `out` buffer.
2379         # Escape HTML special chars.
2380         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2381                 var pos = from
2382                 var in_str = false
2383                 var str_char: nullable Char = null
2384                 while pos < length do
2385                         var c = self[pos]
2386                         if in_str then
2387                                 if c == '\\' then
2388                                         out.add c
2389                                         pos += 1
2390                                         if pos < length then
2391                                                 out.add c
2392                                                 pos += 1
2393                                         end
2394                                         continue
2395                                 end
2396                                 if c == str_char then
2397                                         in_str = false
2398                                         out.add c
2399                                         pos += 1
2400                                         continue
2401                                 end
2402                         end
2403                         if c == '"' or c == '\'' then
2404                                 in_str = true
2405                                 str_char = c
2406                         end
2407                         if not in_str then
2408                                 var end_reached = false
2409                                 for n in [0..to.length[ do
2410                                         if c == to[n] then
2411                                                 end_reached = true
2412                                                 break
2413                                         end
2414                                 end
2415                                 if end_reached then break
2416                         end
2417                         out.add c
2418                         pos += 1
2419                 end
2420                 if pos == length then return -1
2421                 return pos
2422         end
2423
2424         # Read `self` as XML and append it to the `out` buffer.
2425         # Safe mode can be activated to limit reading to valid xml.
2426         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2427                 var pos = 0
2428                 var is_valid = true
2429                 var is_close_tag = false
2430                 if start + 1 >= length then return -1
2431                 if self[start + 1] == '/' then
2432                         is_close_tag = true
2433                         pos = start + 2
2434                 else if self[start + 1] == '!' then
2435                         out.append "<!"
2436                         return start + 1
2437                 else
2438                         is_close_tag = false
2439                         pos = start + 1
2440                 end
2441                 if safe_mode then
2442                         var tmp = new FlatBuffer
2443                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2444                         if pos == -1 then return -1
2445                         var tag = tmp.write_to_string.trim.to_lower
2446                         if not tag.is_valid_html_tag then
2447                                 out.append "&lt;"
2448                                 pos = -1
2449                         else if tag.is_html_unsafe then
2450                                 is_valid = false
2451                                 out.append "&lt;"
2452                                 if is_close_tag then out.add '/'
2453                                 out.append tmp
2454                         else
2455                                 out.append "<"
2456                                 if is_close_tag then out.add '/'
2457                                 out.append tmp
2458                         end
2459                 else
2460                         out.add '<'
2461                         if is_close_tag then out.add '/'
2462                         pos = read_xml_until(out, pos, ' ', '/', '>')
2463                 end
2464                 if pos == -1 then return -1
2465                 pos = read_xml_until(out, pos, '/', '>')
2466                 if pos == -1 then return -1
2467                 if self[pos] == '/' then
2468                         out.append " /"
2469                         pos = self.read_xml_until(out, pos + 1, '>')
2470                         if pos == -1 then return -1
2471                 end
2472                 if self[pos] == '>' then
2473                         if is_valid then
2474                                 out.add '>'
2475                         else
2476                                 out.append "&gt;"
2477                         end
2478                         return pos
2479                 end
2480                 return -1
2481         end
2482
2483         # Read a markdown link address and append it to the `out` buffer.
2484         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2485                 var pos = start
2486                 var counter = 1
2487                 while pos < length do
2488                         var c = self[pos]
2489                         if c == '\\' and pos + 1 < length then
2490                                 pos = escape(out, self[pos + 1], pos)
2491                         else
2492                                 var end_reached = false
2493                                 if c == '(' then
2494                                         counter += 1
2495                                 else if c == ' ' then
2496                                         if counter == 1 then end_reached = true
2497                                 else if c == ')' then
2498                                         counter -= 1
2499                                         if counter == 0 then end_reached = true
2500                                 end
2501                                 if end_reached then break
2502                                 out.add c
2503                         end
2504                         pos += 1
2505                 end
2506                 if pos == length then return -1
2507                 return pos
2508         end
2509
2510         # Read a markdown link text and append it to the `out` buffer.
2511         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2512                 var pos = start
2513                 var counter = 1
2514                 while pos < length do
2515                         var c = self[pos]
2516                         var end_reached = false
2517                         if c == '[' then
2518                                 counter += 1
2519                                 out.add c
2520                         else if c == ']' then
2521                                 counter -= 1
2522                                 if counter == 0 then
2523                                         end_reached = true
2524                                 else
2525                                         out.add c
2526                                 end
2527                         else
2528                                 out.add c
2529                         end
2530                         if end_reached then break
2531                         pos += 1
2532                 end
2533                 if pos == length then return -1
2534                 return pos
2535         end
2536
2537         # Extract the XML tag name from a XML tag.
2538         private fun xml_tag: String do
2539                 var tpl = new FlatBuffer
2540                 var pos = 1
2541                 if pos < length and self[1] == '/' then pos += 1
2542                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2543                         tpl.add self[pos]
2544                         pos += 1
2545                 end
2546                 return tpl.write_to_string.to_lower
2547         end
2548
2549         private fun is_valid_html_tag: Bool do
2550                 if is_empty then return false
2551                 for c in self do
2552                         if not c.is_alpha then return false
2553                 end
2554                 return true
2555         end
2556
2557         # Read and escape the markdown contained in `self`.
2558         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2559                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2560                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2561                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2562                    c == '`' or c == '~' or c == '^' then
2563                         out.add c
2564                         return pos + 1
2565                 end
2566                 out.add '\\'
2567                 return pos
2568         end
2569
2570         # Extract string found at end of fence opening.
2571         private fun meta_from_fence: nullable Text do
2572                 for i in [0..chars.length[ do
2573                         var c = chars[i]
2574                         if c != ' ' and c != '`' and c != '~' then
2575                                 return substring_from(i).trim
2576                         end
2577                 end
2578                 return null
2579         end
2580
2581         # Is `self` an unsafe HTML element?
2582         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2583
2584         # Is `self` a HRML block element?
2585         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2586
2587         # Is `self` a link prefix?
2588         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2589
2590         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2591
2592         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2593
2594         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2595 end
2596
2597 redef class String
2598
2599         # Parse `self` as markdown and return the HTML representation
2600         #.
2601         #    var md = "**Hello World!**"
2602         #    var html = md.md_to_html
2603         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2604         fun md_to_html: Writable do
2605                 var processor = new MarkdownProcessor
2606                 return processor.process(self)
2607         end
2608 end