lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # Work in extended mode (default).
  34         #
  35         # Behavior changes when using extended mode:
  36         #
  37         # * Lists and code blocks end a paragraph
  38         #
  39         #   In normal markdown the following:
  40         #
  41         # ~~~md
  42         # This is a paragraph
  43         # * and this is not a list
  44         # ~~~
  45         #
  46         #   Will produce:
  47         #
  48         # ~~~html
  49         # <p>This is a paragraph
  50         # * and this is not a list</p>
  51         # ~~~
  52         #
  53         #   When using extended mode this changes to:
  54         #
  55         # ~~~html
  56         # <p>This is a paragraph</p>
  57         # <ul>
  58         # <li>and this is not a list</li>
  59         # </ul>
  60         # ~~~
  61         #
  62         # * Fences code blocks
  63         #
  64         #   If you don't want to indent your all your code with 4 spaces,
  65         #   you can wrap your code in ``` ``` ``` or `~~~`.
  66         #
  67         #   Here's an example:
  68         #
  69         # ~~~md
  70         # fun test do
  71         #    print "Hello World!"
  72         # end
  73         # ~~~
  74         #
  75         # * Code blocks meta
  76         #
  77         #   If you want to use syntax highlighting tools, most of them need to know what kind
  78         #   of language they are highlighting.
  79         #   You can add an optional language identifier after the fence declaration to output
  80         #   it in the HTML render.
  81         #
  82         # ```nit
  83         # import markdown
  84         #
  85         # print "# Hello World!".md_to_html
  86         # ```
  87         #
  88         #   Becomes
  89         #
  90         # ~~~html
  91         # <pre class="nit"><code>import markdown
  92         #
  93         # print "Hello World!".md_to_html
  94         # </code></pre>
  95         # ~~~
  96         #
  97         # * Underscores (Emphasis)
  98         #
  99         #   Underscores in the middle of a word like:
 100         #
 101         # ~~~md
 102         # Con_cat_this
 103         # ~~~
 104         #
 105         #   normally produces this:
 106         #
 107         # ~~~html
 108         # <p>Con<em>cat</em>this</p>
 109         # ~~~
 110         #
 111         #   With extended mode they don't result in emphasis.
 112         #
 113         # ~~~html
 114         # <p>Con_cat_this</p>
 115         # ~~~
 116         #
 117         # * Strikethrough
 118         #
 119         #   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
 120         #   strikethrought span is marked with `~~`.
 121         #
 122         # ~~~md
 123         # ~~Mistaken text.~~
 124         # ~~~
 125         #
 126         #   becomes
 127         #
 128         # ~~~html
 129         # <del>Mistaken text.</del>
 130         # ~~~
 131         var ext_mode = true
 132
 133         # Disable attaching MDLocation to Tokens
 134         #
 135         # Locations are useful for some tools but they may
 136         # cause an important time and space overhead.
 137         #
 138         # Default = `false`
 139         var no_location = false is writable
 140
 141         # Process the mardown `input` string and return the processed output.
 142         fun process(input: String): Writable do
 143                 # init processor
 144                 link_refs.clear
 145                 last_link_ref = null
 146                 current_line = null
 147                 current_block = null
 148                 # parse markdown
 149                 var parent = read_lines(input)
 150                 parent.remove_surrounding_empty_lines
 151                 recurse(parent, false)
 152                 # output processed text
 153                 return emit(parent.kind)
 154         end
 155
 156         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
 157         private fun read_lines(input: String): MDBlock do
 158                 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
 159                 var value = new FlatBuffer
 160                 var i = 0
 161
 162                 var line_pos = 0
 163                 var col_pos = 0
 164
 165                 while i < input.length do
 166                         value.clear
 167                         var pos = 0
 168                         var eol = false
 169                         while not eol and i < input.length do
 170                                 col_pos += 1
 171                                 var c = input[i]
 172                                 if c == '\n' then
 173                                         eol = true
 174                                 else if c == '\r' then
 175                                 else if c == '\t' then
 176                                         var np = pos + (4 - (pos & 3))
 177                                         while pos < np do
 178                                                 value.add ' '
 179                                                 pos += 1
 180                                         end
 181                                 else
 182                                         pos += 1
 183                                         value.add c
 184                                 end
 185                                 i += 1
 186                         end
 187                         line_pos += 1
 188
 189                         var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
 190                         var line = new MDLine(loc, value.write_to_string)
 191                         var is_link_ref = check_link_ref(line)
 192                         # Skip link refs
 193                         if not is_link_ref then block.add_line line
 194                         col_pos = 0
 195                 end
 196                 return block
 197         end
 198
 199         # Check if line is a block link definition.
 200         # Return `true` if line contains a valid link ref and save it into `link_refs`.
 201         private fun check_link_ref(line: MDLine): Bool do
 202                 var md = line.value
 203                 var is_link_ref = false
 204                 var id = new FlatBuffer
 205                 var link = new FlatBuffer
 206                 var comment = new FlatBuffer
 207                 var pos = -1
 208                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
 209                         pos = line.leading + 1
 210                         pos = md.read_until(id, pos, ']')
 211                         if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
 212                                 if line.value[pos + 1] == ':' then
 213                                         pos += 2
 214                                         pos = md.skip_spaces(pos)
 215                                         if pos >= 0 and line.value[pos] == '<' then
 216                                                 pos += 1
 217                                                 pos = md.read_until(link, pos, '>')
 218                                                 pos += 1
 219                                         else if pos >= 0 then
 220                                                 pos = md.read_until(link, pos, ' ', '\n')
 221                                         end
 222                                         if not link.is_empty then
 223                                                 pos = md.skip_spaces(pos)
 224                                                 if pos > 0 and pos < line.value.length then
 225                                                         var c = line.value[pos]
 226                                                         if c == '\"' or c == '\'' or c == '(' then
 227                                                                 pos += 1
 228                                                                 if c == '(' then
 229                                                                         pos = md.read_until(comment, pos, ')')
 230                                                                 else
 231                                                                         pos = md.read_until(comment, pos, c)
 232                                                                 end
 233                                                                 if pos > 0 then is_link_ref = true
 234                                                         end
 235                                                 else
 236                                                         is_link_ref = true
 237                                                 end
 238                                         end
 239                                 end
 240                         end
 241                 end
 242                 if is_link_ref and not id.is_empty and not link.is_empty then
 243                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 244                         add_link_ref(id.write_to_string, lr)
 245                         if comment.is_empty then last_link_ref = lr
 246                         return true
 247                 else
 248                         comment = new FlatBuffer
 249                         if not line.is_empty and last_link_ref != null then
 250                                 pos = line.leading
 251                                 var c = line.value[pos]
 252                                 if c == '\"' or c == '\'' or c ==  '(' then
 253                                         pos += 1
 254                                         if c == '(' then
 255                                                 pos = md.read_until(comment, pos, ')')
 256                                         else
 257                                                 pos = md.read_until(comment, pos, c)
 258                                         end
 259                                 end
 260                                 var last_link_ref = self.last_link_ref
 261                                 if not comment.is_empty and last_link_ref != null then
 262                                         last_link_ref.title = comment.write_to_string
 263                                 end
 264                         end
 265                         if comment.is_empty then return false
 266                         return true
 267                 end
 268         end
 269
 270         # Known link refs
 271         # This list will be needed during output to expand links.
 272         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 273
 274         # Last encountered link ref (for multiline definitions)
 275         #
 276         # Markdown allows link refs to be defined over two lines:
 277         #
 278         # ~~~md
 279         # [id]: http://example.com/longish/path/to/resource/here
 280         #       "Optional Title Here"
 281         # ~~~
 282         #
 283         private var last_link_ref: nullable LinkRef = null
 284
 285         # Add a link ref to the list
 286         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 287
 288         # Recursively split a `block`.
 289         #
 290         # The block is splitted according to the type of lines it contains.
 291         # Some blocks can be splited again recursively like lists.
 292         # The `in_list` mode is used to recurse on list and build
 293         # nested paragraphs or code blocks.
 294         fun recurse(root: MDBlock, in_list: Bool) do
 295                 var old_mode = self.in_list
 296                 var old_root = self.current_block
 297                 self.in_list = in_list
 298
 299                 var line = root.first_line
 300                 while line != null and line.is_empty do
 301                         line = line.next
 302                         if line == null then return
 303                 end
 304
 305                 current_line = line
 306                 current_block = root
 307                 while current_line != null do
 308                         line_kind(current_line.as(not null)).process(self)
 309                 end
 310                 self.in_list = old_mode
 311                 self.current_block = old_root
 312         end
 313
 314         # Currently processed line.
 315         # Used when visiting blocks with `recurse`.
 316         var current_line: nullable MDLine = null is writable
 317
 318         # Currently processed block.
 319         # Used when visiting blocks with `recurse`.
 320         var current_block: nullable MDBlock = null is writable
 321
 322         # Is the current recursion in list mode?
 323         # Used when visiting blocks with `recurse`
 324         private var in_list = false
 325
 326         # The type of line.
 327         # see: `md_line_*`
 328         fun line_kind(md: MDLine): Line do
 329                 var value = md.value
 330                 var leading = md.leading
 331                 var trailing = md.trailing
 332                 if md.is_empty then return new LineEmpty
 333                 if md.leading > 3 then return new LineCode
 334                 if value[leading] == '#' then return new LineHeadline
 335                 if value[leading] == '>' then return new LineBlockquote
 336
 337                 if ext_mode then
 338                         if value.length - leading - trailing > 2 then
 339                                 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 340                                         return new LineFence
 341                                 end
 342                                 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 343                                         return new LineFence
 344                                 end
 345                         end
 346                 end
 347
 348                 if value.length - leading - trailing > 2 and
 349                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 350                    if md.count_chars(value[leading]) >= 3 then
 351                                 return new LineHR
 352                    end
 353                 end
 354
 355                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 356                         var c = value[leading]
 357                         if c == '*' or c == '-' or c == '+' then return new LineUList
 358                 end
 359
 360                 if value.length - leading >= 3 and value[leading].is_digit then
 361                         var i = leading + 1
 362                         while i < value.length and value[i].is_digit do i += 1
 363                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 364                                 return new LineOList
 365                         end
 366                 end
 367
 368                 if value[leading] == '<' and md.check_html then return new LineXML
 369
 370                 var next = md.next
 371                 if next != null and not next.is_empty then
 372                         if next.count_chars('=') > 0 then
 373                                 return new LineHeadline1
 374                         end
 375                         if next.count_chars('-') > 0 then
 376                                 return new LineHeadline2
 377                         end
 378                 end
 379                 return new LineOther
 380         end
 381
 382         # Get the token kind at `pos`.
 383         fun token_at(text: Text, pos: Int): Token do
 384                 var c0: Char
 385                 var c1: Char
 386                 var c2: Char
 387
 388                 if pos > 0 then
 389                         c0 = text[pos - 1]
 390                 else
 391                         c0 = ' '
 392                 end
 393                 var c = text[pos]
 394
 395                 if pos + 1 < text.length then
 396                         c1 = text[pos + 1]
 397                 else
 398                         c1 = ' '
 399                 end
 400                 if pos + 2 < text.length then
 401                         c2 = text[pos + 2]
 402                 else
 403                         c2 = ' '
 404                 end
 405
 406                 var loc
 407                 if no_location then
 408                         loc = null
 409                 else
 410                         loc = new MDLocation(
 411                                 current_loc.line_start,
 412                                 current_loc.column_start + pos,
 413                                 current_loc.line_start,
 414                                 current_loc.column_start + pos)
 415                 end
 416
 417                 if c == '*' then
 418                         if c1 == '*' then
 419                                 if c0 != ' ' or c2 != ' ' then
 420                                         return new TokenStrongStar(loc, pos, c)
 421                                 else
 422                                         return new TokenEmStar(loc, pos, c)
 423                                 end
 424                         end
 425                         if c0 != ' ' or c1 != ' ' then
 426                                 return new TokenEmStar(loc, pos, c)
 427                         else
 428                                 return new TokenNone(loc, pos, c)
 429                         end
 430                 else if c == '_' then
 431                         if c1 == '_' then
 432                                 if c0 != ' ' or c2 != ' ' then
 433                                         return new TokenStrongUnderscore(loc, pos, c)
 434                                 else
 435                                         return new TokenEmUnderscore(loc, pos, c)
 436                                 end
 437                         end
 438                         if ext_mode then
 439                                 if (c0.is_letter or c0.is_digit) and c0 != '_' and
 440                                    (c1.is_letter or c1.is_digit) then
 441                                         return new TokenNone(loc, pos, c)
 442                                 else
 443                                         return new TokenEmUnderscore(loc, pos, c)
 444                                 end
 445                         end
 446                         if c0 != ' ' or c1 != ' ' then
 447                                 return new TokenEmUnderscore(loc, pos, c)
 448                         else
 449                                 return new TokenNone(loc, pos, c)
 450                         end
 451                 else if c == '!' then
 452                         if c1 == '[' then return new TokenImage(loc, pos, c)
 453                         return new TokenNone(loc, pos, c)
 454                 else if c == '[' then
 455                         return new TokenLink(loc, pos, c)
 456                 else if c == ']' then
 457                         return new TokenNone(loc, pos, c)
 458                 else if c == '`' then
 459                         if c1 == '`' then
 460                                 return new TokenCodeDouble(loc, pos, c)
 461                         else
 462                                 return new TokenCodeSingle(loc, pos, c)
 463                         end
 464                 else if c == '\\' then
 465                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 466                                 return new TokenEscape(loc, pos, c)
 467                         else
 468                                 return new TokenNone(loc, pos, c)
 469                         end
 470                 else if c == '<' then
 471                         return new TokenHTML(loc, pos, c)
 472                 else if c == '&' then
 473                         return new TokenEntity(loc, pos, c)
 474                 else
 475                         if ext_mode then
 476                                 if c == '~' and c1 == '~' then
 477                                         return new TokenStrike(loc, pos, c)
 478                                 end
 479                         end
 480                         return new TokenNone(loc, pos, c)
 481                 end
 482         end
 483
 484         # Find the position of a `token` in `self`.
 485         fun find_token(text: Text, start: Int, token: Token): Int do
 486                 var pos = start
 487                 while pos < text.length do
 488                         if token_at(text, pos).is_same_type(token) then
 489                                 return pos
 490                         end
 491                         pos += 1
 492                 end
 493                 return -1
 494         end
 495
 496         # Kind of decorator used for decoration.
 497         type DECORATOR: Decorator
 498
 499         # Decorator used for output.
 500         # Default is `HTMLDecorator`
 501         var decorator: DECORATOR is writable, lazy do
 502                 return new HTMLDecorator
 503         end
 504
 505         # Create a new `MarkdownEmitter` using a custom `decorator`.
 506         init with_decorator(decorator: DECORATOR) do
 507                 self.decorator = decorator
 508         end
 509
 510         # Output `block` using `decorator` in the current buffer.
 511         fun emit(block: Block): Text do
 512                 var buffer = push_buffer
 513                 block.emit(self)
 514                 pop_buffer
 515                 return buffer
 516         end
 517
 518         # Output the content of `block`.
 519         fun emit_in(block: Block) do block.emit_in(self)
 520
 521         # Transform and emit mardown text
 522         fun emit_text(text: Text) do emit_text_until(text, 0, null)
 523
 524         # Transform and emit mardown text starting at `start` and
 525         # until a token with the same type as `token` is found.
 526         # Go until the end of `text` if `token` is null.
 527         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 528                 var old_text = current_text
 529                 var old_pos = current_pos
 530                 current_text = text
 531                 current_pos = start
 532                 while current_pos < text.length do
 533                         if text[current_pos] == '\n' then
 534                                 current_loc.line_start += 1
 535                                 current_loc.column_start = -current_pos
 536                         end
 537                         var mt = token_at(text, current_pos)
 538                         if (token != null and not token isa TokenNone) and
 539                         (mt.is_same_type(token) or
 540                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 541                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 542                                 return current_pos
 543                         end
 544                         mt.emit(self)
 545                         current_pos += 1
 546                 end
 547                 current_text = old_text
 548                 current_pos = old_pos
 549                 return -1
 550         end
 551
 552         # Currently processed position in `current_text`.
 553         # Used when visiting inline production with `emit_text_until`.
 554         private var current_pos: Int = -1
 555
 556         # Currently processed text.
 557         # Used when visiting inline production with `emit_text_until`.
 558         private var current_text: nullable Text = null
 559
 560         # Stacked buffers.
 561         private var buffer_stack = new List[FlatBuffer]
 562
 563         # Push a new buffer on the stack.
 564         private fun push_buffer: FlatBuffer do
 565                 var buffer = new FlatBuffer
 566                 buffer_stack.add buffer
 567                 return buffer
 568         end
 569
 570         # Pop the last buffer.
 571         private fun pop_buffer do buffer_stack.pop
 572
 573         # Current output buffer.
 574         private fun current_buffer: FlatBuffer do
 575                 assert not buffer_stack.is_empty
 576                 return buffer_stack.last
 577         end
 578
 579         # Stacked locations.
 580         private var loc_stack = new List[MDLocation]
 581
 582         # Push a new MDLocation on the stack.
 583         private fun push_loc(location: MDLocation) do loc_stack.add location
 584
 585         # Pop the last buffer.
 586         private fun pop_loc: MDLocation do return loc_stack.pop
 587
 588         # Current output buffer.
 589         private fun current_loc: MDLocation do
 590                 assert not loc_stack.is_empty
 591                 return loc_stack.last
 592         end
 593
 594         # Append `e` to current buffer.
 595         fun add(e: Writable) do
 596                 if e isa Text then
 597                         current_buffer.append e
 598                 else
 599                         current_buffer.append e.write_to_string
 600                 end
 601         end
 602
 603         # Append `c` to current buffer.
 604         fun addc(c: Char) do
 605                 current_buffer.add c
 606         end
 607
 608         # Append a "\n" line break.
 609         fun addn do addc '\n'
 610 end
 611
 612 # A Link Reference.
 613 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 614 #
 615 # ~~~raw
 616 # [1]: http://example.com/ "Optional title"
 617 # ~~~
 618 class LinkRef
 619
 620         # Link href
 621         var link: String
 622
 623         # Optional link title
 624         var title: nullable String = null
 625
 626         # Is the link an abreviation?
 627         var is_abbrev = false
 628
 629         # Create a link with a title.
 630         init with_title(link: String, title: nullable String) do
 631                 init(link)
 632                 self.title = title
 633         end
 634 end
 635
 636 # A `Decorator` is used to emit mardown into a specific format.
 637 # Default decorator used is `HTMLDecorator`.
 638 interface Decorator
 639
 640         # Kind of processor used
 641         type PROCESSOR: MarkdownProcessor
 642
 643         # Render a single plain char.
 644         #
 645         # Redefine this method to add special escaping for plain text.
 646         fun add_char(v: PROCESSOR, c: Char) do v.addc c
 647
 648         # Render a ruler block.
 649         fun add_ruler(v: PROCESSOR, block: BlockRuler) is abstract
 650
 651         # Render a headline block with corresponding level.
 652         fun add_headline(v: PROCESSOR, block: BlockHeadline) is abstract
 653
 654         # Render a paragraph block.
 655         fun add_paragraph(v: PROCESSOR, block: BlockParagraph) is abstract
 656
 657         # Render a code or fence block.
 658         fun add_code(v: PROCESSOR, block: BlockCode) is abstract
 659
 660         # Render a blockquote.
 661         fun add_blockquote(v: PROCESSOR, block: BlockQuote) is abstract
 662
 663         # Render an unordered list.
 664         fun add_unorderedlist(v: PROCESSOR, block: BlockUnorderedList) is abstract
 665
 666         # Render an ordered list.
 667         fun add_orderedlist(v: PROCESSOR, block: BlockOrderedList) is abstract
 668
 669         # Render a list item.
 670         fun add_listitem(v: PROCESSOR, block: BlockListItem) is abstract
 671
 672         # Render an emphasis text.
 673         fun add_em(v: PROCESSOR, text: Text) is abstract
 674
 675         # Render a strong text.
 676         fun add_strong(v: PROCESSOR, text: Text) is abstract
 677
 678         # Render a strike text.
 679         #
 680         # Extended mode only (see `MarkdownProcessor::ext_mode`)
 681         fun add_strike(v: PROCESSOR, text: Text) is abstract
 682
 683         # Render a link.
 684         fun add_link(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract
 685
 686         # Render an image.
 687         fun add_image(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract
 688
 689         # Render an abbreviation.
 690         fun add_abbr(v: PROCESSOR, name: Text, comment: Text) is abstract
 691
 692         # Render a code span reading from a buffer.
 693         fun add_span_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract
 694
 695         # Render a text and escape it.
 696         fun append_value(v: PROCESSOR, value: Text) is abstract
 697
 698         # Render code text from buffer and escape it.
 699         fun append_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract
 700
 701         # Render a character escape.
 702         fun escape_char(v: PROCESSOR, char: Char) is abstract
 703
 704         # Render a line break
 705         fun add_line_break(v: PROCESSOR) is abstract
 706
 707         # Generate a new html valid id from a `String`.
 708         fun strip_id(txt: String): String is abstract
 709
 710         # Found headlines during the processing labeled by their ids.
 711         fun headlines: ArrayMap[String, HeadLine] is abstract
 712 end
 713
 714 # Class representing a markdown headline.
 715 class HeadLine
 716         # Unique identifier of this headline.
 717         var id: String
 718
 719         # Text of the headline.
 720         var title: String
 721
 722         # Level of this headline.
 723         #
 724         # According toe the markdown specification, level must be in `[1..6]`.
 725         var level: Int
 726 end
 727
 728 # `Decorator` that outputs HTML.
 729 class HTMLDecorator
 730         super Decorator
 731
 732         redef var headlines = new ArrayMap[String, HeadLine]
 733
 734         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 735
 736         redef fun add_headline(v, block) do
 737                 # save headline
 738                 var line = block.block.first_line
 739                 if line == null then return
 740                 var txt = line.value
 741                 var id = strip_id(txt)
 742                 var lvl = block.depth
 743                 headlines[id] = new HeadLine(id, txt, lvl)
 744                 # output it
 745                 v.add "<h{lvl} id=\"{id}\">"
 746                 v.emit_in block
 747                 v.add "</h{lvl}>\n"
 748         end
 749
 750         redef fun add_paragraph(v, block) do
 751                 v.add "<p>"
 752                 v.emit_in block
 753                 v.add "</p>\n"
 754         end
 755
 756         redef fun add_code(v, block) do
 757                 var meta = block.meta
 758                 if meta != null then
 759                         v.add "<pre class=\""
 760                         append_value(v, meta)
 761                         v.add "\"><code>"
 762                 else
 763                         v.add "<pre><code>"
 764                 end
 765                 v.emit_in block
 766                 v.add "</code></pre>\n"
 767         end
 768
 769         redef fun add_blockquote(v, block) do
 770                 v.add "<blockquote>\n"
 771                 v.emit_in block
 772                 v.add "</blockquote>\n"
 773         end
 774
 775         redef fun add_unorderedlist(v, block) do
 776                 v.add "<ul>\n"
 777                 v.emit_in block
 778                 v.add "</ul>\n"
 779         end
 780
 781         redef fun add_orderedlist(v, block) do
 782                 v.add "<ol>\n"
 783                 v.emit_in block
 784                 v.add "</ol>\n"
 785         end
 786
 787         redef fun add_listitem(v, block) do
 788                 v.add "<li>"
 789                 v.emit_in block
 790                 v.add "</li>\n"
 791         end
 792
 793         redef fun add_em(v, text) do
 794                 v.add "<em>"
 795                 v.add text
 796                 v.add "</em>"
 797         end
 798
 799         redef fun add_strong(v, text) do
 800                 v.add "<strong>"
 801                 v.add text
 802                 v.add "</strong>"
 803         end
 804
 805         redef fun add_strike(v, text) do
 806                 v.add "<del>"
 807                 v.add text
 808                 v.add "</del>"
 809         end
 810
 811         redef fun add_image(v, link, name, comment) do
 812                 v.add "<img src=\""
 813                 append_value(v, link)
 814                 v.add "\" alt=\""
 815                 append_value(v, name)
 816                 v.add "\""
 817                 if comment != null and not comment.is_empty then
 818                         v.add " title=\""
 819                         append_value(v, comment)
 820                         v.add "\""
 821                 end
 822                 v.add "/>"
 823         end
 824
 825         redef fun add_link(v, link, name, comment) do
 826                 v.add "<a href=\""
 827                 append_value(v, link)
 828                 v.add "\""
 829                 if comment != null and not comment.is_empty then
 830                         v.add " title=\""
 831                         append_value(v, comment)
 832                         v.add "\""
 833                 end
 834                 v.add ">"
 835                 v.emit_text(name)
 836                 v.add "</a>"
 837         end
 838
 839         redef fun add_abbr(v, name, comment) do
 840                 v.add "<abbr title=\""
 841                 append_value(v, comment)
 842                 v.add "\">"
 843                 v.emit_text(name)
 844                 v.add "</abbr>"
 845         end
 846
 847         redef fun add_span_code(v, text, from, to) do
 848                 v.add "<code>"
 849                 append_code(v, text, from, to)
 850                 v.add "</code>"
 851         end
 852
 853         redef fun add_line_break(v) do
 854                 v.add "<br/>"
 855         end
 856
 857         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 858
 859         redef fun escape_char(v, c) do
 860                 if c == '&' then
 861                         v.add "&amp;"
 862                 else if c == '<' then
 863                         v.add "&lt;"
 864                 else if c == '>' then
 865                         v.add "&gt;"
 866                 else if c == '"' then
 867                         v.add "&quot;"
 868                 else if c == '\'' then
 869                         v.add "&apos;"
 870                 else
 871                         v.addc c
 872                 end
 873         end
 874
 875         redef fun append_code(v, buffer, from, to) do
 876                 for i in [from..to[ do
 877                         var c = buffer[i]
 878                         if c == '&' then
 879                                 v.add "&amp;"
 880                         else if c == '<' then
 881                                 v.add "&lt;"
 882                         else if c == '>' then
 883                                 v.add "&gt;"
 884                         else
 885                                 v.addc c
 886                         end
 887                 end
 888         end
 889
 890         redef fun strip_id(txt) do
 891                 # strip id
 892                 var b = new FlatBuffer
 893                 for c in txt do
 894                         if c == ' ' then
 895                                 b.add '_'
 896                         else
 897                                 if not c.is_letter and
 898                                    not c.is_digit and
 899                                    not allowed_id_chars.has(c) then continue
 900                                 b.add c
 901                         end
 902                 end
 903                 var res = b.to_s
 904                 var key = res
 905                 # check for multiple id definitions
 906                 if headlines.has_key(key) then
 907                         var i = 1
 908                         key = "{res}_{i}"
 909                         while headlines.has_key(key) do
 910                                 i += 1
 911                                 key = "{res}_{i}"
 912                         end
 913                 end
 914                 return key
 915         end
 916
 917         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 918 end
 919
 920 # Location in a Markdown input.
 921 class MDLocation
 922
 923         # Starting line number (starting from 1).
 924         var line_start: Int
 925
 926         # Starting column number (starting from 1).
 927         var column_start: Int
 928
 929         # Stopping line number (starting from 1).
 930         var line_end: Int
 931
 932         # Stopping column number (starting from 1).
 933         var column_end: Int
 934
 935         redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
 936
 937         # Return a copy of `self`.
 938         fun copy: MDLocation do
 939                 return new MDLocation(line_start, column_start, line_end, column_end)
 940         end
 941 end
 942
 943 # A block of markdown lines.
 944 # A `MDBlock` can contains lines and/or sub-blocks.
 945 class MDBlock
 946
 947         # Position of `self` in the input.
 948         var location: MDLocation
 949
 950         # Kind of block.
 951         # See `Block`.
 952         var kind: Block = new BlockNone(self) is writable
 953
 954         # First line if any.
 955         var first_line: nullable MDLine = null is writable
 956
 957         # Last line if any.
 958         var last_line: nullable MDLine = null is writable
 959
 960         # First sub-block if any.
 961         var first_block: nullable MDBlock = null is writable
 962
 963         # Last sub-block if any.
 964         var last_block: nullable MDBlock = null is writable
 965
 966         # Previous block if any.
 967         var prev: nullable MDBlock = null is writable
 968
 969         # Next block if any.
 970         var next: nullable MDBlock = null is writable
 971
 972         # Does this block contain subblocks?
 973         fun has_blocks: Bool do return first_block != null
 974
 975         # Count sub-blocks.
 976         fun count_blocks: Int do
 977                 var count = 0
 978                 var block = first_block
 979                 while block != null do
 980                         count += 1
 981                         block = block.next
 982                 end
 983                 return count
 984         end
 985
 986         # Does this block contain lines?
 987         fun has_lines: Bool do return first_line != null
 988
 989         # Count block lines.
 990         fun count_lines: Int do
 991                 var count = 0
 992                 var line = first_line
 993                 while line != null do
 994                         count += 1
 995                         line = line.next
 996                 end
 997                 return count
 998         end
 999
1000         # Split `self` creating a new sub-block having `line` has `last_line`.
1001         fun split(line: MDLine): MDBlock do
1002                 # location for new block
1003                 var new_loc = new MDLocation(
1004                         first_line.as(not null).location.line_start,
1005                         first_line.as(not null).location.column_start,
1006                         line.location.line_end,
1007                         line.location.column_end)
1008                 # create block
1009                 var block = new MDBlock(new_loc)
1010                 block.first_line = first_line
1011                 block.last_line = line
1012                 first_line = line.next
1013                 line.next = null
1014                 if first_line == null then
1015                         last_line = null
1016                 else
1017                         first_line.as(not null).prev = null
1018                         # update current block loc
1019                         location.line_start = first_line.as(not null).location.line_start
1020                         location.column_start = first_line.as(not null).location.column_start
1021                 end
1022                 if first_block == null then
1023                         first_block = block
1024                         last_block = block
1025                 else
1026                         last_block.as(not null).next = block
1027                         last_block = block
1028                 end
1029                 return block
1030         end
1031
1032         # Add a `line` to this block.
1033         fun add_line(line: MDLine) do
1034                 if last_line == null then
1035                         first_line = line
1036                         last_line = line
1037                 else
1038                         last_line.as(not null).next_empty = line.is_empty
1039                         line.prev_empty = last_line.as(not null).is_empty
1040                         line.prev = last_line
1041                         last_line.as(not null).next = line
1042                         last_line = line
1043                 end
1044         end
1045
1046         # Remove `line` from this block.
1047         fun remove_line(line: MDLine) do
1048                 if line.prev == null then
1049                         first_line = line.next
1050                 else
1051                         line.prev.as(not null).next = line.next
1052                 end
1053                 if line.next == null then
1054                         last_line = line.prev
1055                 else
1056                         line.next.as(not null).prev = line.prev
1057                 end
1058                 line.prev = null
1059                 line.next = null
1060         end
1061
1062         # Remove leading empty lines.
1063         fun remove_leading_empty_lines: Bool do
1064                 var was_empty = false
1065                 var line = first_line
1066                 while line != null and line.is_empty do
1067                         remove_line line
1068                         line = first_line
1069                         was_empty = true
1070                 end
1071                 return was_empty
1072         end
1073
1074         # Remove trailing empty lines.
1075         fun remove_trailing_empty_lines: Bool do
1076                 var was_empty = false
1077                 var line = last_line
1078                 while line != null and line.is_empty do
1079                         remove_line line
1080                         line = last_line
1081                         was_empty = true
1082                 end
1083                 return was_empty
1084         end
1085
1086         # Remove leading and trailing empty lines.
1087         fun remove_surrounding_empty_lines: Bool do
1088                 var was_empty = false
1089                 if remove_leading_empty_lines then was_empty = true
1090                 if remove_trailing_empty_lines then was_empty = true
1091                 return was_empty
1092         end
1093
1094         # Remove list markers and up to 4 leading spaces.
1095         # Used to clean nested lists.
1096         fun remove_list_indent(v: MarkdownProcessor) do
1097                 var line = first_line
1098                 while line != null do
1099                         if not line.is_empty then
1100                                 var kind = v.line_kind(line)
1101                                 if kind isa LineList then
1102                                         line.value = kind.extract_value(line)
1103                                 else
1104                                         line.value = line.value.substring_from(line.leading.min(4))
1105                                 end
1106                                 line.leading = line.process_leading
1107                         end
1108                         line = line.next
1109                 end
1110         end
1111
1112         # Collect block line text.
1113         fun text: String do
1114                 var text = new FlatBuffer
1115                 var line = first_line
1116                 while line != null do
1117                         if not line.is_empty then
1118                                 text.append line.text
1119                         end
1120                         text.append "\n"
1121                         line = line.next
1122                 end
1123                 return text.write_to_string
1124         end
1125 end
1126
1127 # Representation of a markdown block in the AST.
1128 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1129 abstract class Block
1130
1131         # The markdown block `self` is related to.
1132         var block: MDBlock
1133
1134         # Output `self` using `v.decorator`.
1135         fun emit(v: MarkdownProcessor) do v.emit_in(self)
1136
1137         # Emit the containts of `self`, lines or blocks.
1138         fun emit_in(v: MarkdownProcessor) do
1139                 block.remove_surrounding_empty_lines
1140                 if block.has_lines then
1141                         emit_lines(v)
1142                 else
1143                         emit_blocks(v)
1144                 end
1145         end
1146
1147         # Emit lines contained in `block`.
1148         fun emit_lines(v: MarkdownProcessor) do
1149                 var tpl = v.push_buffer
1150                 var line = block.first_line
1151                 while line != null do
1152                         if not line.is_empty then
1153                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1154                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
1155                         end
1156                         if line.next != null then
1157                                 v.addn
1158                         end
1159                         line = line.next
1160                 end
1161                 v.pop_buffer
1162                 v.emit_text(tpl)
1163         end
1164
1165         # Emit sub-blocks contained in `block`.
1166         fun emit_blocks(v: MarkdownProcessor) do
1167                 var block = self.block.first_block
1168                 while block != null do
1169                         v.push_loc(block.location)
1170                         block.kind.emit(v)
1171                         v.pop_loc
1172                         block = block.next
1173                 end
1174         end
1175
1176         # The raw content of the block as a multi-line string.
1177         fun raw_content: String do
1178                 var infence = self isa BlockFence
1179                 var text = new FlatBuffer
1180                 var line = self.block.first_line
1181                 while line != null do
1182                         if not line.is_empty then
1183                                 var str = line.value
1184                                 if not infence and str.has_prefix("    ") then
1185                                         text.append str.substring(4, str.length - line.trailing)
1186                                 else
1187                                         text.append str
1188                                 end
1189                         end
1190                         text.append "\n"
1191                         line = line.next
1192                 end
1193                 return text.write_to_string
1194         end
1195 end
1196
1197 # A block without any markdown specificities.
1198 #
1199 # Actually use the same implementation than `BlockCode`,
1200 # this class is only used for typing purposes.
1201 class BlockNone
1202         super Block
1203 end
1204
1205 # A markdown blockquote.
1206 class BlockQuote
1207         super Block
1208
1209         redef fun emit(v) do v.decorator.add_blockquote(v, self)
1210
1211         # Remove blockquote markers.
1212         private fun remove_block_quote_prefix(block: MDBlock) do
1213                 var line = block.first_line
1214                 while line != null do
1215                         if not line.is_empty then
1216                                 if line.value[line.leading] == '>' then
1217                                         var rem = line.leading + 1
1218                                         if line.leading + 1 < line.value.length and
1219                                            line.value[line.leading + 1] == ' ' then
1220                                                 rem += 1
1221                                         end
1222                                         line.value = line.value.substring_from(rem)
1223                                         line.leading = line.process_leading
1224                                 end
1225                         end
1226                         line = line.next
1227                 end
1228         end
1229 end
1230
1231 # A markdown code block.
1232 class BlockCode
1233         super Block
1234
1235         # Any string found after fence token.
1236         var meta: nullable Text
1237
1238         # Number of char to skip at the beginning of the line.
1239         #
1240         # Block code lines start at 4 spaces.
1241         protected var line_start = 4
1242
1243         redef fun emit(v) do v.decorator.add_code(v, self)
1244
1245         redef fun emit_lines(v) do
1246                 var line = block.first_line
1247                 while line != null do
1248                         if not line.is_empty then
1249                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1250                         end
1251                         v.addn
1252                         line = line.next
1253                 end
1254         end
1255 end
1256
1257 # A markdown code-fence block.
1258 #
1259 # Actually use the same implementation than `BlockCode`,
1260 # this class is only used for typing purposes.
1261 class BlockFence
1262         super BlockCode
1263
1264         # Fence code lines start at 0 spaces.
1265         redef var line_start = 0
1266 end
1267
1268 # A markdown headline.
1269 class BlockHeadline
1270         super Block
1271
1272         redef fun emit(v) do
1273                 var loc = block.location.copy
1274                 loc.column_start += start
1275                 v.push_loc(loc)
1276                 v.decorator.add_headline(v, self)
1277                 v.pop_loc
1278         end
1279
1280         private var start = 0
1281
1282         # Depth of the headline used to determine the headline level.
1283         var depth = 0
1284
1285         # Remove healine marks from lines contained in `self`.
1286         private fun transform_headline(block: MDBlock) do
1287                 if depth > 0 then return
1288                 var level = 0
1289                 var line = block.first_line
1290                 if line == null then return
1291                 if line.is_empty then return
1292                 var start = line.leading
1293                 while start < line.value.length and line.value[start] == '#' do
1294                         level += 1
1295                         start += 1
1296                 end
1297                 while start < line.value.length and line.value[start] == ' ' do
1298                         start += 1
1299                 end
1300                 if start >= line.value.length then
1301                         line.is_empty = true
1302                 else
1303                         var nend = line.value.length - line.trailing - 1
1304                         while line.value[nend] == '#' do nend -= 1
1305                         while line.value[nend] == ' ' do nend -= 1
1306                         line.value = line.value.substring(start, nend - start + 1)
1307                         line.leading = 0
1308                         line.trailing = 0
1309                 end
1310                 self.start = start
1311                 depth = level.min(6)
1312         end
1313 end
1314
1315 # A markdown list item block.
1316 class BlockListItem
1317         super Block
1318
1319         redef fun emit(v) do v.decorator.add_listitem(v, self)
1320 end
1321
1322 # A markdown list block.
1323 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1324 abstract class BlockList
1325         super Block
1326
1327         # Split list block into list items sub-blocks.
1328         private fun init_block(v: MarkdownProcessor) do
1329                 var line = block.first_line
1330                 if line == null then return
1331                 line = line.next
1332                 while line != null do
1333                         var t = v.line_kind(line)
1334                         if t isa LineList or
1335                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1336                            not (t isa LineList))) then
1337                                    var sblock = block.split(line.prev.as(not null))
1338                                    sblock.kind = new BlockListItem(sblock)
1339                         end
1340                         line = line.next
1341                 end
1342                 var sblock = block.split(block.last_line.as(not null))
1343                 sblock.kind = new BlockListItem(sblock)
1344         end
1345
1346         # Expand list items as paragraphs if needed.
1347         private fun expand_paragraphs(block: MDBlock) do
1348                 var outer = block.first_block
1349                 var inner: nullable MDBlock
1350                 var has_paragraph = false
1351                 while outer != null and not has_paragraph do
1352                         if outer.kind isa BlockListItem then
1353                                 inner = outer.first_block
1354                                 while inner != null and not has_paragraph do
1355                                         if inner.kind isa BlockParagraph then
1356                                                 has_paragraph = true
1357                                         end
1358                                         inner = inner.next
1359                                 end
1360                         end
1361                         outer = outer.next
1362                 end
1363                 if has_paragraph then
1364                         outer = block.first_block
1365                         while outer != null do
1366                                 if outer.kind isa BlockListItem then
1367                                         inner = outer.first_block
1368                                         while inner != null do
1369                                                 if inner.kind isa BlockNone then
1370                                                         inner.kind = new BlockParagraph(inner)
1371                                                 end
1372                                                 inner = inner.next
1373                                         end
1374                                 end
1375                                 outer = outer.next
1376                         end
1377                 end
1378         end
1379 end
1380
1381 # A markdown ordered list.
1382 class BlockOrderedList
1383         super BlockList
1384
1385         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1386 end
1387
1388 # A markdown unordred list.
1389 class BlockUnorderedList
1390         super BlockList
1391
1392         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1393 end
1394
1395 # A markdown paragraph block.
1396 class BlockParagraph
1397         super Block
1398
1399         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1400 end
1401
1402 # A markdown ruler.
1403 class BlockRuler
1404         super Block
1405
1406         redef fun emit(v) do v.decorator.add_ruler(v, self)
1407 end
1408
1409 # Xml blocks that can be found in markdown markup.
1410 class BlockXML
1411         super Block
1412
1413         redef fun emit_lines(v) do
1414                 var line = block.first_line
1415                 while line != null do
1416                         if not line.is_empty then v.add line.value
1417                         v.addn
1418                         line = line.next
1419                 end
1420         end
1421 end
1422
1423 # A markdown line.
1424 class MDLine
1425
1426         # Location of `self` in the original input.
1427         var location: MDLocation
1428
1429         # Text contained in this line.
1430         var value: String is writable
1431
1432         # Is this line empty?
1433         # Lines containing only spaces are considered empty.
1434         var is_empty: Bool = true is writable
1435
1436         # Previous line in `MDBlock` or null if first line.
1437         var prev: nullable MDLine = null is writable
1438
1439         # Next line in `MDBlock` or null if last line.
1440         var next: nullable MDLine = null is writable
1441
1442         # Is the previous line empty?
1443         var prev_empty: Bool = false is writable
1444
1445         # Is the next line empty?
1446         var next_empty: Bool = false is writable
1447
1448         # Initialize a new MDLine from its string value
1449         init do
1450                 self.leading = process_leading
1451                 if leading != value.length then
1452                         self.is_empty = false
1453                         self.trailing = process_trailing
1454                 end
1455         end
1456
1457         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1458         fun clear do
1459                 value = ""
1460                 leading = 0
1461                 trailing = 0
1462                 is_empty = true
1463                 if prev != null then prev.as(not null).next_empty = true
1464                 if next != null then next.as(not null).prev_empty = true
1465         end
1466
1467         # Number or leading spaces on this line.
1468         var leading: Int = 0 is writable
1469
1470         # Compute `leading` depending on `value`.
1471         fun process_leading: Int do
1472                 var count = 0
1473                 var value = self.value
1474                 while count < value.length and value[count] == ' ' do count += 1
1475                 if leading == value.length then clear
1476                 return count
1477         end
1478
1479         # Number of trailing spaces on this line.
1480         var trailing: Int = 0 is writable
1481
1482         # Compute `trailing` depending on `value`.
1483         fun process_trailing: Int do
1484                 var count = 0
1485                 var value = self.value
1486                 while value[value.length - count - 1] == ' ' do
1487                         count += 1
1488                 end
1489                 return count
1490         end
1491
1492         # Count the amount of `ch` in this line.
1493         # Return A value > 0 if this line only consists of `ch` end spaces.
1494         fun count_chars(ch: Char): Int do
1495                 var count = 0
1496                 for c in value do
1497                         if c == ' ' then
1498                                 continue
1499                         end
1500                         if c == ch then
1501                                 count += 1
1502                                 continue
1503                         end
1504                         count = 0
1505                         break
1506                 end
1507                 return count
1508         end
1509
1510         # Count the amount of `ch` at the start of this line ignoring spaces.
1511         fun count_chars_start(ch: Char): Int do
1512                 var count = 0
1513                 for c in value do
1514                         if c == ' ' then
1515                                 continue
1516                         end
1517                         if c == ch then
1518                                 count += 1
1519                         else
1520                                 break
1521                         end
1522                 end
1523                 return count
1524         end
1525
1526         # Last XML line if any.
1527         private var xml_end_line: nullable MDLine = null
1528
1529         # Does `value` contains valid XML markup?
1530         private fun check_html: Bool do
1531                 var tags = new Array[String]
1532                 var tmp = new FlatBuffer
1533                 var pos = leading
1534                 if pos + 1 < value.length and value[pos + 1] == '!' then
1535                         if read_xml_comment(self, pos) > 0 then return true
1536                 end
1537                 pos = value.read_xml(tmp, pos, false)
1538                 var tag: String
1539                 if pos > -1 then
1540                         tag = tmp.xml_tag
1541                         if not tag.is_html_block then
1542                                 return false
1543                         end
1544                         if tag == "hr" then
1545                                 xml_end_line = self
1546                                 return true
1547                         end
1548                         tags.add tag
1549                         var line: nullable MDLine = self
1550                         while line != null do
1551                                 while pos < line.value.length and line.value[pos] != '<' do
1552                                         pos += 1
1553                                 end
1554                                 if pos >= line.value.length then
1555                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1556                                                 tags.pop
1557                                                 if tags.is_empty then
1558                                                         xml_end_line = line
1559                                                         break
1560                                                 end
1561                                         end
1562                                         line = line.next
1563                                         pos = 0
1564                                 else
1565                                         tmp = new FlatBuffer
1566                                         var new_pos = line.value.read_xml(tmp, pos, false)
1567                                         if new_pos > 0 then
1568                                                 tag = tmp.xml_tag
1569                                                 if tag.is_html_block and not tag == "hr" then
1570                                                         if tmp[1] == '/' then
1571                                                                 if tags.last != tag then
1572                                                                         return false
1573                                                                 end
1574                                                                 tags.pop
1575                                                         else
1576                                                                 tags.add tag
1577                                                         end
1578                                                 end
1579                                                 if tags.is_empty then
1580                                                         xml_end_line = line
1581                                                         break
1582                                                 end
1583                                                 pos = new_pos
1584                                         else
1585                                                 pos += 1
1586                                         end
1587                                 end
1588                         end
1589                         return tags.is_empty
1590                 end
1591                 return false
1592         end
1593
1594         # Read a XML comment.
1595         # Used by `check_html`.
1596         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1597                 var line: nullable MDLine = first_line
1598                 if start + 3 < line.as(not null).value.length then
1599                         if line.as(not null).value[2] == '-' and line.as(not null).value[3] == '-' then
1600                                 var pos = start + 4
1601                                 while line != null do
1602                                         while pos < line.value.length and line.value[pos] != '-' do
1603                                                 pos += 1
1604                                         end
1605                                         if pos == line.value.length then
1606                                                 line = line.next
1607                                                 pos = 0
1608                                         else
1609                                                 if pos + 2 < line.value.length then
1610                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1611                                                                 first_line.xml_end_line = line
1612                                                                 return pos + 3
1613                                                         end
1614                                                 end
1615                                                 pos += 1
1616                                         end
1617                                 end
1618                         end
1619                 end
1620                 return -1
1621         end
1622
1623         # Extract the text of `self` without leading and trailing.
1624         fun text: String do return value.substring(leading, value.length - trailing)
1625 end
1626
1627 # A markdown line.
1628 interface Line
1629
1630         # Parse the line.
1631         # See `MarkdownProcessor::recurse`.
1632         fun process(v: MarkdownProcessor) is abstract
1633 end
1634
1635 # An empty markdown line.
1636 class LineEmpty
1637         super Line
1638
1639         redef fun process(v) do
1640                 v.current_line = v.current_line.as(not null).next
1641         end
1642 end
1643
1644 # A non-specific markdown construction.
1645 # Mainly used as part of another line construct such as paragraphs or lists.
1646 class LineOther
1647         super Line
1648
1649         redef fun process(v) do
1650                 var line = v.current_line
1651                 # go to block end
1652                 var was_empty = line.as(not null).prev_empty
1653                 while line != null and not line.is_empty do
1654                         var t = v.line_kind(line)
1655                         if (v.in_list or v.ext_mode) and t isa LineList then
1656                                 break
1657                         end
1658                         if v.ext_mode and (t isa LineCode or t isa LineFence) then
1659                                 break
1660                         end
1661                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1662                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1663                                    break
1664                         end
1665                         line = line.next
1666                 end
1667                 # build block
1668                 var current_block = v.current_block.as(not null)
1669                 if line != null and not line.is_empty then
1670                         var block = current_block.split(line.prev.as(not null))
1671                         if v.in_list and not was_empty then
1672                                 block.kind = new BlockNone(block)
1673                         else
1674                                 block.kind = new BlockParagraph(block)
1675                         end
1676                         current_block.remove_leading_empty_lines
1677                 else
1678                         var block: MDBlock
1679                         if line != null then
1680                                 block = current_block.split(line)
1681                         else
1682                                 block = current_block.split(current_block.last_line.as(not null))
1683                         end
1684                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1685                                 block.kind = new BlockNone(block)
1686                         else
1687                                 block.kind = new BlockParagraph(block)
1688                         end
1689                         current_block.remove_leading_empty_lines
1690                 end
1691                 v.current_line = current_block.first_line
1692         end
1693 end
1694
1695 # A line of markdown code.
1696 class LineCode
1697         super Line
1698
1699         redef fun process(v) do
1700                 var line = v.current_line
1701                 # lookup block end
1702                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1703                         line = line.next
1704                 end
1705                 # split at block end line
1706                 var current_block = v.current_block.as(not null)
1707                 var block: MDBlock
1708                 if line != null then
1709                         block = current_block.split(line.prev.as(not null))
1710                 else
1711                         block = current_block.split(current_block.last_line.as(not null))
1712                 end
1713                 block.kind = new BlockCode(block)
1714                 block.remove_surrounding_empty_lines
1715                 v.current_line = current_block.first_line
1716         end
1717 end
1718
1719 # A line of raw XML.
1720 class LineXML
1721         super Line
1722
1723         redef fun process(v) do
1724                 var line = v.current_line
1725                 if line == null then return
1726                 var current_block = v.current_block.as(not null)
1727                 var prev = line.prev
1728                 if prev != null then current_block.split(prev)
1729                 var block = current_block.split(line.xml_end_line.as(not null))
1730                 block.kind = new BlockXML(block)
1731                 current_block.remove_leading_empty_lines
1732                 v.current_line = current_block.first_line
1733         end
1734 end
1735
1736 # A markdown blockquote line.
1737 class LineBlockquote
1738         super Line
1739
1740         redef fun process(v) do
1741                 var line = v.current_line
1742                 var current_block = v.current_block.as(not null)
1743                 # go to bquote end
1744                 while line != null do
1745                         if not line.is_empty and (line.prev_empty and
1746                            line.leading == 0 and
1747                            not v.line_kind(line) isa LineBlockquote) then break
1748                         line = line.next
1749                 end
1750                 # build sub block
1751                 var block: MDBlock
1752                 if line != null then
1753                         block = current_block.split(line.prev.as(not null))
1754                 else
1755                         block = current_block.split(current_block.last_line.as(not null))
1756                 end
1757                 var kind = new BlockQuote(block)
1758                 block.kind = kind
1759                 block.remove_surrounding_empty_lines
1760                 kind.remove_block_quote_prefix(block)
1761                 v.current_line = line
1762                 v.recurse(block, false)
1763                 v.current_line = current_block.first_line
1764         end
1765 end
1766
1767 # A markdown ruler line.
1768 class LineHR
1769         super Line
1770
1771         redef fun process(v) do
1772                 var line = v.current_line
1773                 if line == null then return
1774                 var current_block = v.current_block.as(not null)
1775                 if line.prev != null then current_block.split(line.prev.as(not null))
1776                 var block = current_block.split(line)
1777                 block.kind = new BlockRuler(block)
1778                 current_block.remove_leading_empty_lines
1779                 v.current_line = current_block.first_line
1780         end
1781 end
1782
1783 # A markdown fence code line.
1784 class LineFence
1785         super Line
1786
1787         redef fun process(v) do
1788                 # go to fence end
1789                 var line = v.current_line.as(not null).next
1790                 var current_block = v.current_block.as(not null)
1791                 while line != null do
1792                         if v.line_kind(line) isa LineFence then break
1793                         line = line.next
1794                 end
1795                 if line != null then
1796                         line = line.next
1797                 end
1798                 # build fence block
1799                 var block: MDBlock
1800                 if line != null then
1801                         block = current_block.split(line.prev.as(not null))
1802                 else
1803                         block = current_block.split(current_block.last_line.as(not null))
1804                 end
1805                 block.remove_surrounding_empty_lines
1806                 var meta = block.first_line.as(not null).value.meta_from_fence
1807                 block.kind = new BlockFence(block, meta)
1808                 block.first_line.as(not null).clear
1809                 var last = block.last_line
1810                 if last != null and v.line_kind(last) isa LineFence then
1811                         block.last_line.as(not null).clear
1812                 end
1813                 block.remove_surrounding_empty_lines
1814                 v.current_line = line
1815         end
1816 end
1817
1818 # A markdown headline.
1819 class LineHeadline
1820         super Line
1821
1822         redef fun process(v) do
1823                 var line = v.current_line
1824                 if line == null then return
1825                 var current_block = v.current_block.as(not null)
1826                 var lprev = line.prev
1827                 if lprev != null then current_block.split(lprev)
1828                 var block = current_block.split(line)
1829                 var kind = new BlockHeadline(block)
1830                 block.kind = kind
1831                 kind.transform_headline(block)
1832                 current_block.remove_leading_empty_lines
1833                 v.current_line = current_block.first_line
1834         end
1835 end
1836
1837 # A markdown headline of level 1.
1838 class LineHeadline1
1839         super LineHeadline
1840
1841         redef fun process(v) do
1842                 var line = v.current_line
1843                 if line == null then return
1844                 var current_block = v.current_block.as(not null)
1845                 var lprev = line.prev
1846                 if lprev != null then current_block.split(lprev)
1847                 line.next.as(not null).clear
1848                 var block = current_block.split(line)
1849                 var kind = new BlockHeadline(block)
1850                 kind.depth = 1
1851                 kind.transform_headline(block)
1852                 block.kind = kind
1853                 current_block.remove_leading_empty_lines
1854                 v.current_line = current_block.first_line
1855         end
1856 end
1857
1858 # A markdown headline of level 2.
1859 class LineHeadline2
1860         super LineHeadline
1861
1862         redef fun process(v) do
1863                 var line = v.current_line
1864                 if line == null then return
1865                 var current_block = v.current_block.as(not null)
1866                 var lprev = line.prev
1867                 if lprev != null then current_block.split(lprev)
1868                 line.next.as(not null).clear
1869                 var block = current_block.split(line)
1870                 var kind = new BlockHeadline(block)
1871                 kind.depth = 2
1872                 kind.transform_headline(block)
1873                 block.kind = kind
1874                 current_block.remove_leading_empty_lines
1875                 v.current_line = current_block.first_line
1876         end
1877 end
1878
1879 # A markdown list line.
1880 # Mainly used to factorize code between ordered and unordered lists.
1881 abstract class LineList
1882         super Line
1883
1884         redef fun process(v) do
1885                 var line = v.current_line
1886                 # go to list end
1887                 while line != null do
1888                         var t = v.line_kind(line)
1889                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1890                            not t isa LineList) then break
1891                         line = line.next
1892                 end
1893                 # build list block
1894                 var current_block = v.current_block.as(not null)
1895                 var list: MDBlock
1896                 if line != null then
1897                         list = current_block.split(line.prev.as(not null))
1898                 else
1899                         list = current_block.split(current_block.last_line.as(not null))
1900                 end
1901                 var kind = block_kind(list)
1902                 list.kind = kind
1903                 list.first_line.as(not null).prev_empty = false
1904                 list.last_line.as(not null).next_empty = false
1905                 list.remove_surrounding_empty_lines
1906                 list.first_line.as(not null).prev_empty = false
1907                 list.last_line.as(not null).next_empty = false
1908                 kind.init_block(v)
1909                 var block = list.first_block
1910                 while block != null do
1911                         block.remove_list_indent(v)
1912                         v.recurse(block, true)
1913                         block = block.next
1914                 end
1915                 kind.expand_paragraphs(list)
1916                 v.current_line = line
1917         end
1918
1919         # Create a new block kind based on this line.
1920         protected fun block_kind(block: MDBlock): BlockList is abstract
1921
1922         # Extract string value from `MDLine`.
1923         protected fun extract_value(line: MDLine): String is abstract
1924 end
1925
1926 # An ordered list line.
1927 class LineOList
1928         super LineList
1929
1930         redef fun block_kind(block) do return new BlockOrderedList(block)
1931
1932         redef fun extract_value(line) do
1933                 return line.value.substring_from(line.value.index_of('.') + 2)
1934         end
1935 end
1936
1937 # An unordered list line.
1938 class LineUList
1939         super LineList
1940
1941         redef fun block_kind(block) do return new BlockUnorderedList(block)
1942
1943         redef fun extract_value(line) do
1944                 return line.value.substring_from(line.leading + 2)
1945         end
1946 end
1947
1948 # A token represent a character in the markdown input.
1949 # Some tokens have a specific markup behaviour that is handled here.
1950 abstract class Token
1951
1952         # Location of `self` in the original input.
1953         var location: nullable MDLocation
1954
1955         # Position of `self` in input independant from lines.
1956         var pos: Int
1957
1958         # Character found at `pos` in the markdown input.
1959         var char: Char
1960
1961         # Output that token using `MarkdownEmitter::decorator`.
1962         fun emit(v: MarkdownProcessor) do v.decorator.add_char(v, char)
1963 end
1964
1965 # A token without a specific meaning.
1966 class TokenNone
1967         super Token
1968 end
1969
1970 # An emphasis token.
1971 abstract class TokenEm
1972         super Token
1973
1974         redef fun emit(v) do
1975                 var tmp = v.push_buffer
1976                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1977                 v.pop_buffer
1978                 if b > 0 then
1979                         v.decorator.add_em(v, tmp)
1980                         v.current_pos = b
1981                 else
1982                         v.addc char
1983                 end
1984         end
1985 end
1986
1987 # An emphasis star token.
1988 class TokenEmStar
1989         super TokenEm
1990 end
1991
1992 # An emphasis underscore token.
1993 class TokenEmUnderscore
1994         super TokenEm
1995 end
1996
1997 # A strong token.
1998 abstract class TokenStrong
1999         super Token
2000
2001         redef fun emit(v) do
2002                 var tmp = v.push_buffer
2003                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2004                 v.pop_buffer
2005                 if b > 0 then
2006                         v.decorator.add_strong(v, tmp)
2007                         v.current_pos = b + 1
2008                 else
2009                         v.addc char
2010                 end
2011         end
2012 end
2013
2014 # A strong star token.
2015 class TokenStrongStar
2016         super TokenStrong
2017 end
2018
2019 # A strong underscore token.
2020 class TokenStrongUnderscore
2021         super TokenStrong
2022 end
2023
2024 # A code token.
2025 # This class is mainly used to factorize work between single and double quoted span codes.
2026 abstract class TokenCode
2027         super Token
2028
2029         redef fun emit(v) do
2030                 var current_text = v.current_text.as(not null)
2031                 var a = pos + next_pos + 1
2032                 var b = v.find_token(current_text, a, self)
2033                 if b > 0 then
2034                         v.current_pos = b + next_pos
2035                         while a < b and current_text[a] == ' ' do a += 1
2036                         if a < b then
2037                                 while current_text[b - 1] == ' ' do b -= 1
2038                                 v.decorator.add_span_code(v, current_text, a, b)
2039                         end
2040                 else
2041                         v.addc char
2042                 end
2043         end
2044
2045         private fun next_pos: Int is abstract
2046 end
2047
2048 # A span code token.
2049 class TokenCodeSingle
2050         super TokenCode
2051
2052         redef fun next_pos do return 0
2053 end
2054
2055 # A doubled span code token.
2056 class TokenCodeDouble
2057         super TokenCode
2058
2059         redef fun next_pos do return 1
2060 end
2061
2062 # A link or image token.
2063 # This class is mainly used to factorize work between images and links.
2064 abstract class TokenLinkOrImage
2065         super Token
2066
2067         # Link adress
2068         var link: nullable Text = null
2069
2070         # Link text
2071         var name: nullable Text = null
2072
2073         # Link title
2074         var comment: nullable Text = null
2075
2076         # Is the link construct an abbreviation?
2077         var is_abbrev = false
2078
2079         redef fun emit(v) do
2080                 var tmp = new FlatBuffer
2081                 var b = check_link(v, tmp, pos, self)
2082                 if b > 0 then
2083                         emit_hyper(v)
2084                         v.current_pos = b
2085                 else
2086                         v.addc char
2087                 end
2088         end
2089
2090         # Emit the hyperlink as link or image.
2091         private fun emit_hyper(v: MarkdownProcessor) is abstract
2092
2093         # Check if the link is a valid link.
2094         private fun check_link(v: MarkdownProcessor, out: FlatBuffer, start: Int, token: Token): Int do
2095                 var md = v.current_text
2096                 if md == null then return -1
2097                 var pos
2098                 if token isa TokenLink then
2099                         pos = start + 1
2100                 else
2101                         pos = start + 2
2102                 end
2103                 var tmp = new FlatBuffer
2104                 pos = md.read_md_link_id(tmp, pos)
2105                 if pos < start then return -1
2106                 name = tmp
2107                 var old_pos = pos
2108                 pos += 1
2109                 pos = md.skip_spaces(pos)
2110                 if pos < start then
2111                         var tid = name.as(not null).write_to_string.to_lower
2112                         if v.link_refs.has_key(tid) then
2113                                 var lr = v.link_refs[tid]
2114                                 is_abbrev = lr.is_abbrev
2115                                 link = lr.link
2116                                 comment = lr.title
2117                                 pos = old_pos
2118                         else
2119                                 return -1
2120                         end
2121                 else if md[pos] == '(' then
2122                         pos += 1
2123                         pos = md.skip_spaces(pos)
2124                         if pos < start then return -1
2125                         tmp = new FlatBuffer
2126                         var use_lt = md[pos] == '<'
2127                         if use_lt then
2128                                 pos = md.read_until(tmp, pos + 1, '>')
2129                         else
2130                                 pos = md.read_md_link(tmp, pos)
2131                         end
2132                         if pos < start then return -1
2133                         if use_lt then pos += 1
2134                         link = tmp.write_to_string
2135                         if md[pos] == ' ' then
2136                                 pos = md.skip_spaces(pos)
2137                                 if pos > start and md[pos] == '"' then
2138                                         pos += 1
2139                                         tmp = new FlatBuffer
2140                                         pos = md.read_until(tmp, pos, '"')
2141                                         if pos < start then return -1
2142                                         comment = tmp.write_to_string
2143                                         pos += 1
2144                                         pos = md.skip_spaces(pos)
2145                                         if pos == -1 then return -1
2146                                 end
2147                         end
2148                         if pos < start then return -1
2149                         if md[pos] != ')' then return -1
2150                 else if md[pos] == '[' then
2151                         pos += 1
2152                         tmp = new FlatBuffer
2153                         pos = md.read_raw_until(tmp, pos, ']')
2154                         if pos < start then return -1
2155                         var id
2156                         if tmp.length > 0 then
2157                                 id = tmp
2158                         else
2159                                 id = name
2160                         end
2161                         var tid = id.as(not null).write_to_string.to_lower
2162                         if v.link_refs.has_key(tid) then
2163                                 var lr = v.link_refs[tid]
2164                                 link = lr.link
2165                                 comment = lr.title
2166                         end
2167                 else
2168                         var tid = name.as(not null).write_to_string.replace("\n", " ").to_lower
2169                         if v.link_refs.has_key(tid) then
2170                                 var lr = v.link_refs[tid]
2171                                 link = lr.link
2172                                 comment = lr.title
2173                                 pos = old_pos
2174                         else
2175                                 return -1
2176                         end
2177                 end
2178                 if link == null then return -1
2179                 return pos
2180         end
2181 end
2182
2183 # A markdown link token.
2184 class TokenLink
2185         super TokenLinkOrImage
2186
2187         redef fun emit_hyper(v) do
2188                 if is_abbrev and comment != null then
2189                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2190                 else
2191                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2192                 end
2193         end
2194 end
2195
2196 # A markdown image token.
2197 class TokenImage
2198         super TokenLinkOrImage
2199
2200         redef fun emit_hyper(v) do
2201                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2202         end
2203 end
2204
2205 # A HTML/XML token.
2206 class TokenHTML
2207         super Token
2208
2209         redef fun emit(v) do
2210                 var tmp = new FlatBuffer
2211                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2212                 if b > 0 then
2213                         v.add tmp
2214                         v.current_pos = b
2215                 else
2216                         v.decorator.escape_char(v, char)
2217                 end
2218         end
2219
2220         # Is the HTML valid?
2221         # Also take care of link and mailto shortcuts.
2222         private fun check_html(v: MarkdownProcessor, out: FlatBuffer, md: Text, start: Int): Int do
2223                 # check for auto links
2224                 var tmp = new FlatBuffer
2225                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2226                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2227                         pos = md.read_until(tmp, pos, '>')
2228                         if pos != -1 then
2229                                 var link = tmp.write_to_string
2230                                 v.decorator.add_link(v, link, link, null)
2231                                 return pos
2232                         end
2233                 end
2234                 # TODO check for mailto
2235                 # check for inline html
2236                 if start + 2 < md.length then
2237                         return md.read_xml(out, start, true)
2238                 end
2239                 return -1
2240         end
2241 end
2242
2243 # An HTML entity token.
2244 class TokenEntity
2245         super Token
2246
2247         redef fun emit(v) do
2248                 var tmp = new FlatBuffer
2249                 var b = check_entity(tmp, v.current_text.as(not null), pos)
2250                 if b > 0 then
2251                         v.add tmp
2252                         v.current_pos = b
2253                 else
2254                         v.decorator.escape_char(v, char)
2255                 end
2256         end
2257
2258         # Is the entity valid?
2259         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2260                 var pos = md.read_until(out, start, ';')
2261                 if pos < 0 or out.length < 3 then
2262                         return -1
2263                 end
2264                 if out[1] == '#' then
2265                         if out[2] == 'x' or out[2] == 'X' then
2266                                 if out.length < 4 then return -1
2267                                 for i in [3..out.length[ do
2268                                         var c = out[i]
2269                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2270                                                 return -1
2271                                         end
2272                                 end
2273                         else
2274                                 for i in [2..out.length[ do
2275                                         var c = out[i]
2276                                         if c < '0' or c > '9' then return -1
2277                                 end
2278                         end
2279                         out.add ';'
2280                 else
2281                         for i in [1..out.length[ do
2282                                 var c = out[i]
2283                                 if not c.is_digit and not c.is_letter then return -1
2284                         end
2285                         out.add ';'
2286                         # TODO check entity is valid
2287                         # if out.is_entity then
2288                                 return pos
2289                         # else
2290                                 # return -1
2291                         # end
2292                 end
2293                 return pos
2294         end
2295 end
2296
2297 # A markdown escape token.
2298 class TokenEscape
2299         super Token
2300
2301         redef fun emit(v) do
2302                 v.current_pos += 1
2303                 v.addc v.current_text.as(not null)[v.current_pos]
2304         end
2305 end
2306
2307 # A markdown strike token.
2308 #
2309 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2310 class TokenStrike
2311         super Token
2312
2313         redef fun emit(v) do
2314                 var tmp = v.push_buffer
2315                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2316                 v.pop_buffer
2317                 if b > 0 then
2318                         v.decorator.add_strike(v, tmp)
2319                         v.current_pos = b + 1
2320                 else
2321                         v.addc char
2322                 end
2323         end
2324 end
2325
2326 redef class Text
2327
2328         # Get the position of the next non-space character.
2329         private fun skip_spaces(start: Int): Int do
2330                 var pos = start
2331                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2332                         pos += 1
2333                 end
2334                 if pos < length then return pos
2335                 return -1
2336         end
2337
2338         # Read `self` until `nend` and append it to the `out` buffer.
2339         # Escape markdown special chars.
2340         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2341                 var pos = start
2342                 while pos < length do
2343                         var c = self[pos]
2344                         if c == '\\' and pos + 1 < length then
2345                                 pos = escape(out, self[pos + 1], pos)
2346                         else
2347                                 for n in nend do if c == n then break label
2348                                 out.add c
2349                         end
2350                         pos += 1
2351                 end label
2352                 if pos == length then return -1
2353                 return pos
2354         end
2355
2356         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2357         # No escape is made.
2358         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2359                 var pos = start
2360                 while pos < length do
2361                         var c = self[pos]
2362                         var end_reached = false
2363                         for n in nend do
2364                                 if c == n then
2365                                         end_reached = true
2366                                         break
2367                                 end
2368                         end
2369                         if end_reached then break
2370                         out.add c
2371                         pos += 1
2372                 end
2373                 if pos == length then return -1
2374                 return pos
2375         end
2376
2377         # Read `self` as XML until `to` and append it to the `out` buffer.
2378         # Escape HTML special chars.
2379         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2380                 var pos = from
2381                 var in_str = false
2382                 var str_char: nullable Char = null
2383                 while pos < length do
2384                         var c = self[pos]
2385                         if in_str then
2386                                 if c == '\\' then
2387                                         out.add c
2388                                         pos += 1
2389                                         if pos < length then
2390                                                 out.add c
2391                                                 pos += 1
2392                                         end
2393                                         continue
2394                                 end
2395                                 if c == str_char then
2396                                         in_str = false
2397                                         out.add c
2398                                         pos += 1
2399                                         continue
2400                                 end
2401                         end
2402                         if c == '"' or c == '\'' then
2403                                 in_str = true
2404                                 str_char = c
2405                         end
2406                         if not in_str then
2407                                 var end_reached = false
2408                                 for n in [0..to.length[ do
2409                                         if c == to[n] then
2410                                                 end_reached = true
2411                                                 break
2412                                         end
2413                                 end
2414                                 if end_reached then break
2415                         end
2416                         out.add c
2417                         pos += 1
2418                 end
2419                 if pos == length then return -1
2420                 return pos
2421         end
2422
2423         # Read `self` as XML and append it to the `out` buffer.
2424         # Safe mode can be activated to limit reading to valid xml.
2425         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2426                 var pos = 0
2427                 var is_valid = true
2428                 var is_close_tag = false
2429                 if start + 1 >= length then return -1
2430                 if self[start + 1] == '/' then
2431                         is_close_tag = true
2432                         pos = start + 2
2433                 else if self[start + 1] == '!' then
2434                         out.append "<!"
2435                         return start + 1
2436                 else
2437                         is_close_tag = false
2438                         pos = start + 1
2439                 end
2440                 if safe_mode then
2441                         var tmp = new FlatBuffer
2442                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2443                         if pos == -1 then return -1
2444                         var tag = tmp.write_to_string.trim.to_lower
2445                         if not tag.is_valid_html_tag then
2446                                 out.append "&lt;"
2447                                 pos = -1
2448                         else if tag.is_html_unsafe then
2449                                 is_valid = false
2450                                 out.append "&lt;"
2451                                 if is_close_tag then out.add '/'
2452                                 out.append tmp
2453                         else
2454                                 out.append "<"
2455                                 if is_close_tag then out.add '/'
2456                                 out.append tmp
2457                         end
2458                 else
2459                         out.add '<'
2460                         if is_close_tag then out.add '/'
2461                         pos = read_xml_until(out, pos, ' ', '/', '>')
2462                 end
2463                 if pos == -1 then return -1
2464                 pos = read_xml_until(out, pos, '/', '>')
2465                 if pos == -1 then return -1
2466                 if self[pos] == '/' then
2467                         out.append " /"
2468                         pos = self.read_xml_until(out, pos + 1, '>')
2469                         if pos == -1 then return -1
2470                 end
2471                 if self[pos] == '>' then
2472                         if is_valid then
2473                                 out.add '>'
2474                         else
2475                                 out.append "&gt;"
2476                         end
2477                         return pos
2478                 end
2479                 return -1
2480         end
2481
2482         # Read a markdown link address and append it to the `out` buffer.
2483         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2484                 var pos = start
2485                 var counter = 1
2486                 while pos < length do
2487                         var c = self[pos]
2488                         if c == '\\' and pos + 1 < length then
2489                                 pos = escape(out, self[pos + 1], pos)
2490                         else
2491                                 var end_reached = false
2492                                 if c == '(' then
2493                                         counter += 1
2494                                 else if c == ' ' then
2495                                         if counter == 1 then end_reached = true
2496                                 else if c == ')' then
2497                                         counter -= 1
2498                                         if counter == 0 then end_reached = true
2499                                 end
2500                                 if end_reached then break
2501                                 out.add c
2502                         end
2503                         pos += 1
2504                 end
2505                 if pos == length then return -1
2506                 return pos
2507         end
2508
2509         # Read a markdown link text and append it to the `out` buffer.
2510         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2511                 var pos = start
2512                 var counter = 1
2513                 while pos < length do
2514                         var c = self[pos]
2515                         var end_reached = false
2516                         if c == '[' then
2517                                 counter += 1
2518                                 out.add c
2519                         else if c == ']' then
2520                                 counter -= 1
2521                                 if counter == 0 then
2522                                         end_reached = true
2523                                 else
2524                                         out.add c
2525                                 end
2526                         else
2527                                 out.add c
2528                         end
2529                         if end_reached then break
2530                         pos += 1
2531                 end
2532                 if pos == length then return -1
2533                 return pos
2534         end
2535
2536         # Extract the XML tag name from a XML tag.
2537         private fun xml_tag: String do
2538                 var tpl = new FlatBuffer
2539                 var pos = 1
2540                 if pos < length and self[1] == '/' then pos += 1
2541                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2542                         tpl.add self[pos]
2543                         pos += 1
2544                 end
2545                 return tpl.write_to_string.to_lower
2546         end
2547
2548         private fun is_valid_html_tag: Bool do
2549                 if is_empty then return false
2550                 for c in self do
2551                         if not c.is_alpha then return false
2552                 end
2553                 return true
2554         end
2555
2556         # Read and escape the markdown contained in `self`.
2557         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2558                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2559                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2560                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2561                    c == '`' or c == '~' or c == '^' then
2562                         out.add c
2563                         return pos + 1
2564                 end
2565                 out.add '\\'
2566                 return pos
2567         end
2568
2569         # Extract string found at end of fence opening.
2570         private fun meta_from_fence: nullable Text do
2571                 for i in [0..chars.length[ do
2572                         var c = chars[i]
2573                         if c != ' ' and c != '`' and c != '~' then
2574                                 return substring_from(i).trim
2575                         end
2576                 end
2577                 return null
2578         end
2579
2580         # Is `self` an unsafe HTML element?
2581         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2582
2583         # Is `self` a HRML block element?
2584         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2585
2586         # Is `self` a link prefix?
2587         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2588
2589         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2590
2591         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2592
2593         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2594 end
2595
2596 redef class String
2597
2598         # Parse `self` as markdown and return the HTML representation
2599         #.
2600         #    var md = "**Hello World!**"
2601         #    var html = md.md_to_html
2602         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2603         fun md_to_html: Writable do
2604                 var processor = new MarkdownProcessor
2605                 return processor.process(self)
2606         end
2607 end