lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # `MarkdownEmitter` used for ouput.
  34         var emitter: MarkdownEmitter is noinit, protected writable
  35
  36         # Work in extended mode (default).
  37         #
  38         # Behavior changes when using extended mode:
  39         #
  40         # * Lists and code blocks end a paragraph
  41         #
  42         #   In normal markdown the following:
  43         #
  44         # ~~~md
  45         # This is a paragraph
  46         # * and this is not a list
  47         # ~~~
  48         #
  49         #   Will produce:
  50         #
  51         # ~~~html
  52         # <p>This is a paragraph
  53         # * and this is not a list</p>
  54         # ~~~
  55         #
  56         #   When using extended mode this changes to:
  57         #
  58         # ~~~html
  59         # <p>This is a paragraph</p>
  60         # <ul>
  61         # <li>and this is not a list</li>
  62         # </ul>
  63         # ~~~
  64         #
  65         # * Fences code blocks
  66         #
  67         #   If you don't want to indent your all your code with 4 spaces,
  68         #   you can wrap your code in ``` ``` ``` or `~~~`.
  69         #
  70         #   Here's an example:
  71         #
  72         # ~~~md
  73         # fun test do
  74         #    print "Hello World!"
  75         # end
  76         # ~~~
  77         #
  78         # * Code blocks meta
  79         #
  80         #   If you want to use syntax highlighting tools, most of them need to know what kind
  81         #   of language they are highlighting.
  82         #   You can add an optional language identifier after the fence declaration to output
  83         #   it in the HTML render.
  84         #
  85         # ```nit
  86         # import markdown
  87         #
  88         # print "# Hello World!".md_to_html
  89         # ```
  90         #
  91         #   Becomes
  92         #
  93         # ~~~html
  94         # <pre class="nit"><code>import markdown
  95         #
  96         # print "Hello World!".md_to_html
  97         # </code></pre>
  98         # ~~~
  99         #
 100         # * Underscores (Emphasis)
 101         #
 102         #   Underscores in the middle of a word like:
 103         #
 104         # ~~~md
 105         # Con_cat_this
 106         # ~~~
 107         #
 108         #   normally produces this:
 109         #
 110         # ~~~html
 111         # <p>Con<em>cat</em>this</p>
 112         # ~~~
 113         #
 114         #   With extended mode they don't result in emphasis.
 115         #
 116         # ~~~html
 117         # <p>Con_cat_this</p>
 118         # ~~~
 119         #
 120         # * Strikethrough
 121         #
 122         #   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
 123         #   strikethrought span is marked with `~~`.
 124         #
 125         # ~~~md
 126         # ~~Mistaken text.~~
 127         # ~~~
 128         #
 129         #   becomes
 130         #
 131         # ~~~html
 132         # <del>Mistaken text.</del>
 133         # ~~~
 134         var ext_mode = true
 135
 136         # Disable attaching MDLocation to Tokens
 137         #
 138         # Locations are useful for some tools but they may
 139         # cause an important time and space overhead.
 140         #
 141         # Default = `false`
 142         var no_location = false is writable
 143
 144         init do self.emitter = new MarkdownEmitter(self)
 145
 146         # Process the mardown `input` string and return the processed output.
 147         fun process(input: String): Writable do
 148                 # init processor
 149                 link_refs.clear
 150                 last_link_ref = null
 151                 current_line = null
 152                 current_block = null
 153                 # parse markdown
 154                 var parent = read_lines(input)
 155                 parent.remove_surrounding_empty_lines
 156                 recurse(parent, false)
 157                 # output processed text
 158                 return emitter.emit(parent.kind)
 159         end
 160
 161         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
 162         private fun read_lines(input: String): MDBlock do
 163                 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
 164                 var value = new FlatBuffer
 165                 var i = 0
 166
 167                 var line_pos = 0
 168                 var col_pos = 0
 169
 170                 while i < input.length do
 171                         value.clear
 172                         var pos = 0
 173                         var eol = false
 174                         while not eol and i < input.length do
 175                                 col_pos += 1
 176                                 var c = input[i]
 177                                 if c == '\n' then
 178                                         eol = true
 179                                 else if c == '\r' then
 180                                 else if c == '\t' then
 181                                         var np = pos + (4 - (pos & 3))
 182                                         while pos < np do
 183                                                 value.add ' '
 184                                                 pos += 1
 185                                         end
 186                                 else
 187                                         pos += 1
 188                                         value.add c
 189                                 end
 190                                 i += 1
 191                         end
 192                         line_pos += 1
 193
 194                         var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
 195                         var line = new MDLine(loc, value.write_to_string)
 196                         var is_link_ref = check_link_ref(line)
 197                         # Skip link refs
 198                         if not is_link_ref then block.add_line line
 199                         col_pos = 0
 200                 end
 201                 return block
 202         end
 203
 204         # Check if line is a block link definition.
 205         # Return `true` if line contains a valid link ref and save it into `link_refs`.
 206         private fun check_link_ref(line: MDLine): Bool do
 207                 var md = line.value
 208                 var is_link_ref = false
 209                 var id = new FlatBuffer
 210                 var link = new FlatBuffer
 211                 var comment = new FlatBuffer
 212                 var pos = -1
 213                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
 214                         pos = line.leading + 1
 215                         pos = md.read_until(id, pos, ']')
 216                         if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
 217                                 if line.value[pos + 1] == ':' then
 218                                         pos += 2
 219                                         pos = md.skip_spaces(pos)
 220                                         if pos >= 0 and line.value[pos] == '<' then
 221                                                 pos += 1
 222                                                 pos = md.read_until(link, pos, '>')
 223                                                 pos += 1
 224                                         else if pos >= 0 then
 225                                                 pos = md.read_until(link, pos, ' ', '\n')
 226                                         end
 227                                         if not link.is_empty then
 228                                                 pos = md.skip_spaces(pos)
 229                                                 if pos > 0 and pos < line.value.length then
 230                                                         var c = line.value[pos]
 231                                                         if c == '\"' or c == '\'' or c == '(' then
 232                                                                 pos += 1
 233                                                                 if c == '(' then
 234                                                                         pos = md.read_until(comment, pos, ')')
 235                                                                 else
 236                                                                         pos = md.read_until(comment, pos, c)
 237                                                                 end
 238                                                                 if pos > 0 then is_link_ref = true
 239                                                         end
 240                                                 else
 241                                                         is_link_ref = true
 242                                                 end
 243                                         end
 244                                 end
 245                         end
 246                 end
 247                 if is_link_ref and not id.is_empty and not link.is_empty then
 248                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 249                         add_link_ref(id.write_to_string, lr)
 250                         if comment.is_empty then last_link_ref = lr
 251                         return true
 252                 else
 253                         comment = new FlatBuffer
 254                         if not line.is_empty and last_link_ref != null then
 255                                 pos = line.leading
 256                                 var c = line.value[pos]
 257                                 if c == '\"' or c == '\'' or c ==  '(' then
 258                                         pos += 1
 259                                         if c == '(' then
 260                                                 pos = md.read_until(comment, pos, ')')
 261                                         else
 262                                                 pos = md.read_until(comment, pos, c)
 263                                         end
 264                                 end
 265                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 266                         end
 267                         if comment.is_empty then return false
 268                         return true
 269                 end
 270         end
 271
 272         # Known link refs
 273         # This list will be needed during output to expand links.
 274         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 275
 276         # Last encountered link ref (for multiline definitions)
 277         #
 278         # Markdown allows link refs to be defined over two lines:
 279         #
 280         # ~~~md
 281         # [id]: http://example.com/longish/path/to/resource/here
 282         #       "Optional Title Here"
 283         # ~~~
 284         #
 285         private var last_link_ref: nullable LinkRef = null
 286
 287         # Add a link ref to the list
 288         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 289
 290         # Recursively split a `block`.
 291         #
 292         # The block is splitted according to the type of lines it contains.
 293         # Some blocks can be splited again recursively like lists.
 294         # The `in_list` mode is used to recurse on list and build
 295         # nested paragraphs or code blocks.
 296         fun recurse(root: MDBlock, in_list: Bool) do
 297                 var old_mode = self.in_list
 298                 var old_root = self.current_block
 299                 self.in_list = in_list
 300
 301                 var line = root.first_line
 302                 while line != null and line.is_empty do
 303                         line = line.next
 304                         if line == null then return
 305                 end
 306
 307                 current_line = line
 308                 current_block = root
 309                 while current_line != null do
 310                         line_kind(current_line.as(not null)).process(self)
 311                 end
 312                 self.in_list = old_mode
 313                 self.current_block = old_root
 314         end
 315
 316         # Currently processed line.
 317         # Used when visiting blocks with `recurse`.
 318         var current_line: nullable MDLine = null is writable
 319
 320         # Currently processed block.
 321         # Used when visiting blocks with `recurse`.
 322         var current_block: nullable MDBlock = null is writable
 323
 324         # Is the current recursion in list mode?
 325         # Used when visiting blocks with `recurse`
 326         private var in_list = false
 327
 328         # The type of line.
 329         # see: `md_line_*`
 330         fun line_kind(md: MDLine): Line do
 331                 var value = md.value
 332                 var leading = md.leading
 333                 var trailing = md.trailing
 334                 if md.is_empty then return new LineEmpty
 335                 if md.leading > 3 then return new LineCode
 336                 if value[leading] == '#' then return new LineHeadline
 337                 if value[leading] == '>' then return new LineBlockquote
 338
 339                 if ext_mode then
 340                         if value.length - leading - trailing > 2 then
 341                                 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 342                                         return new LineFence
 343                                 end
 344                                 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 345                                         return new LineFence
 346                                 end
 347                         end
 348                 end
 349
 350                 if value.length - leading - trailing > 2 and
 351                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 352                    if md.count_chars(value[leading]) >= 3 then
 353                                 return new LineHR
 354                    end
 355                 end
 356
 357                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 358                         var c = value[leading]
 359                         if c == '*' or c == '-' or c == '+' then return new LineUList
 360                 end
 361
 362                 if value.length - leading >= 3 and value[leading].is_digit then
 363                         var i = leading + 1
 364                         while i < value.length and value[i].is_digit do i += 1
 365                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 366                                 return new LineOList
 367                         end
 368                 end
 369
 370                 if value[leading] == '<' and md.check_html then return new LineXML
 371
 372                 var next = md.next
 373                 if next != null and not next.is_empty then
 374                         if next.count_chars('=') > 0 then
 375                                 return new LineHeadline1
 376                         end
 377                         if next.count_chars('-') > 0 then
 378                                 return new LineHeadline2
 379                         end
 380                 end
 381                 return new LineOther
 382         end
 383
 384         # Get the token kind at `pos`.
 385         fun token_at(text: Text, pos: Int): Token do
 386                 var c0: Char
 387                 var c1: Char
 388                 var c2: Char
 389
 390                 if pos > 0 then
 391                         c0 = text[pos - 1]
 392                 else
 393                         c0 = ' '
 394                 end
 395                 var c = text[pos]
 396
 397                 if pos + 1 < text.length then
 398                         c1 = text[pos + 1]
 399                 else
 400                         c1 = ' '
 401                 end
 402                 if pos + 2 < text.length then
 403                         c2 = text[pos + 2]
 404                 else
 405                         c2 = ' '
 406                 end
 407
 408                 var loc
 409                 if no_location then
 410                         loc = null
 411                 else
 412                         loc = new MDLocation(
 413                                 current_loc.line_start,
 414                                 current_loc.column_start + pos,
 415                                 current_loc.line_start,
 416                                 current_loc.column_start + pos)
 417                 end
 418
 419                 if c == '*' then
 420                         if c1 == '*' then
 421                                 if c0 != ' ' or c2 != ' ' then
 422                                         return new TokenStrongStar(loc, pos, c)
 423                                 else
 424                                         return new TokenEmStar(loc, pos, c)
 425                                 end
 426                         end
 427                         if c0 != ' ' or c1 != ' ' then
 428                                 return new TokenEmStar(loc, pos, c)
 429                         else
 430                                 return new TokenNone(loc, pos, c)
 431                         end
 432                 else if c == '_' then
 433                         if c1 == '_' then
 434                                 if c0 != ' ' or c2 != ' ' then
 435                                         return new TokenStrongUnderscore(loc, pos, c)
 436                                 else
 437                                         return new TokenEmUnderscore(loc, pos, c)
 438                                 end
 439                         end
 440                         if ext_mode then
 441                                 if (c0.is_letter or c0.is_digit) and c0 != '_' and
 442                                    (c1.is_letter or c1.is_digit) then
 443                                         return new TokenNone(loc, pos, c)
 444                                 else
 445                                         return new TokenEmUnderscore(loc, pos, c)
 446                                 end
 447                         end
 448                         if c0 != ' ' or c1 != ' ' then
 449                                 return new TokenEmUnderscore(loc, pos, c)
 450                         else
 451                                 return new TokenNone(loc, pos, c)
 452                         end
 453                 else if c == '!' then
 454                         if c1 == '[' then return new TokenImage(loc, pos, c)
 455                         return new TokenNone(loc, pos, c)
 456                 else if c == '[' then
 457                         return new TokenLink(loc, pos, c)
 458                 else if c == ']' then
 459                         return new TokenNone(loc, pos, c)
 460                 else if c == '`' then
 461                         if c1 == '`' then
 462                                 return new TokenCodeDouble(loc, pos, c)
 463                         else
 464                                 return new TokenCodeSingle(loc, pos, c)
 465                         end
 466                 else if c == '\\' then
 467                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 468                                 return new TokenEscape(loc, pos, c)
 469                         else
 470                                 return new TokenNone(loc, pos, c)
 471                         end
 472                 else if c == '<' then
 473                         return new TokenHTML(loc, pos, c)
 474                 else if c == '&' then
 475                         return new TokenEntity(loc, pos, c)
 476                 else
 477                         if ext_mode then
 478                                 if c == '~' and c1 == '~' then
 479                                         return new TokenStrike(loc, pos, c)
 480                                 end
 481                         end
 482                         return new TokenNone(loc, pos, c)
 483                 end
 484         end
 485
 486         # Find the position of a `token` in `self`.
 487         fun find_token(text: Text, start: Int, token: Token): Int do
 488                 var pos = start
 489                 while pos < text.length do
 490                         if token_at(text, pos).is_same_type(token) then
 491                                 return pos
 492                         end
 493                         pos += 1
 494                 end
 495                 return -1
 496         end
 497
 498         # Location used for next parsed token.
 499         #
 500         # This location can be changed by the emitter to adjust with `\n` found
 501         # in the input.
 502         private fun current_loc: MDLocation do return emitter.current_loc
 503 end
 504
 505 # Emit output corresponding to blocks content.
 506 #
 507 # Blocks are created by a previous pass in `MarkdownProcessor`.
 508 # The emitter use a `Decorator` to select the output format.
 509 class MarkdownEmitter
 510
 511         # Kind of processor used for parsing.
 512         type PROCESSOR: MarkdownProcessor
 513
 514         # Processor containing link refs.
 515         var processor: PROCESSOR
 516
 517         # Kind of decorator used for decoration.
 518         type DECORATOR: Decorator
 519
 520         # Decorator used for output.
 521         # Default is `HTMLDecorator`
 522         var decorator: DECORATOR is writable, lazy do
 523                 return new HTMLDecorator
 524         end
 525
 526         # Create a new `MarkdownEmitter` using a custom `decorator`.
 527         init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
 528                 init processor
 529                 self.decorator = decorator
 530         end
 531
 532         # Output `block` using `decorator` in the current buffer.
 533         fun emit(block: Block): Text do
 534                 var buffer = push_buffer
 535                 block.emit(self)
 536                 pop_buffer
 537                 return buffer
 538         end
 539
 540         # Output the content of `block`.
 541         fun emit_in(block: Block) do block.emit_in(self)
 542
 543         # Transform and emit mardown text
 544         fun emit_text(text: Text) do emit_text_until(text, 0, null)
 545
 546         # Transform and emit mardown text starting at `start` and
 547         # until a token with the same type as `token` is found.
 548         # Go until the end of `text` if `token` is null.
 549         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 550                 var old_text = current_text
 551                 var old_pos = current_pos
 552                 current_text = text
 553                 current_pos = start
 554                 while current_pos < text.length do
 555                         if text[current_pos] == '\n' then
 556                                 current_loc.line_start += 1
 557                                 current_loc.column_start = -current_pos
 558                         end
 559                         var mt = processor.token_at(text, current_pos)
 560                         if (token != null and not token isa TokenNone) and
 561                         (mt.is_same_type(token) or
 562                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 563                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 564                                 return current_pos
 565                         end
 566                         mt.emit(self)
 567                         current_pos += 1
 568                 end
 569                 current_text = old_text
 570                 current_pos = old_pos
 571                 return -1
 572         end
 573
 574         # Currently processed position in `current_text`.
 575         # Used when visiting inline production with `emit_text_until`.
 576         private var current_pos: Int = -1
 577
 578         # Currently processed text.
 579         # Used when visiting inline production with `emit_text_until`.
 580         private var current_text: nullable Text = null
 581
 582         # Stacked buffers.
 583         private var buffer_stack = new List[FlatBuffer]
 584
 585         # Push a new buffer on the stack.
 586         private fun push_buffer: FlatBuffer do
 587                 var buffer = new FlatBuffer
 588                 buffer_stack.add buffer
 589                 return buffer
 590         end
 591
 592         # Pop the last buffer.
 593         private fun pop_buffer do buffer_stack.pop
 594
 595         # Current output buffer.
 596         private fun current_buffer: FlatBuffer do
 597                 assert not buffer_stack.is_empty
 598                 return buffer_stack.last
 599         end
 600
 601         # Stacked locations.
 602         private var loc_stack = new List[MDLocation]
 603
 604         # Push a new MDLocation on the stack.
 605         private fun push_loc(location: MDLocation) do loc_stack.add location
 606
 607         # Pop the last buffer.
 608         private fun pop_loc: MDLocation do return loc_stack.pop
 609
 610         # Current output buffer.
 611         private fun current_loc: MDLocation do
 612                 assert not loc_stack.is_empty
 613                 return loc_stack.last
 614         end
 615
 616         # Append `e` to current buffer.
 617         fun add(e: Writable) do
 618                 if e isa Text then
 619                         current_buffer.append e
 620                 else
 621                         current_buffer.append e.write_to_string
 622                 end
 623         end
 624
 625         # Append `c` to current buffer.
 626         fun addc(c: Char) do
 627                 current_buffer.add c
 628         end
 629
 630         # Append a "\n" line break.
 631         fun addn do addc '\n'
 632 end
 633
 634 # A Link Reference.
 635 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 636 #
 637 # ~~~raw
 638 # [1]: http://example.com/ "Optional title"
 639 # ~~~
 640 class LinkRef
 641
 642         # Link href
 643         var link: String
 644
 645         # Optional link title
 646         var title: nullable String = null
 647
 648         # Is the link an abreviation?
 649         var is_abbrev = false
 650
 651         # Create a link with a title.
 652         init with_title(link: String, title: nullable String) do
 653                 init(link)
 654                 self.title = title
 655         end
 656 end
 657
 658 # A `Decorator` is used to emit mardown into a specific format.
 659 # Default decorator used is `HTMLDecorator`.
 660 interface Decorator
 661
 662         # Kind of emitter used for decoration.
 663         type EMITTER: MarkdownEmitter
 664
 665         # Render a single plain char.
 666         #
 667         # Redefine this method to add special escaping for plain text.
 668         fun add_char(v: EMITTER, c: Char) do v.addc c
 669
 670         # Render a ruler block.
 671         fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
 672
 673         # Render a headline block with corresponding level.
 674         fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
 675
 676         # Render a paragraph block.
 677         fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
 678
 679         # Render a code or fence block.
 680         fun add_code(v: EMITTER, block: BlockCode) is abstract
 681
 682         # Render a blockquote.
 683         fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
 684
 685         # Render an unordered list.
 686         fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
 687
 688         # Render an ordered list.
 689         fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
 690
 691         # Render a list item.
 692         fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
 693
 694         # Render an emphasis text.
 695         fun add_em(v: EMITTER, text: Text) is abstract
 696
 697         # Render a strong text.
 698         fun add_strong(v: EMITTER, text: Text) is abstract
 699
 700         # Render a strike text.
 701         #
 702         # Extended mode only (see `MarkdownProcessor::ext_mode`)
 703         fun add_strike(v: EMITTER, text: Text) is abstract
 704
 705         # Render a link.
 706         fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 707
 708         # Render an image.
 709         fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 710
 711         # Render an abbreviation.
 712         fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
 713
 714         # Render a code span reading from a buffer.
 715         fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 716
 717         # Render a text and escape it.
 718         fun append_value(v: EMITTER, value: Text) is abstract
 719
 720         # Render code text from buffer and escape it.
 721         fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 722
 723         # Render a character escape.
 724         fun escape_char(v: EMITTER, char: Char) is abstract
 725
 726         # Render a line break
 727         fun add_line_break(v: EMITTER) is abstract
 728
 729         # Generate a new html valid id from a `String`.
 730         fun strip_id(txt: String): String is abstract
 731
 732         # Found headlines during the processing labeled by their ids.
 733         fun headlines: ArrayMap[String, HeadLine] is abstract
 734 end
 735
 736 # Class representing a markdown headline.
 737 class HeadLine
 738         # Unique identifier of this headline.
 739         var id: String
 740
 741         # Text of the headline.
 742         var title: String
 743
 744         # Level of this headline.
 745         #
 746         # According toe the markdown specification, level must be in `[1..6]`.
 747         var level: Int
 748 end
 749
 750 # `Decorator` that outputs HTML.
 751 class HTMLDecorator
 752         super Decorator
 753
 754         redef var headlines = new ArrayMap[String, HeadLine]
 755
 756         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 757
 758         redef fun add_headline(v, block) do
 759                 # save headline
 760                 var txt = block.block.first_line.value
 761                 var id = strip_id(txt)
 762                 var lvl = block.depth
 763                 headlines[id] = new HeadLine(id, txt, lvl)
 764                 # output it
 765                 v.add "<h{lvl} id=\"{id}\">"
 766                 v.emit_in block
 767                 v.add "</h{lvl}>\n"
 768         end
 769
 770         redef fun add_paragraph(v, block) do
 771                 v.add "<p>"
 772                 v.emit_in block
 773                 v.add "</p>\n"
 774         end
 775
 776         redef fun add_code(v, block) do
 777                 var meta = block.meta
 778                 if meta != null then
 779                         v.add "<pre class=\""
 780                         append_value(v, meta)
 781                         v.add "\"><code>"
 782                 else
 783                         v.add "<pre><code>"
 784                 end
 785                 v.emit_in block
 786                 v.add "</code></pre>\n"
 787         end
 788
 789         redef fun add_blockquote(v, block) do
 790                 v.add "<blockquote>\n"
 791                 v.emit_in block
 792                 v.add "</blockquote>\n"
 793         end
 794
 795         redef fun add_unorderedlist(v, block) do
 796                 v.add "<ul>\n"
 797                 v.emit_in block
 798                 v.add "</ul>\n"
 799         end
 800
 801         redef fun add_orderedlist(v, block) do
 802                 v.add "<ol>\n"
 803                 v.emit_in block
 804                 v.add "</ol>\n"
 805         end
 806
 807         redef fun add_listitem(v, block) do
 808                 v.add "<li>"
 809                 v.emit_in block
 810                 v.add "</li>\n"
 811         end
 812
 813         redef fun add_em(v, text) do
 814                 v.add "<em>"
 815                 v.add text
 816                 v.add "</em>"
 817         end
 818
 819         redef fun add_strong(v, text) do
 820                 v.add "<strong>"
 821                 v.add text
 822                 v.add "</strong>"
 823         end
 824
 825         redef fun add_strike(v, text) do
 826                 v.add "<del>"
 827                 v.add text
 828                 v.add "</del>"
 829         end
 830
 831         redef fun add_image(v, link, name, comment) do
 832                 v.add "<img src=\""
 833                 append_value(v, link)
 834                 v.add "\" alt=\""
 835                 append_value(v, name)
 836                 v.add "\""
 837                 if comment != null and not comment.is_empty then
 838                         v.add " title=\""
 839                         append_value(v, comment)
 840                         v.add "\""
 841                 end
 842                 v.add "/>"
 843         end
 844
 845         redef fun add_link(v, link, name, comment) do
 846                 v.add "<a href=\""
 847                 append_value(v, link)
 848                 v.add "\""
 849                 if comment != null and not comment.is_empty then
 850                         v.add " title=\""
 851                         append_value(v, comment)
 852                         v.add "\""
 853                 end
 854                 v.add ">"
 855                 v.emit_text(name)
 856                 v.add "</a>"
 857         end
 858
 859         redef fun add_abbr(v, name, comment) do
 860                 v.add "<abbr title=\""
 861                 append_value(v, comment)
 862                 v.add "\">"
 863                 v.emit_text(name)
 864                 v.add "</abbr>"
 865         end
 866
 867         redef fun add_span_code(v, text, from, to) do
 868                 v.add "<code>"
 869                 append_code(v, text, from, to)
 870                 v.add "</code>"
 871         end
 872
 873         redef fun add_line_break(v) do
 874                 v.add "<br/>"
 875         end
 876
 877         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 878
 879         redef fun escape_char(v, c) do
 880                 if c == '&' then
 881                         v.add "&amp;"
 882                 else if c == '<' then
 883                         v.add "&lt;"
 884                 else if c == '>' then
 885                         v.add "&gt;"
 886                 else if c == '"' then
 887                         v.add "&quot;"
 888                 else if c == '\'' then
 889                         v.add "&apos;"
 890                 else
 891                         v.addc c
 892                 end
 893         end
 894
 895         redef fun append_code(v, buffer, from, to) do
 896                 for i in [from..to[ do
 897                         var c = buffer[i]
 898                         if c == '&' then
 899                                 v.add "&amp;"
 900                         else if c == '<' then
 901                                 v.add "&lt;"
 902                         else if c == '>' then
 903                                 v.add "&gt;"
 904                         else
 905                                 v.addc c
 906                         end
 907                 end
 908         end
 909
 910         redef fun strip_id(txt) do
 911                 # strip id
 912                 var b = new FlatBuffer
 913                 for c in txt do
 914                         if c == ' ' then
 915                                 b.add '_'
 916                         else
 917                                 if not c.is_letter and
 918                                    not c.is_digit and
 919                                    not allowed_id_chars.has(c) then continue
 920                                 b.add c
 921                         end
 922                 end
 923                 var res = b.to_s
 924                 var key = res
 925                 # check for multiple id definitions
 926                 if headlines.has_key(key) then
 927                         var i = 1
 928                         key = "{res}_{i}"
 929                         while headlines.has_key(key) do
 930                                 i += 1
 931                                 key = "{res}_{i}"
 932                         end
 933                 end
 934                 return key
 935         end
 936
 937         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 938 end
 939
 940 # Location in a Markdown input.
 941 class MDLocation
 942
 943         # Starting line number (starting from 1).
 944         var line_start: Int
 945
 946         # Starting column number (starting from 1).
 947         var column_start: Int
 948
 949         # Stopping line number (starting from 1).
 950         var line_end: Int
 951
 952         # Stopping column number (starting from 1).
 953         var column_end: Int
 954
 955         redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
 956
 957         # Return a copy of `self`.
 958         fun copy: MDLocation do
 959                 return new MDLocation(line_start, column_start, line_end, column_end)
 960         end
 961 end
 962
 963 # A block of markdown lines.
 964 # A `MDBlock` can contains lines and/or sub-blocks.
 965 class MDBlock
 966
 967         # Position of `self` in the input.
 968         var location: MDLocation
 969
 970         # Kind of block.
 971         # See `Block`.
 972         var kind: Block = new BlockNone(self) is writable
 973
 974         # First line if any.
 975         var first_line: nullable MDLine = null is writable
 976
 977         # Last line if any.
 978         var last_line: nullable MDLine = null is writable
 979
 980         # First sub-block if any.
 981         var first_block: nullable MDBlock = null is writable
 982
 983         # Last sub-block if any.
 984         var last_block: nullable MDBlock = null is writable
 985
 986         # Previous block if any.
 987         var prev: nullable MDBlock = null is writable
 988
 989         # Next block if any.
 990         var next: nullable MDBlock = null is writable
 991
 992         # Does this block contain subblocks?
 993         fun has_blocks: Bool do return first_block != null
 994
 995         # Count sub-blocks.
 996         fun count_blocks: Int do
 997                 var count = 0
 998                 var block = first_block
 999                 while block != null do
1000                         count += 1
1001                         block = block.next
1002                 end
1003                 return count
1004         end
1005
1006         # Does this block contain lines?
1007         fun has_lines: Bool do return first_line != null
1008
1009         # Count block lines.
1010         fun count_lines: Int do
1011                 var count = 0
1012                 var line = first_line
1013                 while line != null do
1014                         count += 1
1015                         line = line.next
1016                 end
1017                 return count
1018         end
1019
1020         # Split `self` creating a new sub-block having `line` has `last_line`.
1021         fun split(line: MDLine): MDBlock do
1022                 # location for new block
1023                 var new_loc = new MDLocation(
1024                         first_line.location.line_start,
1025                         first_line.location.column_start,
1026                         line.location.line_end,
1027                         line.location.column_end)
1028                 # create block
1029                 var block = new MDBlock(new_loc)
1030                 block.first_line = first_line
1031                 block.last_line = line
1032                 first_line = line.next
1033                 line.next = null
1034                 if first_line == null then
1035                         last_line = null
1036                 else
1037                         first_line.prev = null
1038                         # update current block loc
1039                         location.line_start = first_line.location.line_start
1040                         location.column_start = first_line.location.column_start
1041                 end
1042                 if first_block == null then
1043                         first_block = block
1044                         last_block = block
1045                 else
1046                         last_block.next = block
1047                         last_block = block
1048                 end
1049                 return block
1050         end
1051
1052         # Add a `line` to this block.
1053         fun add_line(line: MDLine) do
1054                 if last_line == null then
1055                         first_line = line
1056                         last_line = line
1057                 else
1058                         last_line.next_empty = line.is_empty
1059                         line.prev_empty = last_line.is_empty
1060                         line.prev = last_line
1061                         last_line.next = line
1062                         last_line = line
1063                 end
1064         end
1065
1066         # Remove `line` from this block.
1067         fun remove_line(line: MDLine) do
1068                 if line.prev == null then
1069                         first_line = line.next
1070                 else
1071                         line.prev.next = line.next
1072                 end
1073                 if line.next == null then
1074                         last_line = line.prev
1075                 else
1076                         line.next.prev = line.prev
1077                 end
1078                 line.prev = null
1079                 line.next = null
1080         end
1081
1082         # Remove leading empty lines.
1083         fun remove_leading_empty_lines: Bool do
1084                 var was_empty = false
1085                 var line = first_line
1086                 while line != null and line.is_empty do
1087                         remove_line line
1088                         line = first_line
1089                         was_empty = true
1090                 end
1091                 return was_empty
1092         end
1093
1094         # Remove trailing empty lines.
1095         fun remove_trailing_empty_lines: Bool do
1096                 var was_empty = false
1097                 var line = last_line
1098                 while line != null and line.is_empty do
1099                         remove_line line
1100                         line = last_line
1101                         was_empty = true
1102                 end
1103                 return was_empty
1104         end
1105
1106         # Remove leading and trailing empty lines.
1107         fun remove_surrounding_empty_lines: Bool do
1108                 var was_empty = false
1109                 if remove_leading_empty_lines then was_empty = true
1110                 if remove_trailing_empty_lines then was_empty = true
1111                 return was_empty
1112         end
1113
1114         # Remove list markers and up to 4 leading spaces.
1115         # Used to clean nested lists.
1116         fun remove_list_indent(v: MarkdownProcessor) do
1117                 var line = first_line
1118                 while line != null do
1119                         if not line.is_empty then
1120                                 var kind = v.line_kind(line)
1121                                 if kind isa LineList then
1122                                         line.value = kind.extract_value(line)
1123                                 else
1124                                         line.value = line.value.substring_from(line.leading.min(4))
1125                                 end
1126                                 line.leading = line.process_leading
1127                         end
1128                         line = line.next
1129                 end
1130         end
1131
1132         # Collect block line text.
1133         fun text: String do
1134                 var text = new FlatBuffer
1135                 var line = first_line
1136                 while line != null do
1137                         if not line.is_empty then
1138                                 text.append line.text
1139                         end
1140                         text.append "\n"
1141                         line = line.next
1142                 end
1143                 var block = first_block
1144                 while block != null do
1145                         text.append block.text
1146                         text.append "\n"
1147                         block = block.next
1148                 end
1149                 return text.write_to_string
1150         end
1151 end
1152
1153 # Representation of a markdown block in the AST.
1154 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1155 abstract class Block
1156
1157         # The markdown block `self` is related to.
1158         var block: MDBlock
1159
1160         # Output `self` using `v.decorator`.
1161         fun emit(v: MarkdownEmitter) do v.emit_in(self)
1162
1163         # Emit the containts of `self`, lines or blocks.
1164         fun emit_in(v: MarkdownEmitter) do
1165                 block.remove_surrounding_empty_lines
1166                 if block.has_lines then
1167                         emit_lines(v)
1168                 else
1169                         emit_blocks(v)
1170                 end
1171         end
1172
1173         # Emit lines contained in `block`.
1174         fun emit_lines(v: MarkdownEmitter) do
1175                 var tpl = v.push_buffer
1176                 var line = block.first_line
1177                 while line != null do
1178                         if not line.is_empty then
1179                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1180                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
1181                         end
1182                         if line.next != null then
1183                                 v.addn
1184                         end
1185                         line = line.next
1186                 end
1187                 v.pop_buffer
1188                 v.emit_text(tpl)
1189         end
1190
1191         # Emit sub-blocks contained in `block`.
1192         fun emit_blocks(v: MarkdownEmitter) do
1193                 var block = self.block.first_block
1194                 while block != null do
1195                         v.push_loc(block.location)
1196                         block.kind.emit(v)
1197                         v.pop_loc
1198                         block = block.next
1199                 end
1200         end
1201
1202         # The raw content of the block as a multi-line string.
1203         fun raw_content: String do
1204                 var infence = self isa BlockFence
1205                 var text = new FlatBuffer
1206                 var line = self.block.first_line
1207                 while line != null do
1208                         if not line.is_empty then
1209                                 var str = line.value
1210                                 if not infence and str.has_prefix("    ") then
1211                                         text.append str.substring(4, str.length - line.trailing)
1212                                 else
1213                                         text.append str
1214                                 end
1215                         end
1216                         text.append "\n"
1217                         line = line.next
1218                 end
1219                 return text.write_to_string
1220         end
1221 end
1222
1223 # A block without any markdown specificities.
1224 #
1225 # Actually use the same implementation than `BlockCode`,
1226 # this class is only used for typing purposes.
1227 class BlockNone
1228         super Block
1229 end
1230
1231 # A markdown blockquote.
1232 class BlockQuote
1233         super Block
1234
1235         redef fun emit(v) do v.decorator.add_blockquote(v, self)
1236
1237         # Remove blockquote markers.
1238         private fun remove_block_quote_prefix(block: MDBlock) do
1239                 var line = block.first_line
1240                 while line != null do
1241                         if not line.is_empty then
1242                                 if line.value[line.leading] == '>' then
1243                                         var rem = line.leading + 1
1244                                         if line.leading + 1 < line.value.length and
1245                                            line.value[line.leading + 1] == ' ' then
1246                                                 rem += 1
1247                                         end
1248                                         line.value = line.value.substring_from(rem)
1249                                         line.leading = line.process_leading
1250                                 end
1251                         end
1252                         line = line.next
1253                 end
1254         end
1255 end
1256
1257 # A markdown code block.
1258 class BlockCode
1259         super Block
1260
1261         # Any string found after fence token.
1262         var meta: nullable Text
1263
1264         # Number of char to skip at the beginning of the line.
1265         #
1266         # Block code lines start at 4 spaces.
1267         protected var line_start = 4
1268
1269         redef fun emit(v) do v.decorator.add_code(v, self)
1270
1271         redef fun emit_lines(v) do
1272                 var line = block.first_line
1273                 while line != null do
1274                         if not line.is_empty then
1275                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1276                         end
1277                         v.addn
1278                         line = line.next
1279                 end
1280         end
1281 end
1282
1283 # A markdown code-fence block.
1284 #
1285 # Actually use the same implementation than `BlockCode`,
1286 # this class is only used for typing purposes.
1287 class BlockFence
1288         super BlockCode
1289
1290         # Fence code lines start at 0 spaces.
1291         redef var line_start = 0
1292 end
1293
1294 # A markdown headline.
1295 class BlockHeadline
1296         super Block
1297
1298         redef fun emit(v) do
1299                 var loc = block.location.copy
1300                 loc.column_start += start
1301                 v.push_loc(loc)
1302                 v.decorator.add_headline(v, self)
1303                 v.pop_loc
1304         end
1305
1306         private var start = 0
1307
1308         # Depth of the headline used to determine the headline level.
1309         var depth = 0
1310
1311         # Remove healine marks from lines contained in `self`.
1312         private fun transform_headline(block: MDBlock) do
1313                 if depth > 0 then return
1314                 var level = 0
1315                 var line = block.first_line
1316                 if line.is_empty then return
1317                 var start = line.leading
1318                 while start < line.value.length and line.value[start] == '#' do
1319                         level += 1
1320                         start += 1
1321                 end
1322                 while start < line.value.length and line.value[start] == ' ' do
1323                         start += 1
1324                 end
1325                 if start >= line.value.length then
1326                         line.is_empty = true
1327                 else
1328                         var nend = line.value.length - line.trailing - 1
1329                         while line.value[nend] == '#' do nend -= 1
1330                         while line.value[nend] == ' ' do nend -= 1
1331                         line.value = line.value.substring(start, nend - start + 1)
1332                         line.leading = 0
1333                         line.trailing = 0
1334                 end
1335                 self.start = start
1336                 depth = level.min(6)
1337         end
1338 end
1339
1340 # A markdown list item block.
1341 class BlockListItem
1342         super Block
1343
1344         redef fun emit(v) do v.decorator.add_listitem(v, self)
1345 end
1346
1347 # A markdown list block.
1348 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1349 abstract class BlockList
1350         super Block
1351
1352         # Split list block into list items sub-blocks.
1353         private fun init_block(v: MarkdownProcessor) do
1354                 var line = block.first_line
1355                 line = line.next
1356                 while line != null do
1357                         var t = v.line_kind(line)
1358                         if t isa LineList or
1359                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1360                            not (t isa LineList))) then
1361                                    var sblock = block.split(line.prev.as(not null))
1362                                    sblock.kind = new BlockListItem(sblock)
1363                         end
1364                         line = line.next
1365                 end
1366                 var sblock = block.split(block.last_line.as(not null))
1367                 sblock.kind = new BlockListItem(sblock)
1368         end
1369
1370         # Expand list items as paragraphs if needed.
1371         private fun expand_paragraphs(block: MDBlock) do
1372                 var outer = block.first_block
1373                 var inner: nullable MDBlock
1374                 var has_paragraph = false
1375                 while outer != null and not has_paragraph do
1376                         if outer.kind isa BlockListItem then
1377                                 inner = outer.first_block
1378                                 while inner != null and not has_paragraph do
1379                                         if inner.kind isa BlockParagraph then
1380                                                 has_paragraph = true
1381                                         end
1382                                         inner = inner.next
1383                                 end
1384                         end
1385                         outer = outer.next
1386                 end
1387                 if has_paragraph then
1388                         outer = block.first_block
1389                         while outer != null do
1390                                 if outer.kind isa BlockListItem then
1391                                         inner = outer.first_block
1392                                         while inner != null do
1393                                                 if inner.kind isa BlockNone then
1394                                                         inner.kind = new BlockParagraph(inner)
1395                                                 end
1396                                                 inner = inner.next
1397                                         end
1398                                 end
1399                                 outer = outer.next
1400                         end
1401                 end
1402         end
1403 end
1404
1405 # A markdown ordered list.
1406 class BlockOrderedList
1407         super BlockList
1408
1409         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1410 end
1411
1412 # A markdown unordred list.
1413 class BlockUnorderedList
1414         super BlockList
1415
1416         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1417 end
1418
1419 # A markdown paragraph block.
1420 class BlockParagraph
1421         super Block
1422
1423         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1424 end
1425
1426 # A markdown ruler.
1427 class BlockRuler
1428         super Block
1429
1430         redef fun emit(v) do v.decorator.add_ruler(v, self)
1431 end
1432
1433 # Xml blocks that can be found in markdown markup.
1434 class BlockXML
1435         super Block
1436
1437         redef fun emit_lines(v) do
1438                 var line = block.first_line
1439                 while line != null do
1440                         if not line.is_empty then v.add line.value
1441                         v.addn
1442                         line = line.next
1443                 end
1444         end
1445 end
1446
1447 # A markdown line.
1448 class MDLine
1449
1450         # Location of `self` in the original input.
1451         var location: MDLocation
1452
1453         # Text contained in this line.
1454         var value: String is writable
1455
1456         # Is this line empty?
1457         # Lines containing only spaces are considered empty.
1458         var is_empty: Bool = true is writable
1459
1460         # Previous line in `MDBlock` or null if first line.
1461         var prev: nullable MDLine = null is writable
1462
1463         # Next line in `MDBlock` or null if last line.
1464         var next: nullable MDLine = null is writable
1465
1466         # Is the previous line empty?
1467         var prev_empty: Bool = false is writable
1468
1469         # Is the next line empty?
1470         var next_empty: Bool = false is writable
1471
1472         # Initialize a new MDLine from its string value
1473         init do
1474                 self.leading = process_leading
1475                 if leading != value.length then
1476                         self.is_empty = false
1477                         self.trailing = process_trailing
1478                 end
1479         end
1480
1481         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1482         fun clear do
1483                 value = ""
1484                 leading = 0
1485                 trailing = 0
1486                 is_empty = true
1487                 if prev != null then prev.next_empty = true
1488                 if next != null then next.prev_empty = true
1489         end
1490
1491         # Number or leading spaces on this line.
1492         var leading: Int = 0 is writable
1493
1494         # Compute `leading` depending on `value`.
1495         fun process_leading: Int do
1496                 var count = 0
1497                 var value = self.value
1498                 while count < value.length and value[count] == ' ' do count += 1
1499                 if leading == value.length then clear
1500                 return count
1501         end
1502
1503         # Number of trailing spaces on this line.
1504         var trailing: Int = 0 is writable
1505
1506         # Compute `trailing` depending on `value`.
1507         fun process_trailing: Int do
1508                 var count = 0
1509                 var value = self.value
1510                 while value[value.length - count - 1] == ' ' do
1511                         count += 1
1512                 end
1513                 return count
1514         end
1515
1516         # Count the amount of `ch` in this line.
1517         # Return A value > 0 if this line only consists of `ch` end spaces.
1518         fun count_chars(ch: Char): Int do
1519                 var count = 0
1520                 for c in value do
1521                         if c == ' ' then
1522                                 continue
1523                         end
1524                         if c == ch then
1525                                 count += 1
1526                                 continue
1527                         end
1528                         count = 0
1529                         break
1530                 end
1531                 return count
1532         end
1533
1534         # Count the amount of `ch` at the start of this line ignoring spaces.
1535         fun count_chars_start(ch: Char): Int do
1536                 var count = 0
1537                 for c in value do
1538                         if c == ' ' then
1539                                 continue
1540                         end
1541                         if c == ch then
1542                                 count += 1
1543                         else
1544                                 break
1545                         end
1546                 end
1547                 return count
1548         end
1549
1550         # Last XML line if any.
1551         private var xml_end_line: nullable MDLine = null
1552
1553         # Does `value` contains valid XML markup?
1554         private fun check_html: Bool do
1555                 var tags = new Array[String]
1556                 var tmp = new FlatBuffer
1557                 var pos = leading
1558                 if pos + 1 < value.length and value[pos + 1] == '!' then
1559                         if read_xml_comment(self, pos) > 0 then return true
1560                 end
1561                 pos = value.read_xml(tmp, pos, false)
1562                 var tag: String
1563                 if pos > -1 then
1564                         tag = tmp.xml_tag
1565                         if not tag.is_html_block then
1566                                 return false
1567                         end
1568                         if tag == "hr" then
1569                                 xml_end_line = self
1570                                 return true
1571                         end
1572                         tags.add tag
1573                         var line: nullable MDLine = self
1574                         while line != null do
1575                                 while pos < line.value.length and line.value[pos] != '<' do
1576                                         pos += 1
1577                                 end
1578                                 if pos >= line.value.length then
1579                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1580                                                 tags.pop
1581                                                 if tags.is_empty then
1582                                                         xml_end_line = line
1583                                                         break
1584                                                 end
1585                                         end
1586                                         line = line.next
1587                                         pos = 0
1588                                 else
1589                                         tmp = new FlatBuffer
1590                                         var new_pos = line.value.read_xml(tmp, pos, false)
1591                                         if new_pos > 0 then
1592                                                 tag = tmp.xml_tag
1593                                                 if tag.is_html_block and not tag == "hr" then
1594                                                         if tmp[1] == '/' then
1595                                                                 if tags.last != tag then
1596                                                                         return false
1597                                                                 end
1598                                                                 tags.pop
1599                                                         else
1600                                                                 tags.add tag
1601                                                         end
1602                                                 end
1603                                                 if tags.is_empty then
1604                                                         xml_end_line = line
1605                                                         break
1606                                                 end
1607                                                 pos = new_pos
1608                                         else
1609                                                 pos += 1
1610                                         end
1611                                 end
1612                         end
1613                         return tags.is_empty
1614                 end
1615                 return false
1616         end
1617
1618         # Read a XML comment.
1619         # Used by `check_html`.
1620         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1621                 var line: nullable MDLine = first_line
1622                 if start + 3 < line.value.length then
1623                         if line.value[2] == '-' and line.value[3] == '-' then
1624                                 var pos = start + 4
1625                                 while line != null do
1626                                         while pos < line.value.length and line.value[pos] != '-' do
1627                                                 pos += 1
1628                                         end
1629                                         if pos == line.value.length then
1630                                                 line = line.next
1631                                                 pos = 0
1632                                         else
1633                                                 if pos + 2 < line.value.length then
1634                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1635                                                                 first_line.xml_end_line = line
1636                                                                 return pos + 3
1637                                                         end
1638                                                 end
1639                                                 pos += 1
1640                                         end
1641                                 end
1642                         end
1643                 end
1644                 return -1
1645         end
1646
1647         # Extract the text of `self` without leading and trailing.
1648         fun text: String do return value.substring(leading, value.length - trailing)
1649 end
1650
1651 # A markdown line.
1652 interface Line
1653
1654         # Parse the line.
1655         # See `MarkdownProcessor::recurse`.
1656         fun process(v: MarkdownProcessor) is abstract
1657 end
1658
1659 # An empty markdown line.
1660 class LineEmpty
1661         super Line
1662
1663         redef fun process(v) do
1664                 v.current_line = v.current_line.next
1665         end
1666 end
1667
1668 # A non-specific markdown construction.
1669 # Mainly used as part of another line construct such as paragraphs or lists.
1670 class LineOther
1671         super Line
1672
1673         redef fun process(v) do
1674                 var line = v.current_line
1675                 # go to block end
1676                 var was_empty = line.prev_empty
1677                 while line != null and not line.is_empty do
1678                         var t = v.line_kind(line)
1679                         if (v.in_list or v.ext_mode) and t isa LineList then
1680                                 break
1681                         end
1682                         if v.ext_mode and (t isa LineCode or t isa LineFence) then
1683                                 break
1684                         end
1685                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1686                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1687                                    break
1688                         end
1689                         line = line.next
1690                 end
1691                 # build block
1692                 if line != null and not line.is_empty then
1693                         var block = v.current_block.split(line.prev.as(not null))
1694                         if v.in_list and not was_empty then
1695                                 block.kind = new BlockNone(block)
1696                         else
1697                                 block.kind = new BlockParagraph(block)
1698                         end
1699                         v.current_block.remove_leading_empty_lines
1700                 else
1701                         var block: MDBlock
1702                         if line != null then
1703                                 block = v.current_block.split(line)
1704                         else
1705                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1706                         end
1707                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1708                                 block.kind = new BlockNone(block)
1709                         else
1710                                 block.kind = new BlockParagraph(block)
1711                         end
1712                         v.current_block.remove_leading_empty_lines
1713                 end
1714                 v.current_line = v.current_block.first_line
1715         end
1716 end
1717
1718 # A line of markdown code.
1719 class LineCode
1720         super Line
1721
1722         redef fun process(v) do
1723                 var line = v.current_line
1724                 # lookup block end
1725                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1726                         line = line.next
1727                 end
1728                 # split at block end line
1729                 var block: MDBlock
1730                 if line != null then
1731                         block = v.current_block.split(line.prev.as(not null))
1732                 else
1733                         block = v.current_block.split(v.current_block.last_line.as(not null))
1734                 end
1735                 block.kind = new BlockCode(block)
1736                 block.remove_surrounding_empty_lines
1737                 v.current_line = v.current_block.first_line
1738         end
1739 end
1740
1741 # A line of raw XML.
1742 class LineXML
1743         super Line
1744
1745         redef fun process(v) do
1746                 var line = v.current_line
1747                 var prev = line.prev
1748                 if prev != null then v.current_block.split(prev)
1749                 var block = v.current_block.split(line.xml_end_line.as(not null))
1750                 block.kind = new BlockXML(block)
1751                 v.current_block.remove_leading_empty_lines
1752                 v.current_line = v.current_block.first_line
1753         end
1754 end
1755
1756 # A markdown blockquote line.
1757 class LineBlockquote
1758         super Line
1759
1760         redef fun process(v) do
1761                 var line = v.current_line
1762                 # go to bquote end
1763                 while line != null do
1764                         if not line.is_empty and (line.prev_empty and
1765                            line.leading == 0 and
1766                            not v.line_kind(line) isa LineBlockquote) then break
1767                         line = line.next
1768                 end
1769                 # build sub block
1770                 var block: MDBlock
1771                 if line != null then
1772                         block = v.current_block.split(line.prev.as(not null))
1773                 else
1774                         block = v.current_block.split(v.current_block.last_line.as(not null))
1775                 end
1776                 var kind = new BlockQuote(block)
1777                 block.kind = kind
1778                 block.remove_surrounding_empty_lines
1779                 kind.remove_block_quote_prefix(block)
1780                 v.current_line = line
1781                 v.recurse(block, false)
1782                 v.current_line = v.current_block.first_line
1783         end
1784 end
1785
1786 # A markdown ruler line.
1787 class LineHR
1788         super Line
1789
1790         redef fun process(v) do
1791                 var line = v.current_line
1792                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1793                 var block = v.current_block.split(line.as(not null))
1794                 block.kind = new BlockRuler(block)
1795                 v.current_block.remove_leading_empty_lines
1796                 v.current_line = v.current_block.first_line
1797         end
1798 end
1799
1800 # A markdown fence code line.
1801 class LineFence
1802         super Line
1803
1804         redef fun process(v) do
1805                 # go to fence end
1806                 var line = v.current_line.next
1807                 while line != null do
1808                         if v.line_kind(line) isa LineFence then break
1809                         line = line.next
1810                 end
1811                 if line != null then
1812                         line = line.next
1813                 end
1814                 # build fence block
1815                 var block: MDBlock
1816                 if line != null then
1817                         block = v.current_block.split(line.prev.as(not null))
1818                 else
1819                         block = v.current_block.split(v.current_block.last_line.as(not null))
1820                 end
1821                 block.remove_surrounding_empty_lines
1822                 var meta = block.first_line.value.meta_from_fence
1823                 block.kind = new BlockFence(block, meta)
1824                 block.first_line.clear
1825                 var last = block.last_line
1826                 if last != null and v.line_kind(last) isa LineFence then
1827                         block.last_line.clear
1828                 end
1829                 block.remove_surrounding_empty_lines
1830                 v.current_line = line
1831         end
1832 end
1833
1834 # A markdown headline.
1835 class LineHeadline
1836         super Line
1837
1838         redef fun process(v) do
1839                 var line = v.current_line
1840                 var lprev = line.prev
1841                 if lprev != null then v.current_block.split(lprev)
1842                 var block = v.current_block.split(line.as(not null))
1843                 var kind = new BlockHeadline(block)
1844                 block.kind = kind
1845                 kind.transform_headline(block)
1846                 v.current_block.remove_leading_empty_lines
1847                 v.current_line = v.current_block.first_line
1848         end
1849 end
1850
1851 # A markdown headline of level 1.
1852 class LineHeadline1
1853         super LineHeadline
1854
1855         redef fun process(v) do
1856                 var line = v.current_line
1857                 var lprev = line.prev
1858                 if lprev != null then v.current_block.split(lprev)
1859                 line.next.clear
1860                 var block = v.current_block.split(line.as(not null))
1861                 var kind = new BlockHeadline(block)
1862                 kind.depth = 1
1863                 kind.transform_headline(block)
1864                 block.kind = kind
1865                 v.current_block.remove_leading_empty_lines
1866                 v.current_line = v.current_block.first_line
1867         end
1868 end
1869
1870 # A markdown headline of level 2.
1871 class LineHeadline2
1872         super LineHeadline
1873
1874         redef fun process(v) do
1875                 var line = v.current_line
1876                 var lprev = line.prev
1877                 if lprev != null then v.current_block.split(lprev)
1878                 line.next.clear
1879                 var block = v.current_block.split(line.as(not null))
1880                 var kind = new BlockHeadline(block)
1881                 kind.depth = 2
1882                 kind.transform_headline(block)
1883                 block.kind = kind
1884                 v.current_block.remove_leading_empty_lines
1885                 v.current_line = v.current_block.first_line
1886         end
1887 end
1888
1889 # A markdown list line.
1890 # Mainly used to factorize code between ordered and unordered lists.
1891 abstract class LineList
1892         super Line
1893
1894         redef fun process(v) do
1895                 var line = v.current_line
1896                 # go to list end
1897                 while line != null do
1898                         var t = v.line_kind(line)
1899                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1900                            not t isa LineList) then break
1901                         line = line.next
1902                 end
1903                 # build list block
1904                 var list: MDBlock
1905                 if line != null then
1906                         list = v.current_block.split(line.prev.as(not null))
1907                 else
1908                         list = v.current_block.split(v.current_block.last_line.as(not null))
1909                 end
1910                 var kind = block_kind(list)
1911                 list.kind = kind
1912                 list.first_line.prev_empty = false
1913                 list.last_line.next_empty = false
1914                 list.remove_surrounding_empty_lines
1915                 list.first_line.prev_empty = false
1916                 list.last_line.next_empty = false
1917                 kind.init_block(v)
1918                 var block = list.first_block
1919                 while block != null do
1920                         block.remove_list_indent(v)
1921                         v.recurse(block, true)
1922                         block = block.next
1923                 end
1924                 kind.expand_paragraphs(list)
1925                 v.current_line = line
1926         end
1927
1928         # Create a new block kind based on this line.
1929         protected fun block_kind(block: MDBlock): BlockList is abstract
1930
1931         # Extract string value from `MDLine`.
1932         protected fun extract_value(line: MDLine): String is abstract
1933 end
1934
1935 # An ordered list line.
1936 class LineOList
1937         super LineList
1938
1939         redef fun block_kind(block) do return new BlockOrderedList(block)
1940
1941         redef fun extract_value(line) do
1942                 return line.value.substring_from(line.value.index_of('.') + 2)
1943         end
1944 end
1945
1946 # An unordered list line.
1947 class LineUList
1948         super LineList
1949
1950         redef fun block_kind(block) do return new BlockUnorderedList(block)
1951
1952         redef fun extract_value(line) do
1953                 return line.value.substring_from(line.leading + 2)
1954         end
1955 end
1956
1957 # A token represent a character in the markdown input.
1958 # Some tokens have a specific markup behaviour that is handled here.
1959 abstract class Token
1960
1961         # Location of `self` in the original input.
1962         var location: nullable MDLocation
1963
1964         # Position of `self` in input independant from lines.
1965         var pos: Int
1966
1967         # Character found at `pos` in the markdown input.
1968         var char: Char
1969
1970         # Output that token using `MarkdownEmitter::decorator`.
1971         fun emit(v: MarkdownEmitter) do v.decorator.add_char(v, char)
1972 end
1973
1974 # A token without a specific meaning.
1975 class TokenNone
1976         super Token
1977 end
1978
1979 # An emphasis token.
1980 abstract class TokenEm
1981         super Token
1982
1983         redef fun emit(v) do
1984                 var tmp = v.push_buffer
1985                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1986                 v.pop_buffer
1987                 if b > 0 then
1988                         v.decorator.add_em(v, tmp)
1989                         v.current_pos = b
1990                 else
1991                         v.addc char
1992                 end
1993         end
1994 end
1995
1996 # An emphasis star token.
1997 class TokenEmStar
1998         super TokenEm
1999 end
2000
2001 # An emphasis underscore token.
2002 class TokenEmUnderscore
2003         super TokenEm
2004 end
2005
2006 # A strong token.
2007 abstract class TokenStrong
2008         super Token
2009
2010         redef fun emit(v) do
2011                 var tmp = v.push_buffer
2012                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2013                 v.pop_buffer
2014                 if b > 0 then
2015                         v.decorator.add_strong(v, tmp)
2016                         v.current_pos = b + 1
2017                 else
2018                         v.addc char
2019                 end
2020         end
2021 end
2022
2023 # A strong star token.
2024 class TokenStrongStar
2025         super TokenStrong
2026 end
2027
2028 # A strong underscore token.
2029 class TokenStrongUnderscore
2030         super TokenStrong
2031 end
2032
2033 # A code token.
2034 # This class is mainly used to factorize work between single and double quoted span codes.
2035 abstract class TokenCode
2036         super Token
2037
2038         redef fun emit(v) do
2039                 var a = pos + next_pos + 1
2040                 var b = v.processor.find_token(v.current_text.as(not null), a, self)
2041                 if b > 0 then
2042                         v.current_pos = b + next_pos
2043                         while a < b and v.current_text[a] == ' ' do a += 1
2044                         if a < b then
2045                                 while v.current_text[b - 1] == ' ' do b -= 1
2046                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
2047                         end
2048                 else
2049                         v.addc char
2050                 end
2051         end
2052
2053         private fun next_pos: Int is abstract
2054 end
2055
2056 # A span code token.
2057 class TokenCodeSingle
2058         super TokenCode
2059
2060         redef fun next_pos do return 0
2061 end
2062
2063 # A doubled span code token.
2064 class TokenCodeDouble
2065         super TokenCode
2066
2067         redef fun next_pos do return 1
2068 end
2069
2070 # A link or image token.
2071 # This class is mainly used to factorize work between images and links.
2072 abstract class TokenLinkOrImage
2073         super Token
2074
2075         # Link adress
2076         var link: nullable Text = null
2077
2078         # Link text
2079         var name: nullable Text = null
2080
2081         # Link title
2082         var comment: nullable Text = null
2083
2084         # Is the link construct an abbreviation?
2085         var is_abbrev = false
2086
2087         redef fun emit(v) do
2088                 var tmp = new FlatBuffer
2089                 var b = check_link(v, tmp, pos, self)
2090                 if b > 0 then
2091                         emit_hyper(v)
2092                         v.current_pos = b
2093                 else
2094                         v.addc char
2095                 end
2096         end
2097
2098         # Emit the hyperlink as link or image.
2099         private fun emit_hyper(v: MarkdownEmitter) is abstract
2100
2101         # Check if the link is a valid link.
2102         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
2103                 var md = v.current_text
2104                 var pos
2105                 if token isa TokenLink then
2106                         pos = start + 1
2107                 else
2108                         pos = start + 2
2109                 end
2110                 var tmp = new FlatBuffer
2111                 pos = md.read_md_link_id(tmp, pos)
2112                 if pos < start then return -1
2113                 name = tmp
2114                 var old_pos = pos
2115                 pos += 1
2116                 pos = md.skip_spaces(pos)
2117                 if pos < start then
2118                         var tid = name.write_to_string.to_lower
2119                         if v.processor.link_refs.has_key(tid) then
2120                                 var lr = v.processor.link_refs[tid]
2121                                 is_abbrev = lr.is_abbrev
2122                                 link = lr.link
2123                                 comment = lr.title
2124                                 pos = old_pos
2125                         else
2126                                 return -1
2127                         end
2128                 else if md[pos] == '(' then
2129                         pos += 1
2130                         pos = md.skip_spaces(pos)
2131                         if pos < start then return -1
2132                         tmp = new FlatBuffer
2133                         var use_lt = md[pos] == '<'
2134                         if use_lt then
2135                                 pos = md.read_until(tmp, pos + 1, '>')
2136                         else
2137                                 pos = md.read_md_link(tmp, pos)
2138                         end
2139                         if pos < start then return -1
2140                         if use_lt then pos += 1
2141                         link = tmp.write_to_string
2142                         if md[pos] == ' ' then
2143                                 pos = md.skip_spaces(pos)
2144                                 if pos > start and md[pos] == '"' then
2145                                         pos += 1
2146                                         tmp = new FlatBuffer
2147                                         pos = md.read_until(tmp, pos, '"')
2148                                         if pos < start then return -1
2149                                         comment = tmp.write_to_string
2150                                         pos += 1
2151                                         pos = md.skip_spaces(pos)
2152                                         if pos == -1 then return -1
2153                                 end
2154                         end
2155                         if pos < start then return -1
2156                         if md[pos] != ')' then return -1
2157                 else if md[pos] == '[' then
2158                         pos += 1
2159                         tmp = new FlatBuffer
2160                         pos = md.read_raw_until(tmp, pos, ']')
2161                         if pos < start then return -1
2162                         var id
2163                         if tmp.length > 0 then
2164                                 id = tmp
2165                         else
2166                                 id = name
2167                         end
2168                         var tid = id.write_to_string.to_lower
2169                         if v.processor.link_refs.has_key(tid) then
2170                                 var lr = v.processor.link_refs[tid]
2171                                 link = lr.link
2172                                 comment = lr.title
2173                         end
2174                 else
2175                         var tid = name.write_to_string.replace("\n", " ").to_lower
2176                         if v.processor.link_refs.has_key(tid) then
2177                                 var lr = v.processor.link_refs[tid]
2178                                 link = lr.link
2179                                 comment = lr.title
2180                                 pos = old_pos
2181                         else
2182                                 return -1
2183                         end
2184                 end
2185                 if link == null then return -1
2186                 return pos
2187         end
2188 end
2189
2190 # A markdown link token.
2191 class TokenLink
2192         super TokenLinkOrImage
2193
2194         redef fun emit_hyper(v) do
2195                 if is_abbrev and comment != null then
2196                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2197                 else
2198                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2199                 end
2200         end
2201 end
2202
2203 # A markdown image token.
2204 class TokenImage
2205         super TokenLinkOrImage
2206
2207         redef fun emit_hyper(v) do
2208                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2209         end
2210 end
2211
2212 # A HTML/XML token.
2213 class TokenHTML
2214         super Token
2215
2216         redef fun emit(v) do
2217                 var tmp = new FlatBuffer
2218                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2219                 if b > 0 then
2220                         v.add tmp
2221                         v.current_pos = b
2222                 else
2223                         v.decorator.escape_char(v, char)
2224                 end
2225         end
2226
2227         # Is the HTML valid?
2228         # Also take care of link and mailto shortcuts.
2229         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2230                 # check for auto links
2231                 var tmp = new FlatBuffer
2232                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2233                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2234                         pos = md.read_until(tmp, pos, '>')
2235                         if pos != -1 then
2236                                 var link = tmp.write_to_string
2237                                 v.decorator.add_link(v, link, link, null)
2238                                 return pos
2239                         end
2240                 end
2241                 # TODO check for mailto
2242                 # check for inline html
2243                 if start + 2 < md.length then
2244                         return md.read_xml(out, start, true)
2245                 end
2246                 return -1
2247         end
2248 end
2249
2250 # An HTML entity token.
2251 class TokenEntity
2252         super Token
2253
2254         redef fun emit(v) do
2255                 var tmp = new FlatBuffer
2256                 var b = check_entity(tmp, v.current_text.as(not null), pos)
2257                 if b > 0 then
2258                         v.add tmp
2259                         v.current_pos = b
2260                 else
2261                         v.decorator.escape_char(v, char)
2262                 end
2263         end
2264
2265         # Is the entity valid?
2266         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2267                 var pos = md.read_until(out, start, ';')
2268                 if pos < 0 or out.length < 3 then
2269                         return -1
2270                 end
2271                 if out[1] == '#' then
2272                         if out[2] == 'x' or out[2] == 'X' then
2273                                 if out.length < 4 then return -1
2274                                 for i in [3..out.length[ do
2275                                         var c = out[i]
2276                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2277                                                 return -1
2278                                         end
2279                                 end
2280                         else
2281                                 for i in [2..out.length[ do
2282                                         var c = out[i]
2283                                         if c < '0' or c > '9' then return -1
2284                                 end
2285                         end
2286                         out.add ';'
2287                 else
2288                         for i in [1..out.length[ do
2289                                 var c = out[i]
2290                                 if not c.is_digit and not c.is_letter then return -1
2291                         end
2292                         out.add ';'
2293                         # TODO check entity is valid
2294                         # if out.is_entity then
2295                                 return pos
2296                         # else
2297                                 # return -1
2298                         # end
2299                 end
2300                 return pos
2301         end
2302 end
2303
2304 # A markdown escape token.
2305 class TokenEscape
2306         super Token
2307
2308         redef fun emit(v) do
2309                 v.current_pos += 1
2310                 v.addc v.current_text[v.current_pos]
2311         end
2312 end
2313
2314 # A markdown strike token.
2315 #
2316 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2317 class TokenStrike
2318         super Token
2319
2320         redef fun emit(v) do
2321                 var tmp = v.push_buffer
2322                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2323                 v.pop_buffer
2324                 if b > 0 then
2325                         v.decorator.add_strike(v, tmp)
2326                         v.current_pos = b + 1
2327                 else
2328                         v.addc char
2329                 end
2330         end
2331 end
2332
2333 redef class Text
2334
2335         # Get the position of the next non-space character.
2336         private fun skip_spaces(start: Int): Int do
2337                 var pos = start
2338                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2339                         pos += 1
2340                 end
2341                 if pos < length then return pos
2342                 return -1
2343         end
2344
2345         # Read `self` until `nend` and append it to the `out` buffer.
2346         # Escape markdown special chars.
2347         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2348                 var pos = start
2349                 while pos < length do
2350                         var c = self[pos]
2351                         if c == '\\' and pos + 1 < length then
2352                                 pos = escape(out, self[pos + 1], pos)
2353                         else
2354                                 for n in nend do if c == n then break label
2355                                 out.add c
2356                         end
2357                         pos += 1
2358                 end label
2359                 if pos == length then return -1
2360                 return pos
2361         end
2362
2363         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2364         # No escape is made.
2365         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2366                 var pos = start
2367                 while pos < length do
2368                         var c = self[pos]
2369                         var end_reached = false
2370                         for n in nend do
2371                                 if c == n then
2372                                         end_reached = true
2373                                         break
2374                                 end
2375                         end
2376                         if end_reached then break
2377                         out.add c
2378                         pos += 1
2379                 end
2380                 if pos == length then return -1
2381                 return pos
2382         end
2383
2384         # Read `self` as XML until `to` and append it to the `out` buffer.
2385         # Escape HTML special chars.
2386         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2387                 var pos = from
2388                 var in_str = false
2389                 var str_char: nullable Char = null
2390                 while pos < length do
2391                         var c = self[pos]
2392                         if in_str then
2393                                 if c == '\\' then
2394                                         out.add c
2395                                         pos += 1
2396                                         if pos < length then
2397                                                 out.add c
2398                                                 pos += 1
2399                                         end
2400                                         continue
2401                                 end
2402                                 if c == str_char then
2403                                         in_str = false
2404                                         out.add c
2405                                         pos += 1
2406                                         continue
2407                                 end
2408                         end
2409                         if c == '"' or c == '\'' then
2410                                 in_str = true
2411                                 str_char = c
2412                         end
2413                         if not in_str then
2414                                 var end_reached = false
2415                                 for n in [0..to.length[ do
2416                                         if c == to[n] then
2417                                                 end_reached = true
2418                                                 break
2419                                         end
2420                                 end
2421                                 if end_reached then break
2422                         end
2423                         out.add c
2424                         pos += 1
2425                 end
2426                 if pos == length then return -1
2427                 return pos
2428         end
2429
2430         # Read `self` as XML and append it to the `out` buffer.
2431         # Safe mode can be activated to limit reading to valid xml.
2432         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2433                 var pos = 0
2434                 var is_valid = true
2435                 var is_close_tag = false
2436                 if start + 1 >= length then return -1
2437                 if self[start + 1] == '/' then
2438                         is_close_tag = true
2439                         pos = start + 2
2440                 else if self[start + 1] == '!' then
2441                         out.append "<!"
2442                         return start + 1
2443                 else
2444                         is_close_tag = false
2445                         pos = start + 1
2446                 end
2447                 if safe_mode then
2448                         var tmp = new FlatBuffer
2449                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2450                         if pos == -1 then return -1
2451                         var tag = tmp.write_to_string.trim.to_lower
2452                         if not tag.is_valid_html_tag then
2453                                 out.append "&lt;"
2454                                 pos = -1
2455                         else if tag.is_html_unsafe then
2456                                 is_valid = false
2457                                 out.append "&lt;"
2458                                 if is_close_tag then out.add '/'
2459                                 out.append tmp
2460                         else
2461                                 out.append "<"
2462                                 if is_close_tag then out.add '/'
2463                                 out.append tmp
2464                         end
2465                 else
2466                         out.add '<'
2467                         if is_close_tag then out.add '/'
2468                         pos = read_xml_until(out, pos, ' ', '/', '>')
2469                 end
2470                 if pos == -1 then return -1
2471                 pos = read_xml_until(out, pos, '/', '>')
2472                 if pos == -1 then return -1
2473                 if self[pos] == '/' then
2474                         out.append " /"
2475                         pos = self.read_xml_until(out, pos + 1, '>')
2476                         if pos == -1 then return -1
2477                 end
2478                 if self[pos] == '>' then
2479                         if is_valid then
2480                                 out.add '>'
2481                         else
2482                                 out.append "&gt;"
2483                         end
2484                         return pos
2485                 end
2486                 return -1
2487         end
2488
2489         # Read a markdown link address and append it to the `out` buffer.
2490         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2491                 var pos = start
2492                 var counter = 1
2493                 while pos < length do
2494                         var c = self[pos]
2495                         if c == '\\' and pos + 1 < length then
2496                                 pos = escape(out, self[pos + 1], pos)
2497                         else
2498                                 var end_reached = false
2499                                 if c == '(' then
2500                                         counter += 1
2501                                 else if c == ' ' then
2502                                         if counter == 1 then end_reached = true
2503                                 else if c == ')' then
2504                                         counter -= 1
2505                                         if counter == 0 then end_reached = true
2506                                 end
2507                                 if end_reached then break
2508                                 out.add c
2509                         end
2510                         pos += 1
2511                 end
2512                 if pos == length then return -1
2513                 return pos
2514         end
2515
2516         # Read a markdown link text and append it to the `out` buffer.
2517         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2518                 var pos = start
2519                 var counter = 1
2520                 while pos < length do
2521                         var c = self[pos]
2522                         var end_reached = false
2523                         if c == '[' then
2524                                 counter += 1
2525                                 out.add c
2526                         else if c == ']' then
2527                                 counter -= 1
2528                                 if counter == 0 then
2529                                         end_reached = true
2530                                 else
2531                                         out.add c
2532                                 end
2533                         else
2534                                 out.add c
2535                         end
2536                         if end_reached then break
2537                         pos += 1
2538                 end
2539                 if pos == length then return -1
2540                 return pos
2541         end
2542
2543         # Extract the XML tag name from a XML tag.
2544         private fun xml_tag: String do
2545                 var tpl = new FlatBuffer
2546                 var pos = 1
2547                 if pos < length and self[1] == '/' then pos += 1
2548                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2549                         tpl.add self[pos]
2550                         pos += 1
2551                 end
2552                 return tpl.write_to_string.to_lower
2553         end
2554
2555         private fun is_valid_html_tag: Bool do
2556                 if is_empty then return false
2557                 for c in self do
2558                         if not c.is_alpha then return false
2559                 end
2560                 return true
2561         end
2562
2563         # Read and escape the markdown contained in `self`.
2564         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2565                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2566                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2567                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2568                    c == '`' or c == '~' or c == '^' then
2569                         out.add c
2570                         return pos + 1
2571                 end
2572                 out.add '\\'
2573                 return pos
2574         end
2575
2576         # Extract string found at end of fence opening.
2577         private fun meta_from_fence: nullable Text do
2578                 for i in [0..chars.length[ do
2579                         var c = chars[i]
2580                         if c != ' ' and c != '`' and c != '~' then
2581                                 return substring_from(i).trim
2582                         end
2583                 end
2584                 return null
2585         end
2586
2587         # Is `self` an unsafe HTML element?
2588         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2589
2590         # Is `self` a HRML block element?
2591         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2592
2593         # Is `self` a link prefix?
2594         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2595
2596         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2597
2598         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2599
2600         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2601 end
2602
2603 redef class String
2604
2605         # Parse `self` as markdown and return the HTML representation
2606         #.
2607         #    var md = "**Hello World!**"
2608         #    var html = md.md_to_html
2609         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2610         fun md_to_html: Writable do
2611                 var processor = new MarkdownProcessor
2612                 return processor.process(self)
2613         end
2614 end