lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # `MarkdownEmitter` used for ouput.
  34         var emitter: MarkdownEmitter is noinit, protected writable
  35
  36         # Work in extended mode (default).
  37         #
  38         # Behavior changes when using extended mode:
  39         #
  40         # * Lists and code blocks end a paragraph
  41         #
  42         #   In normal markdown the following:
  43         #
  44         # ~~~md
  45         # This is a paragraph
  46         # * and this is not a list
  47         # ~~~
  48         #
  49         #   Will produce:
  50         #
  51         # ~~~html
  52         # <p>This is a paragraph
  53         # * and this is not a list</p>
  54         # ~~~
  55         #
  56         #   When using extended mode this changes to:
  57         #
  58         # ~~~html
  59         # <p>This is a paragraph</p>
  60         # <ul>
  61         # <li>and this is not a list</li>
  62         # </ul>
  63         # ~~~
  64         #
  65         # * Fences code blocks
  66         #
  67         #   If you don't want to indent your all your code with 4 spaces,
  68         #   you can wrap your code in ``` ``` ``` or `~~~`.
  69         #
  70         #   Here's an example:
  71         #
  72         # ~~~md
  73         # fun test do
  74         #    print "Hello World!"
  75         # end
  76         # ~~~
  77         #
  78         # * Code blocks meta
  79         #
  80         #   If you want to use syntax highlighting tools, most of them need to know what kind
  81         #   of language they are highlighting.
  82         #   You can add an optional language identifier after the fence declaration to output
  83         #   it in the HTML render.
  84         #
  85         # ```nit
  86         # import markdown
  87         #
  88         # print "# Hello World!".md_to_html
  89         # ```
  90         #
  91         #   Becomes
  92         #
  93         # ~~~html
  94         # <pre class="nit"><code>import markdown
  95         #
  96         # print "Hello World!".md_to_html
  97         # </code></pre>
  98         # ~~~
  99         #
 100         # * Underscores (Emphasis)
 101         #
 102         #   Underscores in the middle of a word like:
 103         #
 104         # ~~~md
 105         # Con_cat_this
 106         # ~~~
 107         #
 108         #   normally produces this:
 109         #
 110         # ~~~html
 111         # <p>Con<em>cat</em>this</p>
 112         # ~~~
 113         #
 114         #   With extended mode they don't result in emphasis.
 115         #
 116         # ~~~html
 117         # <p>Con_cat_this</p>
 118         # ~~~
 119         #
 120         # * Strikethrough
 121         #
 122         #   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
 123         #   strikethrought span is marked with `~~`.
 124         #
 125         # ~~~md
 126         # ~~Mistaken text.~~
 127         # ~~~
 128         #
 129         #   becomes
 130         #
 131         # ~~~html
 132         # <del>Mistaken text.</del>
 133         # ~~~
 134         var ext_mode = true
 135
 136         init do self.emitter = new MarkdownEmitter(self)
 137
 138         # Process the mardown `input` string and return the processed output.
 139         fun process(input: String): Writable do
 140                 # init processor
 141                 link_refs.clear
 142                 last_link_ref = null
 143                 current_line = null
 144                 current_block = null
 145                 # parse markdown
 146                 var parent = read_lines(input)
 147                 parent.remove_surrounding_empty_lines
 148                 recurse(parent, false)
 149                 # output processed text
 150                 return emitter.emit(parent.kind)
 151         end
 152
 153         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
 154         private fun read_lines(input: String): MDBlock do
 155                 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
 156                 var value = new FlatBuffer
 157                 var i = 0
 158
 159                 var line_pos = 0
 160                 var col_pos = 0
 161
 162                 while i < input.length do
 163                         value.clear
 164                         var pos = 0
 165                         var eol = false
 166                         while not eol and i < input.length do
 167                                 col_pos += 1
 168                                 var c = input[i]
 169                                 if c == '\n' then
 170                                         eol = true
 171                                 else if c == '\t' then
 172                                         var np = pos + (4 - (pos & 3))
 173                                         while pos < np do
 174                                                 value.add ' '
 175                                                 pos += 1
 176                                         end
 177                                 else
 178                                         pos += 1
 179                                         value.add c
 180                                 end
 181                                 i += 1
 182                         end
 183                         line_pos += 1
 184
 185                         var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
 186                         var line = new MDLine(loc, value.write_to_string)
 187                         var is_link_ref = check_link_ref(line)
 188                         # Skip link refs
 189                         if not is_link_ref then block.add_line line
 190                         col_pos = 0
 191                 end
 192                 return block
 193         end
 194
 195         # Check if line is a block link definition.
 196         # Return `true` if line contains a valid link ref and save it into `link_refs`.
 197         private fun check_link_ref(line: MDLine): Bool do
 198                 var md = line.value
 199                 var is_link_ref = false
 200                 var id = new FlatBuffer
 201                 var link = new FlatBuffer
 202                 var comment = new FlatBuffer
 203                 var pos = -1
 204                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
 205                         pos = line.leading + 1
 206                         pos = md.read_until(id, pos, ']')
 207                         if not id.is_empty and pos + 2 < line.value.length then
 208                                 if line.value[pos + 1] == ':' then
 209                                         pos += 2
 210                                         pos = md.skip_spaces(pos)
 211                                         if line.value[pos] == '<' then
 212                                                 pos += 1
 213                                                 pos = md.read_until(link, pos, '>')
 214                                                 pos += 1
 215                                         else
 216                                                 pos = md.read_until(link, pos, ' ', '\n')
 217                                         end
 218                                         if not link.is_empty then
 219                                                 pos = md.skip_spaces(pos)
 220                                                 if pos > 0 and pos < line.value.length then
 221                                                         var c = line.value[pos]
 222                                                         if c == '\"' or c == '\'' or c == '(' then
 223                                                                 pos += 1
 224                                                                 if c == '(' then
 225                                                                         pos = md.read_until(comment, pos, ')')
 226                                                                 else
 227                                                                         pos = md.read_until(comment, pos, c)
 228                                                                 end
 229                                                                 if pos > 0 then is_link_ref = true
 230                                                         end
 231                                                 else
 232                                                         is_link_ref = true
 233                                                 end
 234                                         end
 235                                 end
 236                         end
 237                 end
 238                 if is_link_ref and not id.is_empty and not link.is_empty then
 239                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 240                         add_link_ref(id.write_to_string, lr)
 241                         if comment.is_empty then last_link_ref = lr
 242                         return true
 243                 else
 244                         comment = new FlatBuffer
 245                         if not line.is_empty and last_link_ref != null then
 246                                 pos = line.leading
 247                                 var c = line.value[pos]
 248                                 if c == '\"' or c == '\'' or c ==  '(' then
 249                                         pos += 1
 250                                         if c == '(' then
 251                                                 pos = md.read_until(comment, pos, ')')
 252                                         else
 253                                                 pos = md.read_until(comment, pos, c)
 254                                         end
 255                                 end
 256                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 257                         end
 258                         if comment.is_empty then return false
 259                         return true
 260                 end
 261         end
 262
 263         # Known link refs
 264         # This list will be needed during output to expand links.
 265         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 266
 267         # Last encountered link ref (for multiline definitions)
 268         #
 269         # Markdown allows link refs to be defined over two lines:
 270         #
 271         # ~~~md
 272         # [id]: http://example.com/longish/path/to/resource/here
 273         #       "Optional Title Here"
 274         # ~~~
 275         #
 276         private var last_link_ref: nullable LinkRef = null
 277
 278         # Add a link ref to the list
 279         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 280
 281         # Recursively split a `block`.
 282         #
 283         # The block is splitted according to the type of lines it contains.
 284         # Some blocks can be splited again recursively like lists.
 285         # The `in_list` mode is used to recurse on list and build
 286         # nested paragraphs or code blocks.
 287         fun recurse(root: MDBlock, in_list: Bool) do
 288                 var old_mode = self.in_list
 289                 var old_root = self.current_block
 290                 self.in_list = in_list
 291
 292                 var line = root.first_line
 293                 while line != null and line.is_empty do
 294                         line = line.next
 295                         if line == null then return
 296                 end
 297
 298                 current_line = line
 299                 current_block = root
 300                 while current_line != null do
 301                         line_kind(current_line.as(not null)).process(self)
 302                 end
 303                 self.in_list = old_mode
 304                 self.current_block = old_root
 305         end
 306
 307         # Currently processed line.
 308         # Used when visiting blocks with `recurse`.
 309         var current_line: nullable MDLine = null is writable
 310
 311         # Currently processed block.
 312         # Used when visiting blocks with `recurse`.
 313         var current_block: nullable MDBlock = null is writable
 314
 315         # Is the current recursion in list mode?
 316         # Used when visiting blocks with `recurse`
 317         private var in_list = false
 318
 319         # The type of line.
 320         # see: `md_line_*`
 321         fun line_kind(md: MDLine): Line do
 322                 var value = md.value
 323                 var leading = md.leading
 324                 var trailing = md.trailing
 325                 if md.is_empty then return new LineEmpty
 326                 if md.leading > 3 then return new LineCode
 327                 if value[leading] == '#' then return new LineHeadline
 328                 if value[leading] == '>' then return new LineBlockquote
 329
 330                 if ext_mode then
 331                         if value.length - leading - trailing > 2 then
 332                                 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 333                                         return new LineFence
 334                                 end
 335                                 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 336                                         return new LineFence
 337                                 end
 338                         end
 339                 end
 340
 341                 if value.length - leading - trailing > 2 and
 342                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 343                    if md.count_chars(value[leading]) >= 3 then
 344                                 return new LineHR
 345                    end
 346                 end
 347
 348                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 349                         var c = value[leading]
 350                         if c == '*' or c == '-' or c == '+' then return new LineUList
 351                 end
 352
 353                 if value.length - leading >= 3 and value[leading].is_digit then
 354                         var i = leading + 1
 355                         while i < value.length and value[i].is_digit do i += 1
 356                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 357                                 return new LineOList
 358                         end
 359                 end
 360
 361                 if value[leading] == '<' and md.check_html then return new LineXML
 362
 363                 var next = md.next
 364                 if next != null and not next.is_empty then
 365                         if next.count_chars('=') > 0 then
 366                                 return new LineHeadline1
 367                         end
 368                         if next.count_chars('-') > 0 then
 369                                 return new LineHeadline2
 370                         end
 371                 end
 372                 return new LineOther
 373         end
 374
 375         # Get the token kind at `pos`.
 376         fun token_at(text: Text, pos: Int): Token do
 377                 var c0: Char
 378                 var c1: Char
 379                 var c2: Char
 380
 381                 if pos > 0 then
 382                         c0 = text[pos - 1]
 383                 else
 384                         c0 = ' '
 385                 end
 386                 var c = text[pos]
 387
 388                 if pos + 1 < text.length then
 389                         c1 = text[pos + 1]
 390                 else
 391                         c1 = ' '
 392                 end
 393                 if pos + 2 < text.length then
 394                         c2 = text[pos + 2]
 395                 else
 396                         c2 = ' '
 397                 end
 398
 399                 var loc = new MDLocation(
 400                         current_loc.line_start,
 401                         current_loc.column_start + pos,
 402                         current_loc.line_start,
 403                         current_loc.column_start + pos)
 404
 405                 if c == '*' then
 406                         if c1 == '*' then
 407                                 if c0 != ' ' or c2 != ' ' then
 408                                         return new TokenStrongStar(loc, pos, c)
 409                                 else
 410                                         return new TokenEmStar(loc, pos, c)
 411                                 end
 412                         end
 413                         if c0 != ' ' or c1 != ' ' then
 414                                 return new TokenEmStar(loc, pos, c)
 415                         else
 416                                 return new TokenNone(loc, pos, c)
 417                         end
 418                 else if c == '_' then
 419                         if c1 == '_' then
 420                                 if c0 != ' ' or c2 != ' 'then
 421                                         return new TokenStrongUnderscore(loc, pos, c)
 422                                 else
 423                                         return new TokenEmUnderscore(loc, pos, c)
 424                                 end
 425                         end
 426                         if ext_mode then
 427                                 if (c0.is_letter or c0.is_digit) and c0 != '_' and
 428                                    (c1.is_letter or c1.is_digit) then
 429                                         return new TokenNone(loc, pos, c)
 430                                 else
 431                                         return new TokenEmUnderscore(loc, pos, c)
 432                                 end
 433                         end
 434                         if c0 != ' ' or c1 != ' ' then
 435                                 return new TokenEmUnderscore(loc, pos, c)
 436                         else
 437                                 return new TokenNone(loc, pos, c)
 438                         end
 439                 else if c == '!' then
 440                         if c1 == '[' then return new TokenImage(loc, pos, c)
 441                         return new TokenNone(loc, pos, c)
 442                 else if c == '[' then
 443                         return new TokenLink(loc, pos, c)
 444                 else if c == ']' then
 445                         return new TokenNone(loc, pos, c)
 446                 else if c == '`' then
 447                         if c1 == '`' then
 448                                 return new TokenCodeDouble(loc, pos, c)
 449                         else
 450                                 return new TokenCodeSingle(loc, pos, c)
 451                         end
 452                 else if c == '\\' then
 453                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 454                                 return new TokenEscape(loc, pos, c)
 455                         else
 456                                 return new TokenNone(loc, pos, c)
 457                         end
 458                 else if c == '<' then
 459                         return new TokenHTML(loc, pos, c)
 460                 else if c == '&' then
 461                         return new TokenEntity(loc, pos, c)
 462                 else
 463                         if ext_mode then
 464                                 if c == '~' and c1 == '~' then
 465                                         return new TokenStrike(loc, pos, c)
 466                                 end
 467                         end
 468                         return new TokenNone(loc, pos, c)
 469                 end
 470         end
 471
 472         # Find the position of a `token` in `self`.
 473         fun find_token(text: Text, start: Int, token: Token): Int do
 474                 var pos = start
 475                 while pos < text.length do
 476                         if token_at(text, pos).is_same_type(token) then
 477                                 return pos
 478                         end
 479                         pos += 1
 480                 end
 481                 return -1
 482         end
 483
 484         # Location used for next parsed token.
 485         #
 486         # This location can be changed by the emitter to adjust with `\n` found
 487         # in the input.
 488         private fun current_loc: MDLocation do return emitter.current_loc
 489 end
 490
 491 # Emit output corresponding to blocks content.
 492 #
 493 # Blocks are created by a previous pass in `MarkdownProcessor`.
 494 # The emitter use a `Decorator` to select the output format.
 495 class MarkdownEmitter
 496
 497         # Kind of processor used for parsing.
 498         type PROCESSOR: MarkdownProcessor
 499
 500         # Processor containing link refs.
 501         var processor: PROCESSOR
 502
 503         # Kind of decorator used for decoration.
 504         type DECORATOR: Decorator
 505
 506         # Decorator used for output.
 507         # Default is `HTMLDecorator`
 508         var decorator: DECORATOR is writable, lazy do
 509                 return new HTMLDecorator
 510         end
 511
 512         # Create a new `MarkdownEmitter` using a custom `decorator`.
 513         init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
 514                 init processor
 515                 self.decorator = decorator
 516         end
 517
 518         # Output `block` using `decorator` in the current buffer.
 519         fun emit(block: Block): Text do
 520                 var buffer = push_buffer
 521                 block.emit(self)
 522                 pop_buffer
 523                 return buffer
 524         end
 525
 526         # Output the content of `block`.
 527         fun emit_in(block: Block) do block.emit_in(self)
 528
 529         # Transform and emit mardown text
 530         fun emit_text(text: Text) do emit_text_until(text, 0, null)
 531
 532         # Transform and emit mardown text starting at `start` and
 533         # until a token with the same type as `token` is found.
 534         # Go until the end of `text` if `token` is null.
 535         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 536                 var old_text = current_text
 537                 var old_pos = current_pos
 538                 current_text = text
 539                 current_pos = start
 540                 while current_pos < text.length do
 541                         if text[current_pos] == '\n' then
 542                                 current_loc.line_start += 1
 543                                 current_loc.column_start = -current_pos
 544                         end
 545                         var mt = processor.token_at(text, current_pos)
 546                         if (token != null and not token isa TokenNone) and
 547                         (mt.is_same_type(token) or
 548                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 549                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 550                                 return current_pos
 551                         end
 552                         mt.emit(self)
 553                         current_pos += 1
 554                 end
 555                 current_text = old_text
 556                 current_pos = old_pos
 557                 return -1
 558         end
 559
 560         # Currently processed position in `current_text`.
 561         # Used when visiting inline production with `emit_text_until`.
 562         private var current_pos: Int = -1
 563
 564         # Currently processed text.
 565         # Used when visiting inline production with `emit_text_until`.
 566         private var current_text: nullable Text = null
 567
 568         # Stacked buffers.
 569         private var buffer_stack = new List[FlatBuffer]
 570
 571         # Push a new buffer on the stack.
 572         private fun push_buffer: FlatBuffer do
 573                 var buffer = new FlatBuffer
 574                 buffer_stack.add buffer
 575                 return buffer
 576         end
 577
 578         # Pop the last buffer.
 579         private fun pop_buffer do buffer_stack.pop
 580
 581         # Current output buffer.
 582         private fun current_buffer: FlatBuffer do
 583                 assert not buffer_stack.is_empty
 584                 return buffer_stack.last
 585         end
 586
 587         # Stacked locations.
 588         private var loc_stack = new List[MDLocation]
 589
 590         # Push a new MDLocation on the stack.
 591         private fun push_loc(location: MDLocation) do loc_stack.add location
 592
 593         # Pop the last buffer.
 594         private fun pop_loc: MDLocation do return loc_stack.pop
 595
 596         # Current output buffer.
 597         private fun current_loc: MDLocation do
 598                 assert not loc_stack.is_empty
 599                 return loc_stack.last
 600         end
 601
 602         # Append `e` to current buffer.
 603         fun add(e: Writable) do
 604                 if e isa Text then
 605                         current_buffer.append e
 606                 else
 607                         current_buffer.append e.write_to_string
 608                 end
 609         end
 610
 611         # Append `c` to current buffer.
 612         fun addc(c: Char) do add c.to_s
 613
 614         # Append a "\n" line break.
 615         fun addn do add "\n"
 616 end
 617
 618 # A Link Reference.
 619 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 620 #
 621 # ~~~raw
 622 # [1]: http://example.com/ "Optional title"
 623 # ~~~
 624 class LinkRef
 625
 626         # Link href
 627         var link: String
 628
 629         # Optional link title
 630         var title: nullable String = null
 631
 632         # Is the link an abreviation?
 633         var is_abbrev = false
 634
 635         # Create a link with a title.
 636         init with_title(link: String, title: nullable String) do
 637                 self.link = link
 638                 self.title = title
 639         end
 640 end
 641
 642 # A `Decorator` is used to emit mardown into a specific format.
 643 # Default decorator used is `HTMLDecorator`.
 644 interface Decorator
 645
 646         # Kind of emitter used for decoration.
 647         type EMITTER: MarkdownEmitter
 648
 649         # Render a single plain char.
 650         #
 651         # Redefine this method to add special escaping for plain text.
 652         fun add_char(v: EMITTER, c: Char) do v.addc c
 653
 654         # Render a ruler block.
 655         fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
 656
 657         # Render a headline block with corresponding level.
 658         fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
 659
 660         # Render a paragraph block.
 661         fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
 662
 663         # Render a code or fence block.
 664         fun add_code(v: EMITTER, block: BlockCode) is abstract
 665
 666         # Render a blockquote.
 667         fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
 668
 669         # Render an unordered list.
 670         fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
 671
 672         # Render an ordered list.
 673         fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
 674
 675         # Render a list item.
 676         fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
 677
 678         # Render an emphasis text.
 679         fun add_em(v: EMITTER, text: Text) is abstract
 680
 681         # Render a strong text.
 682         fun add_strong(v: EMITTER, text: Text) is abstract
 683
 684         # Render a strike text.
 685         #
 686         # Extended mode only (see `MarkdownProcessor::ext_mode`)
 687         fun add_strike(v: EMITTER, text: Text) is abstract
 688
 689         # Render a link.
 690         fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 691
 692         # Render an image.
 693         fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 694
 695         # Render an abbreviation.
 696         fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
 697
 698         # Render a code span reading from a buffer.
 699         fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 700
 701         # Render a text and escape it.
 702         fun append_value(v: EMITTER, value: Text) is abstract
 703
 704         # Render code text from buffer and escape it.
 705         fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 706
 707         # Render a character escape.
 708         fun escape_char(v: EMITTER, char: Char) is abstract
 709
 710         # Render a line break
 711         fun add_line_break(v: EMITTER) is abstract
 712
 713         # Generate a new html valid id from a `String`.
 714         fun strip_id(txt: String): String is abstract
 715
 716         # Found headlines during the processing labeled by their ids.
 717         fun headlines: ArrayMap[String, HeadLine] is abstract
 718 end
 719
 720 # Class representing a markdown headline.
 721 class HeadLine
 722         # Unique identifier of this headline.
 723         var id: String
 724
 725         # Text of the headline.
 726         var title: String
 727
 728         # Level of this headline.
 729         #
 730         # According toe the markdown specification, level must be in `[1..6]`.
 731         var level: Int
 732 end
 733
 734 # `Decorator` that outputs HTML.
 735 class HTMLDecorator
 736         super Decorator
 737
 738         redef var headlines = new ArrayMap[String, HeadLine]
 739
 740         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 741
 742         redef fun add_headline(v, block) do
 743                 # save headline
 744                 var txt = block.block.first_line.value
 745                 var id = strip_id(txt)
 746                 var lvl = block.depth
 747                 headlines[id] = new HeadLine(id, txt, lvl)
 748                 # output it
 749                 v.add "<h{lvl} id=\"{id}\">"
 750                 v.emit_in block
 751                 v.add "</h{lvl}>\n"
 752         end
 753
 754         redef fun add_paragraph(v, block) do
 755                 v.add "<p>"
 756                 v.emit_in block
 757                 v.add "</p>\n"
 758         end
 759
 760         redef fun add_code(v, block) do
 761                 var meta = block.meta
 762                 if meta != null then
 763                         v.add "<pre class=\""
 764                         append_value(v, meta)
 765                         v.add "\"><code>"
 766                 else
 767                         v.add "<pre><code>"
 768                 end
 769                 v.emit_in block
 770                 v.add "</code></pre>\n"
 771         end
 772
 773         redef fun add_blockquote(v, block) do
 774                 v.add "<blockquote>\n"
 775                 v.emit_in block
 776                 v.add "</blockquote>\n"
 777         end
 778
 779         redef fun add_unorderedlist(v, block) do
 780                 v.add "<ul>\n"
 781                 v.emit_in block
 782                 v.add "</ul>\n"
 783         end
 784
 785         redef fun add_orderedlist(v, block) do
 786                 v.add "<ol>\n"
 787                 v.emit_in block
 788                 v.add "</ol>\n"
 789         end
 790
 791         redef fun add_listitem(v, block) do
 792                 v.add "<li>"
 793                 v.emit_in block
 794                 v.add "</li>\n"
 795         end
 796
 797         redef fun add_em(v, text) do
 798                 v.add "<em>"
 799                 v.add text
 800                 v.add "</em>"
 801         end
 802
 803         redef fun add_strong(v, text) do
 804                 v.add "<strong>"
 805                 v.add text
 806                 v.add "</strong>"
 807         end
 808
 809         redef fun add_strike(v, text) do
 810                 v.add "<del>"
 811                 v.add text
 812                 v.add "</del>"
 813         end
 814
 815         redef fun add_image(v, link, name, comment) do
 816                 v.add "<img src=\""
 817                 append_value(v, link)
 818                 v.add "\" alt=\""
 819                 append_value(v, name)
 820                 v.add "\""
 821                 if comment != null and not comment.is_empty then
 822                         v.add " title=\""
 823                         append_value(v, comment)
 824                         v.add "\""
 825                 end
 826                 v.add "/>"
 827         end
 828
 829         redef fun add_link(v, link, name, comment) do
 830                 v.add "<a href=\""
 831                 append_value(v, link)
 832                 v.add "\""
 833                 if comment != null and not comment.is_empty then
 834                         v.add " title=\""
 835                         append_value(v, comment)
 836                         v.add "\""
 837                 end
 838                 v.add ">"
 839                 v.emit_text(name)
 840                 v.add "</a>"
 841         end
 842
 843         redef fun add_abbr(v, name, comment) do
 844                 v.add "<abbr title=\""
 845                 append_value(v, comment)
 846                 v.add "\">"
 847                 v.emit_text(name)
 848                 v.add "</abbr>"
 849         end
 850
 851         redef fun add_span_code(v, text, from, to) do
 852                 v.add "<code>"
 853                 append_code(v, text, from, to)
 854                 v.add "</code>"
 855         end
 856
 857         redef fun add_line_break(v) do
 858                 v.add "<br/>"
 859         end
 860
 861         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 862
 863         redef fun escape_char(v, c) do
 864                 if c == '&' then
 865                         v.add "&amp;"
 866                 else if c == '<' then
 867                         v.add "&lt;"
 868                 else if c == '>' then
 869                         v.add "&gt;"
 870                 else if c == '"' then
 871                         v.add "&quot;"
 872                 else if c == '\'' then
 873                         v.add "&apos;"
 874                 else
 875                         v.addc c
 876                 end
 877         end
 878
 879         redef fun append_code(v, buffer, from, to) do
 880                 for i in [from..to[ do
 881                         var c = buffer[i]
 882                         if c == '&' then
 883                                 v.add "&amp;"
 884                         else if c == '<' then
 885                                 v.add "&lt;"
 886                         else if c == '>' then
 887                                 v.add "&gt;"
 888                         else
 889                                 v.addc c
 890                         end
 891                 end
 892         end
 893
 894         redef fun strip_id(txt) do
 895                 # strip id
 896                 var b = new FlatBuffer
 897                 for c in txt do
 898                         if c == ' ' then
 899                                 b.add '_'
 900                         else
 901                                 if not c.is_letter and
 902                                    not c.is_digit and
 903                                    not allowed_id_chars.has(c) then continue
 904                                 b.add c
 905                         end
 906                 end
 907                 var res = b.to_s
 908                 var key = res
 909                 # check for multiple id definitions
 910                 if headlines.has_key(key) then
 911                         var i = 1
 912                         key = "{res}_{i}"
 913                         while headlines.has_key(key) do
 914                                 i += 1
 915                                 key = "{res}_{i}"
 916                         end
 917                 end
 918                 return key
 919         end
 920
 921         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 922 end
 923
 924 # Location in a Markdown input.
 925 class MDLocation
 926
 927         # Starting line number (starting from 1).
 928         var line_start: Int
 929
 930         # Starting column number (starting from 1).
 931         var column_start: Int
 932
 933         # Stopping line number (starting from 1).
 934         var line_end: Int
 935
 936         # Stopping column number (starting from 1).
 937         var column_end: Int
 938
 939         redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
 940
 941         # Return a copy of `self`.
 942         fun copy: MDLocation do
 943                 return new MDLocation(line_start, column_start, line_end, column_end)
 944         end
 945 end
 946
 947 # A block of markdown lines.
 948 # A `MDBlock` can contains lines and/or sub-blocks.
 949 class MDBlock
 950
 951         # Position of `self` in the input.
 952         var location: MDLocation
 953
 954         # Kind of block.
 955         # See `Block`.
 956         var kind: Block = new BlockNone(self) is writable
 957
 958         # First line if any.
 959         var first_line: nullable MDLine = null is writable
 960
 961         # Last line if any.
 962         var last_line: nullable MDLine = null is writable
 963
 964         # First sub-block if any.
 965         var first_block: nullable MDBlock = null is writable
 966
 967         # Last sub-block if any.
 968         var last_block: nullable MDBlock = null is writable
 969
 970         # Previous block if any.
 971         var prev: nullable MDBlock = null is writable
 972
 973         # Next block if any.
 974         var next: nullable MDBlock = null is writable
 975
 976         # Does this block contain subblocks?
 977         fun has_blocks: Bool do return first_block != null
 978
 979         # Count sub-blocks.
 980         fun count_blocks: Int do
 981                 var count = 0
 982                 var block = first_block
 983                 while block != null do
 984                         count += 1
 985                         block = block.next
 986                 end
 987                 return count
 988         end
 989
 990         # Does this block contain lines?
 991         fun has_lines: Bool do return first_line != null
 992
 993         # Count block lines.
 994         fun count_lines: Int do
 995                 var count = 0
 996                 var line = first_line
 997                 while line != null do
 998                         count += 1
 999                         line = line.next
1000                 end
1001                 return count
1002         end
1003
1004         # Split `self` creating a new sub-block having `line` has `last_line`.
1005         fun split(line: MDLine): MDBlock do
1006                 # location for new block
1007                 var new_loc = new MDLocation(
1008                         first_line.location.line_start,
1009                         first_line.location.column_start,
1010                         line.location.line_end,
1011                         line.location.column_end)
1012                 # create block
1013                 var block = new MDBlock(new_loc)
1014                 block.first_line = first_line
1015                 block.last_line = line
1016                 first_line = line.next
1017                 line.next = null
1018                 if first_line == null then
1019                         last_line = null
1020                 else
1021                         first_line.prev = null
1022                         # update current block loc
1023                         location.line_start = first_line.location.line_start
1024                         location.column_start = first_line.location.column_start
1025                 end
1026                 if first_block == null then
1027                         first_block = block
1028                         last_block = block
1029                 else
1030                         last_block.next = block
1031                         last_block = block
1032                 end
1033                 return block
1034         end
1035
1036         # Add a `line` to this block.
1037         fun add_line(line: MDLine) do
1038                 if last_line == null then
1039                         first_line = line
1040                         last_line = line
1041                 else
1042                         last_line.next_empty = line.is_empty
1043                         line.prev_empty = last_line.is_empty
1044                         line.prev = last_line
1045                         last_line.next = line
1046                         last_line = line
1047                 end
1048         end
1049
1050         # Remove `line` from this block.
1051         fun remove_line(line: MDLine) do
1052                 if line.prev == null then
1053                         first_line = line.next
1054                 else
1055                         line.prev.next = line.next
1056                 end
1057                 if line.next == null then
1058                         last_line = line.prev
1059                 else
1060                         line.next.prev = line.prev
1061                 end
1062                 line.prev = null
1063                 line.next = null
1064         end
1065
1066         # Remove leading empty lines.
1067         fun remove_leading_empty_lines: Bool do
1068                 var was_empty = false
1069                 var line = first_line
1070                 while line != null and line.is_empty do
1071                         remove_line line
1072                         line = first_line
1073                         was_empty = true
1074                 end
1075                 return was_empty
1076         end
1077
1078         # Remove trailing empty lines.
1079         fun remove_trailing_empty_lines: Bool do
1080                 var was_empty = false
1081                 var line = last_line
1082                 while line != null and line.is_empty do
1083                         remove_line line
1084                         line = last_line
1085                         was_empty = true
1086                 end
1087                 return was_empty
1088         end
1089
1090         # Remove leading and trailing empty lines.
1091         fun remove_surrounding_empty_lines: Bool do
1092                 var was_empty = false
1093                 if remove_leading_empty_lines then was_empty = true
1094                 if remove_trailing_empty_lines then was_empty = true
1095                 return was_empty
1096         end
1097
1098         # Remove list markers and up to 4 leading spaces.
1099         # Used to clean nested lists.
1100         fun remove_list_indent(v: MarkdownProcessor) do
1101                 var line = first_line
1102                 while line != null do
1103                         if not line.is_empty then
1104                                 var kind = v.line_kind(line)
1105                                 if kind isa LineList then
1106                                         line.value = kind.extract_value(line)
1107                                 else
1108                                         line.value = line.value.substring_from(line.leading.min(4))
1109                                 end
1110                                 line.leading = line.process_leading
1111                         end
1112                         line = line.next
1113                 end
1114         end
1115
1116         # Collect block line text.
1117         fun text: String do
1118                 var text = new FlatBuffer
1119                 var line = first_line
1120                 while line != null do
1121                         if not line.is_empty then
1122                                 text.append line.text
1123                         end
1124                         text.append "\n"
1125                         line = line.next
1126                 end
1127                 return text.write_to_string
1128         end
1129 end
1130
1131 # Representation of a markdown block in the AST.
1132 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1133 abstract class Block
1134
1135         # The markdown block `self` is related to.
1136         var block: MDBlock
1137
1138         # Output `self` using `v.decorator`.
1139         fun emit(v: MarkdownEmitter) do v.emit_in(self)
1140
1141         # Emit the containts of `self`, lines or blocks.
1142         fun emit_in(v: MarkdownEmitter) do
1143                 block.remove_surrounding_empty_lines
1144                 if block.has_lines then
1145                         emit_lines(v)
1146                 else
1147                         emit_blocks(v)
1148                 end
1149         end
1150
1151         # Emit lines contained in `block`.
1152         fun emit_lines(v: MarkdownEmitter) do
1153                 var tpl = v.push_buffer
1154                 var line = block.first_line
1155                 while line != null do
1156                         if not line.is_empty then
1157                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1158                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
1159                         end
1160                         if line.next != null then
1161                                 v.addn
1162                         end
1163                         line = line.next
1164                 end
1165                 v.pop_buffer
1166                 v.emit_text(tpl)
1167         end
1168
1169         # Emit sub-blocks contained in `block`.
1170         fun emit_blocks(v: MarkdownEmitter) do
1171                 var block = self.block.first_block
1172                 while block != null do
1173                         v.push_loc(block.location)
1174                         block.kind.emit(v)
1175                         v.pop_loc
1176                         block = block.next
1177                 end
1178         end
1179
1180         # The raw content of the block as a multi-line string.
1181         fun raw_content: String do
1182                 var infence = self isa BlockFence
1183                 var text = new FlatBuffer
1184                 var line = self.block.first_line
1185                 while line != null do
1186                         if not line.is_empty then
1187                                 var str = line.value
1188                                 if not infence and str.has_prefix("    ") then
1189                                         text.append str.substring(4, str.length - line.trailing)
1190                                 else
1191                                         text.append str
1192                                 end
1193                         end
1194                         text.append "\n"
1195                         line = line.next
1196                 end
1197                 return text.write_to_string
1198         end
1199 end
1200
1201 # A block without any markdown specificities.
1202 #
1203 # Actually use the same implementation than `BlockCode`,
1204 # this class is only used for typing purposes.
1205 class BlockNone
1206         super Block
1207 end
1208
1209 # A markdown blockquote.
1210 class BlockQuote
1211         super Block
1212
1213         redef fun emit(v) do v.decorator.add_blockquote(v, self)
1214
1215         # Remove blockquote markers.
1216         private fun remove_block_quote_prefix(block: MDBlock) do
1217                 var line = block.first_line
1218                 while line != null do
1219                         if not line.is_empty then
1220                                 if line.value[line.leading] == '>' then
1221                                         var rem = line.leading + 1
1222                                         if line.leading + 1 < line.value.length and
1223                                            line.value[line.leading + 1] == ' ' then
1224                                                 rem += 1
1225                                         end
1226                                         line.value = line.value.substring_from(rem)
1227                                         line.leading = line.process_leading
1228                                 end
1229                         end
1230                         line = line.next
1231                 end
1232         end
1233 end
1234
1235 # A markdown code block.
1236 class BlockCode
1237         super Block
1238
1239         # Any string found after fence token.
1240         var meta: nullable Text
1241
1242         # Number of char to skip at the beginning of the line.
1243         #
1244         # Block code lines start at 4 spaces.
1245         protected var line_start = 4
1246
1247         redef fun emit(v) do v.decorator.add_code(v, self)
1248
1249         redef fun emit_lines(v) do
1250                 var line = block.first_line
1251                 while line != null do
1252                         if not line.is_empty then
1253                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1254                         end
1255                         v.addn
1256                         line = line.next
1257                 end
1258         end
1259 end
1260
1261 # A markdown code-fence block.
1262 #
1263 # Actually use the same implementation than `BlockCode`,
1264 # this class is only used for typing purposes.
1265 class BlockFence
1266         super BlockCode
1267
1268         # Fence code lines start at 0 spaces.
1269         redef var line_start = 0
1270 end
1271
1272 # A markdown headline.
1273 class BlockHeadline
1274         super Block
1275
1276         redef fun emit(v) do
1277                 var loc = block.location.copy
1278                 loc.column_start += start
1279                 v.push_loc(loc)
1280                 v.decorator.add_headline(v, self)
1281                 v.pop_loc
1282         end
1283
1284         private var start = 0
1285
1286         # Depth of the headline used to determine the headline level.
1287         var depth = 0
1288
1289         # Remove healine marks from lines contained in `self`.
1290         private fun transform_headline(block: MDBlock) do
1291                 if depth > 0 then return
1292                 var level = 0
1293                 var line = block.first_line
1294                 if line.is_empty then return
1295                 var start = line.leading
1296                 while start < line.value.length and line.value[start] == '#' do
1297                         level += 1
1298                         start += 1
1299                 end
1300                 while start < line.value.length and line.value[start] == ' ' do
1301                         start += 1
1302                 end
1303                 if start >= line.value.length then
1304                         line.is_empty = true
1305                 else
1306                         var nend = line.value.length - line.trailing - 1
1307                         while line.value[nend] == '#' do nend -= 1
1308                         while line.value[nend] == ' ' do nend -= 1
1309                         line.value = line.value.substring(start, nend - start + 1)
1310                         line.leading = 0
1311                         line.trailing = 0
1312                 end
1313                 self.start = start
1314                 depth = level.min(6)
1315         end
1316 end
1317
1318 # A markdown list item block.
1319 class BlockListItem
1320         super Block
1321
1322         redef fun emit(v) do v.decorator.add_listitem(v, self)
1323 end
1324
1325 # A markdown list block.
1326 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1327 abstract class BlockList
1328         super Block
1329
1330         # Split list block into list items sub-blocks.
1331         private fun init_block(v: MarkdownProcessor) do
1332                 var line = block.first_line
1333                 line = line.next
1334                 while line != null do
1335                         var t = v.line_kind(line)
1336                         if t isa LineList or
1337                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1338                            not (t isa LineList))) then
1339                                    var sblock = block.split(line.prev.as(not null))
1340                                    sblock.kind = new BlockListItem(sblock)
1341                         end
1342                         line = line.next
1343                 end
1344                 var sblock = block.split(block.last_line.as(not null))
1345                 sblock.kind = new BlockListItem(sblock)
1346         end
1347
1348         # Expand list items as paragraphs if needed.
1349         private fun expand_paragraphs(block: MDBlock) do
1350                 var outer = block.first_block
1351                 var inner: nullable MDBlock
1352                 var has_paragraph = false
1353                 while outer != null and not has_paragraph do
1354                         if outer.kind isa BlockListItem then
1355                                 inner = outer.first_block
1356                                 while inner != null and not has_paragraph do
1357                                         if inner.kind isa BlockParagraph then
1358                                                 has_paragraph = true
1359                                         end
1360                                         inner = inner.next
1361                                 end
1362                         end
1363                         outer = outer.next
1364                 end
1365                 if has_paragraph then
1366                         outer = block.first_block
1367                         while outer != null do
1368                                 if outer.kind isa BlockListItem then
1369                                         inner = outer.first_block
1370                                         while inner != null do
1371                                                 if inner.kind isa BlockNone then
1372                                                         inner.kind = new BlockParagraph(inner)
1373                                                 end
1374                                                 inner = inner.next
1375                                         end
1376                                 end
1377                                 outer = outer.next
1378                         end
1379                 end
1380         end
1381 end
1382
1383 # A markdown ordered list.
1384 class BlockOrderedList
1385         super BlockList
1386
1387         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1388 end
1389
1390 # A markdown unordred list.
1391 class BlockUnorderedList
1392         super BlockList
1393
1394         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1395 end
1396
1397 # A markdown paragraph block.
1398 class BlockParagraph
1399         super Block
1400
1401         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1402 end
1403
1404 # A markdown ruler.
1405 class BlockRuler
1406         super Block
1407
1408         redef fun emit(v) do v.decorator.add_ruler(v, self)
1409 end
1410
1411 # Xml blocks that can be found in markdown markup.
1412 class BlockXML
1413         super Block
1414
1415         redef fun emit_lines(v) do
1416                 var line = block.first_line
1417                 while line != null do
1418                         if not line.is_empty then v.add line.value
1419                         v.addn
1420                         line = line.next
1421                 end
1422         end
1423 end
1424
1425 # A markdown line.
1426 class MDLine
1427
1428         # Location of `self` in the original input.
1429         var location: MDLocation
1430
1431         # Text contained in this line.
1432         var value: String is writable
1433
1434         # Is this line empty?
1435         # Lines containing only spaces are considered empty.
1436         var is_empty: Bool = true is writable
1437
1438         # Previous line in `MDBlock` or null if first line.
1439         var prev: nullable MDLine = null is writable
1440
1441         # Next line in `MDBlock` or null if last line.
1442         var next: nullable MDLine = null is writable
1443
1444         # Is the previous line empty?
1445         var prev_empty: Bool = false is writable
1446
1447         # Is the next line empty?
1448         var next_empty: Bool = false is writable
1449
1450         # Initialize a new MDLine from its string value
1451         init do
1452                 self.leading = process_leading
1453                 if leading != value.length then
1454                         self.is_empty = false
1455                         self.trailing = process_trailing
1456                 end
1457         end
1458
1459         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1460         fun clear do
1461                 value = ""
1462                 leading = 0
1463                 trailing = 0
1464                 is_empty = true
1465                 if prev != null then prev.next_empty = true
1466                 if next != null then next.prev_empty = true
1467         end
1468
1469         # Number or leading spaces on this line.
1470         var leading: Int = 0 is writable
1471
1472         # Compute `leading` depending on `value`.
1473         fun process_leading: Int do
1474                 var count = 0
1475                 var value = self.value
1476                 while count < value.length and value[count] == ' ' do count += 1
1477                 if leading == value.length then clear
1478                 return count
1479         end
1480
1481         # Number of trailing spaces on this line.
1482         var trailing: Int = 0 is writable
1483
1484         # Compute `trailing` depending on `value`.
1485         fun process_trailing: Int do
1486                 var count = 0
1487                 var value = self.value
1488                 while value[value.length - count - 1] == ' ' do
1489                         count += 1
1490                 end
1491                 return count
1492         end
1493
1494         # Count the amount of `ch` in this line.
1495         # Return A value > 0 if this line only consists of `ch` end spaces.
1496         fun count_chars(ch: Char): Int do
1497                 var count = 0
1498                 for c in value do
1499                         if c == ' ' then
1500                                 continue
1501                         end
1502                         if c == ch then
1503                                 count += 1
1504                                 continue
1505                         end
1506                         count = 0
1507                         break
1508                 end
1509                 return count
1510         end
1511
1512         # Count the amount of `ch` at the start of this line ignoring spaces.
1513         fun count_chars_start(ch: Char): Int do
1514                 var count = 0
1515                 for c in value do
1516                         if c == ' ' then
1517                                 continue
1518                         end
1519                         if c == ch then
1520                                 count += 1
1521                         else
1522                                 break
1523                         end
1524                 end
1525                 return count
1526         end
1527
1528         # Last XML line if any.
1529         private var xml_end_line: nullable MDLine = null
1530
1531         # Does `value` contains valid XML markup?
1532         private fun check_html: Bool do
1533                 var tags = new Array[String]
1534                 var tmp = new FlatBuffer
1535                 var pos = leading
1536                 if pos + 1 < value.length and value[pos + 1] == '!' then
1537                         if read_xml_comment(self, pos) > 0 then return true
1538                 end
1539                 pos = value.read_xml(tmp, pos, false)
1540                 var tag: String
1541                 if pos > -1 then
1542                         tag = tmp.xml_tag
1543                         if not tag.is_html_block then
1544                                 return false
1545                         end
1546                         if tag == "hr" then
1547                                 xml_end_line = self
1548                                 return true
1549                         end
1550                         tags.add tag
1551                         var line: nullable MDLine = self
1552                         while line != null do
1553                                 while pos < line.value.length and line.value[pos] != '<' do
1554                                         pos += 1
1555                                 end
1556                                 if pos >= line.value.length then
1557                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1558                                                 tags.pop
1559                                                 if tags.is_empty then
1560                                                         xml_end_line = line
1561                                                         break
1562                                                 end
1563                                         end
1564                                         line = line.next
1565                                         pos = 0
1566                                 else
1567                                         tmp = new FlatBuffer
1568                                         var new_pos = line.value.read_xml(tmp, pos, false)
1569                                         if new_pos > 0 then
1570                                                 tag = tmp.xml_tag
1571                                                 if tag.is_html_block and not tag == "hr" then
1572                                                         if tmp[1] == '/' then
1573                                                                 if tags.last != tag then
1574                                                                         return false
1575                                                                 end
1576                                                                 tags.pop
1577                                                         else
1578                                                                 tags.add tag
1579                                                         end
1580                                                 end
1581                                                 if tags.is_empty then
1582                                                         xml_end_line = line
1583                                                         break
1584                                                 end
1585                                                 pos = new_pos
1586                                         else
1587                                                 pos += 1
1588                                         end
1589                                 end
1590                         end
1591                         return tags.is_empty
1592                 end
1593                 return false
1594         end
1595
1596         # Read a XML comment.
1597         # Used by `check_html`.
1598         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1599                 var line: nullable MDLine = first_line
1600                 if start + 3 < line.value.length then
1601                         if line.value[2] == '-' and line.value[3] == '-' then
1602                                 var pos = start + 4
1603                                 while line != null do
1604                                         while pos < line.value.length and line.value[pos] != '-' do
1605                                                 pos += 1
1606                                         end
1607                                         if pos == line.value.length then
1608                                                 line = line.next
1609                                                 pos = 0
1610                                         else
1611                                                 if pos + 2 < line.value.length then
1612                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1613                                                                 first_line.xml_end_line = line
1614                                                                 return pos + 3
1615                                                         end
1616                                                 end
1617                                                 pos += 1
1618                                         end
1619                                 end
1620                         end
1621                 end
1622                 return -1
1623         end
1624
1625         # Extract the text of `self` without leading and trailing.
1626         fun text: String do return value.substring(leading, value.length - trailing)
1627 end
1628
1629 # A markdown line.
1630 interface Line
1631
1632         # Parse the line.
1633         # See `MarkdownProcessor::recurse`.
1634         fun process(v: MarkdownProcessor) is abstract
1635 end
1636
1637 # An empty markdown line.
1638 class LineEmpty
1639         super Line
1640
1641         redef fun process(v) do
1642                 v.current_line = v.current_line.next
1643         end
1644 end
1645
1646 # A non-specific markdown construction.
1647 # Mainly used as part of another line construct such as paragraphs or lists.
1648 class LineOther
1649         super Line
1650
1651         redef fun process(v) do
1652                 var line = v.current_line
1653                 # go to block end
1654                 var was_empty = line.prev_empty
1655                 while line != null and not line.is_empty do
1656                         var t = v.line_kind(line)
1657                         if (v.in_list or v.ext_mode) and t isa LineList then
1658                                 break
1659                         end
1660                         if v.ext_mode and (t isa LineCode or t isa LineFence) then
1661                                 break
1662                         end
1663                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1664                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1665                                    break
1666                         end
1667                         line = line.next
1668                 end
1669                 # build block
1670                 if line != null and not line.is_empty then
1671                         var block = v.current_block.split(line.prev.as(not null))
1672                         if v.in_list and not was_empty then
1673                                 block.kind = new BlockNone(block)
1674                         else
1675                                 block.kind = new BlockParagraph(block)
1676                         end
1677                         v.current_block.remove_leading_empty_lines
1678                 else
1679                         var block: MDBlock
1680                         if line != null then
1681                                 block = v.current_block.split(line)
1682                         else
1683                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1684                         end
1685                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1686                                 block.kind = new BlockNone(block)
1687                         else
1688                                 block.kind = new BlockParagraph(block)
1689                         end
1690                         v.current_block.remove_leading_empty_lines
1691                 end
1692                 v.current_line = v.current_block.first_line
1693         end
1694 end
1695
1696 # A line of markdown code.
1697 class LineCode
1698         super Line
1699
1700         redef fun process(v) do
1701                 var line = v.current_line
1702                 # lookup block end
1703                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1704                         line = line.next
1705                 end
1706                 # split at block end line
1707                 var block: MDBlock
1708                 if line != null then
1709                         block = v.current_block.split(line.prev.as(not null))
1710                 else
1711                         block = v.current_block.split(v.current_block.last_line.as(not null))
1712                 end
1713                 block.kind = new BlockCode(block)
1714                 block.remove_surrounding_empty_lines
1715                 v.current_line = v.current_block.first_line
1716         end
1717 end
1718
1719 # A line of raw XML.
1720 class LineXML
1721         super Line
1722
1723         redef fun process(v) do
1724                 var line = v.current_line
1725                 var prev = line.prev
1726                 if prev != null then v.current_block.split(prev)
1727                 var block = v.current_block.split(line.xml_end_line.as(not null))
1728                 block.kind = new BlockXML(block)
1729                 v.current_block.remove_leading_empty_lines
1730                 v.current_line = v.current_block.first_line
1731         end
1732 end
1733
1734 # A markdown blockquote line.
1735 class LineBlockquote
1736         super Line
1737
1738         redef fun process(v) do
1739                 var line = v.current_line
1740                 # go to bquote end
1741                 while line != null do
1742                         if not line.is_empty and (line.prev_empty and
1743                            line.leading == 0 and
1744                            not v.line_kind(line) isa LineBlockquote) then break
1745                         line = line.next
1746                 end
1747                 # build sub block
1748                 var block: MDBlock
1749                 if line != null then
1750                         block = v.current_block.split(line.prev.as(not null))
1751                 else
1752                         block = v.current_block.split(v.current_block.last_line.as(not null))
1753                 end
1754                 var kind = new BlockQuote(block)
1755                 block.kind = kind
1756                 block.remove_surrounding_empty_lines
1757                 kind.remove_block_quote_prefix(block)
1758                 v.current_line = line
1759                 v.recurse(block, false)
1760                 v.current_line = v.current_block.first_line
1761         end
1762 end
1763
1764 # A markdown ruler line.
1765 class LineHR
1766         super Line
1767
1768         redef fun process(v) do
1769                 var line = v.current_line
1770                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1771                 var block = v.current_block.split(line.as(not null))
1772                 block.kind = new BlockRuler(block)
1773                 v.current_block.remove_leading_empty_lines
1774                 v.current_line = v.current_block.first_line
1775         end
1776 end
1777
1778 # A markdown fence code line.
1779 class LineFence
1780         super Line
1781
1782         redef fun process(v) do
1783                 # go to fence end
1784                 var line = v.current_line.next
1785                 while line != null do
1786                         if v.line_kind(line) isa LineFence then break
1787                         line = line.next
1788                 end
1789                 if line != null then
1790                         line = line.next
1791                 end
1792                 # build fence block
1793                 var block: MDBlock
1794                 if line != null then
1795                         block = v.current_block.split(line.prev.as(not null))
1796                 else
1797                         block = v.current_block.split(v.current_block.last_line.as(not null))
1798                 end
1799                 block.remove_surrounding_empty_lines
1800                 var meta = block.first_line.value.meta_from_fence
1801                 block.kind = new BlockFence(block, meta)
1802                 block.first_line.clear
1803                 var last = block.last_line
1804                 if last != null and v.line_kind(last) isa LineFence then
1805                         block.last_line.clear
1806                 end
1807                 block.remove_surrounding_empty_lines
1808                 v.current_line = line
1809         end
1810 end
1811
1812 # A markdown headline.
1813 class LineHeadline
1814         super Line
1815
1816         redef fun process(v) do
1817                 var line = v.current_line
1818                 var lprev = line.prev
1819                 if lprev != null then v.current_block.split(lprev)
1820                 var block = v.current_block.split(line.as(not null))
1821                 var kind = new BlockHeadline(block)
1822                 block.kind = kind
1823                 kind.transform_headline(block)
1824                 v.current_block.remove_leading_empty_lines
1825                 v.current_line = v.current_block.first_line
1826         end
1827 end
1828
1829 # A markdown headline of level 1.
1830 class LineHeadline1
1831         super LineHeadline
1832
1833         redef fun process(v) do
1834                 var line = v.current_line
1835                 var lprev = line.prev
1836                 if lprev != null then v.current_block.split(lprev)
1837                 line.next.clear
1838                 var block = v.current_block.split(line.as(not null))
1839                 var kind = new BlockHeadline(block)
1840                 kind.depth = 1
1841                 kind.transform_headline(block)
1842                 block.kind = kind
1843                 v.current_block.remove_leading_empty_lines
1844                 v.current_line = v.current_block.first_line
1845         end
1846 end
1847
1848 # A markdown headline of level 2.
1849 class LineHeadline2
1850         super LineHeadline
1851
1852         redef fun process(v) do
1853                 var line = v.current_line
1854                 var lprev = line.prev
1855                 if lprev != null then v.current_block.split(lprev)
1856                 line.next.clear
1857                 var block = v.current_block.split(line.as(not null))
1858                 var kind = new BlockHeadline(block)
1859                 kind.depth = 2
1860                 kind.transform_headline(block)
1861                 block.kind = kind
1862                 v.current_block.remove_leading_empty_lines
1863                 v.current_line = v.current_block.first_line
1864         end
1865 end
1866
1867 # A markdown list line.
1868 # Mainly used to factorize code between ordered and unordered lists.
1869 abstract class LineList
1870         super Line
1871
1872         redef fun process(v) do
1873                 var line = v.current_line
1874                 # go to list end
1875                 while line != null do
1876                         var t = v.line_kind(line)
1877                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1878                            not t isa LineList) then break
1879                         line = line.next
1880                 end
1881                 # build list block
1882                 var list: MDBlock
1883                 if line != null then
1884                         list = v.current_block.split(line.prev.as(not null))
1885                 else
1886                         list = v.current_block.split(v.current_block.last_line.as(not null))
1887                 end
1888                 var kind = block_kind(list)
1889                 list.kind = kind
1890                 list.first_line.prev_empty = false
1891                 list.last_line.next_empty = false
1892                 list.remove_surrounding_empty_lines
1893                 list.first_line.prev_empty = false
1894                 list.last_line.next_empty = false
1895                 kind.init_block(v)
1896                 var block = list.first_block
1897                 while block != null do
1898                         block.remove_list_indent(v)
1899                         v.recurse(block, true)
1900                         block = block.next
1901                 end
1902                 kind.expand_paragraphs(list)
1903                 v.current_line = line
1904         end
1905
1906         # Create a new block kind based on this line.
1907         protected fun block_kind(block: MDBlock): BlockList is abstract
1908
1909         # Extract string value from `MDLine`.
1910         protected fun extract_value(line: MDLine): String is abstract
1911 end
1912
1913 # An ordered list line.
1914 class LineOList
1915         super LineList
1916
1917         redef fun block_kind(block) do return new BlockOrderedList(block)
1918
1919         redef fun extract_value(line) do
1920                 return line.value.substring_from(line.value.index_of('.') + 2)
1921         end
1922 end
1923
1924 # An unordered list line.
1925 class LineUList
1926         super LineList
1927
1928         redef fun block_kind(block) do return new BlockUnorderedList(block)
1929
1930         redef fun extract_value(line) do
1931                 return line.value.substring_from(line.leading + 2)
1932         end
1933 end
1934
1935 # A token represent a character in the markdown input.
1936 # Some tokens have a specific markup behaviour that is handled here.
1937 abstract class Token
1938
1939         # Location of `self` in the original input.
1940         var location: MDLocation
1941
1942         # Position of `self` in input independant from lines.
1943         var pos: Int
1944
1945         # Character found at `pos` in the markdown input.
1946         var char: Char
1947
1948         # Output that token using `MarkdownEmitter::decorator`.
1949         fun emit(v: MarkdownEmitter) do v.decorator.add_char(v, char)
1950 end
1951
1952 # A token without a specific meaning.
1953 class TokenNone
1954         super Token
1955 end
1956
1957 # An emphasis token.
1958 abstract class TokenEm
1959         super Token
1960
1961         redef fun emit(v) do
1962                 var tmp = v.push_buffer
1963                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1964                 v.pop_buffer
1965                 if b > 0 then
1966                         v.decorator.add_em(v, tmp)
1967                         v.current_pos = b
1968                 else
1969                         v.addc char
1970                 end
1971         end
1972 end
1973
1974 # An emphasis star token.
1975 class TokenEmStar
1976         super TokenEm
1977 end
1978
1979 # An emphasis underscore token.
1980 class TokenEmUnderscore
1981         super TokenEm
1982 end
1983
1984 # A strong token.
1985 abstract class TokenStrong
1986         super Token
1987
1988         redef fun emit(v) do
1989                 var tmp = v.push_buffer
1990                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1991                 v.pop_buffer
1992                 if b > 0 then
1993                         v.decorator.add_strong(v, tmp)
1994                         v.current_pos = b + 1
1995                 else
1996                         v.addc char
1997                 end
1998         end
1999 end
2000
2001 # A strong star token.
2002 class TokenStrongStar
2003         super TokenStrong
2004 end
2005
2006 # A strong underscore token.
2007 class TokenStrongUnderscore
2008         super TokenStrong
2009 end
2010
2011 # A code token.
2012 # This class is mainly used to factorize work between single and double quoted span codes.
2013 abstract class TokenCode
2014         super Token
2015
2016         redef fun emit(v) do
2017                 var a = pos + next_pos + 1
2018                 var b = v.processor.find_token(v.current_text.as(not null), a, self)
2019                 if b > 0 then
2020                         v.current_pos = b + next_pos
2021                         while a < b and v.current_text[a] == ' ' do a += 1
2022                         if a < b then
2023                                 while v.current_text[b - 1] == ' ' do b -= 1
2024                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
2025                         end
2026                 else
2027                         v.addc char
2028                 end
2029         end
2030
2031         private fun next_pos: Int is abstract
2032 end
2033
2034 # A span code token.
2035 class TokenCodeSingle
2036         super TokenCode
2037
2038         redef fun next_pos do return 0
2039 end
2040
2041 # A doubled span code token.
2042 class TokenCodeDouble
2043         super TokenCode
2044
2045         redef fun next_pos do return 1
2046 end
2047
2048 # A link or image token.
2049 # This class is mainly used to factorize work between images and links.
2050 abstract class TokenLinkOrImage
2051         super Token
2052
2053         # Link adress
2054         var link: nullable Text = null
2055
2056         # Link text
2057         var name: nullable Text = null
2058
2059         # Link title
2060         var comment: nullable Text = null
2061
2062         # Is the link construct an abbreviation?
2063         var is_abbrev = false
2064
2065         redef fun emit(v) do
2066                 var tmp = new FlatBuffer
2067                 var b = check_link(v, tmp, pos, self)
2068                 if b > 0 then
2069                         emit_hyper(v)
2070                         v.current_pos = b
2071                 else
2072                         v.addc char
2073                 end
2074         end
2075
2076         # Emit the hyperlink as link or image.
2077         private fun emit_hyper(v: MarkdownEmitter) is abstract
2078
2079         # Check if the link is a valid link.
2080         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
2081                 var md = v.current_text
2082                 var pos
2083                 if token isa TokenLink then
2084                         pos = start + 1
2085                 else
2086                         pos = start + 2
2087                 end
2088                 var tmp = new FlatBuffer
2089                 pos = md.read_md_link_id(tmp, pos)
2090                 if pos < start then return -1
2091                 name = tmp
2092                 var old_pos = pos
2093                 pos += 1
2094                 pos = md.skip_spaces(pos)
2095                 if pos < start then
2096                         var tid = name.write_to_string.to_lower
2097                         if v.processor.link_refs.has_key(tid) then
2098                                 var lr = v.processor.link_refs[tid]
2099                                 is_abbrev = lr.is_abbrev
2100                                 link = lr.link
2101                                 comment = lr.title
2102                                 pos = old_pos
2103                         else
2104                                 return -1
2105                         end
2106                 else if md[pos] == '(' then
2107                         pos += 1
2108                         pos = md.skip_spaces(pos)
2109                         if pos < start then return -1
2110                         tmp = new FlatBuffer
2111                         var use_lt = md[pos] == '<'
2112                         if use_lt then
2113                                 pos = md.read_until(tmp, pos + 1, '>')
2114                         else
2115                                 pos = md.read_md_link(tmp, pos)
2116                         end
2117                         if pos < start then return -1
2118                         if use_lt then pos += 1
2119                         link = tmp.write_to_string
2120                         if md[pos] == ' ' then
2121                                 pos = md.skip_spaces(pos)
2122                                 if pos > start and md[pos] == '"' then
2123                                         pos += 1
2124                                         tmp = new FlatBuffer
2125                                         pos = md.read_until(tmp, pos, '"')
2126                                         if pos < start then return -1
2127                                         comment = tmp.write_to_string
2128                                         pos += 1
2129                                         pos = md.skip_spaces(pos)
2130                                         if pos == -1 then return -1
2131                                 end
2132                         end
2133                         if pos < start then return -1
2134                         if md[pos] != ')' then return -1
2135                 else if md[pos] == '[' then
2136                         pos += 1
2137                         tmp = new FlatBuffer
2138                         pos = md.read_raw_until(tmp, pos, ']')
2139                         if pos < start then return -1
2140                         var id
2141                         if tmp.length > 0 then
2142                                 id = tmp
2143                         else
2144                                 id = name
2145                         end
2146                         var tid = id.write_to_string.to_lower
2147                         if v.processor.link_refs.has_key(tid) then
2148                                 var lr = v.processor.link_refs[tid]
2149                                 link = lr.link
2150                                 comment = lr.title
2151                         end
2152                 else
2153                         var tid = name.write_to_string.replace("\n", " ").to_lower
2154                         if v.processor.link_refs.has_key(tid) then
2155                                 var lr = v.processor.link_refs[tid]
2156                                 link = lr.link
2157                                 comment = lr.title
2158                                 pos = old_pos
2159                         else
2160                                 return -1
2161                         end
2162                 end
2163                 if link == null then return -1
2164                 return pos
2165         end
2166 end
2167
2168 # A markdown link token.
2169 class TokenLink
2170         super TokenLinkOrImage
2171
2172         redef fun emit_hyper(v) do
2173                 if is_abbrev and comment != null then
2174                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2175                 else
2176                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2177                 end
2178         end
2179 end
2180
2181 # A markdown image token.
2182 class TokenImage
2183         super TokenLinkOrImage
2184
2185         redef fun emit_hyper(v) do
2186                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2187         end
2188 end
2189
2190 # A HTML/XML token.
2191 class TokenHTML
2192         super Token
2193
2194         redef fun emit(v) do
2195                 var tmp = new FlatBuffer
2196                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2197                 if b > 0 then
2198                         v.add tmp
2199                         v.current_pos = b
2200                 else
2201                         v.decorator.escape_char(v, char)
2202                 end
2203         end
2204
2205         # Is the HTML valid?
2206         # Also take care of link and mailto shortcuts.
2207         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2208                 # check for auto links
2209                 var tmp = new FlatBuffer
2210                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2211                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2212                         pos = md.read_until(tmp, pos, '>')
2213                         if pos != -1 then
2214                                 var link = tmp.write_to_string
2215                                 v.decorator.add_link(v, link, link, null)
2216                                 return pos
2217                         end
2218                 end
2219                 # TODO check for mailto
2220                 # check for inline html
2221                 if start + 2 < md.length then
2222                         return md.read_xml(out, start, true)
2223                 end
2224                 return -1
2225         end
2226 end
2227
2228 # An HTML entity token.
2229 class TokenEntity
2230         super Token
2231
2232         redef fun emit(v) do
2233                 var tmp = new FlatBuffer
2234                 var b = check_entity(tmp, v.current_text.as(not null), pos)
2235                 if b > 0 then
2236                         v.add tmp
2237                         v.current_pos = b
2238                 else
2239                         v.decorator.escape_char(v, char)
2240                 end
2241         end
2242
2243         # Is the entity valid?
2244         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2245                 var pos = md.read_until(out, start, ';')
2246                 if pos < 0 or out.length < 3 then
2247                         return -1
2248                 end
2249                 if out[1] == '#' then
2250                         if out[2] == 'x' or out[2] == 'X' then
2251                                 if out.length < 4 then return -1
2252                                 for i in [3..out.length[ do
2253                                         var c = out[i]
2254                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2255                                                 return -1
2256                                         end
2257                                 end
2258                         else
2259                                 for i in [2..out.length[ do
2260                                         var c = out[i]
2261                                         if c < '0' or c > '9' then return -1
2262                                 end
2263                         end
2264                         out.add ';'
2265                 else
2266                         for i in [1..out.length[ do
2267                                 var c = out[i]
2268                                 if not c.is_digit and not c.is_letter then return -1
2269                         end
2270                         out.add ';'
2271                         # TODO check entity is valid
2272                         # if out.is_entity then
2273                                 return pos
2274                         # else
2275                                 # return -1
2276                         # end
2277                 end
2278                 return pos
2279         end
2280 end
2281
2282 # A markdown escape token.
2283 class TokenEscape
2284         super Token
2285
2286         redef fun emit(v) do
2287                 v.current_pos += 1
2288                 v.addc v.current_text[v.current_pos]
2289         end
2290 end
2291
2292 # A markdown strike token.
2293 #
2294 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2295 class TokenStrike
2296         super Token
2297
2298         redef fun emit(v) do
2299                 var tmp = v.push_buffer
2300                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2301                 v.pop_buffer
2302                 if b > 0 then
2303                         v.decorator.add_strike(v, tmp)
2304                         v.current_pos = b + 1
2305                 else
2306                         v.addc char
2307                 end
2308         end
2309 end
2310
2311 redef class Text
2312
2313         # Get the position of the next non-space character.
2314         private fun skip_spaces(start: Int): Int do
2315                 var pos = start
2316                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2317                         pos += 1
2318                 end
2319                 if pos < length then return pos
2320                 return -1
2321         end
2322
2323         # Read `self` until `nend` and append it to the `out` buffer.
2324         # Escape markdown special chars.
2325         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2326                 var pos = start
2327                 while pos < length do
2328                         var c = self[pos]
2329                         if c == '\\' and pos + 1 < length then
2330                                 pos = escape(out, self[pos + 1], pos)
2331                         else
2332                                 var end_reached = false
2333                                 for n in nend do
2334                                         if c == n then
2335                                                 end_reached = true
2336                                                 break
2337                                         end
2338                                 end
2339                                 if end_reached then break
2340                                 out.add c
2341                         end
2342                         pos += 1
2343                 end
2344                 if pos == length then return -1
2345                 return pos
2346         end
2347
2348         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2349         # No escape is made.
2350         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2351                 var pos = start
2352                 while pos < length do
2353                         var c = self[pos]
2354                         var end_reached = false
2355                         for n in nend do
2356                                 if c == n then
2357                                         end_reached = true
2358                                         break
2359                                 end
2360                         end
2361                         if end_reached then break
2362                         out.add c
2363                         pos += 1
2364                 end
2365                 if pos == length then return -1
2366                 return pos
2367         end
2368
2369         # Read `self` as XML until `to` and append it to the `out` buffer.
2370         # Escape HTML special chars.
2371         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2372                 var pos = from
2373                 var in_str = false
2374                 var str_char: nullable Char = null
2375                 while pos < length do
2376                         var c = self[pos]
2377                         if in_str then
2378                                 if c == '\\' then
2379                                         out.add c
2380                                         pos += 1
2381                                         if pos < length then
2382                                                 out.add c
2383                                                 pos += 1
2384                                         end
2385                                         continue
2386                                 end
2387                                 if c == str_char then
2388                                         in_str = false
2389                                         out.add c
2390                                         pos += 1
2391                                         continue
2392                                 end
2393                         end
2394                         if c == '"' or c == '\'' then
2395                                 in_str = true
2396                                 str_char = c
2397                         end
2398                         if not in_str then
2399                                 var end_reached = false
2400                                 for n in [0..to.length[ do
2401                                         if c == to[n] then
2402                                                 end_reached = true
2403                                                 break
2404                                         end
2405                                 end
2406                                 if end_reached then break
2407                         end
2408                         out.add c
2409                         pos += 1
2410                 end
2411                 if pos == length then return -1
2412                 return pos
2413         end
2414
2415         # Read `self` as XML and append it to the `out` buffer.
2416         # Safe mode can be activated to limit reading to valid xml.
2417         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2418                 var pos = 0
2419                 var is_valid = true
2420                 var is_close_tag = false
2421                 if start + 1 >= length then return -1
2422                 if self[start + 1] == '/' then
2423                         is_close_tag = true
2424                         pos = start + 2
2425                 else if self[start + 1] == '!' then
2426                         out.append "<!"
2427                         return start + 1
2428                 else
2429                         is_close_tag = false
2430                         pos = start + 1
2431                 end
2432                 if safe_mode then
2433                         var tmp = new FlatBuffer
2434                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2435                         if pos == -1 then return -1
2436                         var tag = tmp.write_to_string.trim.to_lower
2437                         if not tag.is_valid_html_tag then
2438                                 out.append "&lt;"
2439                                 pos = -1
2440                         else if tag.is_html_unsafe then
2441                                 is_valid = false
2442                                 out.append "&lt;"
2443                                 if is_close_tag then out.add '/'
2444                                 out.append tmp
2445                         else
2446                                 out.append "<"
2447                                 if is_close_tag then out.add '/'
2448                                 out.append tmp
2449                         end
2450                 else
2451                         out.add '<'
2452                         if is_close_tag then out.add '/'
2453                         pos = read_xml_until(out, pos, ' ', '/', '>')
2454                 end
2455                 if pos == -1 then return -1
2456                 pos = read_xml_until(out, pos, '/', '>')
2457                 if pos == -1 then return -1
2458                 if self[pos] == '/' then
2459                         out.append " /"
2460                         pos = self.read_xml_until(out, pos + 1, '>')
2461                         if pos == -1 then return -1
2462                 end
2463                 if self[pos] == '>' then
2464                         if is_valid then
2465                                 out.add '>'
2466                         else
2467                                 out.append "&gt;"
2468                         end
2469                         return pos
2470                 end
2471                 return -1
2472         end
2473
2474         # Read a markdown link address and append it to the `out` buffer.
2475         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2476                 var pos = start
2477                 var counter = 1
2478                 while pos < length do
2479                         var c = self[pos]
2480                         if c == '\\' and pos + 1 < length then
2481                                 pos = escape(out, self[pos + 1], pos)
2482                         else
2483                                 var end_reached = false
2484                                 if c == '(' then
2485                                         counter += 1
2486                                 else if c == ' ' then
2487                                         if counter == 1 then end_reached = true
2488                                 else if c == ')' then
2489                                         counter -= 1
2490                                         if counter == 0 then end_reached = true
2491                                 end
2492                                 if end_reached then break
2493                                 out.add c
2494                         end
2495                         pos += 1
2496                 end
2497                 if pos == length then return -1
2498                 return pos
2499         end
2500
2501         # Read a markdown link text and append it to the `out` buffer.
2502         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2503                 var pos = start
2504                 var counter = 1
2505                 while pos < length do
2506                         var c = self[pos]
2507                         var end_reached = false
2508                         if c == '[' then
2509                                 counter += 1
2510                                 out.add c
2511                         else if c == ']' then
2512                                 counter -= 1
2513                                 if counter == 0 then
2514                                         end_reached = true
2515                                 else
2516                                         out.add c
2517                                 end
2518                         else
2519                                 out.add c
2520                         end
2521                         if end_reached then break
2522                         pos += 1
2523                 end
2524                 if pos == length then return -1
2525                 return pos
2526         end
2527
2528         # Extract the XML tag name from a XML tag.
2529         private fun xml_tag: String do
2530                 var tpl = new FlatBuffer
2531                 var pos = 1
2532                 if pos < length and self[1] == '/' then pos += 1
2533                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2534                         tpl.add self[pos]
2535                         pos += 1
2536                 end
2537                 return tpl.write_to_string.to_lower
2538         end
2539
2540         private fun is_valid_html_tag: Bool do
2541                 if is_empty then return false
2542                 for c in self do
2543                         if not c.is_alpha then return false
2544                 end
2545                 return true
2546         end
2547
2548         # Read and escape the markdown contained in `self`.
2549         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2550                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2551                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2552                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2553                    c == '`' or c == '~' or c == '^' then
2554                         out.add c
2555                         return pos + 1
2556                 end
2557                 out.add '\\'
2558                 return pos
2559         end
2560
2561         # Extract string found at end of fence opening.
2562         private fun meta_from_fence: nullable Text do
2563                 for i in [0..chars.length[ do
2564                         var c = chars[i]
2565                         if c != ' ' and c != '`' and c != '~' then
2566                                 return substring_from(i).trim
2567                         end
2568                 end
2569                 return null
2570         end
2571
2572         # Is `self` an unsafe HTML element?
2573         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2574
2575         # Is `self` a HRML block element?
2576         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2577
2578         # Is `self` a link prefix?
2579         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2580
2581         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2582
2583         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2584
2585         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2586 end
2587
2588 redef class String
2589
2590         # Parse `self` as markdown and return the HTML representation
2591         #.
2592         #    var md = "**Hello World!**"
2593         #    var html = md.md_to_html
2594         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2595         fun md_to_html: Writable do
2596                 var processor = new MarkdownProcessor
2597                 return processor.process(self)
2598         end
2599 end