lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # `MarkdownEmitter` used for ouput.
  34         var emitter: MarkdownEmitter is noinit, protected writable
  35
  36         # Work in extended mode (default).
  37         #
  38         # Behavior changes when using extended mode:
  39         #
  40         # * Lists and code blocks end a paragraph
  41         #
  42         #   In normal markdown the following:
  43         #
  44         # ~~~md
  45         # This is a paragraph
  46         # * and this is not a list
  47         # ~~~
  48         #
  49         #   Will produce:
  50         #
  51         # ~~~html
  52         # <p>This is a paragraph
  53         # * and this is not a list</p>
  54         # ~~~
  55         #
  56         #   When using extended mode this changes to:
  57         #
  58         # ~~~html
  59         # <p>This is a paragraph</p>
  60         # <ul>
  61         # <li>and this is not a list</li>
  62         # </ul>
  63         # ~~~
  64         #
  65         # * Fences code blocks
  66         #
  67         #   If you don't want to indent your all your code with 4 spaces,
  68         #   you can wrap your code in ``` ``` ``` or `~~~`.
  69         #
  70         #   Here's an example:
  71         #
  72         # ~~~md
  73         # fun test do
  74         #    print "Hello World!"
  75         # end
  76         # ~~~
  77         #
  78         # * Code blocks meta
  79         #
  80         #   If you want to use syntax highlighting tools, most of them need to know what kind
  81         #   of language they are highlighting.
  82         #   You can add an optional language identifier after the fence declaration to output
  83         #   it in the HTML render.
  84         #
  85         # ```nit
  86         # import markdown
  87         #
  88         # print "# Hello World!".md_to_html
  89         # ```
  90         #
  91         #   Becomes
  92         #
  93         # ~~~html
  94         # <pre class="nit"><code>import markdown
  95         #
  96         # print "Hello World!".md_to_html
  97         # </code></pre>
  98         # ~~~
  99         #
 100         # * Underscores (Emphasis)
 101         #
 102         #   Underscores in the middle of a word like:
 103         #
 104         # ~~~md
 105         # Con_cat_this
 106         # ~~~
 107         #
 108         #   normally produces this:
 109         #
 110         # ~~~html
 111         # <p>Con<em>cat</em>this</p>
 112         # ~~~
 113         #
 114         #   With extended mode they don't result in emphasis.
 115         #
 116         # ~~~html
 117         # <p>Con_cat_this</p>
 118         # ~~~
 119         #
 120         # * Strikethrough
 121         #
 122         #   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
 123         #   strikethrought span is marked with `~~`.
 124         #
 125         # ~~~md
 126         # ~~Mistaken text.~~
 127         # ~~~
 128         #
 129         #   becomes
 130         #
 131         # ~~~html
 132         # <del>Mistaken text.</del>
 133         # ~~~
 134         var ext_mode = true
 135
 136         init do self.emitter = new MarkdownEmitter(self)
 137
 138         # Process the mardown `input` string and return the processed output.
 139         fun process(input: String): Writable do
 140                 # init processor
 141                 link_refs.clear
 142                 last_link_ref = null
 143                 current_line = null
 144                 current_block = null
 145                 # parse markdown
 146                 var parent = read_lines(input)
 147                 parent.remove_surrounding_empty_lines
 148                 recurse(parent, false)
 149                 # output processed text
 150                 return emitter.emit(parent.kind)
 151         end
 152
 153         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
 154         private fun read_lines(input: String): MDBlock do
 155                 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
 156                 var value = new FlatBuffer
 157                 var i = 0
 158
 159                 var line_pos = 0
 160                 var col_pos = 0
 161
 162                 while i < input.length do
 163                         value.clear
 164                         var pos = 0
 165                         var eol = false
 166                         while not eol and i < input.length do
 167                                 col_pos += 1
 168                                 var c = input[i]
 169                                 if c == '\n' then
 170                                         eol = true
 171                                 else if c == '\r' then
 172                                 else if c == '\t' then
 173                                         var np = pos + (4 - (pos & 3))
 174                                         while pos < np do
 175                                                 value.add ' '
 176                                                 pos += 1
 177                                         end
 178                                 else
 179                                         pos += 1
 180                                         value.add c
 181                                 end
 182                                 i += 1
 183                         end
 184                         line_pos += 1
 185
 186                         var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
 187                         var line = new MDLine(loc, value.write_to_string)
 188                         var is_link_ref = check_link_ref(line)
 189                         # Skip link refs
 190                         if not is_link_ref then block.add_line line
 191                         col_pos = 0
 192                 end
 193                 return block
 194         end
 195
 196         # Check if line is a block link definition.
 197         # Return `true` if line contains a valid link ref and save it into `link_refs`.
 198         private fun check_link_ref(line: MDLine): Bool do
 199                 var md = line.value
 200                 var is_link_ref = false
 201                 var id = new FlatBuffer
 202                 var link = new FlatBuffer
 203                 var comment = new FlatBuffer
 204                 var pos = -1
 205                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
 206                         pos = line.leading + 1
 207                         pos = md.read_until(id, pos, ']')
 208                         if not id.is_empty and pos + 2 < line.value.length then
 209                                 if line.value[pos + 1] == ':' then
 210                                         pos += 2
 211                                         pos = md.skip_spaces(pos)
 212                                         if line.value[pos] == '<' then
 213                                                 pos += 1
 214                                                 pos = md.read_until(link, pos, '>')
 215                                                 pos += 1
 216                                         else
 217                                                 pos = md.read_until(link, pos, ' ', '\n')
 218                                         end
 219                                         if not link.is_empty then
 220                                                 pos = md.skip_spaces(pos)
 221                                                 if pos > 0 and pos < line.value.length then
 222                                                         var c = line.value[pos]
 223                                                         if c == '\"' or c == '\'' or c == '(' then
 224                                                                 pos += 1
 225                                                                 if c == '(' then
 226                                                                         pos = md.read_until(comment, pos, ')')
 227                                                                 else
 228                                                                         pos = md.read_until(comment, pos, c)
 229                                                                 end
 230                                                                 if pos > 0 then is_link_ref = true
 231                                                         end
 232                                                 else
 233                                                         is_link_ref = true
 234                                                 end
 235                                         end
 236                                 end
 237                         end
 238                 end
 239                 if is_link_ref and not id.is_empty and not link.is_empty then
 240                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 241                         add_link_ref(id.write_to_string, lr)
 242                         if comment.is_empty then last_link_ref = lr
 243                         return true
 244                 else
 245                         comment = new FlatBuffer
 246                         if not line.is_empty and last_link_ref != null then
 247                                 pos = line.leading
 248                                 var c = line.value[pos]
 249                                 if c == '\"' or c == '\'' or c ==  '(' then
 250                                         pos += 1
 251                                         if c == '(' then
 252                                                 pos = md.read_until(comment, pos, ')')
 253                                         else
 254                                                 pos = md.read_until(comment, pos, c)
 255                                         end
 256                                 end
 257                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 258                         end
 259                         if comment.is_empty then return false
 260                         return true
 261                 end
 262         end
 263
 264         # Known link refs
 265         # This list will be needed during output to expand links.
 266         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 267
 268         # Last encountered link ref (for multiline definitions)
 269         #
 270         # Markdown allows link refs to be defined over two lines:
 271         #
 272         # ~~~md
 273         # [id]: http://example.com/longish/path/to/resource/here
 274         #       "Optional Title Here"
 275         # ~~~
 276         #
 277         private var last_link_ref: nullable LinkRef = null
 278
 279         # Add a link ref to the list
 280         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 281
 282         # Recursively split a `block`.
 283         #
 284         # The block is splitted according to the type of lines it contains.
 285         # Some blocks can be splited again recursively like lists.
 286         # The `in_list` mode is used to recurse on list and build
 287         # nested paragraphs or code blocks.
 288         fun recurse(root: MDBlock, in_list: Bool) do
 289                 var old_mode = self.in_list
 290                 var old_root = self.current_block
 291                 self.in_list = in_list
 292
 293                 var line = root.first_line
 294                 while line != null and line.is_empty do
 295                         line = line.next
 296                         if line == null then return
 297                 end
 298
 299                 current_line = line
 300                 current_block = root
 301                 while current_line != null do
 302                         line_kind(current_line.as(not null)).process(self)
 303                 end
 304                 self.in_list = old_mode
 305                 self.current_block = old_root
 306         end
 307
 308         # Currently processed line.
 309         # Used when visiting blocks with `recurse`.
 310         var current_line: nullable MDLine = null is writable
 311
 312         # Currently processed block.
 313         # Used when visiting blocks with `recurse`.
 314         var current_block: nullable MDBlock = null is writable
 315
 316         # Is the current recursion in list mode?
 317         # Used when visiting blocks with `recurse`
 318         private var in_list = false
 319
 320         # The type of line.
 321         # see: `md_line_*`
 322         fun line_kind(md: MDLine): Line do
 323                 var value = md.value
 324                 var leading = md.leading
 325                 var trailing = md.trailing
 326                 if md.is_empty then return new LineEmpty
 327                 if md.leading > 3 then return new LineCode
 328                 if value[leading] == '#' then return new LineHeadline
 329                 if value[leading] == '>' then return new LineBlockquote
 330
 331                 if ext_mode then
 332                         if value.length - leading - trailing > 2 then
 333                                 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 334                                         return new LineFence
 335                                 end
 336                                 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 337                                         return new LineFence
 338                                 end
 339                         end
 340                 end
 341
 342                 if value.length - leading - trailing > 2 and
 343                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 344                    if md.count_chars(value[leading]) >= 3 then
 345                                 return new LineHR
 346                    end
 347                 end
 348
 349                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 350                         var c = value[leading]
 351                         if c == '*' or c == '-' or c == '+' then return new LineUList
 352                 end
 353
 354                 if value.length - leading >= 3 and value[leading].is_digit then
 355                         var i = leading + 1
 356                         while i < value.length and value[i].is_digit do i += 1
 357                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 358                                 return new LineOList
 359                         end
 360                 end
 361
 362                 if value[leading] == '<' and md.check_html then return new LineXML
 363
 364                 var next = md.next
 365                 if next != null and not next.is_empty then
 366                         if next.count_chars('=') > 0 then
 367                                 return new LineHeadline1
 368                         end
 369                         if next.count_chars('-') > 0 then
 370                                 return new LineHeadline2
 371                         end
 372                 end
 373                 return new LineOther
 374         end
 375
 376         # Get the token kind at `pos`.
 377         fun token_at(text: Text, pos: Int): Token do
 378                 var c0: Char
 379                 var c1: Char
 380                 var c2: Char
 381
 382                 if pos > 0 then
 383                         c0 = text[pos - 1]
 384                 else
 385                         c0 = ' '
 386                 end
 387                 var c = text[pos]
 388
 389                 if pos + 1 < text.length then
 390                         c1 = text[pos + 1]
 391                 else
 392                         c1 = ' '
 393                 end
 394                 if pos + 2 < text.length then
 395                         c2 = text[pos + 2]
 396                 else
 397                         c2 = ' '
 398                 end
 399
 400                 var loc = new MDLocation(
 401                         current_loc.line_start,
 402                         current_loc.column_start + pos,
 403                         current_loc.line_start,
 404                         current_loc.column_start + pos)
 405
 406                 if c == '*' then
 407                         if c1 == '*' then
 408                                 if c0 != ' ' or c2 != ' ' then
 409                                         return new TokenStrongStar(loc, pos, c)
 410                                 else
 411                                         return new TokenEmStar(loc, pos, c)
 412                                 end
 413                         end
 414                         if c0 != ' ' or c1 != ' ' then
 415                                 return new TokenEmStar(loc, pos, c)
 416                         else
 417                                 return new TokenNone(loc, pos, c)
 418                         end
 419                 else if c == '_' then
 420                         if c1 == '_' then
 421                                 if c0 != ' ' or c2 != ' ' then
 422                                         return new TokenStrongUnderscore(loc, pos, c)
 423                                 else
 424                                         return new TokenEmUnderscore(loc, pos, c)
 425                                 end
 426                         end
 427                         if ext_mode then
 428                                 if (c0.is_letter or c0.is_digit) and c0 != '_' and
 429                                    (c1.is_letter or c1.is_digit) then
 430                                         return new TokenNone(loc, pos, c)
 431                                 else
 432                                         return new TokenEmUnderscore(loc, pos, c)
 433                                 end
 434                         end
 435                         if c0 != ' ' or c1 != ' ' then
 436                                 return new TokenEmUnderscore(loc, pos, c)
 437                         else
 438                                 return new TokenNone(loc, pos, c)
 439                         end
 440                 else if c == '!' then
 441                         if c1 == '[' then return new TokenImage(loc, pos, c)
 442                         return new TokenNone(loc, pos, c)
 443                 else if c == '[' then
 444                         return new TokenLink(loc, pos, c)
 445                 else if c == ']' then
 446                         return new TokenNone(loc, pos, c)
 447                 else if c == '`' then
 448                         if c1 == '`' then
 449                                 return new TokenCodeDouble(loc, pos, c)
 450                         else
 451                                 return new TokenCodeSingle(loc, pos, c)
 452                         end
 453                 else if c == '\\' then
 454                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 455                                 return new TokenEscape(loc, pos, c)
 456                         else
 457                                 return new TokenNone(loc, pos, c)
 458                         end
 459                 else if c == '<' then
 460                         return new TokenHTML(loc, pos, c)
 461                 else if c == '&' then
 462                         return new TokenEntity(loc, pos, c)
 463                 else
 464                         if ext_mode then
 465                                 if c == '~' and c1 == '~' then
 466                                         return new TokenStrike(loc, pos, c)
 467                                 end
 468                         end
 469                         return new TokenNone(loc, pos, c)
 470                 end
 471         end
 472
 473         # Find the position of a `token` in `self`.
 474         fun find_token(text: Text, start: Int, token: Token): Int do
 475                 var pos = start
 476                 while pos < text.length do
 477                         if token_at(text, pos).is_same_type(token) then
 478                                 return pos
 479                         end
 480                         pos += 1
 481                 end
 482                 return -1
 483         end
 484
 485         # Location used for next parsed token.
 486         #
 487         # This location can be changed by the emitter to adjust with `\n` found
 488         # in the input.
 489         private fun current_loc: MDLocation do return emitter.current_loc
 490 end
 491
 492 # Emit output corresponding to blocks content.
 493 #
 494 # Blocks are created by a previous pass in `MarkdownProcessor`.
 495 # The emitter use a `Decorator` to select the output format.
 496 class MarkdownEmitter
 497
 498         # Kind of processor used for parsing.
 499         type PROCESSOR: MarkdownProcessor
 500
 501         # Processor containing link refs.
 502         var processor: PROCESSOR
 503
 504         # Kind of decorator used for decoration.
 505         type DECORATOR: Decorator
 506
 507         # Decorator used for output.
 508         # Default is `HTMLDecorator`
 509         var decorator: DECORATOR is writable, lazy do
 510                 return new HTMLDecorator
 511         end
 512
 513         # Create a new `MarkdownEmitter` using a custom `decorator`.
 514         init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
 515                 init processor
 516                 self.decorator = decorator
 517         end
 518
 519         # Output `block` using `decorator` in the current buffer.
 520         fun emit(block: Block): Text do
 521                 var buffer = push_buffer
 522                 block.emit(self)
 523                 pop_buffer
 524                 return buffer
 525         end
 526
 527         # Output the content of `block`.
 528         fun emit_in(block: Block) do block.emit_in(self)
 529
 530         # Transform and emit mardown text
 531         fun emit_text(text: Text) do emit_text_until(text, 0, null)
 532
 533         # Transform and emit mardown text starting at `start` and
 534         # until a token with the same type as `token` is found.
 535         # Go until the end of `text` if `token` is null.
 536         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 537                 var old_text = current_text
 538                 var old_pos = current_pos
 539                 current_text = text
 540                 current_pos = start
 541                 while current_pos < text.length do
 542                         if text[current_pos] == '\n' then
 543                                 current_loc.line_start += 1
 544                                 current_loc.column_start = -current_pos
 545                         end
 546                         var mt = processor.token_at(text, current_pos)
 547                         if (token != null and not token isa TokenNone) and
 548                         (mt.is_same_type(token) or
 549                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 550                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 551                                 return current_pos
 552                         end
 553                         mt.emit(self)
 554                         current_pos += 1
 555                 end
 556                 current_text = old_text
 557                 current_pos = old_pos
 558                 return -1
 559         end
 560
 561         # Currently processed position in `current_text`.
 562         # Used when visiting inline production with `emit_text_until`.
 563         private var current_pos: Int = -1
 564
 565         # Currently processed text.
 566         # Used when visiting inline production with `emit_text_until`.
 567         private var current_text: nullable Text = null
 568
 569         # Stacked buffers.
 570         private var buffer_stack = new List[FlatBuffer]
 571
 572         # Push a new buffer on the stack.
 573         private fun push_buffer: FlatBuffer do
 574                 var buffer = new FlatBuffer
 575                 buffer_stack.add buffer
 576                 return buffer
 577         end
 578
 579         # Pop the last buffer.
 580         private fun pop_buffer do buffer_stack.pop
 581
 582         # Current output buffer.
 583         private fun current_buffer: FlatBuffer do
 584                 assert not buffer_stack.is_empty
 585                 return buffer_stack.last
 586         end
 587
 588         # Stacked locations.
 589         private var loc_stack = new List[MDLocation]
 590
 591         # Push a new MDLocation on the stack.
 592         private fun push_loc(location: MDLocation) do loc_stack.add location
 593
 594         # Pop the last buffer.
 595         private fun pop_loc: MDLocation do return loc_stack.pop
 596
 597         # Current output buffer.
 598         private fun current_loc: MDLocation do
 599                 assert not loc_stack.is_empty
 600                 return loc_stack.last
 601         end
 602
 603         # Append `e` to current buffer.
 604         fun add(e: Writable) do
 605                 if e isa Text then
 606                         current_buffer.append e
 607                 else
 608                         current_buffer.append e.write_to_string
 609                 end
 610         end
 611
 612         # Append `c` to current buffer.
 613         fun addc(c: Char) do add c.to_s
 614
 615         # Append a "\n" line break.
 616         fun addn do add "\n"
 617 end
 618
 619 # A Link Reference.
 620 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 621 #
 622 # ~~~raw
 623 # [1]: http://example.com/ "Optional title"
 624 # ~~~
 625 class LinkRef
 626
 627         # Link href
 628         var link: String
 629
 630         # Optional link title
 631         var title: nullable String = null
 632
 633         # Is the link an abreviation?
 634         var is_abbrev = false
 635
 636         # Create a link with a title.
 637         init with_title(link: String, title: nullable String) do
 638                 init(link)
 639                 self.title = title
 640         end
 641 end
 642
 643 # A `Decorator` is used to emit mardown into a specific format.
 644 # Default decorator used is `HTMLDecorator`.
 645 interface Decorator
 646
 647         # Kind of emitter used for decoration.
 648         type EMITTER: MarkdownEmitter
 649
 650         # Render a single plain char.
 651         #
 652         # Redefine this method to add special escaping for plain text.
 653         fun add_char(v: EMITTER, c: Char) do v.addc c
 654
 655         # Render a ruler block.
 656         fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
 657
 658         # Render a headline block with corresponding level.
 659         fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
 660
 661         # Render a paragraph block.
 662         fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
 663
 664         # Render a code or fence block.
 665         fun add_code(v: EMITTER, block: BlockCode) is abstract
 666
 667         # Render a blockquote.
 668         fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
 669
 670         # Render an unordered list.
 671         fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
 672
 673         # Render an ordered list.
 674         fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
 675
 676         # Render a list item.
 677         fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
 678
 679         # Render an emphasis text.
 680         fun add_em(v: EMITTER, text: Text) is abstract
 681
 682         # Render a strong text.
 683         fun add_strong(v: EMITTER, text: Text) is abstract
 684
 685         # Render a strike text.
 686         #
 687         # Extended mode only (see `MarkdownProcessor::ext_mode`)
 688         fun add_strike(v: EMITTER, text: Text) is abstract
 689
 690         # Render a link.
 691         fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 692
 693         # Render an image.
 694         fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 695
 696         # Render an abbreviation.
 697         fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
 698
 699         # Render a code span reading from a buffer.
 700         fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 701
 702         # Render a text and escape it.
 703         fun append_value(v: EMITTER, value: Text) is abstract
 704
 705         # Render code text from buffer and escape it.
 706         fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 707
 708         # Render a character escape.
 709         fun escape_char(v: EMITTER, char: Char) is abstract
 710
 711         # Render a line break
 712         fun add_line_break(v: EMITTER) is abstract
 713
 714         # Generate a new html valid id from a `String`.
 715         fun strip_id(txt: String): String is abstract
 716
 717         # Found headlines during the processing labeled by their ids.
 718         fun headlines: ArrayMap[String, HeadLine] is abstract
 719 end
 720
 721 # Class representing a markdown headline.
 722 class HeadLine
 723         # Unique identifier of this headline.
 724         var id: String
 725
 726         # Text of the headline.
 727         var title: String
 728
 729         # Level of this headline.
 730         #
 731         # According toe the markdown specification, level must be in `[1..6]`.
 732         var level: Int
 733 end
 734
 735 # `Decorator` that outputs HTML.
 736 class HTMLDecorator
 737         super Decorator
 738
 739         redef var headlines = new ArrayMap[String, HeadLine]
 740
 741         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 742
 743         redef fun add_headline(v, block) do
 744                 # save headline
 745                 var txt = block.block.first_line.value
 746                 var id = strip_id(txt)
 747                 var lvl = block.depth
 748                 headlines[id] = new HeadLine(id, txt, lvl)
 749                 # output it
 750                 v.add "<h{lvl} id=\"{id}\">"
 751                 v.emit_in block
 752                 v.add "</h{lvl}>\n"
 753         end
 754
 755         redef fun add_paragraph(v, block) do
 756                 v.add "<p>"
 757                 v.emit_in block
 758                 v.add "</p>\n"
 759         end
 760
 761         redef fun add_code(v, block) do
 762                 var meta = block.meta
 763                 if meta != null then
 764                         v.add "<pre class=\""
 765                         append_value(v, meta)
 766                         v.add "\"><code>"
 767                 else
 768                         v.add "<pre><code>"
 769                 end
 770                 v.emit_in block
 771                 v.add "</code></pre>\n"
 772         end
 773
 774         redef fun add_blockquote(v, block) do
 775                 v.add "<blockquote>\n"
 776                 v.emit_in block
 777                 v.add "</blockquote>\n"
 778         end
 779
 780         redef fun add_unorderedlist(v, block) do
 781                 v.add "<ul>\n"
 782                 v.emit_in block
 783                 v.add "</ul>\n"
 784         end
 785
 786         redef fun add_orderedlist(v, block) do
 787                 v.add "<ol>\n"
 788                 v.emit_in block
 789                 v.add "</ol>\n"
 790         end
 791
 792         redef fun add_listitem(v, block) do
 793                 v.add "<li>"
 794                 v.emit_in block
 795                 v.add "</li>\n"
 796         end
 797
 798         redef fun add_em(v, text) do
 799                 v.add "<em>"
 800                 v.add text
 801                 v.add "</em>"
 802         end
 803
 804         redef fun add_strong(v, text) do
 805                 v.add "<strong>"
 806                 v.add text
 807                 v.add "</strong>"
 808         end
 809
 810         redef fun add_strike(v, text) do
 811                 v.add "<del>"
 812                 v.add text
 813                 v.add "</del>"
 814         end
 815
 816         redef fun add_image(v, link, name, comment) do
 817                 v.add "<img src=\""
 818                 append_value(v, link)
 819                 v.add "\" alt=\""
 820                 append_value(v, name)
 821                 v.add "\""
 822                 if comment != null and not comment.is_empty then
 823                         v.add " title=\""
 824                         append_value(v, comment)
 825                         v.add "\""
 826                 end
 827                 v.add "/>"
 828         end
 829
 830         redef fun add_link(v, link, name, comment) do
 831                 v.add "<a href=\""
 832                 append_value(v, link)
 833                 v.add "\""
 834                 if comment != null and not comment.is_empty then
 835                         v.add " title=\""
 836                         append_value(v, comment)
 837                         v.add "\""
 838                 end
 839                 v.add ">"
 840                 v.emit_text(name)
 841                 v.add "</a>"
 842         end
 843
 844         redef fun add_abbr(v, name, comment) do
 845                 v.add "<abbr title=\""
 846                 append_value(v, comment)
 847                 v.add "\">"
 848                 v.emit_text(name)
 849                 v.add "</abbr>"
 850         end
 851
 852         redef fun add_span_code(v, text, from, to) do
 853                 v.add "<code>"
 854                 append_code(v, text, from, to)
 855                 v.add "</code>"
 856         end
 857
 858         redef fun add_line_break(v) do
 859                 v.add "<br/>"
 860         end
 861
 862         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 863
 864         redef fun escape_char(v, c) do
 865                 if c == '&' then
 866                         v.add "&amp;"
 867                 else if c == '<' then
 868                         v.add "&lt;"
 869                 else if c == '>' then
 870                         v.add "&gt;"
 871                 else if c == '"' then
 872                         v.add "&quot;"
 873                 else if c == '\'' then
 874                         v.add "&apos;"
 875                 else
 876                         v.addc c
 877                 end
 878         end
 879
 880         redef fun append_code(v, buffer, from, to) do
 881                 for i in [from..to[ do
 882                         var c = buffer[i]
 883                         if c == '&' then
 884                                 v.add "&amp;"
 885                         else if c == '<' then
 886                                 v.add "&lt;"
 887                         else if c == '>' then
 888                                 v.add "&gt;"
 889                         else
 890                                 v.addc c
 891                         end
 892                 end
 893         end
 894
 895         redef fun strip_id(txt) do
 896                 # strip id
 897                 var b = new FlatBuffer
 898                 for c in txt do
 899                         if c == ' ' then
 900                                 b.add '_'
 901                         else
 902                                 if not c.is_letter and
 903                                    not c.is_digit and
 904                                    not allowed_id_chars.has(c) then continue
 905                                 b.add c
 906                         end
 907                 end
 908                 var res = b.to_s
 909                 var key = res
 910                 # check for multiple id definitions
 911                 if headlines.has_key(key) then
 912                         var i = 1
 913                         key = "{res}_{i}"
 914                         while headlines.has_key(key) do
 915                                 i += 1
 916                                 key = "{res}_{i}"
 917                         end
 918                 end
 919                 return key
 920         end
 921
 922         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 923 end
 924
 925 # Location in a Markdown input.
 926 class MDLocation
 927
 928         # Starting line number (starting from 1).
 929         var line_start: Int
 930
 931         # Starting column number (starting from 1).
 932         var column_start: Int
 933
 934         # Stopping line number (starting from 1).
 935         var line_end: Int
 936
 937         # Stopping column number (starting from 1).
 938         var column_end: Int
 939
 940         redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
 941
 942         # Return a copy of `self`.
 943         fun copy: MDLocation do
 944                 return new MDLocation(line_start, column_start, line_end, column_end)
 945         end
 946 end
 947
 948 # A block of markdown lines.
 949 # A `MDBlock` can contains lines and/or sub-blocks.
 950 class MDBlock
 951
 952         # Position of `self` in the input.
 953         var location: MDLocation
 954
 955         # Kind of block.
 956         # See `Block`.
 957         var kind: Block = new BlockNone(self) is writable
 958
 959         # First line if any.
 960         var first_line: nullable MDLine = null is writable
 961
 962         # Last line if any.
 963         var last_line: nullable MDLine = null is writable
 964
 965         # First sub-block if any.
 966         var first_block: nullable MDBlock = null is writable
 967
 968         # Last sub-block if any.
 969         var last_block: nullable MDBlock = null is writable
 970
 971         # Previous block if any.
 972         var prev: nullable MDBlock = null is writable
 973
 974         # Next block if any.
 975         var next: nullable MDBlock = null is writable
 976
 977         # Does this block contain subblocks?
 978         fun has_blocks: Bool do return first_block != null
 979
 980         # Count sub-blocks.
 981         fun count_blocks: Int do
 982                 var count = 0
 983                 var block = first_block
 984                 while block != null do
 985                         count += 1
 986                         block = block.next
 987                 end
 988                 return count
 989         end
 990
 991         # Does this block contain lines?
 992         fun has_lines: Bool do return first_line != null
 993
 994         # Count block lines.
 995         fun count_lines: Int do
 996                 var count = 0
 997                 var line = first_line
 998                 while line != null do
 999                         count += 1
1000                         line = line.next
1001                 end
1002                 return count
1003         end
1004
1005         # Split `self` creating a new sub-block having `line` has `last_line`.
1006         fun split(line: MDLine): MDBlock do
1007                 # location for new block
1008                 var new_loc = new MDLocation(
1009                         first_line.location.line_start,
1010                         first_line.location.column_start,
1011                         line.location.line_end,
1012                         line.location.column_end)
1013                 # create block
1014                 var block = new MDBlock(new_loc)
1015                 block.first_line = first_line
1016                 block.last_line = line
1017                 first_line = line.next
1018                 line.next = null
1019                 if first_line == null then
1020                         last_line = null
1021                 else
1022                         first_line.prev = null
1023                         # update current block loc
1024                         location.line_start = first_line.location.line_start
1025                         location.column_start = first_line.location.column_start
1026                 end
1027                 if first_block == null then
1028                         first_block = block
1029                         last_block = block
1030                 else
1031                         last_block.next = block
1032                         last_block = block
1033                 end
1034                 return block
1035         end
1036
1037         # Add a `line` to this block.
1038         fun add_line(line: MDLine) do
1039                 if last_line == null then
1040                         first_line = line
1041                         last_line = line
1042                 else
1043                         last_line.next_empty = line.is_empty
1044                         line.prev_empty = last_line.is_empty
1045                         line.prev = last_line
1046                         last_line.next = line
1047                         last_line = line
1048                 end
1049         end
1050
1051         # Remove `line` from this block.
1052         fun remove_line(line: MDLine) do
1053                 if line.prev == null then
1054                         first_line = line.next
1055                 else
1056                         line.prev.next = line.next
1057                 end
1058                 if line.next == null then
1059                         last_line = line.prev
1060                 else
1061                         line.next.prev = line.prev
1062                 end
1063                 line.prev = null
1064                 line.next = null
1065         end
1066
1067         # Remove leading empty lines.
1068         fun remove_leading_empty_lines: Bool do
1069                 var was_empty = false
1070                 var line = first_line
1071                 while line != null and line.is_empty do
1072                         remove_line line
1073                         line = first_line
1074                         was_empty = true
1075                 end
1076                 return was_empty
1077         end
1078
1079         # Remove trailing empty lines.
1080         fun remove_trailing_empty_lines: Bool do
1081                 var was_empty = false
1082                 var line = last_line
1083                 while line != null and line.is_empty do
1084                         remove_line line
1085                         line = last_line
1086                         was_empty = true
1087                 end
1088                 return was_empty
1089         end
1090
1091         # Remove leading and trailing empty lines.
1092         fun remove_surrounding_empty_lines: Bool do
1093                 var was_empty = false
1094                 if remove_leading_empty_lines then was_empty = true
1095                 if remove_trailing_empty_lines then was_empty = true
1096                 return was_empty
1097         end
1098
1099         # Remove list markers and up to 4 leading spaces.
1100         # Used to clean nested lists.
1101         fun remove_list_indent(v: MarkdownProcessor) do
1102                 var line = first_line
1103                 while line != null do
1104                         if not line.is_empty then
1105                                 var kind = v.line_kind(line)
1106                                 if kind isa LineList then
1107                                         line.value = kind.extract_value(line)
1108                                 else
1109                                         line.value = line.value.substring_from(line.leading.min(4))
1110                                 end
1111                                 line.leading = line.process_leading
1112                         end
1113                         line = line.next
1114                 end
1115         end
1116
1117         # Collect block line text.
1118         fun text: String do
1119                 var text = new FlatBuffer
1120                 var line = first_line
1121                 while line != null do
1122                         if not line.is_empty then
1123                                 text.append line.text
1124                         end
1125                         text.append "\n"
1126                         line = line.next
1127                 end
1128                 return text.write_to_string
1129         end
1130 end
1131
1132 # Representation of a markdown block in the AST.
1133 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1134 abstract class Block
1135
1136         # The markdown block `self` is related to.
1137         var block: MDBlock
1138
1139         # Output `self` using `v.decorator`.
1140         fun emit(v: MarkdownEmitter) do v.emit_in(self)
1141
1142         # Emit the containts of `self`, lines or blocks.
1143         fun emit_in(v: MarkdownEmitter) do
1144                 block.remove_surrounding_empty_lines
1145                 if block.has_lines then
1146                         emit_lines(v)
1147                 else
1148                         emit_blocks(v)
1149                 end
1150         end
1151
1152         # Emit lines contained in `block`.
1153         fun emit_lines(v: MarkdownEmitter) do
1154                 var tpl = v.push_buffer
1155                 var line = block.first_line
1156                 while line != null do
1157                         if not line.is_empty then
1158                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1159                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
1160                         end
1161                         if line.next != null then
1162                                 v.addn
1163                         end
1164                         line = line.next
1165                 end
1166                 v.pop_buffer
1167                 v.emit_text(tpl)
1168         end
1169
1170         # Emit sub-blocks contained in `block`.
1171         fun emit_blocks(v: MarkdownEmitter) do
1172                 var block = self.block.first_block
1173                 while block != null do
1174                         v.push_loc(block.location)
1175                         block.kind.emit(v)
1176                         v.pop_loc
1177                         block = block.next
1178                 end
1179         end
1180
1181         # The raw content of the block as a multi-line string.
1182         fun raw_content: String do
1183                 var infence = self isa BlockFence
1184                 var text = new FlatBuffer
1185                 var line = self.block.first_line
1186                 while line != null do
1187                         if not line.is_empty then
1188                                 var str = line.value
1189                                 if not infence and str.has_prefix("    ") then
1190                                         text.append str.substring(4, str.length - line.trailing)
1191                                 else
1192                                         text.append str
1193                                 end
1194                         end
1195                         text.append "\n"
1196                         line = line.next
1197                 end
1198                 return text.write_to_string
1199         end
1200 end
1201
1202 # A block without any markdown specificities.
1203 #
1204 # Actually use the same implementation than `BlockCode`,
1205 # this class is only used for typing purposes.
1206 class BlockNone
1207         super Block
1208 end
1209
1210 # A markdown blockquote.
1211 class BlockQuote
1212         super Block
1213
1214         redef fun emit(v) do v.decorator.add_blockquote(v, self)
1215
1216         # Remove blockquote markers.
1217         private fun remove_block_quote_prefix(block: MDBlock) do
1218                 var line = block.first_line
1219                 while line != null do
1220                         if not line.is_empty then
1221                                 if line.value[line.leading] == '>' then
1222                                         var rem = line.leading + 1
1223                                         if line.leading + 1 < line.value.length and
1224                                            line.value[line.leading + 1] == ' ' then
1225                                                 rem += 1
1226                                         end
1227                                         line.value = line.value.substring_from(rem)
1228                                         line.leading = line.process_leading
1229                                 end
1230                         end
1231                         line = line.next
1232                 end
1233         end
1234 end
1235
1236 # A markdown code block.
1237 class BlockCode
1238         super Block
1239
1240         # Any string found after fence token.
1241         var meta: nullable Text
1242
1243         # Number of char to skip at the beginning of the line.
1244         #
1245         # Block code lines start at 4 spaces.
1246         protected var line_start = 4
1247
1248         redef fun emit(v) do v.decorator.add_code(v, self)
1249
1250         redef fun emit_lines(v) do
1251                 var line = block.first_line
1252                 while line != null do
1253                         if not line.is_empty then
1254                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1255                         end
1256                         v.addn
1257                         line = line.next
1258                 end
1259         end
1260 end
1261
1262 # A markdown code-fence block.
1263 #
1264 # Actually use the same implementation than `BlockCode`,
1265 # this class is only used for typing purposes.
1266 class BlockFence
1267         super BlockCode
1268
1269         # Fence code lines start at 0 spaces.
1270         redef var line_start = 0
1271 end
1272
1273 # A markdown headline.
1274 class BlockHeadline
1275         super Block
1276
1277         redef fun emit(v) do
1278                 var loc = block.location.copy
1279                 loc.column_start += start
1280                 v.push_loc(loc)
1281                 v.decorator.add_headline(v, self)
1282                 v.pop_loc
1283         end
1284
1285         private var start = 0
1286
1287         # Depth of the headline used to determine the headline level.
1288         var depth = 0
1289
1290         # Remove healine marks from lines contained in `self`.
1291         private fun transform_headline(block: MDBlock) do
1292                 if depth > 0 then return
1293                 var level = 0
1294                 var line = block.first_line
1295                 if line.is_empty then return
1296                 var start = line.leading
1297                 while start < line.value.length and line.value[start] == '#' do
1298                         level += 1
1299                         start += 1
1300                 end
1301                 while start < line.value.length and line.value[start] == ' ' do
1302                         start += 1
1303                 end
1304                 if start >= line.value.length then
1305                         line.is_empty = true
1306                 else
1307                         var nend = line.value.length - line.trailing - 1
1308                         while line.value[nend] == '#' do nend -= 1
1309                         while line.value[nend] == ' ' do nend -= 1
1310                         line.value = line.value.substring(start, nend - start + 1)
1311                         line.leading = 0
1312                         line.trailing = 0
1313                 end
1314                 self.start = start
1315                 depth = level.min(6)
1316         end
1317 end
1318
1319 # A markdown list item block.
1320 class BlockListItem
1321         super Block
1322
1323         redef fun emit(v) do v.decorator.add_listitem(v, self)
1324 end
1325
1326 # A markdown list block.
1327 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1328 abstract class BlockList
1329         super Block
1330
1331         # Split list block into list items sub-blocks.
1332         private fun init_block(v: MarkdownProcessor) do
1333                 var line = block.first_line
1334                 line = line.next
1335                 while line != null do
1336                         var t = v.line_kind(line)
1337                         if t isa LineList or
1338                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1339                            not (t isa LineList))) then
1340                                    var sblock = block.split(line.prev.as(not null))
1341                                    sblock.kind = new BlockListItem(sblock)
1342                         end
1343                         line = line.next
1344                 end
1345                 var sblock = block.split(block.last_line.as(not null))
1346                 sblock.kind = new BlockListItem(sblock)
1347         end
1348
1349         # Expand list items as paragraphs if needed.
1350         private fun expand_paragraphs(block: MDBlock) do
1351                 var outer = block.first_block
1352                 var inner: nullable MDBlock
1353                 var has_paragraph = false
1354                 while outer != null and not has_paragraph do
1355                         if outer.kind isa BlockListItem then
1356                                 inner = outer.first_block
1357                                 while inner != null and not has_paragraph do
1358                                         if inner.kind isa BlockParagraph then
1359                                                 has_paragraph = true
1360                                         end
1361                                         inner = inner.next
1362                                 end
1363                         end
1364                         outer = outer.next
1365                 end
1366                 if has_paragraph then
1367                         outer = block.first_block
1368                         while outer != null do
1369                                 if outer.kind isa BlockListItem then
1370                                         inner = outer.first_block
1371                                         while inner != null do
1372                                                 if inner.kind isa BlockNone then
1373                                                         inner.kind = new BlockParagraph(inner)
1374                                                 end
1375                                                 inner = inner.next
1376                                         end
1377                                 end
1378                                 outer = outer.next
1379                         end
1380                 end
1381         end
1382 end
1383
1384 # A markdown ordered list.
1385 class BlockOrderedList
1386         super BlockList
1387
1388         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1389 end
1390
1391 # A markdown unordred list.
1392 class BlockUnorderedList
1393         super BlockList
1394
1395         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1396 end
1397
1398 # A markdown paragraph block.
1399 class BlockParagraph
1400         super Block
1401
1402         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1403 end
1404
1405 # A markdown ruler.
1406 class BlockRuler
1407         super Block
1408
1409         redef fun emit(v) do v.decorator.add_ruler(v, self)
1410 end
1411
1412 # Xml blocks that can be found in markdown markup.
1413 class BlockXML
1414         super Block
1415
1416         redef fun emit_lines(v) do
1417                 var line = block.first_line
1418                 while line != null do
1419                         if not line.is_empty then v.add line.value
1420                         v.addn
1421                         line = line.next
1422                 end
1423         end
1424 end
1425
1426 # A markdown line.
1427 class MDLine
1428
1429         # Location of `self` in the original input.
1430         var location: MDLocation
1431
1432         # Text contained in this line.
1433         var value: String is writable
1434
1435         # Is this line empty?
1436         # Lines containing only spaces are considered empty.
1437         var is_empty: Bool = true is writable
1438
1439         # Previous line in `MDBlock` or null if first line.
1440         var prev: nullable MDLine = null is writable
1441
1442         # Next line in `MDBlock` or null if last line.
1443         var next: nullable MDLine = null is writable
1444
1445         # Is the previous line empty?
1446         var prev_empty: Bool = false is writable
1447
1448         # Is the next line empty?
1449         var next_empty: Bool = false is writable
1450
1451         # Initialize a new MDLine from its string value
1452         init do
1453                 self.leading = process_leading
1454                 if leading != value.length then
1455                         self.is_empty = false
1456                         self.trailing = process_trailing
1457                 end
1458         end
1459
1460         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1461         fun clear do
1462                 value = ""
1463                 leading = 0
1464                 trailing = 0
1465                 is_empty = true
1466                 if prev != null then prev.next_empty = true
1467                 if next != null then next.prev_empty = true
1468         end
1469
1470         # Number or leading spaces on this line.
1471         var leading: Int = 0 is writable
1472
1473         # Compute `leading` depending on `value`.
1474         fun process_leading: Int do
1475                 var count = 0
1476                 var value = self.value
1477                 while count < value.length and value[count] == ' ' do count += 1
1478                 if leading == value.length then clear
1479                 return count
1480         end
1481
1482         # Number of trailing spaces on this line.
1483         var trailing: Int = 0 is writable
1484
1485         # Compute `trailing` depending on `value`.
1486         fun process_trailing: Int do
1487                 var count = 0
1488                 var value = self.value
1489                 while value[value.length - count - 1] == ' ' do
1490                         count += 1
1491                 end
1492                 return count
1493         end
1494
1495         # Count the amount of `ch` in this line.
1496         # Return A value > 0 if this line only consists of `ch` end spaces.
1497         fun count_chars(ch: Char): Int do
1498                 var count = 0
1499                 for c in value do
1500                         if c == ' ' then
1501                                 continue
1502                         end
1503                         if c == ch then
1504                                 count += 1
1505                                 continue
1506                         end
1507                         count = 0
1508                         break
1509                 end
1510                 return count
1511         end
1512
1513         # Count the amount of `ch` at the start of this line ignoring spaces.
1514         fun count_chars_start(ch: Char): Int do
1515                 var count = 0
1516                 for c in value do
1517                         if c == ' ' then
1518                                 continue
1519                         end
1520                         if c == ch then
1521                                 count += 1
1522                         else
1523                                 break
1524                         end
1525                 end
1526                 return count
1527         end
1528
1529         # Last XML line if any.
1530         private var xml_end_line: nullable MDLine = null
1531
1532         # Does `value` contains valid XML markup?
1533         private fun check_html: Bool do
1534                 var tags = new Array[String]
1535                 var tmp = new FlatBuffer
1536                 var pos = leading
1537                 if pos + 1 < value.length and value[pos + 1] == '!' then
1538                         if read_xml_comment(self, pos) > 0 then return true
1539                 end
1540                 pos = value.read_xml(tmp, pos, false)
1541                 var tag: String
1542                 if pos > -1 then
1543                         tag = tmp.xml_tag
1544                         if not tag.is_html_block then
1545                                 return false
1546                         end
1547                         if tag == "hr" then
1548                                 xml_end_line = self
1549                                 return true
1550                         end
1551                         tags.add tag
1552                         var line: nullable MDLine = self
1553                         while line != null do
1554                                 while pos < line.value.length and line.value[pos] != '<' do
1555                                         pos += 1
1556                                 end
1557                                 if pos >= line.value.length then
1558                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1559                                                 tags.pop
1560                                                 if tags.is_empty then
1561                                                         xml_end_line = line
1562                                                         break
1563                                                 end
1564                                         end
1565                                         line = line.next
1566                                         pos = 0
1567                                 else
1568                                         tmp = new FlatBuffer
1569                                         var new_pos = line.value.read_xml(tmp, pos, false)
1570                                         if new_pos > 0 then
1571                                                 tag = tmp.xml_tag
1572                                                 if tag.is_html_block and not tag == "hr" then
1573                                                         if tmp[1] == '/' then
1574                                                                 if tags.last != tag then
1575                                                                         return false
1576                                                                 end
1577                                                                 tags.pop
1578                                                         else
1579                                                                 tags.add tag
1580                                                         end
1581                                                 end
1582                                                 if tags.is_empty then
1583                                                         xml_end_line = line
1584                                                         break
1585                                                 end
1586                                                 pos = new_pos
1587                                         else
1588                                                 pos += 1
1589                                         end
1590                                 end
1591                         end
1592                         return tags.is_empty
1593                 end
1594                 return false
1595         end
1596
1597         # Read a XML comment.
1598         # Used by `check_html`.
1599         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1600                 var line: nullable MDLine = first_line
1601                 if start + 3 < line.value.length then
1602                         if line.value[2] == '-' and line.value[3] == '-' then
1603                                 var pos = start + 4
1604                                 while line != null do
1605                                         while pos < line.value.length and line.value[pos] != '-' do
1606                                                 pos += 1
1607                                         end
1608                                         if pos == line.value.length then
1609                                                 line = line.next
1610                                                 pos = 0
1611                                         else
1612                                                 if pos + 2 < line.value.length then
1613                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1614                                                                 first_line.xml_end_line = line
1615                                                                 return pos + 3
1616                                                         end
1617                                                 end
1618                                                 pos += 1
1619                                         end
1620                                 end
1621                         end
1622                 end
1623                 return -1
1624         end
1625
1626         # Extract the text of `self` without leading and trailing.
1627         fun text: String do return value.substring(leading, value.length - trailing)
1628 end
1629
1630 # A markdown line.
1631 interface Line
1632
1633         # Parse the line.
1634         # See `MarkdownProcessor::recurse`.
1635         fun process(v: MarkdownProcessor) is abstract
1636 end
1637
1638 # An empty markdown line.
1639 class LineEmpty
1640         super Line
1641
1642         redef fun process(v) do
1643                 v.current_line = v.current_line.next
1644         end
1645 end
1646
1647 # A non-specific markdown construction.
1648 # Mainly used as part of another line construct such as paragraphs or lists.
1649 class LineOther
1650         super Line
1651
1652         redef fun process(v) do
1653                 var line = v.current_line
1654                 # go to block end
1655                 var was_empty = line.prev_empty
1656                 while line != null and not line.is_empty do
1657                         var t = v.line_kind(line)
1658                         if (v.in_list or v.ext_mode) and t isa LineList then
1659                                 break
1660                         end
1661                         if v.ext_mode and (t isa LineCode or t isa LineFence) then
1662                                 break
1663                         end
1664                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1665                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1666                                    break
1667                         end
1668                         line = line.next
1669                 end
1670                 # build block
1671                 if line != null and not line.is_empty then
1672                         var block = v.current_block.split(line.prev.as(not null))
1673                         if v.in_list and not was_empty then
1674                                 block.kind = new BlockNone(block)
1675                         else
1676                                 block.kind = new BlockParagraph(block)
1677                         end
1678                         v.current_block.remove_leading_empty_lines
1679                 else
1680                         var block: MDBlock
1681                         if line != null then
1682                                 block = v.current_block.split(line)
1683                         else
1684                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1685                         end
1686                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1687                                 block.kind = new BlockNone(block)
1688                         else
1689                                 block.kind = new BlockParagraph(block)
1690                         end
1691                         v.current_block.remove_leading_empty_lines
1692                 end
1693                 v.current_line = v.current_block.first_line
1694         end
1695 end
1696
1697 # A line of markdown code.
1698 class LineCode
1699         super Line
1700
1701         redef fun process(v) do
1702                 var line = v.current_line
1703                 # lookup block end
1704                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1705                         line = line.next
1706                 end
1707                 # split at block end line
1708                 var block: MDBlock
1709                 if line != null then
1710                         block = v.current_block.split(line.prev.as(not null))
1711                 else
1712                         block = v.current_block.split(v.current_block.last_line.as(not null))
1713                 end
1714                 block.kind = new BlockCode(block)
1715                 block.remove_surrounding_empty_lines
1716                 v.current_line = v.current_block.first_line
1717         end
1718 end
1719
1720 # A line of raw XML.
1721 class LineXML
1722         super Line
1723
1724         redef fun process(v) do
1725                 var line = v.current_line
1726                 var prev = line.prev
1727                 if prev != null then v.current_block.split(prev)
1728                 var block = v.current_block.split(line.xml_end_line.as(not null))
1729                 block.kind = new BlockXML(block)
1730                 v.current_block.remove_leading_empty_lines
1731                 v.current_line = v.current_block.first_line
1732         end
1733 end
1734
1735 # A markdown blockquote line.
1736 class LineBlockquote
1737         super Line
1738
1739         redef fun process(v) do
1740                 var line = v.current_line
1741                 # go to bquote end
1742                 while line != null do
1743                         if not line.is_empty and (line.prev_empty and
1744                            line.leading == 0 and
1745                            not v.line_kind(line) isa LineBlockquote) then break
1746                         line = line.next
1747                 end
1748                 # build sub block
1749                 var block: MDBlock
1750                 if line != null then
1751                         block = v.current_block.split(line.prev.as(not null))
1752                 else
1753                         block = v.current_block.split(v.current_block.last_line.as(not null))
1754                 end
1755                 var kind = new BlockQuote(block)
1756                 block.kind = kind
1757                 block.remove_surrounding_empty_lines
1758                 kind.remove_block_quote_prefix(block)
1759                 v.current_line = line
1760                 v.recurse(block, false)
1761                 v.current_line = v.current_block.first_line
1762         end
1763 end
1764
1765 # A markdown ruler line.
1766 class LineHR
1767         super Line
1768
1769         redef fun process(v) do
1770                 var line = v.current_line
1771                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1772                 var block = v.current_block.split(line.as(not null))
1773                 block.kind = new BlockRuler(block)
1774                 v.current_block.remove_leading_empty_lines
1775                 v.current_line = v.current_block.first_line
1776         end
1777 end
1778
1779 # A markdown fence code line.
1780 class LineFence
1781         super Line
1782
1783         redef fun process(v) do
1784                 # go to fence end
1785                 var line = v.current_line.next
1786                 while line != null do
1787                         if v.line_kind(line) isa LineFence then break
1788                         line = line.next
1789                 end
1790                 if line != null then
1791                         line = line.next
1792                 end
1793                 # build fence block
1794                 var block: MDBlock
1795                 if line != null then
1796                         block = v.current_block.split(line.prev.as(not null))
1797                 else
1798                         block = v.current_block.split(v.current_block.last_line.as(not null))
1799                 end
1800                 block.remove_surrounding_empty_lines
1801                 var meta = block.first_line.value.meta_from_fence
1802                 block.kind = new BlockFence(block, meta)
1803                 block.first_line.clear
1804                 var last = block.last_line
1805                 if last != null and v.line_kind(last) isa LineFence then
1806                         block.last_line.clear
1807                 end
1808                 block.remove_surrounding_empty_lines
1809                 v.current_line = line
1810         end
1811 end
1812
1813 # A markdown headline.
1814 class LineHeadline
1815         super Line
1816
1817         redef fun process(v) do
1818                 var line = v.current_line
1819                 var lprev = line.prev
1820                 if lprev != null then v.current_block.split(lprev)
1821                 var block = v.current_block.split(line.as(not null))
1822                 var kind = new BlockHeadline(block)
1823                 block.kind = kind
1824                 kind.transform_headline(block)
1825                 v.current_block.remove_leading_empty_lines
1826                 v.current_line = v.current_block.first_line
1827         end
1828 end
1829
1830 # A markdown headline of level 1.
1831 class LineHeadline1
1832         super LineHeadline
1833
1834         redef fun process(v) do
1835                 var line = v.current_line
1836                 var lprev = line.prev
1837                 if lprev != null then v.current_block.split(lprev)
1838                 line.next.clear
1839                 var block = v.current_block.split(line.as(not null))
1840                 var kind = new BlockHeadline(block)
1841                 kind.depth = 1
1842                 kind.transform_headline(block)
1843                 block.kind = kind
1844                 v.current_block.remove_leading_empty_lines
1845                 v.current_line = v.current_block.first_line
1846         end
1847 end
1848
1849 # A markdown headline of level 2.
1850 class LineHeadline2
1851         super LineHeadline
1852
1853         redef fun process(v) do
1854                 var line = v.current_line
1855                 var lprev = line.prev
1856                 if lprev != null then v.current_block.split(lprev)
1857                 line.next.clear
1858                 var block = v.current_block.split(line.as(not null))
1859                 var kind = new BlockHeadline(block)
1860                 kind.depth = 2
1861                 kind.transform_headline(block)
1862                 block.kind = kind
1863                 v.current_block.remove_leading_empty_lines
1864                 v.current_line = v.current_block.first_line
1865         end
1866 end
1867
1868 # A markdown list line.
1869 # Mainly used to factorize code between ordered and unordered lists.
1870 abstract class LineList
1871         super Line
1872
1873         redef fun process(v) do
1874                 var line = v.current_line
1875                 # go to list end
1876                 while line != null do
1877                         var t = v.line_kind(line)
1878                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1879                            not t isa LineList) then break
1880                         line = line.next
1881                 end
1882                 # build list block
1883                 var list: MDBlock
1884                 if line != null then
1885                         list = v.current_block.split(line.prev.as(not null))
1886                 else
1887                         list = v.current_block.split(v.current_block.last_line.as(not null))
1888                 end
1889                 var kind = block_kind(list)
1890                 list.kind = kind
1891                 list.first_line.prev_empty = false
1892                 list.last_line.next_empty = false
1893                 list.remove_surrounding_empty_lines
1894                 list.first_line.prev_empty = false
1895                 list.last_line.next_empty = false
1896                 kind.init_block(v)
1897                 var block = list.first_block
1898                 while block != null do
1899                         block.remove_list_indent(v)
1900                         v.recurse(block, true)
1901                         block = block.next
1902                 end
1903                 kind.expand_paragraphs(list)
1904                 v.current_line = line
1905         end
1906
1907         # Create a new block kind based on this line.
1908         protected fun block_kind(block: MDBlock): BlockList is abstract
1909
1910         # Extract string value from `MDLine`.
1911         protected fun extract_value(line: MDLine): String is abstract
1912 end
1913
1914 # An ordered list line.
1915 class LineOList
1916         super LineList
1917
1918         redef fun block_kind(block) do return new BlockOrderedList(block)
1919
1920         redef fun extract_value(line) do
1921                 return line.value.substring_from(line.value.index_of('.') + 2)
1922         end
1923 end
1924
1925 # An unordered list line.
1926 class LineUList
1927         super LineList
1928
1929         redef fun block_kind(block) do return new BlockUnorderedList(block)
1930
1931         redef fun extract_value(line) do
1932                 return line.value.substring_from(line.leading + 2)
1933         end
1934 end
1935
1936 # A token represent a character in the markdown input.
1937 # Some tokens have a specific markup behaviour that is handled here.
1938 abstract class Token
1939
1940         # Location of `self` in the original input.
1941         var location: MDLocation
1942
1943         # Position of `self` in input independant from lines.
1944         var pos: Int
1945
1946         # Character found at `pos` in the markdown input.
1947         var char: Char
1948
1949         # Output that token using `MarkdownEmitter::decorator`.
1950         fun emit(v: MarkdownEmitter) do v.decorator.add_char(v, char)
1951 end
1952
1953 # A token without a specific meaning.
1954 class TokenNone
1955         super Token
1956 end
1957
1958 # An emphasis token.
1959 abstract class TokenEm
1960         super Token
1961
1962         redef fun emit(v) do
1963                 var tmp = v.push_buffer
1964                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1965                 v.pop_buffer
1966                 if b > 0 then
1967                         v.decorator.add_em(v, tmp)
1968                         v.current_pos = b
1969                 else
1970                         v.addc char
1971                 end
1972         end
1973 end
1974
1975 # An emphasis star token.
1976 class TokenEmStar
1977         super TokenEm
1978 end
1979
1980 # An emphasis underscore token.
1981 class TokenEmUnderscore
1982         super TokenEm
1983 end
1984
1985 # A strong token.
1986 abstract class TokenStrong
1987         super Token
1988
1989         redef fun emit(v) do
1990                 var tmp = v.push_buffer
1991                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1992                 v.pop_buffer
1993                 if b > 0 then
1994                         v.decorator.add_strong(v, tmp)
1995                         v.current_pos = b + 1
1996                 else
1997                         v.addc char
1998                 end
1999         end
2000 end
2001
2002 # A strong star token.
2003 class TokenStrongStar
2004         super TokenStrong
2005 end
2006
2007 # A strong underscore token.
2008 class TokenStrongUnderscore
2009         super TokenStrong
2010 end
2011
2012 # A code token.
2013 # This class is mainly used to factorize work between single and double quoted span codes.
2014 abstract class TokenCode
2015         super Token
2016
2017         redef fun emit(v) do
2018                 var a = pos + next_pos + 1
2019                 var b = v.processor.find_token(v.current_text.as(not null), a, self)
2020                 if b > 0 then
2021                         v.current_pos = b + next_pos
2022                         while a < b and v.current_text[a] == ' ' do a += 1
2023                         if a < b then
2024                                 while v.current_text[b - 1] == ' ' do b -= 1
2025                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
2026                         end
2027                 else
2028                         v.addc char
2029                 end
2030         end
2031
2032         private fun next_pos: Int is abstract
2033 end
2034
2035 # A span code token.
2036 class TokenCodeSingle
2037         super TokenCode
2038
2039         redef fun next_pos do return 0
2040 end
2041
2042 # A doubled span code token.
2043 class TokenCodeDouble
2044         super TokenCode
2045
2046         redef fun next_pos do return 1
2047 end
2048
2049 # A link or image token.
2050 # This class is mainly used to factorize work between images and links.
2051 abstract class TokenLinkOrImage
2052         super Token
2053
2054         # Link adress
2055         var link: nullable Text = null
2056
2057         # Link text
2058         var name: nullable Text = null
2059
2060         # Link title
2061         var comment: nullable Text = null
2062
2063         # Is the link construct an abbreviation?
2064         var is_abbrev = false
2065
2066         redef fun emit(v) do
2067                 var tmp = new FlatBuffer
2068                 var b = check_link(v, tmp, pos, self)
2069                 if b > 0 then
2070                         emit_hyper(v)
2071                         v.current_pos = b
2072                 else
2073                         v.addc char
2074                 end
2075         end
2076
2077         # Emit the hyperlink as link or image.
2078         private fun emit_hyper(v: MarkdownEmitter) is abstract
2079
2080         # Check if the link is a valid link.
2081         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
2082                 var md = v.current_text
2083                 var pos
2084                 if token isa TokenLink then
2085                         pos = start + 1
2086                 else
2087                         pos = start + 2
2088                 end
2089                 var tmp = new FlatBuffer
2090                 pos = md.read_md_link_id(tmp, pos)
2091                 if pos < start then return -1
2092                 name = tmp
2093                 var old_pos = pos
2094                 pos += 1
2095                 pos = md.skip_spaces(pos)
2096                 if pos < start then
2097                         var tid = name.write_to_string.to_lower
2098                         if v.processor.link_refs.has_key(tid) then
2099                                 var lr = v.processor.link_refs[tid]
2100                                 is_abbrev = lr.is_abbrev
2101                                 link = lr.link
2102                                 comment = lr.title
2103                                 pos = old_pos
2104                         else
2105                                 return -1
2106                         end
2107                 else if md[pos] == '(' then
2108                         pos += 1
2109                         pos = md.skip_spaces(pos)
2110                         if pos < start then return -1
2111                         tmp = new FlatBuffer
2112                         var use_lt = md[pos] == '<'
2113                         if use_lt then
2114                                 pos = md.read_until(tmp, pos + 1, '>')
2115                         else
2116                                 pos = md.read_md_link(tmp, pos)
2117                         end
2118                         if pos < start then return -1
2119                         if use_lt then pos += 1
2120                         link = tmp.write_to_string
2121                         if md[pos] == ' ' then
2122                                 pos = md.skip_spaces(pos)
2123                                 if pos > start and md[pos] == '"' then
2124                                         pos += 1
2125                                         tmp = new FlatBuffer
2126                                         pos = md.read_until(tmp, pos, '"')
2127                                         if pos < start then return -1
2128                                         comment = tmp.write_to_string
2129                                         pos += 1
2130                                         pos = md.skip_spaces(pos)
2131                                         if pos == -1 then return -1
2132                                 end
2133                         end
2134                         if pos < start then return -1
2135                         if md[pos] != ')' then return -1
2136                 else if md[pos] == '[' then
2137                         pos += 1
2138                         tmp = new FlatBuffer
2139                         pos = md.read_raw_until(tmp, pos, ']')
2140                         if pos < start then return -1
2141                         var id
2142                         if tmp.length > 0 then
2143                                 id = tmp
2144                         else
2145                                 id = name
2146                         end
2147                         var tid = id.write_to_string.to_lower
2148                         if v.processor.link_refs.has_key(tid) then
2149                                 var lr = v.processor.link_refs[tid]
2150                                 link = lr.link
2151                                 comment = lr.title
2152                         end
2153                 else
2154                         var tid = name.write_to_string.replace("\n", " ").to_lower
2155                         if v.processor.link_refs.has_key(tid) then
2156                                 var lr = v.processor.link_refs[tid]
2157                                 link = lr.link
2158                                 comment = lr.title
2159                                 pos = old_pos
2160                         else
2161                                 return -1
2162                         end
2163                 end
2164                 if link == null then return -1
2165                 return pos
2166         end
2167 end
2168
2169 # A markdown link token.
2170 class TokenLink
2171         super TokenLinkOrImage
2172
2173         redef fun emit_hyper(v) do
2174                 if is_abbrev and comment != null then
2175                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2176                 else
2177                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2178                 end
2179         end
2180 end
2181
2182 # A markdown image token.
2183 class TokenImage
2184         super TokenLinkOrImage
2185
2186         redef fun emit_hyper(v) do
2187                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2188         end
2189 end
2190
2191 # A HTML/XML token.
2192 class TokenHTML
2193         super Token
2194
2195         redef fun emit(v) do
2196                 var tmp = new FlatBuffer
2197                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2198                 if b > 0 then
2199                         v.add tmp
2200                         v.current_pos = b
2201                 else
2202                         v.decorator.escape_char(v, char)
2203                 end
2204         end
2205
2206         # Is the HTML valid?
2207         # Also take care of link and mailto shortcuts.
2208         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2209                 # check for auto links
2210                 var tmp = new FlatBuffer
2211                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2212                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2213                         pos = md.read_until(tmp, pos, '>')
2214                         if pos != -1 then
2215                                 var link = tmp.write_to_string
2216                                 v.decorator.add_link(v, link, link, null)
2217                                 return pos
2218                         end
2219                 end
2220                 # TODO check for mailto
2221                 # check for inline html
2222                 if start + 2 < md.length then
2223                         return md.read_xml(out, start, true)
2224                 end
2225                 return -1
2226         end
2227 end
2228
2229 # An HTML entity token.
2230 class TokenEntity
2231         super Token
2232
2233         redef fun emit(v) do
2234                 var tmp = new FlatBuffer
2235                 var b = check_entity(tmp, v.current_text.as(not null), pos)
2236                 if b > 0 then
2237                         v.add tmp
2238                         v.current_pos = b
2239                 else
2240                         v.decorator.escape_char(v, char)
2241                 end
2242         end
2243
2244         # Is the entity valid?
2245         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2246                 var pos = md.read_until(out, start, ';')
2247                 if pos < 0 or out.length < 3 then
2248                         return -1
2249                 end
2250                 if out[1] == '#' then
2251                         if out[2] == 'x' or out[2] == 'X' then
2252                                 if out.length < 4 then return -1
2253                                 for i in [3..out.length[ do
2254                                         var c = out[i]
2255                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2256                                                 return -1
2257                                         end
2258                                 end
2259                         else
2260                                 for i in [2..out.length[ do
2261                                         var c = out[i]
2262                                         if c < '0' or c > '9' then return -1
2263                                 end
2264                         end
2265                         out.add ';'
2266                 else
2267                         for i in [1..out.length[ do
2268                                 var c = out[i]
2269                                 if not c.is_digit and not c.is_letter then return -1
2270                         end
2271                         out.add ';'
2272                         # TODO check entity is valid
2273                         # if out.is_entity then
2274                                 return pos
2275                         # else
2276                                 # return -1
2277                         # end
2278                 end
2279                 return pos
2280         end
2281 end
2282
2283 # A markdown escape token.
2284 class TokenEscape
2285         super Token
2286
2287         redef fun emit(v) do
2288                 v.current_pos += 1
2289                 v.addc v.current_text[v.current_pos]
2290         end
2291 end
2292
2293 # A markdown strike token.
2294 #
2295 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2296 class TokenStrike
2297         super Token
2298
2299         redef fun emit(v) do
2300                 var tmp = v.push_buffer
2301                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2302                 v.pop_buffer
2303                 if b > 0 then
2304                         v.decorator.add_strike(v, tmp)
2305                         v.current_pos = b + 1
2306                 else
2307                         v.addc char
2308                 end
2309         end
2310 end
2311
2312 redef class Text
2313
2314         # Get the position of the next non-space character.
2315         private fun skip_spaces(start: Int): Int do
2316                 var pos = start
2317                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2318                         pos += 1
2319                 end
2320                 if pos < length then return pos
2321                 return -1
2322         end
2323
2324         # Read `self` until `nend` and append it to the `out` buffer.
2325         # Escape markdown special chars.
2326         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2327                 var pos = start
2328                 while pos < length do
2329                         var c = self[pos]
2330                         if c == '\\' and pos + 1 < length then
2331                                 pos = escape(out, self[pos + 1], pos)
2332                         else
2333                                 var end_reached = false
2334                                 for n in nend do
2335                                         if c == n then
2336                                                 end_reached = true
2337                                                 break
2338                                         end
2339                                 end
2340                                 if end_reached then break
2341                                 out.add c
2342                         end
2343                         pos += 1
2344                 end
2345                 if pos == length then return -1
2346                 return pos
2347         end
2348
2349         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2350         # No escape is made.
2351         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2352                 var pos = start
2353                 while pos < length do
2354                         var c = self[pos]
2355                         var end_reached = false
2356                         for n in nend do
2357                                 if c == n then
2358                                         end_reached = true
2359                                         break
2360                                 end
2361                         end
2362                         if end_reached then break
2363                         out.add c
2364                         pos += 1
2365                 end
2366                 if pos == length then return -1
2367                 return pos
2368         end
2369
2370         # Read `self` as XML until `to` and append it to the `out` buffer.
2371         # Escape HTML special chars.
2372         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2373                 var pos = from
2374                 var in_str = false
2375                 var str_char: nullable Char = null
2376                 while pos < length do
2377                         var c = self[pos]
2378                         if in_str then
2379                                 if c == '\\' then
2380                                         out.add c
2381                                         pos += 1
2382                                         if pos < length then
2383                                                 out.add c
2384                                                 pos += 1
2385                                         end
2386                                         continue
2387                                 end
2388                                 if c == str_char then
2389                                         in_str = false
2390                                         out.add c
2391                                         pos += 1
2392                                         continue
2393                                 end
2394                         end
2395                         if c == '"' or c == '\'' then
2396                                 in_str = true
2397                                 str_char = c
2398                         end
2399                         if not in_str then
2400                                 var end_reached = false
2401                                 for n in [0..to.length[ do
2402                                         if c == to[n] then
2403                                                 end_reached = true
2404                                                 break
2405                                         end
2406                                 end
2407                                 if end_reached then break
2408                         end
2409                         out.add c
2410                         pos += 1
2411                 end
2412                 if pos == length then return -1
2413                 return pos
2414         end
2415
2416         # Read `self` as XML and append it to the `out` buffer.
2417         # Safe mode can be activated to limit reading to valid xml.
2418         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2419                 var pos = 0
2420                 var is_valid = true
2421                 var is_close_tag = false
2422                 if start + 1 >= length then return -1
2423                 if self[start + 1] == '/' then
2424                         is_close_tag = true
2425                         pos = start + 2
2426                 else if self[start + 1] == '!' then
2427                         out.append "<!"
2428                         return start + 1
2429                 else
2430                         is_close_tag = false
2431                         pos = start + 1
2432                 end
2433                 if safe_mode then
2434                         var tmp = new FlatBuffer
2435                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2436                         if pos == -1 then return -1
2437                         var tag = tmp.write_to_string.trim.to_lower
2438                         if not tag.is_valid_html_tag then
2439                                 out.append "&lt;"
2440                                 pos = -1
2441                         else if tag.is_html_unsafe then
2442                                 is_valid = false
2443                                 out.append "&lt;"
2444                                 if is_close_tag then out.add '/'
2445                                 out.append tmp
2446                         else
2447                                 out.append "<"
2448                                 if is_close_tag then out.add '/'
2449                                 out.append tmp
2450                         end
2451                 else
2452                         out.add '<'
2453                         if is_close_tag then out.add '/'
2454                         pos = read_xml_until(out, pos, ' ', '/', '>')
2455                 end
2456                 if pos == -1 then return -1
2457                 pos = read_xml_until(out, pos, '/', '>')
2458                 if pos == -1 then return -1
2459                 if self[pos] == '/' then
2460                         out.append " /"
2461                         pos = self.read_xml_until(out, pos + 1, '>')
2462                         if pos == -1 then return -1
2463                 end
2464                 if self[pos] == '>' then
2465                         if is_valid then
2466                                 out.add '>'
2467                         else
2468                                 out.append "&gt;"
2469                         end
2470                         return pos
2471                 end
2472                 return -1
2473         end
2474
2475         # Read a markdown link address and append it to the `out` buffer.
2476         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2477                 var pos = start
2478                 var counter = 1
2479                 while pos < length do
2480                         var c = self[pos]
2481                         if c == '\\' and pos + 1 < length then
2482                                 pos = escape(out, self[pos + 1], pos)
2483                         else
2484                                 var end_reached = false
2485                                 if c == '(' then
2486                                         counter += 1
2487                                 else if c == ' ' then
2488                                         if counter == 1 then end_reached = true
2489                                 else if c == ')' then
2490                                         counter -= 1
2491                                         if counter == 0 then end_reached = true
2492                                 end
2493                                 if end_reached then break
2494                                 out.add c
2495                         end
2496                         pos += 1
2497                 end
2498                 if pos == length then return -1
2499                 return pos
2500         end
2501
2502         # Read a markdown link text and append it to the `out` buffer.
2503         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2504                 var pos = start
2505                 var counter = 1
2506                 while pos < length do
2507                         var c = self[pos]
2508                         var end_reached = false
2509                         if c == '[' then
2510                                 counter += 1
2511                                 out.add c
2512                         else if c == ']' then
2513                                 counter -= 1
2514                                 if counter == 0 then
2515                                         end_reached = true
2516                                 else
2517                                         out.add c
2518                                 end
2519                         else
2520                                 out.add c
2521                         end
2522                         if end_reached then break
2523                         pos += 1
2524                 end
2525                 if pos == length then return -1
2526                 return pos
2527         end
2528
2529         # Extract the XML tag name from a XML tag.
2530         private fun xml_tag: String do
2531                 var tpl = new FlatBuffer
2532                 var pos = 1
2533                 if pos < length and self[1] == '/' then pos += 1
2534                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2535                         tpl.add self[pos]
2536                         pos += 1
2537                 end
2538                 return tpl.write_to_string.to_lower
2539         end
2540
2541         private fun is_valid_html_tag: Bool do
2542                 if is_empty then return false
2543                 for c in self do
2544                         if not c.is_alpha then return false
2545                 end
2546                 return true
2547         end
2548
2549         # Read and escape the markdown contained in `self`.
2550         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2551                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2552                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2553                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2554                    c == '`' or c == '~' or c == '^' then
2555                         out.add c
2556                         return pos + 1
2557                 end
2558                 out.add '\\'
2559                 return pos
2560         end
2561
2562         # Extract string found at end of fence opening.
2563         private fun meta_from_fence: nullable Text do
2564                 for i in [0..chars.length[ do
2565                         var c = chars[i]
2566                         if c != ' ' and c != '`' and c != '~' then
2567                                 return substring_from(i).trim
2568                         end
2569                 end
2570                 return null
2571         end
2572
2573         # Is `self` an unsafe HTML element?
2574         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2575
2576         # Is `self` a HRML block element?
2577         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2578
2579         # Is `self` a link prefix?
2580         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2581
2582         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2583
2584         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2585
2586         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2587 end
2588
2589 redef class String
2590
2591         # Parse `self` as markdown and return the HTML representation
2592         #.
2593         #    var md = "**Hello World!**"
2594         #    var html = md.md_to_html
2595         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2596         fun md_to_html: Writable do
2597                 var processor = new MarkdownProcessor
2598                 return processor.process(self)
2599         end
2600 end