lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # `MarkdownEmitter` used for ouput.
  34         var emitter: MarkdownEmitter is noinit, protected writable
  35
  36         # Work in extended mode (default).
  37         #
  38         # Behavior changes when using extended mode:
  39         #
  40         # * Lists and code blocks end a paragraph
  41         #
  42         #   In normal markdown the following:
  43         #
  44         #               This is a paragraph
  45         #               * and this is not a list
  46         #
  47         #   Will produce:
  48         #
  49         #               <p>This is a paragraph
  50         #               * and this is not a list</p>
  51         #
  52         #       When using extended mode this changes to:
  53         #
  54         #               <p>This is a paragraph</p>
  55         #               <ul>
  56         #               <li>and this is not a list</li>
  57         #               </ul>
  58         #
  59         # * Fences code blocks
  60         #
  61         #   If you don't want to indent your all your code with 4 spaces,
  62         #   you can wrap your code in ``` ``` ``` or `~~~`.
  63         #
  64         #       Here's an example:
  65         #
  66         #               ```
  67         #               fun test do
  68         #                       print "Hello World!"
  69         #               end
  70         #               ```
  71         #
  72         # * Code blocks meta
  73         #
  74         #   If you want to use syntax highlighting tools, most of them need to know what kind
  75         #   of language they are highlighting.
  76         #   You can add an optional language identifier after the fence declaration to output
  77         #   it in the HTML render.
  78         #
  79         #               ```nit
  80         #               import markdown
  81         #
  82         #               print "# Hello World!".md_to_html
  83         #               ```
  84         #
  85         #   Becomes
  86         #
  87         #               <pre class="nit"><code>import markdown
  88         #
  89         #               print "Hello World!".md_to_html
  90         #               </code></pre>
  91         #
  92         # * Underscores (Emphasis)
  93         #
  94         #   Underscores in the middle of a word like:
  95         #
  96         #               Con_cat_this
  97         #
  98         #       normally produces this:
  99         #
 100         #               <p>Con<em>cat</em>this</p>
 101         #
 102         #   With extended mode they don't result in emphasis.
 103         #
 104         #               <p>Con_cat_this</p>
 105         #
 106         # * Strikethrough
 107         #
 108         #   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
 109         #   strikethrought span is marked with `~~`.
 110         #
 111         #               ~~Mistaken text.~~
 112         #
 113         #   becomes
 114         #
 115         #               <del>Mistaken text.</del>
 116         var ext_mode = true
 117
 118         init do self.emitter = new MarkdownEmitter(self)
 119
 120         # Process the mardown `input` string and return the processed output.
 121         fun process(input: String): Writable do
 122                 # init processor
 123                 link_refs.clear
 124                 last_link_ref = null
 125                 current_line = null
 126                 current_block = null
 127                 # parse markdown
 128                 var parent = read_lines(input)
 129                 parent.remove_surrounding_empty_lines
 130                 recurse(parent, false)
 131                 # output processed text
 132                 return emitter.emit(parent.kind)
 133         end
 134
 135         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
 136         private fun read_lines(input: String): MDBlock do
 137                 var block = new MDBlock
 138                 var value = new FlatBuffer
 139                 var i = 0
 140                 while i < input.length do
 141                         value.clear
 142                         var pos = 0
 143                         var eol = false
 144                         while not eol and i < input.length do
 145                                 var c = input[i]
 146                                 if c == '\n' then
 147                                         i += 1
 148                                         eol = true
 149                                 else if c == '\t' then
 150                                         var np = pos + (4 - (pos.bin_and(3)))
 151                                         while pos < np do
 152                                                 value.add ' '
 153                                                 pos += 1
 154                                         end
 155                                         i += 1
 156                                 else
 157                                         pos += 1
 158                                         value.add c
 159                                         i += 1
 160                                 end
 161                         end
 162
 163                         var line = new MDLine(value.write_to_string)
 164                         var is_link_ref = check_link_ref(line)
 165                         # Skip link refs
 166                         if not is_link_ref then block.add_line line
 167                 end
 168                 return block
 169         end
 170
 171         # Check if line is a block link definition.
 172         # Return `true` if line contains a valid link ref and save it into `link_refs`.
 173         private fun check_link_ref(line: MDLine): Bool do
 174                 var md = line.value
 175                 var is_link_ref = false
 176                 var id = new FlatBuffer
 177                 var link = new FlatBuffer
 178                 var comment = new FlatBuffer
 179                 var pos = -1
 180                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
 181                         pos = line.leading + 1
 182                         pos = md.read_until(id, pos, ']')
 183                         if not id.is_empty and pos + 2 < line.value.length then
 184                                 if line.value[pos + 1] == ':' then
 185                                         pos += 2
 186                                         pos = md.skip_spaces(pos)
 187                                         if line.value[pos] == '<' then
 188                                                 pos += 1
 189                                                 pos = md.read_until(link, pos, '>')
 190                                                 pos += 1
 191                                         else
 192                                                 pos = md.read_until(link, pos, ' ', '\n')
 193                                         end
 194                                         if not link.is_empty then
 195                                                 pos = md.skip_spaces(pos)
 196                                                 if pos > 0 and pos < line.value.length then
 197                                                         var c = line.value[pos]
 198                                                         if c == '\"' or c == '\'' or c == '(' then
 199                                                                 pos += 1
 200                                                                 if c == '(' then
 201                                                                         pos = md.read_until(comment, pos, ')')
 202                                                                 else
 203                                                                         pos = md.read_until(comment, pos, c)
 204                                                                 end
 205                                                                 if pos > 0 then is_link_ref = true
 206                                                         end
 207                                                 else
 208                                                         is_link_ref = true
 209                                                 end
 210                                         end
 211                                 end
 212                         end
 213                 end
 214                 if is_link_ref and not id.is_empty and not link.is_empty then
 215                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 216                         add_link_ref(id.write_to_string, lr)
 217                         if comment.is_empty then last_link_ref = lr
 218                         return true
 219                 else
 220                         comment = new FlatBuffer
 221                         if not line.is_empty and last_link_ref != null then
 222                                 pos = line.leading
 223                                 var c = line.value[pos]
 224                                 if c == '\"' or c == '\'' or c ==  '(' then
 225                                         pos += 1
 226                                         if c == '(' then
 227                                                 pos = md.read_until(comment, pos, ')')
 228                                         else
 229                                                 pos = md.read_until(comment, pos, c)
 230                                         end
 231                                 end
 232                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 233                         end
 234                         if comment.is_empty then return false
 235                         return true
 236                 end
 237         end
 238
 239         # Known link refs
 240         # This list will be needed during output to expand links.
 241         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 242
 243         # Last encountered link ref (for multiline definitions)
 244         #
 245         # Markdown allows link refs to be defined over two lines:
 246         #
 247         #       [id]: http://example.com/longish/path/to/resource/here
 248         #               "Optional Title Here"
 249         #
 250         private var last_link_ref: nullable LinkRef = null
 251
 252         # Add a link ref to the list
 253         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 254
 255         # Recursively split a `block`.
 256         #
 257         # The block is splitted according to the type of lines it contains.
 258         # Some blocks can be splited again recursively like lists.
 259         # The `in_list` mode is used to recurse on list and build
 260         # nested paragraphs or code blocks.
 261         fun recurse(root: MDBlock, in_list: Bool) do
 262                 var old_mode = self.in_list
 263                 var old_root = self.current_block
 264                 self.in_list = in_list
 265
 266                 var line = root.first_line
 267                 while line != null and line.is_empty do
 268                         line = line.next
 269                         if line == null then return
 270                 end
 271
 272                 current_line = line
 273                 current_block = root
 274                 while current_line != null do
 275                         line_kind(current_line.as(not null)).process(self)
 276                 end
 277                 self.in_list = old_mode
 278                 self.current_block = old_root
 279         end
 280
 281         # Currently processed line.
 282         # Used when visiting blocks with `recurse`.
 283         var current_line: nullable MDLine = null is writable
 284
 285         # Currently processed block.
 286         # Used when visiting blocks with `recurse`.
 287         var current_block: nullable MDBlock = null is writable
 288
 289         # Is the current recursion in list mode?
 290         # Used when visiting blocks with `recurse`
 291         private var in_list = false
 292
 293         # The type of line.
 294         # see: `md_line_*`
 295         fun line_kind(md: MDLine): Line do
 296                 var value = md.value
 297                 var leading = md.leading
 298                 var trailing = md.trailing
 299                 if md.is_empty then return new LineEmpty
 300                 if md.leading > 3 then return new LineCode
 301                 if value[leading] == '#' then return new LineHeadline
 302                 if value[leading] == '>' then return new LineBlockquote
 303
 304                 if ext_mode then
 305                         if value.length - leading - trailing > 2 then
 306                                 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 307                                         return new LineFence
 308                                 end
 309                                 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 310                                         return new LineFence
 311                                 end
 312                         end
 313                 end
 314
 315                 if value.length - leading - trailing > 2 and
 316                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 317                    if md.count_chars(value[leading]) >= 3 then
 318                                 return new LineHR
 319                    end
 320                 end
 321
 322                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 323                         var c = value[leading]
 324                         if c == '*' or c == '-' or c == '+' then return new LineUList
 325                 end
 326
 327                 if value.length - leading >= 3 and value[leading].is_digit then
 328                         var i = leading + 1
 329                         while i < value.length and value[i].is_digit do i += 1
 330                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 331                                 return new LineOList
 332                         end
 333                 end
 334
 335                 if value[leading] == '<' and md.check_html then return new LineXML
 336
 337                 var next = md.next
 338                 if next != null and not next.is_empty then
 339                         if next.count_chars('=') > 0 then
 340                                 return new LineHeadline1
 341                         end
 342                         if next.count_chars('-') > 0 then
 343                                 return new LineHeadline2
 344                         end
 345                 end
 346                 return new LineOther
 347         end
 348
 349         # Get the token kind at `pos`.
 350         fun token_at(text: Text, pos: Int): Token do
 351                 var c0: Char
 352                 var c1: Char
 353                 var c2: Char
 354
 355                 if pos > 0 then
 356                         c0 = text[pos - 1]
 357                 else
 358                         c0 = ' '
 359                 end
 360                 var c = text[pos]
 361
 362                 if pos + 1 < text.length then
 363                         c1 = text[pos + 1]
 364                 else
 365                         c1 = ' '
 366                 end
 367                 if pos + 2 < text.length then
 368                         c2 = text[pos + 2]
 369                 else
 370                         c2 = ' '
 371                 end
 372
 373                 if c == '*' then
 374                         if c1 == '*' then
 375                                 if c0 != ' ' or c2 != ' ' then
 376                                         return new TokenStrongStar(pos, c)
 377                                 else
 378                                         return new TokenEmStar(pos, c)
 379                                 end
 380                         end
 381                         if c0 != ' ' or c1 != ' ' then
 382                                 return new TokenEmStar(pos, c)
 383                         else
 384                                 return new TokenNone(pos, c)
 385                         end
 386                 else if c == '_' then
 387                         if c1 == '_' then
 388                                 if c0 != ' ' or c2 != ' 'then
 389                                         return new TokenStrongUnderscore(pos, c)
 390                                 else
 391                                         return new TokenEmUnderscore(pos, c)
 392                                 end
 393                         end
 394                         if ext_mode then
 395                                 if (c0.is_letter or c0.is_digit) and c0 != '_' and
 396                                    (c1.is_letter or c1.is_digit) then
 397                                         return new TokenNone(pos, c)
 398                                 else
 399                                         return new TokenEmUnderscore(pos, c)
 400                                 end
 401                         end
 402                         if c0 != ' ' or c1 != ' ' then
 403                                 return new TokenEmUnderscore(pos, c)
 404                         else
 405                                 return new TokenNone(pos, c)
 406                         end
 407                 else if c == '!' then
 408                         if c1 == '[' then return new TokenImage(pos, c)
 409                         return new TokenNone(pos, c)
 410                 else if c == '[' then
 411                         return new TokenLink(pos, c)
 412                 else if c == ']' then
 413                         return new TokenNone(pos, c)
 414                 else if c == '`' then
 415                         if c1 == '`' then
 416                                 return new TokenCodeDouble(pos, c)
 417                         else
 418                                 return new TokenCodeSingle(pos, c)
 419                         end
 420                 else if c == '\\' then
 421                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 422                                 return new TokenEscape(pos, c)
 423                         else
 424                                 return new TokenNone(pos, c)
 425                         end
 426                 else if c == '<' then
 427                         return new TokenHTML(pos, c)
 428                 else if c == '&' then
 429                         return new TokenEntity(pos, c)
 430                 else
 431                         if ext_mode then
 432                                 if c == '~' and c1 == '~' then
 433                                         return new TokenStrike(pos, c)
 434                                 end
 435                         end
 436                         return new TokenNone(pos, c)
 437                 end
 438         end
 439
 440         # Find the position of a `token` in `self`.
 441         fun find_token(text: Text, start: Int, token: Token): Int do
 442                 var pos = start
 443                 while pos < text.length do
 444                         if token_at(text, pos).is_same_type(token) then
 445                                 return pos
 446                         end
 447                         pos += 1
 448                 end
 449                 return -1
 450         end
 451 end
 452
 453 # Emit output corresponding to blocks content.
 454 #
 455 # Blocks are created by a previous pass in `MarkdownProcessor`.
 456 # The emitter use a `Decorator` to select the output format.
 457 class MarkdownEmitter
 458
 459         # Kind of processor used for parsing.
 460         type PROCESSOR: MarkdownProcessor
 461
 462         # Processor containing link refs.
 463         var processor: PROCESSOR
 464
 465         # Kind of decorator used for decoration.
 466         type DECORATOR: Decorator
 467
 468         # Decorator used for output.
 469         # Default is `HTMLDecorator`
 470         var decorator: DECORATOR is writable, lazy do
 471                 return new HTMLDecorator
 472         end
 473
 474         # Create a new `MarkdownEmitter` using a custom `decorator`.
 475         init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
 476                 init processor
 477                 self.decorator = decorator
 478         end
 479
 480         # Output `block` using `decorator` in the current buffer.
 481         fun emit(block: Block): Text do
 482                 var buffer = push_buffer
 483                 block.emit(self)
 484                 pop_buffer
 485                 return buffer
 486         end
 487
 488         # Output the content of `block`.
 489         fun emit_in(block: Block) do block.emit_in(self)
 490
 491         # Transform and emit mardown text
 492         fun emit_text(text: Text) do emit_text_until(text, 0, null)
 493
 494         # Transform and emit mardown text starting at `from` and
 495         # until a token with the same type as `token` is found.
 496         # Go until the end of text if `token` is null.
 497         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 498                 var old_text = current_text
 499                 var old_pos = current_pos
 500                 current_text = text
 501                 current_pos = start
 502                 while current_pos < text.length do
 503                         var mt = processor.token_at(text, current_pos)
 504                         if (token != null and not token isa TokenNone) and
 505                         (mt.is_same_type(token) or
 506                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 507                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 508                                 return current_pos
 509                         end
 510                         mt.emit(self)
 511                         current_pos += 1
 512                 end
 513                 current_text = old_text
 514                 current_pos = old_pos
 515                 return -1
 516         end
 517
 518         # Currently processed position in `current_text`.
 519         # Used when visiting inline production with `emit_text_until`.
 520         private var current_pos: Int = -1
 521
 522         # Currently processed text.
 523         # Used when visiting inline production with `emit_text_until`.
 524         private var current_text: nullable Text = null
 525
 526         # Stacked buffers.
 527         private var buffer_stack = new List[FlatBuffer]
 528
 529         # Push a new buffer on the stack.
 530         private fun push_buffer: FlatBuffer do
 531                 var buffer = new FlatBuffer
 532                 buffer_stack.add buffer
 533                 return buffer
 534         end
 535
 536         # Pop the last buffer.
 537         private fun pop_buffer do buffer_stack.pop
 538
 539         # Current output buffer.
 540         private fun current_buffer: FlatBuffer do
 541                 assert not buffer_stack.is_empty
 542                 return buffer_stack.last
 543         end
 544
 545         # Append `e` to current buffer.
 546         fun add(e: Writable) do
 547                 if e isa Text then
 548                         current_buffer.append e
 549                 else
 550                         current_buffer.append e.write_to_string
 551                 end
 552         end
 553
 554         # Append `c` to current buffer.
 555         fun addc(c: Char) do current_buffer.add c
 556
 557         # Append a "\n" line break.
 558         fun addn do current_buffer.add '\n'
 559 end
 560
 561 # A Link Reference.
 562 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 563 #
 564 # ~~~raw
 565 # [1]: http://example.com/ "Optional title"
 566 # ~~~
 567 class LinkRef
 568
 569         # Link href
 570         var link: String
 571
 572         # Optional link title
 573         var title: nullable String = null
 574
 575         # Is the link an abreviation?
 576         var is_abbrev = false
 577
 578         # Create a link with a title.
 579         init with_title(link: String, title: nullable String) do
 580                 self.link = link
 581                 self.title = title
 582         end
 583 end
 584
 585 # A `Decorator` is used to emit mardown into a specific format.
 586 # Default decorator used is `HTMLDecorator`.
 587 interface Decorator
 588
 589         # Kind of emitter used for decoration.
 590         type EMITTER: MarkdownEmitter
 591
 592         # Render a ruler block.
 593         fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
 594
 595         # Render a headline block with corresponding level.
 596         fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
 597
 598         # Render a paragraph block.
 599         fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
 600
 601         # Render a code or fence block.
 602         fun add_code(v: EMITTER, block: BlockCode) is abstract
 603
 604         # Render a blockquote.
 605         fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
 606
 607         # Render an unordered list.
 608         fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
 609
 610         # Render an ordered list.
 611         fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
 612
 613         # Render a list item.
 614         fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
 615
 616         # Render an emphasis text.
 617         fun add_em(v: EMITTER, text: Text) is abstract
 618
 619         # Render a strong text.
 620         fun add_strong(v: EMITTER, text: Text) is abstract
 621
 622         # Render a strike text.
 623         #
 624         # Extended mode only (see `MarkdownProcessor::ext_mode`)
 625         fun add_strike(v: EMITTER, text: Text) is abstract
 626
 627         # Render a link.
 628         fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 629
 630         # Render an image.
 631         fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 632
 633         # Render an abbreviation.
 634         fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
 635
 636         # Render a code span reading from a buffer.
 637         fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 638
 639         # Render a text and escape it.
 640         fun append_value(v: EMITTER, value: Text) is abstract
 641
 642         # Render code text from buffer and escape it.
 643         fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 644
 645         # Render a character escape.
 646         fun escape_char(v: EMITTER, char: Char) is abstract
 647
 648         # Render a line break
 649         fun add_line_break(v: EMITTER) is abstract
 650
 651         # Generate a new html valid id from a `String`.
 652         fun strip_id(txt: String): String is abstract
 653
 654         # Found headlines during the processing labeled by their ids.
 655         fun headlines: ArrayMap[String, HeadLine] is abstract
 656 end
 657
 658 # Class representing a markdown headline.
 659 class HeadLine
 660         # Unique identifier of this headline.
 661         var id: String
 662
 663         # Text of the headline.
 664         var title: String
 665
 666         # Level of this headline.
 667         #
 668         # According toe the markdown specification, level must be in `[1..6]`.
 669         var level: Int
 670 end
 671
 672 # `Decorator` that outputs HTML.
 673 class HTMLDecorator
 674         super Decorator
 675
 676         redef var headlines = new ArrayMap[String, HeadLine]
 677
 678         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 679
 680         redef fun add_headline(v, block) do
 681                 # save headline
 682                 var txt = block.block.first_line.value
 683                 var id = strip_id(txt)
 684                 var lvl = block.depth
 685                 headlines[id] = new HeadLine(id, txt, lvl)
 686                 # output it
 687                 v.add "<h{lvl} id=\"{id}\">"
 688                 v.emit_in block
 689                 v.add "</h{lvl}>\n"
 690         end
 691
 692         redef fun add_paragraph(v, block) do
 693                 v.add "<p>"
 694                 v.emit_in block
 695                 v.add "</p>\n"
 696         end
 697
 698         redef fun add_code(v, block) do
 699                 if block isa BlockFence and block.meta != null then
 700                         v.add "<pre class=\"{block.meta.to_s}\"><code>"
 701                 else
 702                         v.add "<pre><code>"
 703                 end
 704                 v.emit_in block
 705                 v.add "</code></pre>\n"
 706         end
 707
 708         redef fun add_blockquote(v, block) do
 709                 v.add "<blockquote>\n"
 710                 v.emit_in block
 711                 v.add "</blockquote>\n"
 712         end
 713
 714         redef fun add_unorderedlist(v, block) do
 715                 v.add "<ul>\n"
 716                 v.emit_in block
 717                 v.add "</ul>\n"
 718         end
 719
 720         redef fun add_orderedlist(v, block) do
 721                 v.add "<ol>\n"
 722                 v.emit_in block
 723                 v.add "</ol>\n"
 724         end
 725
 726         redef fun add_listitem(v, block) do
 727                 v.add "<li>"
 728                 v.emit_in block
 729                 v.add "</li>\n"
 730         end
 731
 732         redef fun add_em(v, text) do
 733                 v.add "<em>"
 734                 v.add text
 735                 v.add "</em>"
 736         end
 737
 738         redef fun add_strong(v, text) do
 739                 v.add "<strong>"
 740                 v.add text
 741                 v.add "</strong>"
 742         end
 743
 744         redef fun add_strike(v, text) do
 745                 v.add "<del>"
 746                 v.add text
 747                 v.add "</del>"
 748         end
 749
 750         redef fun add_image(v, link, name, comment) do
 751                 v.add "<img src=\""
 752                 append_value(v, link)
 753                 v.add "\" alt=\""
 754                 append_value(v, name)
 755                 v.add "\""
 756                 if comment != null and not comment.is_empty then
 757                         v.add " title=\""
 758                         append_value(v, comment)
 759                         v.add "\""
 760                 end
 761                 v.add "/>"
 762         end
 763
 764         redef fun add_link(v, link, name, comment) do
 765                 v.add "<a href=\""
 766                 append_value(v, link)
 767                 v.add "\""
 768                 if comment != null and not comment.is_empty then
 769                         v.add " title=\""
 770                         append_value(v, comment)
 771                         v.add "\""
 772                 end
 773                 v.add ">"
 774                 v.emit_text(name)
 775                 v.add "</a>"
 776         end
 777
 778         redef fun add_abbr(v, name, comment) do
 779                 v.add "<abbr title=\""
 780                 append_value(v, comment)
 781                 v.add "\">"
 782                 v.emit_text(name)
 783                 v.add "</abbr>"
 784         end
 785
 786         redef fun add_span_code(v, text, from, to) do
 787                 v.add "<code>"
 788                 append_code(v, text, from, to)
 789                 v.add "</code>"
 790         end
 791
 792         redef fun add_line_break(v) do
 793                 v.add "<br/>"
 794         end
 795
 796         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 797
 798         redef fun escape_char(v, c) do
 799                 if c == '&' then
 800                         v.add "&amp;"
 801                 else if c == '<' then
 802                         v.add "&lt;"
 803                 else if c == '>' then
 804                         v.add "&gt;"
 805                 else if c == '"' then
 806                         v.add "&quot;"
 807                 else if c == '\'' then
 808                         v.add "&apos;"
 809                 else
 810                         v.addc c
 811                 end
 812         end
 813
 814         redef fun append_code(v, buffer, from, to) do
 815                 for i in [from..to[ do
 816                         var c = buffer[i]
 817                         if c == '&' then
 818                                 v.add "&amp;"
 819                         else if c == '<' then
 820                                 v.add "&lt;"
 821                         else if c == '>' then
 822                                 v.add "&gt;"
 823                         else
 824                                 v.addc c
 825                         end
 826                 end
 827         end
 828
 829         redef fun strip_id(txt) do
 830                 # strip id
 831                 var b = new FlatBuffer
 832                 for c in txt do
 833                         if c == ' ' then
 834                                 b.add '_'
 835                         else
 836                                 if not c.is_letter and
 837                                    not c.is_digit and
 838                                    not allowed_id_chars.has(c) then continue
 839                                 b.add c
 840                         end
 841                 end
 842                 var res = b.to_s
 843                 var key = res
 844                 # check for multiple id definitions
 845                 if headlines.has_key(key) then
 846                         var i = 1
 847                         key = "{res}_{i}"
 848                         while headlines.has_key(key) do
 849                                 i += 1
 850                                 key = "{res}_{i}"
 851                         end
 852                 end
 853                 return key
 854         end
 855
 856         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 857 end
 858
 859 # A block of markdown lines.
 860 # A `MDBlock` can contains lines and/or sub-blocks.
 861 class MDBlock
 862         # Kind of block.
 863         # See `Block`.
 864         var kind: Block = new BlockNone(self) is writable
 865
 866         # First line if any.
 867         var first_line: nullable MDLine = null is writable
 868
 869         # Last line if any.
 870         var last_line: nullable MDLine = null is writable
 871
 872         # First sub-block if any.
 873         var first_block: nullable MDBlock = null is writable
 874
 875         # Last sub-block if any.
 876         var last_block: nullable MDBlock = null is writable
 877
 878         # Previous block if any.
 879         var prev: nullable MDBlock = null is writable
 880
 881         # Next block if any.
 882         var next: nullable MDBlock = null is writable
 883
 884         # Does this block contain subblocks?
 885         fun has_blocks: Bool do return first_block != null
 886
 887         # Count sub-blocks.
 888         fun count_blocks: Int do
 889                 var count = 0
 890                 var block = first_block
 891                 while block != null do
 892                         count += 1
 893                         block = block.next
 894                 end
 895                 return count
 896         end
 897
 898         # Does this block contain lines?
 899         fun has_lines: Bool do return first_line != null
 900
 901         # Count block lines.
 902         fun count_lines: Int do
 903                 var count = 0
 904                 var line = first_line
 905                 while line != null do
 906                         count += 1
 907                         line = line.next
 908                 end
 909                 return count
 910         end
 911
 912         # Split `self` creating a new sub-block having `line` has `last_line`.
 913         fun split(line: MDLine): MDBlock do
 914                 var block = new MDBlock
 915                 block.first_line = first_line
 916                 block.last_line = line
 917                 first_line = line.next
 918                 line.next = null
 919                 if first_line == null then
 920                         last_line = null
 921                 else
 922                         first_line.prev = null
 923                 end
 924                 if first_block == null then
 925                         first_block = block
 926                         last_block = block
 927                 else
 928                         last_block.next = block
 929                         last_block = block
 930                 end
 931                 return block
 932         end
 933
 934         # Add a `line` to this block.
 935         fun add_line(line: MDLine) do
 936                 if last_line == null then
 937                         first_line = line
 938                         last_line = line
 939                 else
 940                         last_line.next_empty = line.is_empty
 941                         line.prev_empty = last_line.is_empty
 942                         line.prev = last_line
 943                         last_line.next = line
 944                         last_line = line
 945                 end
 946         end
 947
 948         # Remove `line` from this block.
 949         fun remove_line(line: MDLine) do
 950                 if line.prev == null then
 951                         first_line = line.next
 952                 else
 953                         line.prev.next = line.next
 954                 end
 955                 if line.next == null then
 956                         last_line = line.prev
 957                 else
 958                         line.next.prev = line.prev
 959                 end
 960                 line.prev = null
 961                 line.next = null
 962         end
 963
 964         # Remove leading empty lines.
 965         fun remove_leading_empty_lines: Bool do
 966                 var was_empty = false
 967                 var line = first_line
 968                 while line != null and line.is_empty do
 969                         remove_line line
 970                         line = first_line
 971                         was_empty = true
 972                 end
 973                 return was_empty
 974         end
 975
 976         # Remove trailing empty lines.
 977         fun remove_trailing_empty_lines: Bool do
 978                 var was_empty = false
 979                 var line = last_line
 980                 while line != null and line.is_empty do
 981                         remove_line line
 982                         line = last_line
 983                         was_empty = true
 984                 end
 985                 return was_empty
 986         end
 987
 988         # Remove leading and trailing empty lines.
 989         fun remove_surrounding_empty_lines: Bool do
 990                 var was_empty = false
 991                 if remove_leading_empty_lines then was_empty = true
 992                 if remove_trailing_empty_lines then was_empty = true
 993                 return was_empty
 994         end
 995
 996         # Remove list markers and up to 4 leading spaces.
 997         # Used to clean nested lists.
 998         fun remove_list_indent(v: MarkdownProcessor) do
 999                 var line = first_line
1000                 while line != null do
1001                         if not line.is_empty then
1002                                 var kind = v.line_kind(line)
1003                                 if kind isa LineList then
1004                                         line.value = kind.extract_value(line)
1005                                 else
1006                                         line.value = line.value.substring_from(line.leading.min(4))
1007                                 end
1008                                 line.leading = line.process_leading
1009                         end
1010                         line = line.next
1011                 end
1012         end
1013
1014         # Collect block line text.
1015         fun text: String do
1016                 var text = new FlatBuffer
1017                 var line = first_line
1018                 while line != null do
1019                         if not line.is_empty then
1020                                 text.append line.text
1021                         end
1022                         text.append "\n"
1023                         line = line.next
1024                 end
1025                 return text.write_to_string
1026         end
1027 end
1028
1029 # Representation of a markdown block in the AST.
1030 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1031 abstract class Block
1032
1033         # The markdown block `self` is related to.
1034         var block: MDBlock
1035
1036         # Output `self` using `v.decorator`.
1037         fun emit(v: MarkdownEmitter) do v.emit_in(self)
1038
1039         # Emit the containts of `self`, lines or blocks.
1040         fun emit_in(v: MarkdownEmitter) do
1041                 block.remove_surrounding_empty_lines
1042                 if block.has_lines then
1043                         emit_lines(v)
1044                 else
1045                         emit_blocks(v)
1046                 end
1047         end
1048
1049         # Emit lines contained in `block`.
1050         fun emit_lines(v: MarkdownEmitter) do
1051                 var tpl = v.push_buffer
1052                 var line = block.first_line
1053                 while line != null do
1054                         if not line.is_empty then
1055                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1056                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
1057                         end
1058                         if line.next != null then
1059                                 v.addn
1060                         end
1061                         line = line.next
1062                 end
1063                 v.pop_buffer
1064                 v.emit_text(tpl)
1065         end
1066
1067         # Emit sub-blocks contained in `block`.
1068         fun emit_blocks(v: MarkdownEmitter) do
1069                 var block = self.block.first_block
1070                 while block != null do
1071                         block.kind.emit(v)
1072                         block = block.next
1073                 end
1074         end
1075 end
1076
1077 # A block without any markdown specificities.
1078 #
1079 # Actually use the same implementation than `BlockCode`,
1080 # this class is only used for typing purposes.
1081 class BlockNone
1082         super Block
1083 end
1084
1085 # A markdown blockquote.
1086 class BlockQuote
1087         super Block
1088
1089         redef fun emit(v) do v.decorator.add_blockquote(v, self)
1090
1091         # Remove blockquote markers.
1092         private fun remove_block_quote_prefix(block: MDBlock) do
1093                 var line = block.first_line
1094                 while line != null do
1095                         if not line.is_empty then
1096                                 if line.value[line.leading] == '>' then
1097                                         var rem = line.leading + 1
1098                                         if line.leading + 1 < line.value.length and
1099                                            line.value[line.leading + 1] == ' ' then
1100                                                 rem += 1
1101                                         end
1102                                         line.value = line.value.substring_from(rem)
1103                                         line.leading = line.process_leading
1104                                 end
1105                         end
1106                         line = line.next
1107                 end
1108         end
1109 end
1110
1111 # A markdown code block.
1112 class BlockCode
1113         super Block
1114
1115         # Number of char to skip at the beginning of the line.
1116         #
1117         # Block code lines start at 4 spaces.
1118         protected var line_start = 4
1119
1120         redef fun emit(v) do v.decorator.add_code(v, self)
1121
1122         redef fun emit_lines(v) do
1123                 var line = block.first_line
1124                 while line != null do
1125                         if not line.is_empty then
1126                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1127                         end
1128                         v.addn
1129                         line = line.next
1130                 end
1131         end
1132 end
1133
1134 # A markdown code-fence block.
1135 #
1136 # Actually use the same implementation than `BlockCode`,
1137 # this class is only used for typing purposes.
1138 class BlockFence
1139         super BlockCode
1140
1141         # Any string found after fence token.
1142         var meta: nullable Text
1143
1144         # Fence code lines start at 0 spaces.
1145         redef var line_start = 0
1146 end
1147
1148 # A markdown headline.
1149 class BlockHeadline
1150         super Block
1151
1152         redef fun emit(v) do v.decorator.add_headline(v, self)
1153
1154         # Depth of the headline used to determine the headline level.
1155         var depth = 0
1156
1157         # Remove healine marks from lines contained in `self`.
1158         private fun transform_headline(block: MDBlock) do
1159                 if depth > 0 then return
1160                 var level = 0
1161                 var line = block.first_line
1162                 if line.is_empty then return
1163                 var start = line.leading
1164                 while start < line.value.length and line.value[start] == '#' do
1165                         level += 1
1166                         start += 1
1167                 end
1168                 while start < line.value.length and line.value[start] == ' ' do
1169                         start += 1
1170                 end
1171                 if start >= line.value.length then
1172                         line.is_empty = true
1173                 else
1174                         var nend = line.value.length - line.trailing - 1
1175                         while line.value[nend] == '#' do nend -= 1
1176                         while line.value[nend] == ' ' do nend -= 1
1177                         line.value = line.value.substring(start, nend - start + 1)
1178                         line.leading = 0
1179                         line.trailing = 0
1180                 end
1181                 depth = level.min(6)
1182         end
1183 end
1184
1185 # A markdown list item block.
1186 class BlockListItem
1187         super Block
1188
1189         redef fun emit(v) do v.decorator.add_listitem(v, self)
1190 end
1191
1192 # A markdown list block.
1193 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1194 abstract class BlockList
1195         super Block
1196
1197         # Split list block into list items sub-blocks.
1198         private fun init_block(v: MarkdownProcessor) do
1199                 var line = block.first_line
1200                 line = line.next
1201                 while line != null do
1202                         var t = v.line_kind(line)
1203                         if t isa LineList or
1204                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1205                            not (t isa LineList))) then
1206                                    var sblock = block.split(line.prev.as(not null))
1207                                    sblock.kind = new BlockListItem(sblock)
1208                         end
1209                         line = line.next
1210                 end
1211                 var sblock = block.split(block.last_line.as(not null))
1212                 sblock.kind = new BlockListItem(sblock)
1213         end
1214
1215         # Expand list items as paragraphs if needed.
1216         private fun expand_paragraphs(block: MDBlock) do
1217                 var outer = block.first_block
1218                 var inner: nullable MDBlock
1219                 var has_paragraph = false
1220                 while outer != null and not has_paragraph do
1221                         if outer.kind isa BlockListItem then
1222                                 inner = outer.first_block
1223                                 while inner != null and not has_paragraph do
1224                                         if inner.kind isa BlockParagraph then
1225                                                 has_paragraph = true
1226                                         end
1227                                         inner = inner.next
1228                                 end
1229                         end
1230                         outer = outer.next
1231                 end
1232                 if has_paragraph then
1233                         outer = block.first_block
1234                         while outer != null do
1235                                 if outer.kind isa BlockListItem then
1236                                         inner = outer.first_block
1237                                         while inner != null do
1238                                                 if inner.kind isa BlockNone then
1239                                                         inner.kind = new BlockParagraph(inner)
1240                                                 end
1241                                                 inner = inner.next
1242                                         end
1243                                 end
1244                                 outer = outer.next
1245                         end
1246                 end
1247         end
1248 end
1249
1250 # A markdown ordered list.
1251 class BlockOrderedList
1252         super BlockList
1253
1254         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1255 end
1256
1257 # A markdown unordred list.
1258 class BlockUnorderedList
1259         super BlockList
1260
1261         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1262 end
1263
1264 # A markdown paragraph block.
1265 class BlockParagraph
1266         super Block
1267
1268         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1269 end
1270
1271 # A markdown ruler.
1272 class BlockRuler
1273         super Block
1274
1275         redef fun emit(v) do v.decorator.add_ruler(v, self)
1276 end
1277
1278 # Xml blocks that can be found in markdown markup.
1279 class BlockXML
1280         super Block
1281
1282         redef fun emit_lines(v) do
1283                 var line = block.first_line
1284                 while line != null do
1285                         if not line.is_empty then v.add line.value
1286                         v.addn
1287                         line = line.next
1288                 end
1289         end
1290 end
1291
1292 # A markdown line.
1293 class MDLine
1294
1295         # Text contained in this line.
1296         var value: String is writable
1297
1298         # Is this line empty?
1299         # Lines containing only spaces are considered empty.
1300         var is_empty: Bool = true is writable
1301
1302         # Previous line in `MDBlock` or null if first line.
1303         var prev: nullable MDLine = null is writable
1304
1305         # Next line in `MDBlock` or null if last line.
1306         var next: nullable MDLine = null is writable
1307
1308         # Is the previous line empty?
1309         var prev_empty: Bool = false is writable
1310
1311         # Is the next line empty?
1312         var next_empty: Bool = false is writable
1313
1314         # Initialize a new MDLine from its string value
1315         init do
1316                 self.leading = process_leading
1317                 if leading != value.length then
1318                         self.is_empty = false
1319                         self.trailing = process_trailing
1320                 end
1321         end
1322
1323         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1324         fun clear do
1325                 value = ""
1326                 leading = 0
1327                 trailing = 0
1328                 is_empty = true
1329                 if prev != null then prev.next_empty = true
1330                 if next != null then next.prev_empty = true
1331         end
1332
1333         # Number or leading spaces on this line.
1334         var leading: Int = 0 is writable
1335
1336         # Compute `leading` depending on `value`.
1337         fun process_leading: Int do
1338                 var count = 0
1339                 var value = self.value
1340                 while count < value.length and value[count] == ' ' do count += 1
1341                 if leading == value.length then clear
1342                 return count
1343         end
1344
1345         # Number of trailing spaces on this line.
1346         var trailing: Int = 0 is writable
1347
1348         # Compute `trailing` depending on `value`.
1349         fun process_trailing: Int do
1350                 var count = 0
1351                 var value = self.value
1352                 while value[value.length - count - 1] == ' ' do
1353                         count += 1
1354                 end
1355                 return count
1356         end
1357
1358         # Count the amount of `ch` in this line.
1359         # Return A value > 0 if this line only consists of `ch` end spaces.
1360         fun count_chars(ch: Char): Int do
1361                 var count = 0
1362                 for c in value do
1363                         if c == ' ' then
1364                                 continue
1365                         end
1366                         if c == ch then
1367                                 count += 1
1368                                 continue
1369                         end
1370                         count = 0
1371                         break
1372                 end
1373                 return count
1374         end
1375
1376         # Count the amount of `ch` at the start of this line ignoring spaces.
1377         fun count_chars_start(ch: Char): Int do
1378                 var count = 0
1379                 for c in value do
1380                         if c == ' ' then
1381                                 continue
1382                         end
1383                         if c == ch then
1384                                 count += 1
1385                         else
1386                                 break
1387                         end
1388                 end
1389                 return count
1390         end
1391
1392         # Last XML line if any.
1393         private var xml_end_line: nullable MDLine = null
1394
1395         # Does `value` contains valid XML markup?
1396         private fun check_html: Bool do
1397                 var tags = new Array[String]
1398                 var tmp = new FlatBuffer
1399                 var pos = leading
1400                 if pos + 1 < value.length and value[pos + 1] == '!' then
1401                         if read_xml_comment(self, pos) > 0 then return true
1402                 end
1403                 pos = value.read_xml(tmp, pos, false)
1404                 var tag: String
1405                 if pos > -1 then
1406                         tag = tmp.xml_tag
1407                         if not tag.is_html_block then
1408                                 return false
1409                         end
1410                         if tag == "hr" then
1411                                 xml_end_line = self
1412                                 return true
1413                         end
1414                         tags.add tag
1415                         var line: nullable MDLine = self
1416                         while line != null do
1417                                 while pos < line.value.length and line.value[pos] != '<' do
1418                                         pos += 1
1419                                 end
1420                                 if pos >= line.value.length then
1421                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1422                                                 tags.pop
1423                                                 if tags.is_empty then
1424                                                         xml_end_line = line
1425                                                         break
1426                                                 end
1427                                         end
1428                                         line = line.next
1429                                         pos = 0
1430                                 else
1431                                         tmp = new FlatBuffer
1432                                         var new_pos = line.value.read_xml(tmp, pos, false)
1433                                         if new_pos > 0 then
1434                                                 tag = tmp.xml_tag
1435                                                 if tag.is_html_block and not tag == "hr" then
1436                                                         if tmp[1] == '/' then
1437                                                                 if tags.last != tag then
1438                                                                         return false
1439                                                                 end
1440                                                                 tags.pop
1441                                                         else
1442                                                                 tags.add tag
1443                                                         end
1444                                                 end
1445                                                 if tags.is_empty then
1446                                                         xml_end_line = line
1447                                                         break
1448                                                 end
1449                                                 pos = new_pos
1450                                         else
1451                                                 pos += 1
1452                                         end
1453                                 end
1454                         end
1455                         return tags.is_empty
1456                 end
1457                 return false
1458         end
1459
1460         # Read a XML comment.
1461         # Used by `check_html`.
1462         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1463                 var line: nullable MDLine = first_line
1464                 if start + 3 < line.value.length then
1465                         if line.value[2] == '-' and line.value[3] == '-' then
1466                                 var pos = start + 4
1467                                 while line != null do
1468                                         while pos < line.value.length and line.value[pos] != '-' do
1469                                                 pos += 1
1470                                         end
1471                                         if pos == line.value.length then
1472                                                 line = line.next
1473                                                 pos = 0
1474                                         else
1475                                                 if pos + 2 < line.value.length then
1476                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1477                                                                 first_line.xml_end_line = line
1478                                                                 return pos + 3
1479                                                         end
1480                                                 end
1481                                                 pos += 1
1482                                         end
1483                                 end
1484                         end
1485                 end
1486                 return -1
1487         end
1488
1489         # Extract the text of `self` without leading and trailing.
1490         fun text: String do return value.substring(leading, value.length - trailing)
1491 end
1492
1493 # A markdown line.
1494 interface Line
1495
1496         # Parse the line.
1497         # See `MarkdownProcessor::recurse`.
1498         fun process(v: MarkdownProcessor) is abstract
1499 end
1500
1501 # An empty markdown line.
1502 class LineEmpty
1503         super Line
1504
1505         redef fun process(v) do
1506                 v.current_line = v.current_line.next
1507         end
1508 end
1509
1510 # A non-specific markdown construction.
1511 # Mainly used as part of another line construct such as paragraphs or lists.
1512 class LineOther
1513         super Line
1514
1515         redef fun process(v) do
1516                 var line = v.current_line
1517                 # go to block end
1518                 var was_empty = line.prev_empty
1519                 while line != null and not line.is_empty do
1520                         var t = v.line_kind(line)
1521                         if (v.in_list or v.ext_mode) and t isa LineList then
1522                                 break
1523                         end
1524                         if v.ext_mode and (t isa LineCode or t isa LineFence) then
1525                                 break
1526                         end
1527                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1528                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1529                                    break
1530                         end
1531                         line = line.next
1532                 end
1533                 # build block
1534                 if line != null and not line.is_empty then
1535                         var block = v.current_block.split(line.prev.as(not null))
1536                         if v.in_list and not was_empty then
1537                                 block.kind = new BlockNone(block)
1538                         else
1539                                 block.kind = new BlockParagraph(block)
1540                         end
1541                         v.current_block.remove_leading_empty_lines
1542                 else
1543                         var block: MDBlock
1544                         if line != null then
1545                                 block = v.current_block.split(line)
1546                         else
1547                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1548                         end
1549                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1550                                 block.kind = new BlockNone(block)
1551                         else
1552                                 block.kind = new BlockParagraph(block)
1553                         end
1554                         v.current_block.remove_leading_empty_lines
1555                 end
1556                 v.current_line = v.current_block.first_line
1557         end
1558 end
1559
1560 # A line of markdown code.
1561 class LineCode
1562         super Line
1563
1564         redef fun process(v) do
1565                 var line = v.current_line
1566                 # lookup block end
1567                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1568                         line = line.next
1569                 end
1570                 # split at block end line
1571                 var block: MDBlock
1572                 if line != null then
1573                         block = v.current_block.split(line.prev.as(not null))
1574                 else
1575                         block = v.current_block.split(v.current_block.last_line.as(not null))
1576                 end
1577                 block.kind = new BlockCode(block)
1578                 block.remove_surrounding_empty_lines
1579                 v.current_line = v.current_block.first_line
1580         end
1581 end
1582
1583 # A line of raw XML.
1584 class LineXML
1585         super Line
1586
1587         redef fun process(v) do
1588                 var line = v.current_line
1589                 var prev = line.prev
1590                 if prev != null then v.current_block.split(prev)
1591                 var block = v.current_block.split(line.xml_end_line.as(not null))
1592                 block.kind = new BlockXML(block)
1593                 v.current_block.remove_leading_empty_lines
1594                 v.current_line = v.current_block.first_line
1595         end
1596 end
1597
1598 # A markdown blockquote line.
1599 class LineBlockquote
1600         super Line
1601
1602         redef fun process(v) do
1603                 var line = v.current_line
1604                 # go to bquote end
1605                 while line != null do
1606                         if not line.is_empty and (line.prev_empty and
1607                            line.leading == 0 and
1608                            not v.line_kind(line) isa LineBlockquote) then break
1609                         line = line.next
1610                 end
1611                 # build sub block
1612                 var block: MDBlock
1613                 if line != null then
1614                         block = v.current_block.split(line.prev.as(not null))
1615                 else
1616                         block = v.current_block.split(v.current_block.last_line.as(not null))
1617                 end
1618                 var kind = new BlockQuote(block)
1619                 block.kind = kind
1620                 block.remove_surrounding_empty_lines
1621                 kind.remove_block_quote_prefix(block)
1622                 v.current_line = line
1623                 v.recurse(block, false)
1624                 v.current_line = v.current_block.first_line
1625         end
1626 end
1627
1628 # A markdown ruler line.
1629 class LineHR
1630         super Line
1631
1632         redef fun process(v) do
1633                 var line = v.current_line
1634                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1635                 var block = v.current_block.split(line.as(not null))
1636                 block.kind = new BlockRuler(block)
1637                 v.current_block.remove_leading_empty_lines
1638                 v.current_line = v.current_block.first_line
1639         end
1640 end
1641
1642 # A markdown fence code line.
1643 class LineFence
1644         super Line
1645
1646         redef fun process(v) do
1647                 # go to fence end
1648                 var line = v.current_line.next
1649                 while line != null do
1650                         if v.line_kind(line) isa LineFence then break
1651                         line = line.next
1652                 end
1653                 if line != null then
1654                         line = line.next
1655                 end
1656                 # build fence block
1657                 var block: MDBlock
1658                 if line != null then
1659                         block = v.current_block.split(line.prev.as(not null))
1660                 else
1661                         block = v.current_block.split(v.current_block.last_line.as(not null))
1662                 end
1663                 var meta = block.first_line.value.meta_from_fence
1664                 block.kind = new BlockFence(block, meta)
1665                 block.first_line.clear
1666                 var last = block.last_line
1667                 if last != null and v.line_kind(last) isa LineFence then
1668                         block.last_line.clear
1669                 end
1670                 block.remove_surrounding_empty_lines
1671                 v.current_line = line
1672         end
1673 end
1674
1675 # A markdown headline.
1676 class LineHeadline
1677         super Line
1678
1679         redef fun process(v) do
1680                 var line = v.current_line
1681                 var lprev = line.prev
1682                 if lprev != null then v.current_block.split(lprev)
1683                 var block = v.current_block.split(line.as(not null))
1684                 var kind = new BlockHeadline(block)
1685                 block.kind = kind
1686                 kind.transform_headline(block)
1687                 v.current_block.remove_leading_empty_lines
1688                 v.current_line = v.current_block.first_line
1689         end
1690 end
1691
1692 # A markdown headline of level 1.
1693 class LineHeadline1
1694         super LineHeadline
1695
1696         redef fun process(v) do
1697                 var line = v.current_line
1698                 var lprev = line.prev
1699                 if lprev != null then v.current_block.split(lprev)
1700                 line.next.clear
1701                 var block = v.current_block.split(line.as(not null))
1702                 var kind = new BlockHeadline(block)
1703                 kind.depth = 1
1704                 kind.transform_headline(block)
1705                 block.kind = kind
1706                 v.current_block.remove_leading_empty_lines
1707                 v.current_line = v.current_block.first_line
1708         end
1709 end
1710
1711 # A markdown headline of level 2.
1712 class LineHeadline2
1713         super LineHeadline
1714
1715         redef fun process(v) do
1716                 var line = v.current_line
1717                 var lprev = line.prev
1718                 if lprev != null then v.current_block.split(lprev)
1719                 line.next.clear
1720                 var block = v.current_block.split(line.as(not null))
1721                 var kind = new BlockHeadline(block)
1722                 kind.depth = 2
1723                 kind.transform_headline(block)
1724                 block.kind = kind
1725                 v.current_block.remove_leading_empty_lines
1726                 v.current_line = v.current_block.first_line
1727         end
1728 end
1729
1730 # A markdown list line.
1731 # Mainly used to factorize code between ordered and unordered lists.
1732 class LineList
1733         super Line
1734
1735         redef fun process(v) do
1736                 var line = v.current_line
1737                 # go to list end
1738                 while line != null do
1739                         var t = v.line_kind(line)
1740                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1741                            not t isa LineList) then break
1742                         line = line.next
1743                 end
1744                 # build list block
1745                 var list: MDBlock
1746                 if line != null then
1747                         list = v.current_block.split(line.prev.as(not null))
1748                 else
1749                         list = v.current_block.split(v.current_block.last_line.as(not null))
1750                 end
1751                 var kind = block_kind(list)
1752                 list.kind = kind
1753                 list.first_line.prev_empty = false
1754                 list.last_line.next_empty = false
1755                 list.remove_surrounding_empty_lines
1756                 list.first_line.prev_empty = false
1757                 list.last_line.next_empty = false
1758                 kind.init_block(v)
1759                 var block = list.first_block
1760                 while block != null do
1761                         block.remove_list_indent(v)
1762                         v.recurse(block, true)
1763                         block = block.next
1764                 end
1765                 kind.expand_paragraphs(list)
1766                 v.current_line = line
1767         end
1768
1769         # Create a new block kind based on this line.
1770         protected fun block_kind(block: MDBlock): BlockList is abstract
1771
1772         # Extract string value from `MDLine`.
1773         protected fun extract_value(line: MDLine): String is abstract
1774 end
1775
1776 # An ordered list line.
1777 class LineOList
1778         super LineList
1779
1780         redef fun block_kind(block) do return new BlockOrderedList(block)
1781
1782         redef fun extract_value(line) do
1783                 return line.value.substring_from(line.value.index_of('.') + 2)
1784         end
1785 end
1786
1787 # An unordered list line.
1788 class LineUList
1789         super LineList
1790
1791         redef fun block_kind(block) do return new BlockUnorderedList(block)
1792
1793         redef fun extract_value(line) do
1794                 return line.value.substring_from(line.leading + 2)
1795         end
1796 end
1797
1798 # A token represent a character in the markdown input.
1799 # Some tokens have a specific markup behaviour that is handled here.
1800 abstract class Token
1801
1802         # Position of `self` in markdown input.
1803         var pos: Int
1804
1805         # Character found at `pos` in the markdown input.
1806         var char: Char
1807
1808         # Output that token using `MarkdownEmitter::decorator`.
1809         fun emit(v: MarkdownEmitter) do v.addc char
1810 end
1811
1812 # A token without a specific meaning.
1813 class TokenNone
1814         super Token
1815 end
1816
1817 # An emphasis token.
1818 abstract class TokenEm
1819         super Token
1820
1821         redef fun emit(v) do
1822                 var tmp = v.push_buffer
1823                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1824                 v.pop_buffer
1825                 if b > 0 then
1826                         v.decorator.add_em(v, tmp)
1827                         v.current_pos = b
1828                 else
1829                         v.addc char
1830                 end
1831         end
1832 end
1833
1834 # An emphasis star token.
1835 class TokenEmStar
1836         super TokenEm
1837 end
1838
1839 # An emphasis underscore token.
1840 class TokenEmUnderscore
1841         super TokenEm
1842 end
1843
1844 # A strong token.
1845 abstract class TokenStrong
1846         super Token
1847
1848         redef fun emit(v) do
1849                 var tmp = v.push_buffer
1850                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1851                 v.pop_buffer
1852                 if b > 0 then
1853                         v.decorator.add_strong(v, tmp)
1854                         v.current_pos = b + 1
1855                 else
1856                         v.addc char
1857                 end
1858         end
1859 end
1860
1861 # A strong star token.
1862 class TokenStrongStar
1863         super TokenStrong
1864 end
1865
1866 # A strong underscore token.
1867 class TokenStrongUnderscore
1868         super TokenStrong
1869 end
1870
1871 # A code token.
1872 # This class is mainly used to factorize work between single and double quoted span codes.
1873 abstract class TokenCode
1874         super Token
1875
1876         redef fun emit(v) do
1877                 var a = pos + next_pos + 1
1878                 var b = v.processor.find_token(v.current_text.as(not null), a, self)
1879                 if b > 0 then
1880                         v.current_pos = b + next_pos
1881                         while a < b and v.current_text[a] == ' ' do a += 1
1882                         if a < b then
1883                                 while v.current_text[b - 1] == ' ' do b -= 1
1884                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1885                         end
1886                 else
1887                         v.addc char
1888                 end
1889         end
1890
1891         private fun next_pos: Int is abstract
1892 end
1893
1894 # A span code token.
1895 class TokenCodeSingle
1896         super TokenCode
1897
1898         redef fun next_pos do return 0
1899 end
1900
1901 # A doubled span code token.
1902 class TokenCodeDouble
1903         super TokenCode
1904
1905         redef fun next_pos do return 1
1906 end
1907
1908 # A link or image token.
1909 # This class is mainly used to factorize work between images and links.
1910 abstract class TokenLinkOrImage
1911         super Token
1912
1913         # Link adress
1914         var link: nullable Text = null
1915
1916         # Link text
1917         var name: nullable Text = null
1918
1919         # Link title
1920         var comment: nullable Text = null
1921
1922         # Is the link construct an abbreviation?
1923         var is_abbrev = false
1924
1925         redef fun emit(v) do
1926                 var tmp = new FlatBuffer
1927                 var b = check_link(v, tmp, pos, self)
1928                 if b > 0 then
1929                         emit_hyper(v)
1930                         v.current_pos = b
1931                 else
1932                         v.addc char
1933                 end
1934         end
1935
1936         # Emit the hyperlink as link or image.
1937         private fun emit_hyper(v: MarkdownEmitter) is abstract
1938
1939         # Check if the link is a valid link.
1940         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1941                 var md = v.current_text
1942                 var pos
1943                 if token isa TokenLink then
1944                         pos = start + 1
1945                 else
1946                         pos = start + 2
1947                 end
1948                 var tmp = new FlatBuffer
1949                 pos = md.read_md_link_id(tmp, pos)
1950                 if pos < start then return -1
1951                 name = tmp
1952                 var old_pos = pos
1953                 pos += 1
1954                 pos = md.skip_spaces(pos)
1955                 if pos < start then
1956                         var tid = name.write_to_string.to_lower
1957                         if v.processor.link_refs.has_key(tid) then
1958                                 var lr = v.processor.link_refs[tid]
1959                                 is_abbrev = lr.is_abbrev
1960                                 link = lr.link
1961                                 comment = lr.title
1962                                 pos = old_pos
1963                         else
1964                                 return -1
1965                         end
1966                 else if md[pos] == '(' then
1967                         pos += 1
1968                         pos = md.skip_spaces(pos)
1969                         if pos < start then return -1
1970                         tmp = new FlatBuffer
1971                         var use_lt = md[pos] == '<'
1972                         if use_lt then
1973                                 pos = md.read_until(tmp, pos + 1, '>')
1974                         else
1975                                 pos = md.read_md_link(tmp, pos)
1976                         end
1977                         if pos < start then return -1
1978                         if use_lt then pos += 1
1979                         link = tmp.write_to_string
1980                         if md[pos] == ' ' then
1981                                 pos = md.skip_spaces(pos)
1982                                 if pos > start and md[pos] == '"' then
1983                                         pos += 1
1984                                         tmp = new FlatBuffer
1985                                         pos = md.read_until(tmp, pos, '"')
1986                                         if pos < start then return -1
1987                                         comment = tmp.write_to_string
1988                                         pos += 1
1989                                         pos = md.skip_spaces(pos)
1990                                         if pos == -1 then return -1
1991                                 end
1992                         end
1993                         if md[pos] != ')' then return -1
1994                 else if md[pos] == '[' then
1995                         pos += 1
1996                         tmp = new FlatBuffer
1997                         pos = md.read_raw_until(tmp, pos, ']')
1998                         if pos < start then return -1
1999                         var id
2000                         if tmp.length > 0 then
2001                                 id = tmp
2002                         else
2003                                 id = name
2004                         end
2005                         var tid = id.write_to_string.to_lower
2006                         if v.processor.link_refs.has_key(tid) then
2007                                 var lr = v.processor.link_refs[tid]
2008                                 link = lr.link
2009                                 comment = lr.title
2010                         end
2011                 else
2012                         var tid = name.write_to_string.replace("\n", " ").to_lower
2013                         if v.processor.link_refs.has_key(tid) then
2014                                 var lr = v.processor.link_refs[tid]
2015                                 link = lr.link
2016                                 comment = lr.title
2017                                 pos = old_pos
2018                         else
2019                                 return -1
2020                         end
2021                 end
2022                 if link == null then return -1
2023                 return pos
2024         end
2025 end
2026
2027 # A markdown link token.
2028 class TokenLink
2029         super TokenLinkOrImage
2030
2031         redef fun emit_hyper(v) do
2032                 if is_abbrev and comment != null then
2033                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2034                 else
2035                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2036                 end
2037         end
2038 end
2039
2040 # A markdown image token.
2041 class TokenImage
2042         super TokenLinkOrImage
2043
2044         redef fun emit_hyper(v) do
2045                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2046         end
2047 end
2048
2049 # A HTML/XML token.
2050 class TokenHTML
2051         super Token
2052
2053         redef fun emit(v) do
2054                 var tmp = new FlatBuffer
2055                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2056                 if b > 0 then
2057                         v.add tmp
2058                         v.current_pos = b
2059                 else
2060                         v.decorator.escape_char(v, char)
2061                 end
2062         end
2063
2064         # Is the HTML valid?
2065         # Also take care of link and mailto shortcuts.
2066         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2067                 # check for auto links
2068                 var tmp = new FlatBuffer
2069                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2070                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2071                         pos = md.read_until(tmp, pos, '>')
2072                         if pos != -1 then
2073                                 var link = tmp.write_to_string
2074                                 v.decorator.add_link(v, link, link, null)
2075                                 return pos
2076                         end
2077                 end
2078                 # TODO check for mailto
2079                 # check for inline html
2080                 if start + 2 < md.length then
2081                         return md.read_xml(out, start, true)
2082                 end
2083                 return -1
2084         end
2085 end
2086
2087 # An HTML entity token.
2088 class TokenEntity
2089         super Token
2090
2091         redef fun emit(v) do
2092                 var tmp = new FlatBuffer
2093                 var b = check_entity(tmp, v.current_text.as(not null), pos)
2094                 if b > 0 then
2095                         v.add tmp
2096                         v.current_pos = b
2097                 else
2098                         v.decorator.escape_char(v, char)
2099                 end
2100         end
2101
2102         # Is the entity valid?
2103         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2104                 var pos = md.read_until(out, start, ';')
2105                 if pos < 0 or out.length < 3 then
2106                         return -1
2107                 end
2108                 if out[1] == '#' then
2109                         if out[2] == 'x' or out[2] == 'X' then
2110                                 if out.length < 4 then return -1
2111                                 for i in [3..out.length[ do
2112                                         var c = out[i]
2113                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2114                                                 return -1
2115                                         end
2116                                 end
2117                         else
2118                                 for i in [2..out.length[ do
2119                                         var c = out[i]
2120                                         if c < '0' or c > '9' then return -1
2121                                 end
2122                         end
2123                         out.add ';'
2124                 else
2125                         for i in [1..out.length[ do
2126                                 var c = out[i]
2127                                 if not c.is_digit and not c.is_letter then return -1
2128                         end
2129                         out.add ';'
2130                         # TODO check entity is valid
2131                         # if out.is_entity then
2132                                 return pos
2133                         # else
2134                                 # return -1
2135                         # end
2136                 end
2137                 return pos
2138         end
2139 end
2140
2141 # A markdown escape token.
2142 class TokenEscape
2143         super Token
2144
2145         redef fun emit(v) do
2146                 v.current_pos += 1
2147                 v.addc v.current_text[v.current_pos]
2148         end
2149 end
2150
2151 # A markdown strike token.
2152 #
2153 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2154 class TokenStrike
2155         super Token
2156
2157         redef fun emit(v) do
2158                 var tmp = v.push_buffer
2159                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2160                 v.pop_buffer
2161                 if b > 0 then
2162                         v.decorator.add_strike(v, tmp)
2163                         v.current_pos = b + 1
2164                 else
2165                         v.addc char
2166                 end
2167         end
2168 end
2169
2170 redef class Text
2171
2172         # Get the position of the next non-space character.
2173         private fun skip_spaces(start: Int): Int do
2174                 var pos = start
2175                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2176                         pos += 1
2177                 end
2178                 if pos < length then return pos
2179                 return -1
2180         end
2181
2182         # Read `self` until `nend` and append it to the `out` buffer.
2183         # Escape markdown special chars.
2184         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2185                 var pos = start
2186                 while pos < length do
2187                         var c = self[pos]
2188                         if c == '\\' and pos + 1 < length then
2189                                 pos = escape(out, self[pos + 1], pos)
2190                         else
2191                                 var end_reached = false
2192                                 for n in nend do
2193                                         if c == n then
2194                                                 end_reached = true
2195                                                 break
2196                                         end
2197                                 end
2198                                 if end_reached then break
2199                                 out.add c
2200                         end
2201                         pos += 1
2202                 end
2203                 if pos == length then return -1
2204                 return pos
2205         end
2206
2207         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2208         # No escape is made.
2209         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2210                 var pos = start
2211                 while pos < length do
2212                         var c = self[pos]
2213                         var end_reached = false
2214                         for n in nend do
2215                                 if c == n then
2216                                         end_reached = true
2217                                         break
2218                                 end
2219                         end
2220                         if end_reached then break
2221                         out.add c
2222                         pos += 1
2223                 end
2224                 if pos == length then return -1
2225                 return pos
2226         end
2227
2228         # Read `self` as XML until `to` and append it to the `out` buffer.
2229         # Escape HTML special chars.
2230         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2231                 var pos = from
2232                 var in_str = false
2233                 var str_char: nullable Char = null
2234                 while pos < length do
2235                         var c = self[pos]
2236                         if in_str then
2237                                 if c == '\\' then
2238                                         out.add c
2239                                         pos += 1
2240                                         if pos < length then
2241                                                 out.add c
2242                                                 pos += 1
2243                                         end
2244                                         continue
2245                                 end
2246                                 if c == str_char then
2247                                         in_str = false
2248                                         out.add c
2249                                         pos += 1
2250                                         continue
2251                                 end
2252                         end
2253                         if c == '"' or c == '\'' then
2254                                 in_str = true
2255                                 str_char = c
2256                         end
2257                         if not in_str then
2258                                 var end_reached = false
2259                                 for n in [0..to.length[ do
2260                                         if c == to[n] then
2261                                                 end_reached = true
2262                                                 break
2263                                         end
2264                                 end
2265                                 if end_reached then break
2266                         end
2267                         out.add c
2268                         pos += 1
2269                 end
2270                 if pos == length then return -1
2271                 return pos
2272         end
2273
2274         # Read `self` as XML and append it to the `out` buffer.
2275         # Safe mode can be activated to limit reading to valid xml.
2276         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2277                 var pos = 0
2278                 var is_valid = true
2279                 var is_close_tag = false
2280                 if start + 1 >= length then return -1
2281                 if self[start + 1] == '/' then
2282                         is_close_tag = true
2283                         pos = start + 2
2284                 else if self[start + 1] == '!' then
2285                         out.append "<!"
2286                         return start + 1
2287                 else
2288                         is_close_tag = false
2289                         pos = start + 1
2290                 end
2291                 if safe_mode then
2292                         var tmp = new FlatBuffer
2293                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2294                         if pos == -1 then return -1
2295                         var tag = tmp.write_to_string.trim.to_lower
2296                         if not tag.is_valid_html_tag then
2297                                 out.append "&lt;"
2298                                 pos = -1
2299                         else if tag.is_html_unsafe then
2300                                 is_valid = false
2301                                 out.append "&lt;"
2302                                 if is_close_tag then out.add '/'
2303                                 out.append tmp
2304                         else
2305                                 out.append "<"
2306                                 if is_close_tag then out.add '/'
2307                                 out.append tmp
2308                         end
2309                 else
2310                         out.add '<'
2311                         if is_close_tag then out.add '/'
2312                         pos = read_xml_until(out, pos, ' ', '/', '>')
2313                 end
2314                 if pos == -1 then return -1
2315                 pos = read_xml_until(out, pos, '/', '>')
2316                 if pos == -1 then return -1
2317                 if self[pos] == '/' then
2318                         out.append " /"
2319                         pos = self.read_xml_until(out, pos + 1, '>')
2320                         if pos == -1 then return -1
2321                 end
2322                 if self[pos] == '>' then
2323                         if is_valid then
2324                                 out.add '>'
2325                         else
2326                                 out.append "&gt;"
2327                         end
2328                         return pos
2329                 end
2330                 return -1
2331         end
2332
2333         # Read a markdown link address and append it to the `out` buffer.
2334         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2335                 var pos = start
2336                 var counter = 1
2337                 while pos < length do
2338                         var c = self[pos]
2339                         if c == '\\' and pos + 1 < length then
2340                                 pos = escape(out, self[pos + 1], pos)
2341                         else
2342                                 var end_reached = false
2343                                 if c == '(' then
2344                                         counter += 1
2345                                 else if c == ' ' then
2346                                         if counter == 1 then end_reached = true
2347                                 else if c == ')' then
2348                                         counter -= 1
2349                                         if counter == 0 then end_reached = true
2350                                 end
2351                                 if end_reached then break
2352                                 out.add c
2353                         end
2354                         pos += 1
2355                 end
2356                 if pos == length then return -1
2357                 return pos
2358         end
2359
2360         # Read a markdown link text and append it to the `out` buffer.
2361         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2362                 var pos = start
2363                 var counter = 1
2364                 while pos < length do
2365                         var c = self[pos]
2366                         var end_reached = false
2367                         if c == '[' then
2368                                 counter += 1
2369                                 out.add c
2370                         else if c == ']' then
2371                                 counter -= 1
2372                                 if counter == 0 then
2373                                         end_reached = true
2374                                 else
2375                                         out.add c
2376                                 end
2377                         else
2378                                 out.add c
2379                         end
2380                         if end_reached then break
2381                         pos += 1
2382                 end
2383                 if pos == length then return -1
2384                 return pos
2385         end
2386
2387         # Extract the XML tag name from a XML tag.
2388         private fun xml_tag: String do
2389                 var tpl = new FlatBuffer
2390                 var pos = 1
2391                 if pos < length and self[1] == '/' then pos += 1
2392                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2393                         tpl.add self[pos]
2394                         pos += 1
2395                 end
2396                 return tpl.write_to_string.to_lower
2397         end
2398
2399         private fun is_valid_html_tag: Bool do
2400                 if is_empty then return false
2401                 for c in self do
2402                         if not c.is_alpha then return false
2403                 end
2404                 return true
2405         end
2406
2407         # Read and escape the markdown contained in `self`.
2408         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2409                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2410                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2411                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2412                    c == '`' or c == '~' or c == '^' then
2413                         out.add c
2414                         return pos + 1
2415                 end
2416                 out.add '\\'
2417                 return pos
2418         end
2419
2420         # Extract string found at end of fence opening.
2421         private fun meta_from_fence: nullable Text do
2422                 for i in [0..chars.length[ do
2423                         var c = chars[i]
2424                         if c != ' ' and c != '`' and c != '~' then
2425                                 return substring_from(i).trim
2426                         end
2427                 end
2428                 return null
2429         end
2430
2431         # Is `self` an unsafe HTML element?
2432         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2433
2434         # Is `self` a HRML block element?
2435         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2436
2437         # Is `self` a link prefix?
2438         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2439
2440         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2441
2442         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2443
2444         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2445 end
2446
2447 redef class String
2448
2449         # Parse `self` as markdown and return the HTML representation
2450         #.
2451         #    var md = "**Hello World!**"
2452         #    var html = md.md_to_html
2453         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2454         fun md_to_html: Writable do
2455                 var processor = new MarkdownProcessor
2456                 return processor.process(self)
2457         end
2458 end