lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # `MarkdownEmitter` used for ouput.
  34         var emitter: MarkdownEmitter is noinit, protected writable
  35
  36         # Work in extended mode (default).
  37         #
  38         # Behavior changes when using extended mode:
  39         #
  40         # * Lists and code blocks end a paragraph
  41         #
  42         #   In normal markdown the following:
  43         #
  44         #               This is a paragraph
  45         #               * and this is not a list
  46         #
  47         #   Will produce:
  48         #
  49         #               <p>This is a paragraph
  50         #               * and this is not a list</p>
  51         #
  52         #       When using extended mode this changes to:
  53         #
  54         #               <p>This is a paragraph</p>
  55         #               <ul>
  56         #               <li>and this is not a list</li>
  57         #               </ul>
  58         #
  59         # * Fences code blocks
  60         #
  61         #   If you don't want to indent your all your code with 4 spaces,
  62         #   you can wrap your code in ``` ``` ``` or `~~~`.
  63         #
  64         #       Here's an example:
  65         #
  66         #               ```
  67         #               fun test do
  68         #                       print "Hello World!"
  69         #               end
  70         #               ```
  71         #
  72         # * Code blocks meta
  73         #
  74         #   If you want to use syntax highlighting tools, most of them need to know what kind
  75         #   of language they are highlighting.
  76         #   You can add an optional language identifier after the fence declaration to output
  77         #   it in the HTML render.
  78         #
  79         #               ```nit
  80         #               import markdown
  81         #
  82         #               print "# Hello World!".md_to_html
  83         #               ```
  84         #
  85         #   Becomes
  86         #
  87         #               <pre class="nit"><code>import markdown
  88         #
  89         #               print "Hello World!".md_to_html
  90         #               </code></pre>
  91         #
  92         # * Underscores (Emphasis)
  93         #
  94         #   Underscores in the middle of a word like:
  95         #
  96         #               Con_cat_this
  97         #
  98         #       normally produces this:
  99         #
 100         #               <p>Con<em>cat</em>this</p>
 101         #
 102         #   With extended mode they don't result in emphasis.
 103         #
 104         #               <p>Con_cat_this</p>
 105         #
 106         # * Strikethrough
 107         #
 108         #   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
 109         #   strikethrought span is marked with `~~`.
 110         #
 111         #               ~~Mistaken text.~~
 112         #
 113         #   becomes
 114         #
 115         #               <del>Mistaken text.</del>
 116         var ext_mode = true
 117
 118         init do self.emitter = new MarkdownEmitter(self)
 119
 120         # Process the mardown `input` string and return the processed output.
 121         fun process(input: String): Streamable do
 122                 # init processor
 123                 link_refs.clear
 124                 last_link_ref = null
 125                 current_line = null
 126                 current_block = null
 127                 # parse markdown
 128                 var parent = read_lines(input)
 129                 parent.remove_surrounding_empty_lines
 130                 recurse(parent, false)
 131                 # output processed text
 132                 return emitter.emit(parent.kind)
 133         end
 134
 135         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
 136         private fun read_lines(input: String): MDBlock do
 137                 var block = new MDBlock
 138                 var value = new FlatBuffer
 139                 var i = 0
 140                 while i < input.length do
 141                         value.clear
 142                         var pos = 0
 143                         var eol = false
 144                         while not eol and i < input.length do
 145                                 var c = input[i]
 146                                 if c == '\n' then
 147                                         i += 1
 148                                         eol = true
 149                                 else if c == '\t' then
 150                                         var np = pos + (4 - (pos.bin_and(3)))
 151                                         while pos < np do
 152                                                 value.add ' '
 153                                                 pos += 1
 154                                         end
 155                                         i += 1
 156                                 else
 157                                         pos += 1
 158                                         value.add c
 159                                         i += 1
 160                                 end
 161                         end
 162
 163                         var line = new MDLine(value.write_to_string)
 164                         var is_link_ref = check_link_ref(line)
 165                         # Skip link refs
 166                         if not is_link_ref then block.add_line line
 167                 end
 168                 return block
 169         end
 170
 171         # Check if line is a block link definition.
 172         # Return `true` if line contains a valid link ref and save it into `link_refs`.
 173         private fun check_link_ref(line: MDLine): Bool do
 174                 var md = line.value
 175                 var is_link_ref = false
 176                 var id = new FlatBuffer
 177                 var link = new FlatBuffer
 178                 var comment = new FlatBuffer
 179                 var pos = -1
 180                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
 181                         pos = line.leading + 1
 182                         pos = md.read_until(id, pos, ']')
 183                         if not id.is_empty and pos + 2 < line.value.length then
 184                                 if line.value[pos + 1] == ':' then
 185                                         pos += 2
 186                                         pos = md.skip_spaces(pos)
 187                                         if line.value[pos] == '<' then
 188                                                 pos += 1
 189                                                 pos = md.read_until(link, pos, '>')
 190                                                 pos += 1
 191                                         else
 192                                                 pos = md.read_until(link, pos, ' ', '\n')
 193                                         end
 194                                         if not link.is_empty then
 195                                                 pos = md.skip_spaces(pos)
 196                                                 if pos > 0 and pos < line.value.length then
 197                                                         var c = line.value[pos]
 198                                                         if c == '\"' or c == '\'' or c == '(' then
 199                                                                 pos += 1
 200                                                                 if c == '(' then
 201                                                                         pos = md.read_until(comment, pos, ')')
 202                                                                 else
 203                                                                         pos = md.read_until(comment, pos, c)
 204                                                                 end
 205                                                                 if pos > 0 then is_link_ref = true
 206                                                         end
 207                                                 else
 208                                                         is_link_ref = true
 209                                                 end
 210                                         end
 211                                 end
 212                         end
 213                 end
 214                 if is_link_ref and not id.is_empty and not link.is_empty then
 215                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 216                         add_link_ref(id.write_to_string, lr)
 217                         if comment.is_empty then last_link_ref = lr
 218                         return true
 219                 else
 220                         comment = new FlatBuffer
 221                         if not line.is_empty and last_link_ref != null then
 222                                 pos = line.leading
 223                                 var c = line.value[pos]
 224                                 if c == '\"' or c == '\'' or c ==  '(' then
 225                                         pos += 1
 226                                         if c == '(' then
 227                                                 pos = md.read_until(comment, pos, ')')
 228                                         else
 229                                                 pos = md.read_until(comment, pos, c)
 230                                         end
 231                                 end
 232                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 233                         end
 234                         if comment.is_empty then return false
 235                         return true
 236                 end
 237         end
 238
 239         # Known link refs
 240         # This list will be needed during output to expand links.
 241         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 242
 243         # Last encountered link ref (for multiline definitions)
 244         #
 245         # Markdown allows link refs to be defined over two lines:
 246         #
 247         #       [id]: http://example.com/longish/path/to/resource/here
 248         #               "Optional Title Here"
 249         #
 250         private var last_link_ref: nullable LinkRef = null
 251
 252         # Add a link ref to the list
 253         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 254
 255         # Recursively split a `block`.
 256         #
 257         # The block is splitted according to the type of lines it contains.
 258         # Some blocks can be splited again recursively like lists.
 259         # The `in_list` mode is used to recurse on list and build
 260         # nested paragraphs or code blocks.
 261         fun recurse(root: MDBlock, in_list: Bool) do
 262                 var old_mode = self.in_list
 263                 var old_root = self.current_block
 264                 self.in_list = in_list
 265
 266                 var line = root.first_line
 267                 while line != null and line.is_empty do
 268                         line = line.next
 269                         if line == null then return
 270                 end
 271
 272                 current_line = line
 273                 current_block = root
 274                 while current_line != null do
 275                         line_kind(current_line.as(not null)).process(self)
 276                 end
 277                 self.in_list = old_mode
 278                 self.current_block = old_root
 279         end
 280
 281         # Currently processed line.
 282         # Used when visiting blocks with `recurse`.
 283         var current_line: nullable MDLine = null is writable
 284
 285         # Currently processed block.
 286         # Used when visiting blocks with `recurse`.
 287         var current_block: nullable MDBlock = null is writable
 288
 289         # Is the current recursion in list mode?
 290         # Used when visiting blocks with `recurse`
 291         private var in_list = false
 292
 293         # The type of line.
 294         # see: `md_line_*`
 295         fun line_kind(md: MDLine): Line do
 296                 var value = md.value
 297                 var leading = md.leading
 298                 var trailing = md.trailing
 299                 if md.is_empty then return new LineEmpty
 300                 if md.leading > 3 then return new LineCode
 301                 if value[leading] == '#' then return new LineHeadline
 302                 if value[leading] == '>' then return new LineBlockquote
 303
 304                 if ext_mode then
 305                         if value.length - leading - trailing > 2 then
 306                                 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 307                                         return new LineFence
 308                                 end
 309                                 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 310                                         return new LineFence
 311                                 end
 312                         end
 313                 end
 314
 315                 if value.length - leading - trailing > 2 and
 316                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 317                    if md.count_chars(value[leading]) >= 3 then
 318                                 return new LineHR
 319                    end
 320                 end
 321
 322                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 323                         var c = value[leading]
 324                         if c == '*' or c == '-' or c == '+' then return new LineUList
 325                 end
 326
 327                 if value.length - leading >= 3 and value[leading].is_digit then
 328                         var i = leading + 1
 329                         while i < value.length and value[i].is_digit do i += 1
 330                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 331                                 return new LineOList
 332                         end
 333                 end
 334
 335                 if value[leading] == '<' and md.check_html then return new LineXML
 336
 337                 var next = md.next
 338                 if next != null and not next.is_empty then
 339                         if next.count_chars('=') > 0 then
 340                                 return new LineHeadline1
 341                         end
 342                         if next.count_chars('-') > 0 then
 343                                 return new LineHeadline2
 344                         end
 345                 end
 346                 return new LineOther
 347         end
 348
 349         # Get the token kind at `pos`.
 350         fun token_at(text: Text, pos: Int): Token do
 351                 var c0: Char
 352                 var c1: Char
 353                 var c2: Char
 354
 355                 if pos > 0 then
 356                         c0 = text[pos - 1]
 357                 else
 358                         c0 = ' '
 359                 end
 360                 var c = text[pos]
 361
 362                 if pos + 1 < text.length then
 363                         c1 = text[pos + 1]
 364                 else
 365                         c1 = ' '
 366                 end
 367                 if pos + 2 < text.length then
 368                         c2 = text[pos + 2]
 369                 else
 370                         c2 = ' '
 371                 end
 372
 373                 if c == '*' then
 374                         if c1 == '*' then
 375                                 if c0 != ' ' or c2 != ' ' then
 376                                         return new TokenStrongStar(pos, c)
 377                                 else
 378                                         return new TokenEmStar(pos, c)
 379                                 end
 380                         end
 381                         if c0 != ' ' or c1 != ' ' then
 382                                 return new TokenEmStar(pos, c)
 383                         else
 384                                 return new TokenNone(pos, c)
 385                         end
 386                 else if c == '_' then
 387                         if c1 == '_' then
 388                                 if c0 != ' ' or c2 != ' 'then
 389                                         return new TokenStrongUnderscore(pos, c)
 390                                 else
 391                                         return new TokenEmUnderscore(pos, c)
 392                                 end
 393                         end
 394                         if ext_mode then
 395                                 if (c0.is_letter or c0.is_digit) and c0 != '_' and
 396                                    (c1.is_letter or c1.is_digit) then
 397                                         return new TokenNone(pos, c)
 398                                 else
 399                                         return new TokenEmUnderscore(pos, c)
 400                                 end
 401                         end
 402                         if c0 != ' ' or c1 != ' ' then
 403                                 return new TokenEmUnderscore(pos, c)
 404                         else
 405                                 return new TokenNone(pos, c)
 406                         end
 407                 else if c == '!' then
 408                         if c1 == '[' then return new TokenImage(pos, c)
 409                         return new TokenNone(pos, c)
 410                 else if c == '[' then
 411                         return new TokenLink(pos, c)
 412                 else if c == ']' then
 413                         return new TokenNone(pos, c)
 414                 else if c == '`' then
 415                         if c1 == '`' then
 416                                 return new TokenCodeDouble(pos, c)
 417                         else
 418                                 return new TokenCodeSingle(pos, c)
 419                         end
 420                 else if c == '\\' then
 421                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 422                                 return new TokenEscape(pos, c)
 423                         else
 424                                 return new TokenNone(pos, c)
 425                         end
 426                 else if c == '<' then
 427                         return new TokenHTML(pos, c)
 428                 else if c == '&' then
 429                         return new TokenEntity(pos, c)
 430                 else
 431                         if ext_mode then
 432                                 if c == '~' and c1 == '~' then
 433                                         return new TokenStrike(pos, c)
 434                                 end
 435                         end
 436                         return new TokenNone(pos, c)
 437                 end
 438         end
 439
 440         # Find the position of a `token` in `self`.
 441         fun find_token(text: Text, start: Int, token: Token): Int do
 442                 var pos = start
 443                 while pos < text.length do
 444                         if token_at(text, pos).is_same_type(token) then
 445                                 return pos
 446                         end
 447                         pos += 1
 448                 end
 449                 return -1
 450         end
 451 end
 452
 453 # Emit output corresponding to blocks content.
 454 #
 455 # Blocks are created by a previous pass in `MarkdownProcessor`.
 456 # The emitter use a `Decorator` to select the output format.
 457 class MarkdownEmitter
 458
 459         # Processor containing link refs.
 460         var processor: MarkdownProcessor
 461
 462         # Decorator used for output.
 463         # Default is `HTMLDecorator`
 464         var decorator: Decorator = new HTMLDecorator is writable
 465
 466         # Create a new `MarkdownEmitter` using a custom `decorator`.
 467         init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
 468                 init processor
 469                 self.decorator = decorator
 470         end
 471
 472         # Output `block` using `decorator` in the current buffer.
 473         fun emit(block: Block): Text do
 474                 var buffer = push_buffer
 475                 block.emit(self)
 476                 pop_buffer
 477                 return buffer
 478         end
 479
 480         # Output the content of `block`.
 481         fun emit_in(block: Block) do block.emit_in(self)
 482
 483         # Transform and emit mardown text
 484         fun emit_text(text: Text) do
 485                 emit_text_until(text, 0, null)
 486         end
 487
 488         # Transform and emit mardown text starting at `from` and
 489         # until a token with the same type as `token` is found.
 490         # Go until the end of text if `token` is null.
 491         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 492                 var old_text = current_text
 493                 var old_pos = current_pos
 494                 current_text = text
 495                 current_pos = start
 496                 while current_pos < text.length do
 497                         var mt = processor.token_at(text, current_pos)
 498                         if (token != null and not token isa TokenNone) and
 499                         (mt.is_same_type(token) or
 500                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 501                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 502                                 return current_pos
 503                         end
 504                         mt.emit(self)
 505                         current_pos += 1
 506                 end
 507                 current_text = old_text
 508                 current_pos = old_pos
 509                 return -1
 510         end
 511
 512         # Currently processed position in `current_text`.
 513         # Used when visiting inline production with `emit_text_until`.
 514         private var current_pos: Int = -1
 515
 516         # Currently processed text.
 517         # Used when visiting inline production with `emit_text_until`.
 518         private var current_text: nullable Text = null
 519
 520         # Stacked buffers.
 521         private var buffer_stack = new List[FlatBuffer]
 522
 523         # Push a new buffer on the stack.
 524         private fun push_buffer: FlatBuffer do
 525                 var buffer = new FlatBuffer
 526                 buffer_stack.add buffer
 527                 return buffer
 528         end
 529
 530         # Pop the last buffer.
 531         private fun pop_buffer do buffer_stack.pop
 532
 533         # Current output buffer.
 534         private fun current_buffer: FlatBuffer do
 535                 assert not buffer_stack.is_empty
 536                 return buffer_stack.last
 537         end
 538
 539         # Append `e` to current buffer.
 540         fun add(e: Streamable) do
 541                 if e isa Text then
 542                         current_buffer.append e
 543                 else
 544                         current_buffer.append e.write_to_string
 545                 end
 546         end
 547
 548         # Append `c` to current buffer.
 549         fun addc(c: Char) do current_buffer.add c
 550
 551         # Append a "\n" line break.
 552         fun addn do current_buffer.add '\n'
 553 end
 554
 555 # A Link Reference.
 556 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 557 #
 558 # ~~~raw
 559 # [1]: http://example.com/ "Optional title"
 560 # ~~~
 561 class LinkRef
 562
 563         # Link href
 564         var link: String
 565
 566         # Optional link title
 567         var title: nullable String = null
 568
 569         # Is the link an abreviation?
 570         var is_abbrev = false
 571
 572         # Create a link with a title.
 573         init with_title(link: String, title: nullable String) do
 574                 self.link = link
 575                 self.title = title
 576         end
 577 end
 578
 579 # A `Decorator` is used to emit mardown into a specific format.
 580 # Default decorator used is `HTMLDecorator`.
 581 interface Decorator
 582
 583         # Render a ruler block.
 584         fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
 585
 586         # Render a headline block with corresponding level.
 587         fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
 588
 589         # Render a paragraph block.
 590         fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
 591
 592         # Render a code or fence block.
 593         fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
 594
 595         # Render a blockquote.
 596         fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
 597
 598         # Render an unordered list.
 599         fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
 600
 601         # Render an ordered list.
 602         fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
 603
 604         # Render a list item.
 605         fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
 606
 607         # Render an emphasis text.
 608         fun add_em(v: MarkdownEmitter, text: Text) is abstract
 609
 610         # Render a strong text.
 611         fun add_strong(v: MarkdownEmitter, text: Text) is abstract
 612
 613         # Render a strike text.
 614         #
 615         # Extended mode only (see `MarkdownProcessor::ext_mode`)
 616         fun add_strike(v: MarkdownEmitter, text: Text) is abstract
 617
 618         # Render a link.
 619         fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 620
 621         # Render an image.
 622         fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 623
 624         # Render an abbreviation.
 625         fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
 626
 627         # Render a code span reading from a buffer.
 628         fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 629
 630         # Render a text and escape it.
 631         fun append_value(v: MarkdownEmitter, value: Text) is abstract
 632
 633         # Render code text from buffer and escape it.
 634         fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 635
 636         # Render a character escape.
 637         fun escape_char(v: MarkdownEmitter, char: Char) is abstract
 638
 639         # Render a line break
 640         fun add_line_break(v: MarkdownEmitter) is abstract
 641
 642         # Generate a new html valid id from a `String`.
 643         fun strip_id(txt: String): String is abstract
 644
 645         # Found headlines during the processing labeled by their ids.
 646         fun headlines: ArrayMap[String, HeadLine] is abstract
 647 end
 648
 649 # Class representing a markdown headline.
 650 class HeadLine
 651         # Unique identifier of this headline.
 652         var id: String
 653
 654         # Text of the headline.
 655         var title: String
 656
 657         # Level of this headline.
 658         #
 659         # According toe the markdown specification, level must be in `[1..6]`.
 660         var level: Int
 661 end
 662
 663 # `Decorator` that outputs HTML.
 664 class HTMLDecorator
 665         super Decorator
 666
 667         redef var headlines = new ArrayMap[String, HeadLine]
 668
 669         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 670
 671         redef fun add_headline(v, block) do
 672                 # save headline
 673                 var txt = block.block.first_line.value
 674                 var id = strip_id(txt)
 675                 var lvl = block.depth
 676                 headlines[id] = new HeadLine(id, txt, lvl)
 677                 # output it
 678                 v.add "<h{lvl} id=\"{id}\">"
 679                 v.emit_in block
 680                 v.add "</h{lvl}>\n"
 681         end
 682
 683         redef fun add_paragraph(v, block) do
 684                 v.add "<p>"
 685                 v.emit_in block
 686                 v.add "</p>\n"
 687         end
 688
 689         redef fun add_code(v, block) do
 690                 if block isa BlockFence and block.meta != null then
 691                         v.add "<pre class=\"{block.meta.to_s}\"><code>"
 692                 else
 693                         v.add "<pre><code>"
 694                 end
 695                 v.emit_in block
 696                 v.add "</code></pre>\n"
 697         end
 698
 699         redef fun add_blockquote(v, block) do
 700                 v.add "<blockquote>\n"
 701                 v.emit_in block
 702                 v.add "</blockquote>\n"
 703         end
 704
 705         redef fun add_unorderedlist(v, block) do
 706                 v.add "<ul>\n"
 707                 v.emit_in block
 708                 v.add "</ul>\n"
 709         end
 710
 711         redef fun add_orderedlist(v, block) do
 712                 v.add "<ol>\n"
 713                 v.emit_in block
 714                 v.add "</ol>\n"
 715         end
 716
 717         redef fun add_listitem(v, block) do
 718                 v.add "<li>"
 719                 v.emit_in block
 720                 v.add "</li>\n"
 721         end
 722
 723         redef fun add_em(v, text) do
 724                 v.add "<em>"
 725                 v.add text
 726                 v.add "</em>"
 727         end
 728
 729         redef fun add_strong(v, text) do
 730                 v.add "<strong>"
 731                 v.add text
 732                 v.add "</strong>"
 733         end
 734
 735         redef fun add_strike(v, text) do
 736                 v.add "<del>"
 737                 v.add text
 738                 v.add "</del>"
 739         end
 740
 741         redef fun add_image(v, link, name, comment) do
 742                 v.add "<img src=\""
 743                 append_value(v, link)
 744                 v.add "\" alt=\""
 745                 append_value(v, name)
 746                 v.add "\""
 747                 if comment != null and not comment.is_empty then
 748                         v.add " title=\""
 749                         append_value(v, comment)
 750                         v.add "\""
 751                 end
 752                 v.add "/>"
 753         end
 754
 755         redef fun add_link(v, link, name, comment) do
 756                 v.add "<a href=\""
 757                 append_value(v, link)
 758                 v.add "\""
 759                 if comment != null and not comment.is_empty then
 760                         v.add " title=\""
 761                         append_value(v, comment)
 762                         v.add "\""
 763                 end
 764                 v.add ">"
 765                 v.emit_text(name)
 766                 v.add "</a>"
 767         end
 768
 769         redef fun add_abbr(v, name, comment) do
 770                 v.add "<abbr title=\""
 771                 append_value(v, comment)
 772                 v.add "\">"
 773                 v.emit_text(name)
 774                 v.add "</abbr>"
 775         end
 776
 777         redef fun add_span_code(v, text, from, to) do
 778                 v.add "<code>"
 779                 append_code(v, text, from, to)
 780                 v.add "</code>"
 781         end
 782
 783         redef fun add_line_break(v) do
 784                 v.add "<br/>"
 785         end
 786
 787         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 788
 789         redef fun escape_char(v, c) do
 790                 if c == '&' then
 791                         v.add "&amp;"
 792                 else if c == '<' then
 793                         v.add "&lt;"
 794                 else if c == '>' then
 795                         v.add "&gt;"
 796                 else if c == '"' then
 797                         v.add "&quot;"
 798                 else if c == '\'' then
 799                         v.add "&apos;"
 800                 else
 801                         v.addc c
 802                 end
 803         end
 804
 805         redef fun append_code(v, buffer, from, to) do
 806                 for i in [from..to[ do
 807                         var c = buffer[i]
 808                         if c == '&' then
 809                                 v.add "&amp;"
 810                         else if c == '<' then
 811                                 v.add "&lt;"
 812                         else if c == '>' then
 813                                 v.add "&gt;"
 814                         else
 815                                 v.addc c
 816                         end
 817                 end
 818         end
 819
 820         redef fun strip_id(txt) do
 821                 # strip id
 822                 var b = new FlatBuffer
 823                 for c in txt do
 824                         if c == ' ' then
 825                                 b.add '_'
 826                         else
 827                                 if not c.is_letter and
 828                                    not c.is_digit and
 829                                    not allowed_id_chars.has(c) then continue
 830                                 b.add c
 831                         end
 832                 end
 833                 var res = b.to_s
 834                 var key = res
 835                 # check for multiple id definitions
 836                 if headlines.has_key(key) then
 837                         var i = 1
 838                         key = "{res}_{i}"
 839                         while headlines.has_key(key) do
 840                                 i += 1
 841                                 key = "{res}_{i}"
 842                         end
 843                 end
 844                 return key
 845         end
 846
 847         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 848 end
 849
 850 # A block of markdown lines.
 851 # A `MDBlock` can contains lines and/or sub-blocks.
 852 class MDBlock
 853         # Kind of block.
 854         # See `Block`.
 855         var kind: Block = new BlockNone(self) is writable
 856
 857         # First line if any.
 858         var first_line: nullable MDLine = null is writable
 859
 860         # Last line if any.
 861         var last_line: nullable MDLine = null is writable
 862
 863         # First sub-block if any.
 864         var first_block: nullable MDBlock = null is writable
 865
 866         # Last sub-block if any.
 867         var last_block: nullable MDBlock = null is writable
 868
 869         # Previous block if any.
 870         var prev: nullable MDBlock = null is writable
 871
 872         # Next block if any.
 873         var next: nullable MDBlock = null is writable
 874
 875         # Does this block contain subblocks?
 876         fun has_blocks: Bool do return first_block != null
 877
 878         # Count sub-blocks.
 879         fun count_blocks: Int do
 880                 var count = 0
 881                 var block = first_block
 882                 while block != null do
 883                         count += 1
 884                         block = block.next
 885                 end
 886                 return count
 887         end
 888
 889         # Does this block contain lines?
 890         fun has_lines: Bool do return first_line != null
 891
 892         # Count block lines.
 893         fun count_lines: Int do
 894                 var count = 0
 895                 var line = first_line
 896                 while line != null do
 897                         count += 1
 898                         line = line.next
 899                 end
 900                 return count
 901         end
 902
 903         # Split `self` creating a new sub-block having `line` has `last_line`.
 904         fun split(line: MDLine): MDBlock do
 905                 var block = new MDBlock
 906                 block.first_line = first_line
 907                 block.last_line = line
 908                 first_line = line.next
 909                 line.next = null
 910                 if first_line == null then
 911                         last_line = null
 912                 else
 913                         first_line.prev = null
 914                 end
 915                 if first_block == null then
 916                         first_block = block
 917                         last_block = block
 918                 else
 919                         last_block.next = block
 920                         last_block = block
 921                 end
 922                 return block
 923         end
 924
 925         # Add a `line` to this block.
 926         fun add_line(line: MDLine) do
 927                 if last_line == null then
 928                         first_line = line
 929                         last_line = line
 930                 else
 931                         last_line.next_empty = line.is_empty
 932                         line.prev_empty = last_line.is_empty
 933                         line.prev = last_line
 934                         last_line.next = line
 935                         last_line = line
 936                 end
 937         end
 938
 939         # Remove `line` from this block.
 940         fun remove_line(line: MDLine) do
 941                 if line.prev == null then
 942                         first_line = line.next
 943                 else
 944                         line.prev.next = line.next
 945                 end
 946                 if line.next == null then
 947                         last_line = line.prev
 948                 else
 949                         line.next.prev = line.prev
 950                 end
 951                 line.prev = null
 952                 line.next = null
 953         end
 954
 955         # Remove leading empty lines.
 956         fun remove_leading_empty_lines: Bool do
 957                 var was_empty = false
 958                 var line = first_line
 959                 while line != null and line.is_empty do
 960                         remove_line line
 961                         line = first_line
 962                         was_empty = true
 963                 end
 964                 return was_empty
 965         end
 966
 967         # Remove trailing empty lines.
 968         fun remove_trailing_empty_lines: Bool do
 969                 var was_empty = false
 970                 var line = last_line
 971                 while line != null and line.is_empty do
 972                         remove_line line
 973                         line = last_line
 974                         was_empty = true
 975                 end
 976                 return was_empty
 977         end
 978
 979         # Remove leading and trailing empty lines.
 980         fun remove_surrounding_empty_lines: Bool do
 981                 var was_empty = false
 982                 if remove_leading_empty_lines then was_empty = true
 983                 if remove_trailing_empty_lines then was_empty = true
 984                 return was_empty
 985         end
 986
 987         # Remove list markers and up to 4 leading spaces.
 988         # Used to clean nested lists.
 989         fun remove_list_indent(v: MarkdownProcessor) do
 990                 var line = first_line
 991                 while line != null do
 992                         if not line.is_empty then
 993                                 var kind = v.line_kind(line)
 994                                 if kind isa LineList then
 995                                         line.value = kind.extract_value(line)
 996                                 else
 997                                         line.value = line.value.substring_from(line.leading.min(4))
 998                                 end
 999                                 line.leading = line.process_leading
1000                         end
1001                         line = line.next
1002                 end
1003         end
1004
1005         # Collect block line text.
1006         fun text: String do
1007                 var text = new FlatBuffer
1008                 var line = first_line
1009                 while line != null do
1010                         if not line.is_empty then
1011                                 text.append line.text
1012                         end
1013                         text.append "\n"
1014                         line = line.next
1015                 end
1016                 return text.write_to_string
1017         end
1018 end
1019
1020 # Representation of a markdown block in the AST.
1021 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1022 abstract class Block
1023
1024         # The markdown block `self` is related to.
1025         var block: MDBlock
1026
1027         # Output `self` using `v.decorator`.
1028         fun emit(v: MarkdownEmitter) do v.emit_in(self)
1029
1030         # Emit the containts of `self`, lines or blocks.
1031         fun emit_in(v: MarkdownEmitter) do
1032                 block.remove_surrounding_empty_lines
1033                 if block.has_lines then
1034                         emit_lines(v)
1035                 else
1036                         emit_blocks(v)
1037                 end
1038         end
1039
1040         # Emit lines contained in `block`.
1041         fun emit_lines(v: MarkdownEmitter) do
1042                 var tpl = v.push_buffer
1043                 var line = block.first_line
1044                 while line != null do
1045                         if not line.is_empty then
1046                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1047                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
1048                         end
1049                         if line.next != null then
1050                                 v.addn
1051                         end
1052                         line = line.next
1053                 end
1054                 v.pop_buffer
1055                 v.emit_text(tpl)
1056         end
1057
1058         # Emit sub-blocks contained in `block`.
1059         fun emit_blocks(v: MarkdownEmitter) do
1060                 var block = self.block.first_block
1061                 while block != null do
1062                         block.kind.emit(v)
1063                         block = block.next
1064                 end
1065         end
1066 end
1067
1068 # A block without any markdown specificities.
1069 #
1070 # Actually use the same implementation than `BlockCode`,
1071 # this class is only used for typing purposes.
1072 class BlockNone
1073         super Block
1074 end
1075
1076 # A markdown blockquote.
1077 class BlockQuote
1078         super Block
1079
1080         redef fun emit(v) do v.decorator.add_blockquote(v, self)
1081
1082         # Remove blockquote markers.
1083         private fun remove_block_quote_prefix(block: MDBlock) do
1084                 var line = block.first_line
1085                 while line != null do
1086                         if not line.is_empty then
1087                                 if line.value[line.leading] == '>' then
1088                                         var rem = line.leading + 1
1089                                         if line.leading + 1 < line.value.length and
1090                                            line.value[line.leading + 1] == ' ' then
1091                                                 rem += 1
1092                                         end
1093                                         line.value = line.value.substring_from(rem)
1094                                         line.leading = line.process_leading
1095                                 end
1096                         end
1097                         line = line.next
1098                 end
1099         end
1100 end
1101
1102 # A markdown code block.
1103 class BlockCode
1104         super Block
1105
1106         # Number of char to skip at the beginning of the line.
1107         #
1108         # Block code lines start at 4 spaces.
1109         protected var line_start = 4
1110
1111         redef fun emit(v) do v.decorator.add_code(v, self)
1112
1113         redef fun emit_lines(v) do
1114                 var line = block.first_line
1115                 while line != null do
1116                         if not line.is_empty then
1117                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1118                         end
1119                         v.addn
1120                         line = line.next
1121                 end
1122         end
1123 end
1124
1125 # A markdown code-fence block.
1126 #
1127 # Actually use the same implementation than `BlockCode`,
1128 # this class is only used for typing purposes.
1129 class BlockFence
1130         super BlockCode
1131
1132         # Any string found after fence token.
1133         var meta: nullable Text
1134
1135         # Fence code lines start at 0 spaces.
1136         redef var line_start = 0
1137 end
1138
1139 # A markdown headline.
1140 class BlockHeadline
1141         super Block
1142
1143         redef fun emit(v) do v.decorator.add_headline(v, self)
1144
1145         # Depth of the headline used to determine the headline level.
1146         var depth = 0
1147
1148         # Remove healine marks from lines contained in `self`.
1149         private fun transform_headline(block: MDBlock) do
1150                 if depth > 0 then return
1151                 var level = 0
1152                 var line = block.first_line
1153                 if line.is_empty then return
1154                 var start = line.leading
1155                 while start < line.value.length and line.value[start] == '#' do
1156                         level += 1
1157                         start += 1
1158                 end
1159                 while start < line.value.length and line.value[start] == ' ' do
1160                         start += 1
1161                 end
1162                 if start >= line.value.length then
1163                         line.is_empty = true
1164                 else
1165                         var nend = line.value.length - line.trailing - 1
1166                         while line.value[nend] == '#' do nend -= 1
1167                         while line.value[nend] == ' ' do nend -= 1
1168                         line.value = line.value.substring(start, nend - start + 1)
1169                         line.leading = 0
1170                         line.trailing = 0
1171                 end
1172                 depth = level.min(6)
1173         end
1174 end
1175
1176 # A markdown list item block.
1177 class BlockListItem
1178         super Block
1179
1180         redef fun emit(v) do v.decorator.add_listitem(v, self)
1181 end
1182
1183 # A markdown list block.
1184 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1185 abstract class BlockList
1186         super Block
1187
1188         # Split list block into list items sub-blocks.
1189         private fun init_block(v: MarkdownProcessor) do
1190                 var line = block.first_line
1191                 line = line.next
1192                 while line != null do
1193                         var t = v.line_kind(line)
1194                         if t isa LineList or
1195                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1196                            not (t isa LineList))) then
1197                                    var sblock = block.split(line.prev.as(not null))
1198                                    sblock.kind = new BlockListItem(sblock)
1199                         end
1200                         line = line.next
1201                 end
1202                 var sblock = block.split(block.last_line.as(not null))
1203                 sblock.kind = new BlockListItem(sblock)
1204         end
1205
1206         # Expand list items as paragraphs if needed.
1207         private fun expand_paragraphs(block: MDBlock) do
1208                 var outer = block.first_block
1209                 var inner: nullable MDBlock
1210                 var has_paragraph = false
1211                 while outer != null and not has_paragraph do
1212                         if outer.kind isa BlockListItem then
1213                                 inner = outer.first_block
1214                                 while inner != null and not has_paragraph do
1215                                         if inner.kind isa BlockParagraph then
1216                                                 has_paragraph = true
1217                                         end
1218                                         inner = inner.next
1219                                 end
1220                         end
1221                         outer = outer.next
1222                 end
1223                 if has_paragraph then
1224                         outer = block.first_block
1225                         while outer != null do
1226                                 if outer.kind isa BlockListItem then
1227                                         inner = outer.first_block
1228                                         while inner != null do
1229                                                 if inner.kind isa BlockNone then
1230                                                         inner.kind = new BlockParagraph(inner)
1231                                                 end
1232                                                 inner = inner.next
1233                                         end
1234                                 end
1235                                 outer = outer.next
1236                         end
1237                 end
1238         end
1239 end
1240
1241 # A markdown ordered list.
1242 class BlockOrderedList
1243         super BlockList
1244
1245         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1246 end
1247
1248 # A markdown unordred list.
1249 class BlockUnorderedList
1250         super BlockList
1251
1252         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1253 end
1254
1255 # A markdown paragraph block.
1256 class BlockParagraph
1257         super Block
1258
1259         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1260 end
1261
1262 # A markdown ruler.
1263 class BlockRuler
1264         super Block
1265
1266         redef fun emit(v) do v.decorator.add_ruler(v, self)
1267 end
1268
1269 # Xml blocks that can be found in markdown markup.
1270 class BlockXML
1271         super Block
1272
1273         redef fun emit_lines(v) do
1274                 var line = block.first_line
1275                 while line != null do
1276                         if not line.is_empty then v.add line.value
1277                         v.addn
1278                         line = line.next
1279                 end
1280         end
1281 end
1282
1283 # A markdown line.
1284 class MDLine
1285
1286         # Text contained in this line.
1287         var value: String is writable
1288
1289         # Is this line empty?
1290         # Lines containing only spaces are considered empty.
1291         var is_empty: Bool = true is writable
1292
1293         # Previous line in `MDBlock` or null if first line.
1294         var prev: nullable MDLine = null is writable
1295
1296         # Next line in `MDBlock` or null if last line.
1297         var next: nullable MDLine = null is writable
1298
1299         # Is the previous line empty?
1300         var prev_empty: Bool = false is writable
1301
1302         # Is the next line empty?
1303         var next_empty: Bool = false is writable
1304
1305         # Initialize a new MDLine from its string value
1306         init do
1307                 self.leading = process_leading
1308                 if leading != value.length then
1309                         self.is_empty = false
1310                         self.trailing = process_trailing
1311                 end
1312         end
1313
1314         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1315         fun clear do
1316                 value = ""
1317                 leading = 0
1318                 trailing = 0
1319                 is_empty = true
1320                 if prev != null then prev.next_empty = true
1321                 if next != null then next.prev_empty = true
1322         end
1323
1324         # Number or leading spaces on this line.
1325         var leading: Int = 0 is writable
1326
1327         # Compute `leading` depending on `value`.
1328         fun process_leading: Int do
1329                 var count = 0
1330                 var value = self.value
1331                 while count < value.length and value[count] == ' ' do count += 1
1332                 if leading == value.length then clear
1333                 return count
1334         end
1335
1336         # Number of trailing spaces on this line.
1337         var trailing: Int = 0 is writable
1338
1339         # Compute `trailing` depending on `value`.
1340         fun process_trailing: Int do
1341                 var count = 0
1342                 var value = self.value
1343                 while value[value.length - count - 1] == ' ' do
1344                         count += 1
1345                 end
1346                 return count
1347         end
1348
1349         # Count the amount of `ch` in this line.
1350         # Return A value > 0 if this line only consists of `ch` end spaces.
1351         fun count_chars(ch: Char): Int do
1352                 var count = 0
1353                 for c in value do
1354                         if c == ' ' then
1355                                 continue
1356                         end
1357                         if c == ch then
1358                                 count += 1
1359                                 continue
1360                         end
1361                         count = 0
1362                         break
1363                 end
1364                 return count
1365         end
1366
1367         # Count the amount of `ch` at the start of this line ignoring spaces.
1368         fun count_chars_start(ch: Char): Int do
1369                 var count = 0
1370                 for c in value do
1371                         if c == ' ' then
1372                                 continue
1373                         end
1374                         if c == ch then
1375                                 count += 1
1376                         else
1377                                 break
1378                         end
1379                 end
1380                 return count
1381         end
1382
1383         # Last XML line if any.
1384         private var xml_end_line: nullable MDLine = null
1385
1386         # Does `value` contains valid XML markup?
1387         private fun check_html: Bool do
1388                 var tags = new Array[String]
1389                 var tmp = new FlatBuffer
1390                 var pos = leading
1391                 if pos + 1 < value.length and value[pos + 1] == '!' then
1392                         if read_xml_comment(self, pos) > 0 then return true
1393                 end
1394                 pos = value.read_xml(tmp, pos, false)
1395                 var tag: String
1396                 if pos > -1 then
1397                         tag = tmp.xml_tag
1398                         if not tag.is_html_block then
1399                                 return false
1400                         end
1401                         if tag == "hr" then
1402                                 xml_end_line = self
1403                                 return true
1404                         end
1405                         tags.add tag
1406                         var line: nullable MDLine = self
1407                         while line != null do
1408                                 while pos < line.value.length and line.value[pos] != '<' do
1409                                         pos += 1
1410                                 end
1411                                 if pos >= line.value.length then
1412                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1413                                                 tags.pop
1414                                                 if tags.is_empty then
1415                                                         xml_end_line = line
1416                                                         break
1417                                                 end
1418                                         end
1419                                         line = line.next
1420                                         pos = 0
1421                                 else
1422                                         tmp = new FlatBuffer
1423                                         var new_pos = line.value.read_xml(tmp, pos, false)
1424                                         if new_pos > 0 then
1425                                                 tag = tmp.xml_tag
1426                                                 if tag.is_html_block and not tag == "hr" then
1427                                                         if tmp[1] == '/' then
1428                                                                 if tags.last != tag then
1429                                                                         return false
1430                                                                 end
1431                                                                 tags.pop
1432                                                         else
1433                                                                 tags.add tag
1434                                                         end
1435                                                 end
1436                                                 if tags.is_empty then
1437                                                         xml_end_line = line
1438                                                         break
1439                                                 end
1440                                                 pos = new_pos
1441                                         else
1442                                                 pos += 1
1443                                         end
1444                                 end
1445                         end
1446                         return tags.is_empty
1447                 end
1448                 return false
1449         end
1450
1451         # Read a XML comment.
1452         # Used by `check_html`.
1453         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1454                 var line: nullable MDLine = first_line
1455                 if start + 3 < line.value.length then
1456                         if line.value[2] == '-' and line.value[3] == '-' then
1457                                 var pos = start + 4
1458                                 while line != null do
1459                                         while pos < line.value.length and line.value[pos] != '-' do
1460                                                 pos += 1
1461                                         end
1462                                         if pos == line.value.length then
1463                                                 line = line.next
1464                                                 pos = 0
1465                                         else
1466                                                 if pos + 2 < line.value.length then
1467                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1468                                                                 first_line.xml_end_line = line
1469                                                                 return pos + 3
1470                                                         end
1471                                                 end
1472                                                 pos += 1
1473                                         end
1474                                 end
1475                         end
1476                 end
1477                 return -1
1478         end
1479
1480         # Extract the text of `self` without leading and trailing.
1481         fun text: String do return value.substring(leading, value.length - trailing)
1482 end
1483
1484 # A markdown line.
1485 interface Line
1486
1487         # Parse the line.
1488         # See `MarkdownProcessor::recurse`.
1489         fun process(v: MarkdownProcessor) is abstract
1490 end
1491
1492 # An empty markdown line.
1493 class LineEmpty
1494         super Line
1495
1496         redef fun process(v) do
1497                 v.current_line = v.current_line.next
1498         end
1499 end
1500
1501 # A non-specific markdown construction.
1502 # Mainly used as part of another line construct such as paragraphs or lists.
1503 class LineOther
1504         super Line
1505
1506         redef fun process(v) do
1507                 var line = v.current_line
1508                 # go to block end
1509                 var was_empty = line.prev_empty
1510                 while line != null and not line.is_empty do
1511                         var t = v.line_kind(line)
1512                         if (v.in_list or v.ext_mode) and t isa LineList then
1513                                 break
1514                         end
1515                         if v.ext_mode and (t isa LineCode or t isa LineFence) then
1516                                 break
1517                         end
1518                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1519                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1520                                    break
1521                         end
1522                         line = line.next
1523                 end
1524                 # build block
1525                 if line != null and not line.is_empty then
1526                         var block = v.current_block.split(line.prev.as(not null))
1527                         if v.in_list and not was_empty then
1528                                 block.kind = new BlockNone(block)
1529                         else
1530                                 block.kind = new BlockParagraph(block)
1531                         end
1532                         v.current_block.remove_leading_empty_lines
1533                 else
1534                         var block: MDBlock
1535                         if line != null then
1536                                 block = v.current_block.split(line)
1537                         else
1538                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1539                         end
1540                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1541                                 block.kind = new BlockNone(block)
1542                         else
1543                                 block.kind = new BlockParagraph(block)
1544                         end
1545                         v.current_block.remove_leading_empty_lines
1546                 end
1547                 v.current_line = v.current_block.first_line
1548         end
1549 end
1550
1551 # A line of markdown code.
1552 class LineCode
1553         super Line
1554
1555         redef fun process(v) do
1556                 var line = v.current_line
1557                 # lookup block end
1558                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1559                         line = line.next
1560                 end
1561                 # split at block end line
1562                 var block: MDBlock
1563                 if line != null then
1564                         block = v.current_block.split(line.prev.as(not null))
1565                 else
1566                         block = v.current_block.split(v.current_block.last_line.as(not null))
1567                 end
1568                 block.kind = new BlockCode(block)
1569                 block.remove_surrounding_empty_lines
1570                 v.current_line = v.current_block.first_line
1571         end
1572 end
1573
1574 # A line of raw XML.
1575 class LineXML
1576         super Line
1577
1578         redef fun process(v) do
1579                 var line = v.current_line
1580                 var prev = line.prev
1581                 if prev != null then v.current_block.split(prev)
1582                 var block = v.current_block.split(line.xml_end_line.as(not null))
1583                 block.kind = new BlockXML(block)
1584                 v.current_block.remove_leading_empty_lines
1585                 v.current_line = v.current_block.first_line
1586         end
1587 end
1588
1589 # A markdown blockquote line.
1590 class LineBlockquote
1591         super Line
1592
1593         redef fun process(v) do
1594                 var line = v.current_line
1595                 # go to bquote end
1596                 while line != null do
1597                         if not line.is_empty and (line.prev_empty and
1598                            line.leading == 0 and
1599                            not v.line_kind(line) isa LineBlockquote) then break
1600                         line = line.next
1601                 end
1602                 # build sub block
1603                 var block: MDBlock
1604                 if line != null then
1605                         block = v.current_block.split(line.prev.as(not null))
1606                 else
1607                         block = v.current_block.split(v.current_block.last_line.as(not null))
1608                 end
1609                 var kind = new BlockQuote(block)
1610                 block.kind = kind
1611                 block.remove_surrounding_empty_lines
1612                 kind.remove_block_quote_prefix(block)
1613                 v.current_line = line
1614                 v.recurse(block, false)
1615                 v.current_line = v.current_block.first_line
1616         end
1617 end
1618
1619 # A markdown ruler line.
1620 class LineHR
1621         super Line
1622
1623         redef fun process(v) do
1624                 var line = v.current_line
1625                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1626                 var block = v.current_block.split(line.as(not null))
1627                 block.kind = new BlockRuler(block)
1628                 v.current_block.remove_leading_empty_lines
1629                 v.current_line = v.current_block.first_line
1630         end
1631 end
1632
1633 # A markdown fence code line.
1634 class LineFence
1635         super Line
1636
1637         redef fun process(v) do
1638                 # go to fence end
1639                 var line = v.current_line.next
1640                 while line != null do
1641                         if v.line_kind(line) isa LineFence then break
1642                         line = line.next
1643                 end
1644                 if line != null then
1645                         line = line.next
1646                 end
1647                 # build fence block
1648                 var block: MDBlock
1649                 if line != null then
1650                         block = v.current_block.split(line.prev.as(not null))
1651                 else
1652                         block = v.current_block.split(v.current_block.last_line.as(not null))
1653                 end
1654                 var meta = block.first_line.value.meta_from_fence
1655                 block.kind = new BlockFence(block, meta)
1656                 block.first_line.clear
1657                 var last = block.last_line
1658                 if last != null and v.line_kind(last) isa LineFence then
1659                         block.last_line.clear
1660                 end
1661                 block.remove_surrounding_empty_lines
1662                 v.current_line = line
1663         end
1664 end
1665
1666 # A markdown headline.
1667 class LineHeadline
1668         super Line
1669
1670         redef fun process(v) do
1671                 var line = v.current_line
1672                 var lprev = line.prev
1673                 if lprev != null then v.current_block.split(lprev)
1674                 var block = v.current_block.split(line.as(not null))
1675                 var kind = new BlockHeadline(block)
1676                 block.kind = kind
1677                 kind.transform_headline(block)
1678                 v.current_block.remove_leading_empty_lines
1679                 v.current_line = v.current_block.first_line
1680         end
1681 end
1682
1683 # A markdown headline of level 1.
1684 class LineHeadline1
1685         super LineHeadline
1686
1687         redef fun process(v) do
1688                 var line = v.current_line
1689                 var lprev = line.prev
1690                 if lprev != null then v.current_block.split(lprev)
1691                 line.next.clear
1692                 var block = v.current_block.split(line.as(not null))
1693                 var kind = new BlockHeadline(block)
1694                 kind.depth = 1
1695                 kind.transform_headline(block)
1696                 block.kind = kind
1697                 v.current_block.remove_leading_empty_lines
1698                 v.current_line = v.current_block.first_line
1699         end
1700 end
1701
1702 # A markdown headline of level 2.
1703 class LineHeadline2
1704         super LineHeadline
1705
1706         redef fun process(v) do
1707                 var line = v.current_line
1708                 var lprev = line.prev
1709                 if lprev != null then v.current_block.split(lprev)
1710                 line.next.clear
1711                 var block = v.current_block.split(line.as(not null))
1712                 var kind = new BlockHeadline(block)
1713                 kind.depth = 2
1714                 kind.transform_headline(block)
1715                 block.kind = kind
1716                 v.current_block.remove_leading_empty_lines
1717                 v.current_line = v.current_block.first_line
1718         end
1719 end
1720
1721 # A markdown list line.
1722 # Mainly used to factorize code between ordered and unordered lists.
1723 class LineList
1724         super Line
1725
1726         redef fun process(v) do
1727                 var line = v.current_line
1728                 # go to list end
1729                 while line != null do
1730                         var t = v.line_kind(line)
1731                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1732                            not t isa LineList) then break
1733                         line = line.next
1734                 end
1735                 # build list block
1736                 var list: MDBlock
1737                 if line != null then
1738                         list = v.current_block.split(line.prev.as(not null))
1739                 else
1740                         list = v.current_block.split(v.current_block.last_line.as(not null))
1741                 end
1742                 var kind = block_kind(list)
1743                 list.kind = kind
1744                 list.first_line.prev_empty = false
1745                 list.last_line.next_empty = false
1746                 list.remove_surrounding_empty_lines
1747                 list.first_line.prev_empty = false
1748                 list.last_line.next_empty = false
1749                 kind.init_block(v)
1750                 var block = list.first_block
1751                 while block != null do
1752                         block.remove_list_indent(v)
1753                         v.recurse(block, true)
1754                         block = block.next
1755                 end
1756                 kind.expand_paragraphs(list)
1757                 v.current_line = line
1758         end
1759
1760         # Create a new block kind based on this line.
1761         protected fun block_kind(block: MDBlock): BlockList is abstract
1762
1763         # Extract string value from `MDLine`.
1764         protected fun extract_value(line: MDLine): String is abstract
1765 end
1766
1767 # An ordered list line.
1768 class LineOList
1769         super LineList
1770
1771         redef fun block_kind(block) do return new BlockOrderedList(block)
1772
1773         redef fun extract_value(line) do
1774                 return line.value.substring_from(line.value.index_of('.') + 2)
1775         end
1776 end
1777
1778 # An unordered list line.
1779 class LineUList
1780         super LineList
1781
1782         redef fun block_kind(block) do return new BlockUnorderedList(block)
1783
1784         redef fun extract_value(line) do
1785                 return line.value.substring_from(line.leading + 2)
1786         end
1787 end
1788
1789 # A token represent a character in the markdown input.
1790 # Some tokens have a specific markup behaviour that is handled here.
1791 abstract class Token
1792
1793         # Position of `self` in markdown input.
1794         var pos: Int
1795
1796         # Character found at `pos` in the markdown input.
1797         var char: Char
1798
1799         # Output that token using `MarkdownEmitter::decorator`.
1800         fun emit(v: MarkdownEmitter) do v.addc char
1801 end
1802
1803 # A token without a specific meaning.
1804 class TokenNone
1805         super Token
1806 end
1807
1808 # An emphasis token.
1809 abstract class TokenEm
1810         super Token
1811
1812         redef fun emit(v) do
1813                 var tmp = v.push_buffer
1814                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1815                 v.pop_buffer
1816                 if b > 0 then
1817                         v.decorator.add_em(v, tmp)
1818                         v.current_pos = b
1819                 else
1820                         v.addc char
1821                 end
1822         end
1823 end
1824
1825 # An emphasis star token.
1826 class TokenEmStar
1827         super TokenEm
1828 end
1829
1830 # An emphasis underscore token.
1831 class TokenEmUnderscore
1832         super TokenEm
1833 end
1834
1835 # A strong token.
1836 abstract class TokenStrong
1837         super Token
1838
1839         redef fun emit(v) do
1840                 var tmp = v.push_buffer
1841                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1842                 v.pop_buffer
1843                 if b > 0 then
1844                         v.decorator.add_strong(v, tmp)
1845                         v.current_pos = b + 1
1846                 else
1847                         v.addc char
1848                 end
1849         end
1850 end
1851
1852 # A strong star token.
1853 class TokenStrongStar
1854         super TokenStrong
1855 end
1856
1857 # A strong underscore token.
1858 class TokenStrongUnderscore
1859         super TokenStrong
1860 end
1861
1862 # A code token.
1863 # This class is mainly used to factorize work between single and double quoted span codes.
1864 abstract class TokenCode
1865         super Token
1866
1867         redef fun emit(v) do
1868                 var a = pos + next_pos + 1
1869                 var b = v.processor.find_token(v.current_text.as(not null), a, self)
1870                 if b > 0 then
1871                         v.current_pos = b + next_pos
1872                         while a < b and v.current_text[a] == ' ' do a += 1
1873                         if a < b then
1874                                 while v.current_text[b - 1] == ' ' do b -= 1
1875                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1876                         end
1877                 else
1878                         v.addc char
1879                 end
1880         end
1881
1882         private fun next_pos: Int is abstract
1883 end
1884
1885 # A span code token.
1886 class TokenCodeSingle
1887         super TokenCode
1888
1889         redef fun next_pos do return 0
1890 end
1891
1892 # A doubled span code token.
1893 class TokenCodeDouble
1894         super TokenCode
1895
1896         redef fun next_pos do return 1
1897 end
1898
1899 # A link or image token.
1900 # This class is mainly used to factorize work between images and links.
1901 abstract class TokenLinkOrImage
1902         super Token
1903
1904         # Link adress
1905         var link: nullable Text = null
1906
1907         # Link text
1908         var name: nullable Text = null
1909
1910         # Link title
1911         var comment: nullable Text = null
1912
1913         # Is the link construct an abbreviation?
1914         var is_abbrev = false
1915
1916         redef fun emit(v) do
1917                 var tmp = new FlatBuffer
1918                 var b = check_link(v, tmp, pos, self)
1919                 if b > 0 then
1920                         emit_hyper(v)
1921                         v.current_pos = b
1922                 else
1923                         v.addc char
1924                 end
1925         end
1926
1927         # Emit the hyperlink as link or image.
1928         private fun emit_hyper(v: MarkdownEmitter) is abstract
1929
1930         # Check if the link is a valid link.
1931         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1932                 var md = v.current_text
1933                 var pos
1934                 if token isa TokenLink then
1935                         pos = start + 1
1936                 else
1937                         pos = start + 2
1938                 end
1939                 var tmp = new FlatBuffer
1940                 pos = md.read_md_link_id(tmp, pos)
1941                 if pos < start then return -1
1942                 name = tmp
1943                 var old_pos = pos
1944                 pos += 1
1945                 pos = md.skip_spaces(pos)
1946                 if pos < start then
1947                         var tid = name.write_to_string.to_lower
1948                         if v.processor.link_refs.has_key(tid) then
1949                                 var lr = v.processor.link_refs[tid]
1950                                 is_abbrev = lr.is_abbrev
1951                                 link = lr.link
1952                                 comment = lr.title
1953                                 pos = old_pos
1954                         else
1955                                 return -1
1956                         end
1957                 else if md[pos] == '(' then
1958                         pos += 1
1959                         pos = md.skip_spaces(pos)
1960                         if pos < start then return -1
1961                         tmp = new FlatBuffer
1962                         var use_lt = md[pos] == '<'
1963                         if use_lt then
1964                                 pos = md.read_until(tmp, pos + 1, '>')
1965                         else
1966                                 pos = md.read_md_link(tmp, pos)
1967                         end
1968                         if pos < start then return -1
1969                         if use_lt then pos += 1
1970                         link = tmp.write_to_string
1971                         if md[pos] == ' ' then
1972                                 pos = md.skip_spaces(pos)
1973                                 if pos > start and md[pos] == '"' then
1974                                         pos += 1
1975                                         tmp = new FlatBuffer
1976                                         pos = md.read_until(tmp, pos, '"')
1977                                         if pos < start then return -1
1978                                         comment = tmp.write_to_string
1979                                         pos += 1
1980                                         pos = md.skip_spaces(pos)
1981                                         if pos == -1 then return -1
1982                                 end
1983                         end
1984                         if md[pos] != ')' then return -1
1985                 else if md[pos] == '[' then
1986                         pos += 1
1987                         tmp = new FlatBuffer
1988                         pos = md.read_raw_until(tmp, pos, ']')
1989                         if pos < start then return -1
1990                         var id
1991                         if tmp.length > 0 then
1992                                 id = tmp
1993                         else
1994                                 id = name
1995                         end
1996                         var tid = id.write_to_string.to_lower
1997                         if v.processor.link_refs.has_key(tid) then
1998                                 var lr = v.processor.link_refs[tid]
1999                                 link = lr.link
2000                                 comment = lr.title
2001                         end
2002                 else
2003                 var tid = name.write_to_string.replace("\n", " ").to_lower
2004                         if v.processor.link_refs.has_key(tid) then
2005                                 var lr = v.processor.link_refs[tid]
2006                                 link = lr.link
2007                                 comment = lr.title
2008                                 pos = old_pos
2009                         else
2010                                 return -1
2011                         end
2012                 end
2013                 if link == null then return -1
2014                 return pos
2015         end
2016 end
2017
2018 # A markdown link token.
2019 class TokenLink
2020         super TokenLinkOrImage
2021
2022         redef fun emit_hyper(v) do
2023                 if is_abbrev and comment != null then
2024                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2025                 else
2026                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2027                 end
2028         end
2029 end
2030
2031 # A markdown image token.
2032 class TokenImage
2033         super TokenLinkOrImage
2034
2035         redef fun emit_hyper(v) do
2036                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2037         end
2038 end
2039
2040 # A HTML/XML token.
2041 class TokenHTML
2042         super Token
2043
2044         redef fun emit(v) do
2045                 var tmp = new FlatBuffer
2046                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2047                 if b > 0 then
2048                         v.add tmp
2049                         v.current_pos = b
2050                 else
2051                         v.decorator.escape_char(v, char)
2052                 end
2053         end
2054
2055         # Is the HTML valid?
2056         # Also take care of link and mailto shortcuts.
2057         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2058                 # check for auto links
2059                 var tmp = new FlatBuffer
2060                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2061                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2062                         pos = md.read_until(tmp, pos, '>')
2063                         if pos != -1 then
2064                                 var link = tmp.write_to_string
2065                                 v.decorator.add_link(v, link, link, null)
2066                                 return pos
2067                         end
2068                 end
2069                 # TODO check for mailto
2070                 # check for inline html
2071                 if start + 2 < md.length then
2072                         return md.read_xml(out, start, true)
2073                 end
2074                 return -1
2075         end
2076 end
2077
2078 # An HTML entity token.
2079 class TokenEntity
2080         super Token
2081
2082         redef fun emit(v) do
2083                 var tmp = new FlatBuffer
2084                 var b = check_entity(tmp, v.current_text.as(not null), pos)
2085                 if b > 0 then
2086                         v.add tmp
2087                         v.current_pos = b
2088                 else
2089                         v.decorator.escape_char(v, char)
2090                 end
2091         end
2092
2093         # Is the entity valid?
2094         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2095                 var pos = md.read_until(out, start, ';')
2096                 if pos < 0 or out.length < 3 then
2097                         return -1
2098                 end
2099                 if out[1] == '#' then
2100                         if out[2] == 'x' or out[2] == 'X' then
2101                                 if out.length < 4 then return -1
2102                                 for i in [3..out.length[ do
2103                                         var c = out[i]
2104                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2105                                                 return -1
2106                                         end
2107                                 end
2108                         else
2109                                 for i in [2..out.length[ do
2110                                         var c = out[i]
2111                                         if c < '0' or c > '9' then return -1
2112                                 end
2113                         end
2114                         out.add ';'
2115                 else
2116                         for i in [1..out.length[ do
2117                                 var c = out[i]
2118                                 if not c.is_digit and not c.is_letter then return -1
2119                         end
2120                         out.add ';'
2121                         # TODO check entity is valid
2122                         # if out.is_entity then
2123                                 return pos
2124                         # else
2125                                 # return -1
2126                         # end
2127                 end
2128                 return pos
2129         end
2130 end
2131
2132 # A markdown escape token.
2133 class TokenEscape
2134         super Token
2135
2136         redef fun emit(v) do
2137                 v.current_pos += 1
2138                 v.addc v.current_text[v.current_pos]
2139         end
2140 end
2141
2142 # A markdown strike token.
2143 #
2144 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2145 class TokenStrike
2146         super Token
2147
2148         redef fun emit(v) do
2149                 var tmp = v.push_buffer
2150                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2151                 v.pop_buffer
2152                 if b > 0 then
2153                         v.decorator.add_strike(v, tmp)
2154                         v.current_pos = b + 1
2155                 else
2156                         v.addc char
2157                 end
2158         end
2159 end
2160
2161 redef class Text
2162
2163         # Get the position of the next non-space character.
2164         private fun skip_spaces(start: Int): Int do
2165                 var pos = start
2166                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2167                         pos += 1
2168                 end
2169                 if pos < length then return pos
2170                 return -1
2171         end
2172
2173         # Read `self` until `nend` and append it to the `out` buffer.
2174         # Escape markdown special chars.
2175         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2176                 var pos = start
2177                 while pos < length do
2178                         var c = self[pos]
2179                         if c == '\\' and pos + 1 < length then
2180                                 pos = escape(out, self[pos + 1], pos)
2181                         else
2182                                 var end_reached = false
2183                                 for n in nend do
2184                                         if c == n then
2185                                                 end_reached = true
2186                                                 break
2187                                         end
2188                                 end
2189                                 if end_reached then break
2190                                 out.add c
2191                         end
2192                         pos += 1
2193                 end
2194                 if pos == length then return -1
2195                 return pos
2196         end
2197
2198         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2199         # No escape is made.
2200         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2201                 var pos = start
2202                 while pos < length do
2203                         var c = self[pos]
2204                         var end_reached = false
2205                         for n in nend do
2206                                 if c == n then
2207                                         end_reached = true
2208                                         break
2209                                 end
2210                         end
2211                         if end_reached then break
2212                         out.add c
2213                         pos += 1
2214                 end
2215                 if pos == length then return -1
2216                 return pos
2217         end
2218
2219         # Read `self` as XML until `to` and append it to the `out` buffer.
2220         # Escape HTML special chars.
2221         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2222                 var pos = from
2223                 var in_str = false
2224                 var str_char: nullable Char = null
2225                 while pos < length do
2226                         var c = self[pos]
2227                         if in_str then
2228                                 if c == '\\' then
2229                                         out.add c
2230                                         pos += 1
2231                                         if pos < length then
2232                                                 out.add c
2233                                                 pos += 1
2234                                         end
2235                                         continue
2236                                 end
2237                                 if c == str_char then
2238                                         in_str = false
2239                                         out.add c
2240                                         pos += 1
2241                                         continue
2242                                 end
2243                         end
2244                         if c == '"' or c == '\'' then
2245                                 in_str = true
2246                                 str_char = c
2247                         end
2248                         if not in_str then
2249                                 var end_reached = false
2250                                 for n in [0..to.length[ do
2251                                         if c == to[n] then
2252                                                 end_reached = true
2253                                                 break
2254                                         end
2255                                 end
2256                                 if end_reached then break
2257                         end
2258                         out.add c
2259                         pos += 1
2260                 end
2261                 if pos == length then return -1
2262                 return pos
2263         end
2264
2265         # Read `self` as XML and append it to the `out` buffer.
2266         # Safe mode can be activated to limit reading to valid xml.
2267         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2268                 var pos = 0
2269                 var is_valid = true
2270                 var is_close_tag = false
2271                 if start + 1 >= length then return -1
2272                 if self[start + 1] == '/' then
2273                         is_close_tag = true
2274                         pos = start + 2
2275                 else if self[start + 1] == '!' then
2276                         out.append "<!"
2277                         return start + 1
2278                 else
2279                         is_close_tag = false
2280                         pos = start + 1
2281                 end
2282                 if safe_mode then
2283                         var tmp = new FlatBuffer
2284                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2285                         if pos == -1 then return -1
2286                         var tag = tmp.write_to_string.trim.to_lower
2287                         if not tag.is_valid_html_tag then
2288                                 out.append "&lt;"
2289                                 pos = -1
2290                         else if tag.is_html_unsafe then
2291                                 is_valid = false
2292                                 out.append "&lt;"
2293                                 if is_close_tag then out.add '/'
2294                                 out.append tmp
2295                         else
2296                                 out.append "<"
2297                                 if is_close_tag then out.add '/'
2298                                 out.append tmp
2299                         end
2300                 else
2301                         out.add '<'
2302                         if is_close_tag then out.add '/'
2303                         pos = read_xml_until(out, pos, ' ', '/', '>')
2304                 end
2305                 if pos == -1 then return -1
2306                 pos = read_xml_until(out, pos, '/', '>')
2307                 if pos == -1 then return -1
2308                 if self[pos] == '/' then
2309                         out.append " /"
2310                         pos = self.read_xml_until(out, pos + 1, '>')
2311                         if pos == -1 then return -1
2312                 end
2313                 if self[pos] == '>' then
2314                         if is_valid then
2315                                 out.add '>'
2316                         else
2317                                 out.append "&gt;"
2318                         end
2319                         return pos
2320                 end
2321                 return -1
2322         end
2323
2324         # Read a markdown link address and append it to the `out` buffer.
2325         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2326                 var pos = start
2327                 var counter = 1
2328                 while pos < length do
2329                         var c = self[pos]
2330                         if c == '\\' and pos + 1 < length then
2331                                 pos = escape(out, self[pos + 1], pos)
2332                         else
2333                                 var end_reached = false
2334                                 if c == '(' then
2335                                         counter += 1
2336                                 else if c == ' ' then
2337                                         if counter == 1 then end_reached = true
2338                                 else if c == ')' then
2339                                         counter -= 1
2340                                         if counter == 0 then end_reached = true
2341                                 end
2342                                 if end_reached then break
2343                                 out.add c
2344                         end
2345                         pos += 1
2346                 end
2347                 if pos == length then return -1
2348                 return pos
2349         end
2350
2351         # Read a markdown link text and append it to the `out` buffer.
2352         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2353                 var pos = start
2354                 var counter = 1
2355                 while pos < length do
2356                         var c = self[pos]
2357                         var end_reached = false
2358                         if c == '[' then
2359                                 counter += 1
2360                                 out.add c
2361                         else if c == ']' then
2362                                 counter -= 1
2363                                 if counter == 0 then
2364                                         end_reached = true
2365                                 else
2366                                         out.add c
2367                                 end
2368                         else
2369                                 out.add c
2370                         end
2371                         if end_reached then break
2372                         pos += 1
2373                 end
2374                 if pos == length then return -1
2375                 return pos
2376         end
2377
2378         # Extract the XML tag name from a XML tag.
2379         private fun xml_tag: String do
2380                 var tpl = new FlatBuffer
2381                 var pos = 1
2382                 if pos < length and self[1] == '/' then pos += 1
2383                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2384                         tpl.add self[pos]
2385                         pos += 1
2386                 end
2387                 return tpl.write_to_string.to_lower
2388         end
2389
2390         private fun is_valid_html_tag: Bool do
2391                 if is_empty then return false
2392                 for c in self do
2393                         if not c.is_alpha then return false
2394                 end
2395                 return true
2396         end
2397
2398         # Read and escape the markdown contained in `self`.
2399         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2400                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2401                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2402                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2403                    c == '`' or c == '~' or c == '^' then
2404                         out.add c
2405                         return pos + 1
2406                 end
2407                 out.add '\\'
2408                 return pos
2409         end
2410
2411         # Extract string found at end of fence opening.
2412         private fun meta_from_fence: nullable Text do
2413                 for i in [0..chars.length[ do
2414                         var c = chars[i]
2415                         if c != ' ' and c != '`' and c != '~' then
2416                                 return substring_from(i).trim
2417                         end
2418                 end
2419                 return null
2420         end
2421
2422         # Is `self` an unsafe HTML element?
2423         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2424
2425         # Is `self` a HRML block element?
2426         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2427
2428         # Is `self` a link prefix?
2429         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2430
2431         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2432
2433         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2434
2435         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2436 end
2437
2438 redef class String
2439
2440         # Parse `self` as markdown and return the HTML representation
2441         #.
2442         #    var md = "**Hello World!**"
2443         #    var html = md.md_to_html
2444         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2445         fun md_to_html: Streamable do
2446                 var processor = new MarkdownProcessor
2447                 return processor.process(self)
2448         end
2449 end