lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # `MarkdownEmitter` used for ouput.
  34         var emitter: MarkdownEmitter is noinit, protected writable
  35
  36         # Work in extended mode (default).
  37         #
  38         # Behavior changes when using extended mode:
  39         #
  40         # * Lists and code blocks end a paragraph
  41         #
  42         #   In normal markdown the following:
  43         #
  44         # ~~~md
  45         # This is a paragraph
  46         # * and this is not a list
  47         # ~~~
  48         #
  49         #   Will produce:
  50         #
  51         # ~~~html
  52         # <p>This is a paragraph
  53         # * and this is not a list</p>
  54         # ~~~
  55         #
  56         #   When using extended mode this changes to:
  57         #
  58         # ~~~html
  59         # <p>This is a paragraph</p>
  60         # <ul>
  61         # <li>and this is not a list</li>
  62         # </ul>
  63         # ~~~
  64         #
  65         # * Fences code blocks
  66         #
  67         #   If you don't want to indent your all your code with 4 spaces,
  68         #   you can wrap your code in ``` ``` ``` or `~~~`.
  69         #
  70         #   Here's an example:
  71         #
  72         # ~~~md
  73         # fun test do
  74         #    print "Hello World!"
  75         # end
  76         # ~~~
  77         #
  78         # * Code blocks meta
  79         #
  80         #   If you want to use syntax highlighting tools, most of them need to know what kind
  81         #   of language they are highlighting.
  82         #   You can add an optional language identifier after the fence declaration to output
  83         #   it in the HTML render.
  84         #
  85         # ```nit
  86         # import markdown
  87         #
  88         # print "# Hello World!".md_to_html
  89         # ```
  90         #
  91         #   Becomes
  92         #
  93         # ~~~html
  94         # <pre class="nit"><code>import markdown
  95         #
  96         # print "Hello World!".md_to_html
  97         # </code></pre>
  98         # ~~~
  99         #
 100         # * Underscores (Emphasis)
 101         #
 102         #   Underscores in the middle of a word like:
 103         #
 104         # ~~~md
 105         # Con_cat_this
 106         # ~~~
 107         #
 108         #   normally produces this:
 109         #
 110         # ~~~html
 111         # <p>Con<em>cat</em>this</p>
 112         # ~~~
 113         #
 114         #   With extended mode they don't result in emphasis.
 115         #
 116         # ~~~html
 117         # <p>Con_cat_this</p>
 118         # ~~~
 119         #
 120         # * Strikethrough
 121         #
 122         #   Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
 123         #   strikethrought span is marked with `~~`.
 124         #
 125         # ~~~md
 126         # ~~Mistaken text.~~
 127         # ~~~
 128         #
 129         #   becomes
 130         #
 131         # ~~~html
 132         # <del>Mistaken text.</del>
 133         # ~~~
 134         var ext_mode = true
 135
 136         # Disable attaching MDLocation to Tokens
 137         #
 138         # Locations are useful for some tools but they may
 139         # cause an important time and space overhead.
 140         #
 141         # Default = `false`
 142         var no_location = false is writable
 143
 144         init do self.emitter = new MarkdownEmitter(self)
 145
 146         # Process the mardown `input` string and return the processed output.
 147         fun process(input: String): Writable do
 148                 # init processor
 149                 link_refs.clear
 150                 last_link_ref = null
 151                 current_line = null
 152                 current_block = null
 153                 # parse markdown
 154                 var parent = read_lines(input)
 155                 parent.remove_surrounding_empty_lines
 156                 recurse(parent, false)
 157                 # output processed text
 158                 return emitter.emit(parent.kind)
 159         end
 160
 161         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
 162         private fun read_lines(input: String): MDBlock do
 163                 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
 164                 var value = new FlatBuffer
 165                 var i = 0
 166
 167                 var line_pos = 0
 168                 var col_pos = 0
 169
 170                 while i < input.length do
 171                         value.clear
 172                         var pos = 0
 173                         var eol = false
 174                         while not eol and i < input.length do
 175                                 col_pos += 1
 176                                 var c = input[i]
 177                                 if c == '\n' then
 178                                         eol = true
 179                                 else if c == '\r' then
 180                                 else if c == '\t' then
 181                                         var np = pos + (4 - (pos & 3))
 182                                         while pos < np do
 183                                                 value.add ' '
 184                                                 pos += 1
 185                                         end
 186                                 else
 187                                         pos += 1
 188                                         value.add c
 189                                 end
 190                                 i += 1
 191                         end
 192                         line_pos += 1
 193
 194                         var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
 195                         var line = new MDLine(loc, value.write_to_string)
 196                         var is_link_ref = check_link_ref(line)
 197                         # Skip link refs
 198                         if not is_link_ref then block.add_line line
 199                         col_pos = 0
 200                 end
 201                 return block
 202         end
 203
 204         # Check if line is a block link definition.
 205         # Return `true` if line contains a valid link ref and save it into `link_refs`.
 206         private fun check_link_ref(line: MDLine): Bool do
 207                 var md = line.value
 208                 var is_link_ref = false
 209                 var id = new FlatBuffer
 210                 var link = new FlatBuffer
 211                 var comment = new FlatBuffer
 212                 var pos = -1
 213                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
 214                         pos = line.leading + 1
 215                         pos = md.read_until(id, pos, ']')
 216                         if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
 217                                 if line.value[pos + 1] == ':' then
 218                                         pos += 2
 219                                         pos = md.skip_spaces(pos)
 220                                         if pos >= 0 and line.value[pos] == '<' then
 221                                                 pos += 1
 222                                                 pos = md.read_until(link, pos, '>')
 223                                                 pos += 1
 224                                         else if pos >= 0 then
 225                                                 pos = md.read_until(link, pos, ' ', '\n')
 226                                         end
 227                                         if not link.is_empty then
 228                                                 pos = md.skip_spaces(pos)
 229                                                 if pos > 0 and pos < line.value.length then
 230                                                         var c = line.value[pos]
 231                                                         if c == '\"' or c == '\'' or c == '(' then
 232                                                                 pos += 1
 233                                                                 if c == '(' then
 234                                                                         pos = md.read_until(comment, pos, ')')
 235                                                                 else
 236                                                                         pos = md.read_until(comment, pos, c)
 237                                                                 end
 238                                                                 if pos > 0 then is_link_ref = true
 239                                                         end
 240                                                 else
 241                                                         is_link_ref = true
 242                                                 end
 243                                         end
 244                                 end
 245                         end
 246                 end
 247                 if is_link_ref and not id.is_empty and not link.is_empty then
 248                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 249                         add_link_ref(id.write_to_string, lr)
 250                         if comment.is_empty then last_link_ref = lr
 251                         return true
 252                 else
 253                         comment = new FlatBuffer
 254                         if not line.is_empty and last_link_ref != null then
 255                                 pos = line.leading
 256                                 var c = line.value[pos]
 257                                 if c == '\"' or c == '\'' or c ==  '(' then
 258                                         pos += 1
 259                                         if c == '(' then
 260                                                 pos = md.read_until(comment, pos, ')')
 261                                         else
 262                                                 pos = md.read_until(comment, pos, c)
 263                                         end
 264                                 end
 265                                 var last_link_ref = self.last_link_ref
 266                                 if not comment.is_empty and last_link_ref != null then
 267                                         last_link_ref.title = comment.write_to_string
 268                                 end
 269                         end
 270                         if comment.is_empty then return false
 271                         return true
 272                 end
 273         end
 274
 275         # Known link refs
 276         # This list will be needed during output to expand links.
 277         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 278
 279         # Last encountered link ref (for multiline definitions)
 280         #
 281         # Markdown allows link refs to be defined over two lines:
 282         #
 283         # ~~~md
 284         # [id]: http://example.com/longish/path/to/resource/here
 285         #       "Optional Title Here"
 286         # ~~~
 287         #
 288         private var last_link_ref: nullable LinkRef = null
 289
 290         # Add a link ref to the list
 291         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 292
 293         # Recursively split a `block`.
 294         #
 295         # The block is splitted according to the type of lines it contains.
 296         # Some blocks can be splited again recursively like lists.
 297         # The `in_list` mode is used to recurse on list and build
 298         # nested paragraphs or code blocks.
 299         fun recurse(root: MDBlock, in_list: Bool) do
 300                 var old_mode = self.in_list
 301                 var old_root = self.current_block
 302                 self.in_list = in_list
 303
 304                 var line = root.first_line
 305                 while line != null and line.is_empty do
 306                         line = line.next
 307                         if line == null then return
 308                 end
 309
 310                 current_line = line
 311                 current_block = root
 312                 while current_line != null do
 313                         line_kind(current_line.as(not null)).process(self)
 314                 end
 315                 self.in_list = old_mode
 316                 self.current_block = old_root
 317         end
 318
 319         # Currently processed line.
 320         # Used when visiting blocks with `recurse`.
 321         var current_line: nullable MDLine = null is writable
 322
 323         # Currently processed block.
 324         # Used when visiting blocks with `recurse`.
 325         var current_block: nullable MDBlock = null is writable
 326
 327         # Is the current recursion in list mode?
 328         # Used when visiting blocks with `recurse`
 329         private var in_list = false
 330
 331         # The type of line.
 332         # see: `md_line_*`
 333         fun line_kind(md: MDLine): Line do
 334                 var value = md.value
 335                 var leading = md.leading
 336                 var trailing = md.trailing
 337                 if md.is_empty then return new LineEmpty
 338                 if md.leading > 3 then return new LineCode
 339                 if value[leading] == '#' then return new LineHeadline
 340                 if value[leading] == '>' then return new LineBlockquote
 341
 342                 if ext_mode then
 343                         if value.length - leading - trailing > 2 then
 344                                 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 345                                         return new LineFence
 346                                 end
 347                                 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 348                                         return new LineFence
 349                                 end
 350                         end
 351                 end
 352
 353                 if value.length - leading - trailing > 2 and
 354                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 355                    if md.count_chars(value[leading]) >= 3 then
 356                                 return new LineHR
 357                    end
 358                 end
 359
 360                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 361                         var c = value[leading]
 362                         if c == '*' or c == '-' or c == '+' then return new LineUList
 363                 end
 364
 365                 if value.length - leading >= 3 and value[leading].is_digit then
 366                         var i = leading + 1
 367                         while i < value.length and value[i].is_digit do i += 1
 368                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 369                                 return new LineOList
 370                         end
 371                 end
 372
 373                 if value[leading] == '<' and md.check_html then return new LineXML
 374
 375                 var next = md.next
 376                 if next != null and not next.is_empty then
 377                         if next.count_chars('=') > 0 then
 378                                 return new LineHeadline1
 379                         end
 380                         if next.count_chars('-') > 0 then
 381                                 return new LineHeadline2
 382                         end
 383                 end
 384                 return new LineOther
 385         end
 386
 387         # Get the token kind at `pos`.
 388         fun token_at(text: Text, pos: Int): Token do
 389                 var c0: Char
 390                 var c1: Char
 391                 var c2: Char
 392
 393                 if pos > 0 then
 394                         c0 = text[pos - 1]
 395                 else
 396                         c0 = ' '
 397                 end
 398                 var c = text[pos]
 399
 400                 if pos + 1 < text.length then
 401                         c1 = text[pos + 1]
 402                 else
 403                         c1 = ' '
 404                 end
 405                 if pos + 2 < text.length then
 406                         c2 = text[pos + 2]
 407                 else
 408                         c2 = ' '
 409                 end
 410
 411                 var loc
 412                 if no_location then
 413                         loc = null
 414                 else
 415                         loc = new MDLocation(
 416                                 current_loc.line_start,
 417                                 current_loc.column_start + pos,
 418                                 current_loc.line_start,
 419                                 current_loc.column_start + pos)
 420                 end
 421
 422                 if c == '*' then
 423                         if c1 == '*' then
 424                                 if c0 != ' ' or c2 != ' ' then
 425                                         return new TokenStrongStar(loc, pos, c)
 426                                 else
 427                                         return new TokenEmStar(loc, pos, c)
 428                                 end
 429                         end
 430                         if c0 != ' ' or c1 != ' ' then
 431                                 return new TokenEmStar(loc, pos, c)
 432                         else
 433                                 return new TokenNone(loc, pos, c)
 434                         end
 435                 else if c == '_' then
 436                         if c1 == '_' then
 437                                 if c0 != ' ' or c2 != ' ' then
 438                                         return new TokenStrongUnderscore(loc, pos, c)
 439                                 else
 440                                         return new TokenEmUnderscore(loc, pos, c)
 441                                 end
 442                         end
 443                         if ext_mode then
 444                                 if (c0.is_letter or c0.is_digit) and c0 != '_' and
 445                                    (c1.is_letter or c1.is_digit) then
 446                                         return new TokenNone(loc, pos, c)
 447                                 else
 448                                         return new TokenEmUnderscore(loc, pos, c)
 449                                 end
 450                         end
 451                         if c0 != ' ' or c1 != ' ' then
 452                                 return new TokenEmUnderscore(loc, pos, c)
 453                         else
 454                                 return new TokenNone(loc, pos, c)
 455                         end
 456                 else if c == '!' then
 457                         if c1 == '[' then return new TokenImage(loc, pos, c)
 458                         return new TokenNone(loc, pos, c)
 459                 else if c == '[' then
 460                         return new TokenLink(loc, pos, c)
 461                 else if c == ']' then
 462                         return new TokenNone(loc, pos, c)
 463                 else if c == '`' then
 464                         if c1 == '`' then
 465                                 return new TokenCodeDouble(loc, pos, c)
 466                         else
 467                                 return new TokenCodeSingle(loc, pos, c)
 468                         end
 469                 else if c == '\\' then
 470                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 471                                 return new TokenEscape(loc, pos, c)
 472                         else
 473                                 return new TokenNone(loc, pos, c)
 474                         end
 475                 else if c == '<' then
 476                         return new TokenHTML(loc, pos, c)
 477                 else if c == '&' then
 478                         return new TokenEntity(loc, pos, c)
 479                 else
 480                         if ext_mode then
 481                                 if c == '~' and c1 == '~' then
 482                                         return new TokenStrike(loc, pos, c)
 483                                 end
 484                         end
 485                         return new TokenNone(loc, pos, c)
 486                 end
 487         end
 488
 489         # Find the position of a `token` in `self`.
 490         fun find_token(text: Text, start: Int, token: Token): Int do
 491                 var pos = start
 492                 while pos < text.length do
 493                         if token_at(text, pos).is_same_type(token) then
 494                                 return pos
 495                         end
 496                         pos += 1
 497                 end
 498                 return -1
 499         end
 500
 501         # Location used for next parsed token.
 502         #
 503         # This location can be changed by the emitter to adjust with `\n` found
 504         # in the input.
 505         private fun current_loc: MDLocation do return emitter.current_loc
 506 end
 507
 508 # Emit output corresponding to blocks content.
 509 #
 510 # Blocks are created by a previous pass in `MarkdownProcessor`.
 511 # The emitter use a `Decorator` to select the output format.
 512 class MarkdownEmitter
 513
 514         # Kind of processor used for parsing.
 515         type PROCESSOR: MarkdownProcessor
 516
 517         # Processor containing link refs.
 518         var processor: PROCESSOR
 519
 520         # Kind of decorator used for decoration.
 521         type DECORATOR: Decorator
 522
 523         # Decorator used for output.
 524         # Default is `HTMLDecorator`
 525         var decorator: DECORATOR is writable, lazy do
 526                 return new HTMLDecorator
 527         end
 528
 529         # Create a new `MarkdownEmitter` using a custom `decorator`.
 530         init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
 531                 init processor
 532                 self.decorator = decorator
 533         end
 534
 535         # Output `block` using `decorator` in the current buffer.
 536         fun emit(block: Block): Text do
 537                 var buffer = push_buffer
 538                 block.emit(self)
 539                 pop_buffer
 540                 return buffer
 541         end
 542
 543         # Output the content of `block`.
 544         fun emit_in(block: Block) do block.emit_in(self)
 545
 546         # Transform and emit mardown text
 547         fun emit_text(text: Text) do emit_text_until(text, 0, null)
 548
 549         # Transform and emit mardown text starting at `start` and
 550         # until a token with the same type as `token` is found.
 551         # Go until the end of `text` if `token` is null.
 552         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 553                 var old_text = current_text
 554                 var old_pos = current_pos
 555                 current_text = text
 556                 current_pos = start
 557                 while current_pos < text.length do
 558                         if text[current_pos] == '\n' then
 559                                 current_loc.line_start += 1
 560                                 current_loc.column_start = -current_pos
 561                         end
 562                         var mt = processor.token_at(text, current_pos)
 563                         if (token != null and not token isa TokenNone) and
 564                         (mt.is_same_type(token) or
 565                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 566                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 567                                 return current_pos
 568                         end
 569                         mt.emit(self)
 570                         current_pos += 1
 571                 end
 572                 current_text = old_text
 573                 current_pos = old_pos
 574                 return -1
 575         end
 576
 577         # Currently processed position in `current_text`.
 578         # Used when visiting inline production with `emit_text_until`.
 579         private var current_pos: Int = -1
 580
 581         # Currently processed text.
 582         # Used when visiting inline production with `emit_text_until`.
 583         private var current_text: nullable Text = null
 584
 585         # Stacked buffers.
 586         private var buffer_stack = new List[FlatBuffer]
 587
 588         # Push a new buffer on the stack.
 589         private fun push_buffer: FlatBuffer do
 590                 var buffer = new FlatBuffer
 591                 buffer_stack.add buffer
 592                 return buffer
 593         end
 594
 595         # Pop the last buffer.
 596         private fun pop_buffer do buffer_stack.pop
 597
 598         # Current output buffer.
 599         private fun current_buffer: FlatBuffer do
 600                 assert not buffer_stack.is_empty
 601                 return buffer_stack.last
 602         end
 603
 604         # Stacked locations.
 605         private var loc_stack = new List[MDLocation]
 606
 607         # Push a new MDLocation on the stack.
 608         private fun push_loc(location: MDLocation) do loc_stack.add location
 609
 610         # Pop the last buffer.
 611         private fun pop_loc: MDLocation do return loc_stack.pop
 612
 613         # Current output buffer.
 614         private fun current_loc: MDLocation do
 615                 assert not loc_stack.is_empty
 616                 return loc_stack.last
 617         end
 618
 619         # Append `e` to current buffer.
 620         fun add(e: Writable) do
 621                 if e isa Text then
 622                         current_buffer.append e
 623                 else
 624                         current_buffer.append e.write_to_string
 625                 end
 626         end
 627
 628         # Append `c` to current buffer.
 629         fun addc(c: Char) do
 630                 current_buffer.add c
 631         end
 632
 633         # Append a "\n" line break.
 634         fun addn do addc '\n'
 635 end
 636
 637 # A Link Reference.
 638 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 639 #
 640 # ~~~raw
 641 # [1]: http://example.com/ "Optional title"
 642 # ~~~
 643 class LinkRef
 644
 645         # Link href
 646         var link: String
 647
 648         # Optional link title
 649         var title: nullable String = null
 650
 651         # Is the link an abreviation?
 652         var is_abbrev = false
 653
 654         # Create a link with a title.
 655         init with_title(link: String, title: nullable String) do
 656                 init(link)
 657                 self.title = title
 658         end
 659 end
 660
 661 # A `Decorator` is used to emit mardown into a specific format.
 662 # Default decorator used is `HTMLDecorator`.
 663 interface Decorator
 664
 665         # Kind of emitter used for decoration.
 666         type EMITTER: MarkdownEmitter
 667
 668         # Render a single plain char.
 669         #
 670         # Redefine this method to add special escaping for plain text.
 671         fun add_char(v: EMITTER, c: Char) do v.addc c
 672
 673         # Render a ruler block.
 674         fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
 675
 676         # Render a headline block with corresponding level.
 677         fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
 678
 679         # Render a paragraph block.
 680         fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
 681
 682         # Render a code or fence block.
 683         fun add_code(v: EMITTER, block: BlockCode) is abstract
 684
 685         # Render a blockquote.
 686         fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
 687
 688         # Render an unordered list.
 689         fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
 690
 691         # Render an ordered list.
 692         fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
 693
 694         # Render a list item.
 695         fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
 696
 697         # Render an emphasis text.
 698         fun add_em(v: EMITTER, text: Text) is abstract
 699
 700         # Render a strong text.
 701         fun add_strong(v: EMITTER, text: Text) is abstract
 702
 703         # Render a strike text.
 704         #
 705         # Extended mode only (see `MarkdownProcessor::ext_mode`)
 706         fun add_strike(v: EMITTER, text: Text) is abstract
 707
 708         # Render a link.
 709         fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 710
 711         # Render an image.
 712         fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
 713
 714         # Render an abbreviation.
 715         fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
 716
 717         # Render a code span reading from a buffer.
 718         fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 719
 720         # Render a text and escape it.
 721         fun append_value(v: EMITTER, value: Text) is abstract
 722
 723         # Render code text from buffer and escape it.
 724         fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
 725
 726         # Render a character escape.
 727         fun escape_char(v: EMITTER, char: Char) is abstract
 728
 729         # Render a line break
 730         fun add_line_break(v: EMITTER) is abstract
 731
 732         # Generate a new html valid id from a `String`.
 733         fun strip_id(txt: String): String is abstract
 734
 735         # Found headlines during the processing labeled by their ids.
 736         fun headlines: ArrayMap[String, HeadLine] is abstract
 737 end
 738
 739 # Class representing a markdown headline.
 740 class HeadLine
 741         # Unique identifier of this headline.
 742         var id: String
 743
 744         # Text of the headline.
 745         var title: String
 746
 747         # Level of this headline.
 748         #
 749         # According toe the markdown specification, level must be in `[1..6]`.
 750         var level: Int
 751 end
 752
 753 # `Decorator` that outputs HTML.
 754 class HTMLDecorator
 755         super Decorator
 756
 757         redef var headlines = new ArrayMap[String, HeadLine]
 758
 759         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 760
 761         redef fun add_headline(v, block) do
 762                 # save headline
 763                 var line = block.block.first_line
 764                 if line == null then return
 765                 var txt = line.value
 766                 var id = strip_id(txt)
 767                 var lvl = block.depth
 768                 headlines[id] = new HeadLine(id, txt, lvl)
 769                 # output it
 770                 v.add "<h{lvl} id=\"{id}\">"
 771                 v.emit_in block
 772                 v.add "</h{lvl}>\n"
 773         end
 774
 775         redef fun add_paragraph(v, block) do
 776                 v.add "<p>"
 777                 v.emit_in block
 778                 v.add "</p>\n"
 779         end
 780
 781         redef fun add_code(v, block) do
 782                 var meta = block.meta
 783                 if meta != null then
 784                         v.add "<pre class=\""
 785                         append_value(v, meta)
 786                         v.add "\"><code>"
 787                 else
 788                         v.add "<pre><code>"
 789                 end
 790                 v.emit_in block
 791                 v.add "</code></pre>\n"
 792         end
 793
 794         redef fun add_blockquote(v, block) do
 795                 v.add "<blockquote>\n"
 796                 v.emit_in block
 797                 v.add "</blockquote>\n"
 798         end
 799
 800         redef fun add_unorderedlist(v, block) do
 801                 v.add "<ul>\n"
 802                 v.emit_in block
 803                 v.add "</ul>\n"
 804         end
 805
 806         redef fun add_orderedlist(v, block) do
 807                 v.add "<ol>\n"
 808                 v.emit_in block
 809                 v.add "</ol>\n"
 810         end
 811
 812         redef fun add_listitem(v, block) do
 813                 v.add "<li>"
 814                 v.emit_in block
 815                 v.add "</li>\n"
 816         end
 817
 818         redef fun add_em(v, text) do
 819                 v.add "<em>"
 820                 v.add text
 821                 v.add "</em>"
 822         end
 823
 824         redef fun add_strong(v, text) do
 825                 v.add "<strong>"
 826                 v.add text
 827                 v.add "</strong>"
 828         end
 829
 830         redef fun add_strike(v, text) do
 831                 v.add "<del>"
 832                 v.add text
 833                 v.add "</del>"
 834         end
 835
 836         redef fun add_image(v, link, name, comment) do
 837                 v.add "<img src=\""
 838                 append_value(v, link)
 839                 v.add "\" alt=\""
 840                 append_value(v, name)
 841                 v.add "\""
 842                 if comment != null and not comment.is_empty then
 843                         v.add " title=\""
 844                         append_value(v, comment)
 845                         v.add "\""
 846                 end
 847                 v.add "/>"
 848         end
 849
 850         redef fun add_link(v, link, name, comment) do
 851                 v.add "<a href=\""
 852                 append_value(v, link)
 853                 v.add "\""
 854                 if comment != null and not comment.is_empty then
 855                         v.add " title=\""
 856                         append_value(v, comment)
 857                         v.add "\""
 858                 end
 859                 v.add ">"
 860                 v.emit_text(name)
 861                 v.add "</a>"
 862         end
 863
 864         redef fun add_abbr(v, name, comment) do
 865                 v.add "<abbr title=\""
 866                 append_value(v, comment)
 867                 v.add "\">"
 868                 v.emit_text(name)
 869                 v.add "</abbr>"
 870         end
 871
 872         redef fun add_span_code(v, text, from, to) do
 873                 v.add "<code>"
 874                 append_code(v, text, from, to)
 875                 v.add "</code>"
 876         end
 877
 878         redef fun add_line_break(v) do
 879                 v.add "<br/>"
 880         end
 881
 882         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 883
 884         redef fun escape_char(v, c) do
 885                 if c == '&' then
 886                         v.add "&amp;"
 887                 else if c == '<' then
 888                         v.add "&lt;"
 889                 else if c == '>' then
 890                         v.add "&gt;"
 891                 else if c == '"' then
 892                         v.add "&quot;"
 893                 else if c == '\'' then
 894                         v.add "&apos;"
 895                 else
 896                         v.addc c
 897                 end
 898         end
 899
 900         redef fun append_code(v, buffer, from, to) do
 901                 for i in [from..to[ do
 902                         var c = buffer[i]
 903                         if c == '&' then
 904                                 v.add "&amp;"
 905                         else if c == '<' then
 906                                 v.add "&lt;"
 907                         else if c == '>' then
 908                                 v.add "&gt;"
 909                         else
 910                                 v.addc c
 911                         end
 912                 end
 913         end
 914
 915         redef fun strip_id(txt) do
 916                 # strip id
 917                 var b = new FlatBuffer
 918                 for c in txt do
 919                         if c == ' ' then
 920                                 b.add '_'
 921                         else
 922                                 if not c.is_letter and
 923                                    not c.is_digit and
 924                                    not allowed_id_chars.has(c) then continue
 925                                 b.add c
 926                         end
 927                 end
 928                 var res = b.to_s
 929                 var key = res
 930                 # check for multiple id definitions
 931                 if headlines.has_key(key) then
 932                         var i = 1
 933                         key = "{res}_{i}"
 934                         while headlines.has_key(key) do
 935                                 i += 1
 936                                 key = "{res}_{i}"
 937                         end
 938                 end
 939                 return key
 940         end
 941
 942         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 943 end
 944
 945 # Location in a Markdown input.
 946 class MDLocation
 947
 948         # Starting line number (starting from 1).
 949         var line_start: Int
 950
 951         # Starting column number (starting from 1).
 952         var column_start: Int
 953
 954         # Stopping line number (starting from 1).
 955         var line_end: Int
 956
 957         # Stopping column number (starting from 1).
 958         var column_end: Int
 959
 960         redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
 961
 962         # Return a copy of `self`.
 963         fun copy: MDLocation do
 964                 return new MDLocation(line_start, column_start, line_end, column_end)
 965         end
 966 end
 967
 968 # A block of markdown lines.
 969 # A `MDBlock` can contains lines and/or sub-blocks.
 970 class MDBlock
 971
 972         # Position of `self` in the input.
 973         var location: MDLocation
 974
 975         # Kind of block.
 976         # See `Block`.
 977         var kind: Block = new BlockNone(self) is writable
 978
 979         # First line if any.
 980         var first_line: nullable MDLine = null is writable
 981
 982         # Last line if any.
 983         var last_line: nullable MDLine = null is writable
 984
 985         # First sub-block if any.
 986         var first_block: nullable MDBlock = null is writable
 987
 988         # Last sub-block if any.
 989         var last_block: nullable MDBlock = null is writable
 990
 991         # Previous block if any.
 992         var prev: nullable MDBlock = null is writable
 993
 994         # Next block if any.
 995         var next: nullable MDBlock = null is writable
 996
 997         # Does this block contain subblocks?
 998         fun has_blocks: Bool do return first_block != null
 999
1000         # Count sub-blocks.
1001         fun count_blocks: Int do
1002                 var count = 0
1003                 var block = first_block
1004                 while block != null do
1005                         count += 1
1006                         block = block.next
1007                 end
1008                 return count
1009         end
1010
1011         # Does this block contain lines?
1012         fun has_lines: Bool do return first_line != null
1013
1014         # Count block lines.
1015         fun count_lines: Int do
1016                 var count = 0
1017                 var line = first_line
1018                 while line != null do
1019                         count += 1
1020                         line = line.next
1021                 end
1022                 return count
1023         end
1024
1025         # Split `self` creating a new sub-block having `line` has `last_line`.
1026         fun split(line: MDLine): MDBlock do
1027                 # location for new block
1028                 var new_loc = new MDLocation(
1029                         first_line.as(not null).location.line_start,
1030                         first_line.as(not null).location.column_start,
1031                         line.location.line_end,
1032                         line.location.column_end)
1033                 # create block
1034                 var block = new MDBlock(new_loc)
1035                 block.first_line = first_line
1036                 block.last_line = line
1037                 first_line = line.next
1038                 line.next = null
1039                 if first_line == null then
1040                         last_line = null
1041                 else
1042                         first_line.as(not null).prev = null
1043                         # update current block loc
1044                         location.line_start = first_line.as(not null).location.line_start
1045                         location.column_start = first_line.as(not null).location.column_start
1046                 end
1047                 if first_block == null then
1048                         first_block = block
1049                         last_block = block
1050                 else
1051                         last_block.as(not null).next = block
1052                         last_block = block
1053                 end
1054                 return block
1055         end
1056
1057         # Add a `line` to this block.
1058         fun add_line(line: MDLine) do
1059                 if last_line == null then
1060                         first_line = line
1061                         last_line = line
1062                 else
1063                         last_line.as(not null).next_empty = line.is_empty
1064                         line.prev_empty = last_line.as(not null).is_empty
1065                         line.prev = last_line
1066                         last_line.as(not null).next = line
1067                         last_line = line
1068                 end
1069         end
1070
1071         # Remove `line` from this block.
1072         fun remove_line(line: MDLine) do
1073                 if line.prev == null then
1074                         first_line = line.next
1075                 else
1076                         line.prev.as(not null).next = line.next
1077                 end
1078                 if line.next == null then
1079                         last_line = line.prev
1080                 else
1081                         line.next.as(not null).prev = line.prev
1082                 end
1083                 line.prev = null
1084                 line.next = null
1085         end
1086
1087         # Remove leading empty lines.
1088         fun remove_leading_empty_lines: Bool do
1089                 var was_empty = false
1090                 var line = first_line
1091                 while line != null and line.is_empty do
1092                         remove_line line
1093                         line = first_line
1094                         was_empty = true
1095                 end
1096                 return was_empty
1097         end
1098
1099         # Remove trailing empty lines.
1100         fun remove_trailing_empty_lines: Bool do
1101                 var was_empty = false
1102                 var line = last_line
1103                 while line != null and line.is_empty do
1104                         remove_line line
1105                         line = last_line
1106                         was_empty = true
1107                 end
1108                 return was_empty
1109         end
1110
1111         # Remove leading and trailing empty lines.
1112         fun remove_surrounding_empty_lines: Bool do
1113                 var was_empty = false
1114                 if remove_leading_empty_lines then was_empty = true
1115                 if remove_trailing_empty_lines then was_empty = true
1116                 return was_empty
1117         end
1118
1119         # Remove list markers and up to 4 leading spaces.
1120         # Used to clean nested lists.
1121         fun remove_list_indent(v: MarkdownProcessor) do
1122                 var line = first_line
1123                 while line != null do
1124                         if not line.is_empty then
1125                                 var kind = v.line_kind(line)
1126                                 if kind isa LineList then
1127                                         line.value = kind.extract_value(line)
1128                                 else
1129                                         line.value = line.value.substring_from(line.leading.min(4))
1130                                 end
1131                                 line.leading = line.process_leading
1132                         end
1133                         line = line.next
1134                 end
1135         end
1136
1137         # Collect block line text.
1138         fun text: String do
1139                 var text = new FlatBuffer
1140                 var line = first_line
1141                 while line != null do
1142                         if not line.is_empty then
1143                                 text.append line.text
1144                         end
1145                         text.append "\n"
1146                         line = line.next
1147                 end
1148                 return text.write_to_string
1149         end
1150 end
1151
1152 # Representation of a markdown block in the AST.
1153 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1154 abstract class Block
1155
1156         # The markdown block `self` is related to.
1157         var block: MDBlock
1158
1159         # Output `self` using `v.decorator`.
1160         fun emit(v: MarkdownEmitter) do v.emit_in(self)
1161
1162         # Emit the containts of `self`, lines or blocks.
1163         fun emit_in(v: MarkdownEmitter) do
1164                 block.remove_surrounding_empty_lines
1165                 if block.has_lines then
1166                         emit_lines(v)
1167                 else
1168                         emit_blocks(v)
1169                 end
1170         end
1171
1172         # Emit lines contained in `block`.
1173         fun emit_lines(v: MarkdownEmitter) do
1174                 var tpl = v.push_buffer
1175                 var line = block.first_line
1176                 while line != null do
1177                         if not line.is_empty then
1178                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1179                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
1180                         end
1181                         if line.next != null then
1182                                 v.addn
1183                         end
1184                         line = line.next
1185                 end
1186                 v.pop_buffer
1187                 v.emit_text(tpl)
1188         end
1189
1190         # Emit sub-blocks contained in `block`.
1191         fun emit_blocks(v: MarkdownEmitter) do
1192                 var block = self.block.first_block
1193                 while block != null do
1194                         v.push_loc(block.location)
1195                         block.kind.emit(v)
1196                         v.pop_loc
1197                         block = block.next
1198                 end
1199         end
1200
1201         # The raw content of the block as a multi-line string.
1202         fun raw_content: String do
1203                 var infence = self isa BlockFence
1204                 var text = new FlatBuffer
1205                 var line = self.block.first_line
1206                 while line != null do
1207                         if not line.is_empty then
1208                                 var str = line.value
1209                                 if not infence and str.has_prefix("    ") then
1210                                         text.append str.substring(4, str.length - line.trailing)
1211                                 else
1212                                         text.append str
1213                                 end
1214                         end
1215                         text.append "\n"
1216                         line = line.next
1217                 end
1218                 return text.write_to_string
1219         end
1220 end
1221
1222 # A block without any markdown specificities.
1223 #
1224 # Actually use the same implementation than `BlockCode`,
1225 # this class is only used for typing purposes.
1226 class BlockNone
1227         super Block
1228 end
1229
1230 # A markdown blockquote.
1231 class BlockQuote
1232         super Block
1233
1234         redef fun emit(v) do v.decorator.add_blockquote(v, self)
1235
1236         # Remove blockquote markers.
1237         private fun remove_block_quote_prefix(block: MDBlock) do
1238                 var line = block.first_line
1239                 while line != null do
1240                         if not line.is_empty then
1241                                 if line.value[line.leading] == '>' then
1242                                         var rem = line.leading + 1
1243                                         if line.leading + 1 < line.value.length and
1244                                            line.value[line.leading + 1] == ' ' then
1245                                                 rem += 1
1246                                         end
1247                                         line.value = line.value.substring_from(rem)
1248                                         line.leading = line.process_leading
1249                                 end
1250                         end
1251                         line = line.next
1252                 end
1253         end
1254 end
1255
1256 # A markdown code block.
1257 class BlockCode
1258         super Block
1259
1260         # Any string found after fence token.
1261         var meta: nullable Text
1262
1263         # Number of char to skip at the beginning of the line.
1264         #
1265         # Block code lines start at 4 spaces.
1266         protected var line_start = 4
1267
1268         redef fun emit(v) do v.decorator.add_code(v, self)
1269
1270         redef fun emit_lines(v) do
1271                 var line = block.first_line
1272                 while line != null do
1273                         if not line.is_empty then
1274                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1275                         end
1276                         v.addn
1277                         line = line.next
1278                 end
1279         end
1280 end
1281
1282 # A markdown code-fence block.
1283 #
1284 # Actually use the same implementation than `BlockCode`,
1285 # this class is only used for typing purposes.
1286 class BlockFence
1287         super BlockCode
1288
1289         # Fence code lines start at 0 spaces.
1290         redef var line_start = 0
1291 end
1292
1293 # A markdown headline.
1294 class BlockHeadline
1295         super Block
1296
1297         redef fun emit(v) do
1298                 var loc = block.location.copy
1299                 loc.column_start += start
1300                 v.push_loc(loc)
1301                 v.decorator.add_headline(v, self)
1302                 v.pop_loc
1303         end
1304
1305         private var start = 0
1306
1307         # Depth of the headline used to determine the headline level.
1308         var depth = 0
1309
1310         # Remove healine marks from lines contained in `self`.
1311         private fun transform_headline(block: MDBlock) do
1312                 if depth > 0 then return
1313                 var level = 0
1314                 var line = block.first_line
1315                 if line == null then return
1316                 if line.is_empty then return
1317                 var start = line.leading
1318                 while start < line.value.length and line.value[start] == '#' do
1319                         level += 1
1320                         start += 1
1321                 end
1322                 while start < line.value.length and line.value[start] == ' ' do
1323                         start += 1
1324                 end
1325                 if start >= line.value.length then
1326                         line.is_empty = true
1327                 else
1328                         var nend = line.value.length - line.trailing - 1
1329                         while line.value[nend] == '#' do nend -= 1
1330                         while line.value[nend] == ' ' do nend -= 1
1331                         line.value = line.value.substring(start, nend - start + 1)
1332                         line.leading = 0
1333                         line.trailing = 0
1334                 end
1335                 self.start = start
1336                 depth = level.min(6)
1337         end
1338 end
1339
1340 # A markdown list item block.
1341 class BlockListItem
1342         super Block
1343
1344         redef fun emit(v) do v.decorator.add_listitem(v, self)
1345 end
1346
1347 # A markdown list block.
1348 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1349 abstract class BlockList
1350         super Block
1351
1352         # Split list block into list items sub-blocks.
1353         private fun init_block(v: MarkdownProcessor) do
1354                 var line = block.first_line
1355                 if line == null then return
1356                 line = line.next
1357                 while line != null do
1358                         var t = v.line_kind(line)
1359                         if t isa LineList or
1360                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1361                            not (t isa LineList))) then
1362                                    var sblock = block.split(line.prev.as(not null))
1363                                    sblock.kind = new BlockListItem(sblock)
1364                         end
1365                         line = line.next
1366                 end
1367                 var sblock = block.split(block.last_line.as(not null))
1368                 sblock.kind = new BlockListItem(sblock)
1369         end
1370
1371         # Expand list items as paragraphs if needed.
1372         private fun expand_paragraphs(block: MDBlock) do
1373                 var outer = block.first_block
1374                 var inner: nullable MDBlock
1375                 var has_paragraph = false
1376                 while outer != null and not has_paragraph do
1377                         if outer.kind isa BlockListItem then
1378                                 inner = outer.first_block
1379                                 while inner != null and not has_paragraph do
1380                                         if inner.kind isa BlockParagraph then
1381                                                 has_paragraph = true
1382                                         end
1383                                         inner = inner.next
1384                                 end
1385                         end
1386                         outer = outer.next
1387                 end
1388                 if has_paragraph then
1389                         outer = block.first_block
1390                         while outer != null do
1391                                 if outer.kind isa BlockListItem then
1392                                         inner = outer.first_block
1393                                         while inner != null do
1394                                                 if inner.kind isa BlockNone then
1395                                                         inner.kind = new BlockParagraph(inner)
1396                                                 end
1397                                                 inner = inner.next
1398                                         end
1399                                 end
1400                                 outer = outer.next
1401                         end
1402                 end
1403         end
1404 end
1405
1406 # A markdown ordered list.
1407 class BlockOrderedList
1408         super BlockList
1409
1410         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1411 end
1412
1413 # A markdown unordred list.
1414 class BlockUnorderedList
1415         super BlockList
1416
1417         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1418 end
1419
1420 # A markdown paragraph block.
1421 class BlockParagraph
1422         super Block
1423
1424         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1425 end
1426
1427 # A markdown ruler.
1428 class BlockRuler
1429         super Block
1430
1431         redef fun emit(v) do v.decorator.add_ruler(v, self)
1432 end
1433
1434 # Xml blocks that can be found in markdown markup.
1435 class BlockXML
1436         super Block
1437
1438         redef fun emit_lines(v) do
1439                 var line = block.first_line
1440                 while line != null do
1441                         if not line.is_empty then v.add line.value
1442                         v.addn
1443                         line = line.next
1444                 end
1445         end
1446 end
1447
1448 # A markdown line.
1449 class MDLine
1450
1451         # Location of `self` in the original input.
1452         var location: MDLocation
1453
1454         # Text contained in this line.
1455         var value: String is writable
1456
1457         # Is this line empty?
1458         # Lines containing only spaces are considered empty.
1459         var is_empty: Bool = true is writable
1460
1461         # Previous line in `MDBlock` or null if first line.
1462         var prev: nullable MDLine = null is writable
1463
1464         # Next line in `MDBlock` or null if last line.
1465         var next: nullable MDLine = null is writable
1466
1467         # Is the previous line empty?
1468         var prev_empty: Bool = false is writable
1469
1470         # Is the next line empty?
1471         var next_empty: Bool = false is writable
1472
1473         # Initialize a new MDLine from its string value
1474         init do
1475                 self.leading = process_leading
1476                 if leading != value.length then
1477                         self.is_empty = false
1478                         self.trailing = process_trailing
1479                 end
1480         end
1481
1482         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1483         fun clear do
1484                 value = ""
1485                 leading = 0
1486                 trailing = 0
1487                 is_empty = true
1488                 if prev != null then prev.as(not null).next_empty = true
1489                 if next != null then next.as(not null).prev_empty = true
1490         end
1491
1492         # Number or leading spaces on this line.
1493         var leading: Int = 0 is writable
1494
1495         # Compute `leading` depending on `value`.
1496         fun process_leading: Int do
1497                 var count = 0
1498                 var value = self.value
1499                 while count < value.length and value[count] == ' ' do count += 1
1500                 if leading == value.length then clear
1501                 return count
1502         end
1503
1504         # Number of trailing spaces on this line.
1505         var trailing: Int = 0 is writable
1506
1507         # Compute `trailing` depending on `value`.
1508         fun process_trailing: Int do
1509                 var count = 0
1510                 var value = self.value
1511                 while value[value.length - count - 1] == ' ' do
1512                         count += 1
1513                 end
1514                 return count
1515         end
1516
1517         # Count the amount of `ch` in this line.
1518         # Return A value > 0 if this line only consists of `ch` end spaces.
1519         fun count_chars(ch: Char): Int do
1520                 var count = 0
1521                 for c in value do
1522                         if c == ' ' then
1523                                 continue
1524                         end
1525                         if c == ch then
1526                                 count += 1
1527                                 continue
1528                         end
1529                         count = 0
1530                         break
1531                 end
1532                 return count
1533         end
1534
1535         # Count the amount of `ch` at the start of this line ignoring spaces.
1536         fun count_chars_start(ch: Char): Int do
1537                 var count = 0
1538                 for c in value do
1539                         if c == ' ' then
1540                                 continue
1541                         end
1542                         if c == ch then
1543                                 count += 1
1544                         else
1545                                 break
1546                         end
1547                 end
1548                 return count
1549         end
1550
1551         # Last XML line if any.
1552         private var xml_end_line: nullable MDLine = null
1553
1554         # Does `value` contains valid XML markup?
1555         private fun check_html: Bool do
1556                 var tags = new Array[String]
1557                 var tmp = new FlatBuffer
1558                 var pos = leading
1559                 if pos + 1 < value.length and value[pos + 1] == '!' then
1560                         if read_xml_comment(self, pos) > 0 then return true
1561                 end
1562                 pos = value.read_xml(tmp, pos, false)
1563                 var tag: String
1564                 if pos > -1 then
1565                         tag = tmp.xml_tag
1566                         if not tag.is_html_block then
1567                                 return false
1568                         end
1569                         if tag == "hr" then
1570                                 xml_end_line = self
1571                                 return true
1572                         end
1573                         tags.add tag
1574                         var line: nullable MDLine = self
1575                         while line != null do
1576                                 while pos < line.value.length and line.value[pos] != '<' do
1577                                         pos += 1
1578                                 end
1579                                 if pos >= line.value.length then
1580                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1581                                                 tags.pop
1582                                                 if tags.is_empty then
1583                                                         xml_end_line = line
1584                                                         break
1585                                                 end
1586                                         end
1587                                         line = line.next
1588                                         pos = 0
1589                                 else
1590                                         tmp = new FlatBuffer
1591                                         var new_pos = line.value.read_xml(tmp, pos, false)
1592                                         if new_pos > 0 then
1593                                                 tag = tmp.xml_tag
1594                                                 if tag.is_html_block and not tag == "hr" then
1595                                                         if tmp[1] == '/' then
1596                                                                 if tags.last != tag then
1597                                                                         return false
1598                                                                 end
1599                                                                 tags.pop
1600                                                         else
1601                                                                 tags.add tag
1602                                                         end
1603                                                 end
1604                                                 if tags.is_empty then
1605                                                         xml_end_line = line
1606                                                         break
1607                                                 end
1608                                                 pos = new_pos
1609                                         else
1610                                                 pos += 1
1611                                         end
1612                                 end
1613                         end
1614                         return tags.is_empty
1615                 end
1616                 return false
1617         end
1618
1619         # Read a XML comment.
1620         # Used by `check_html`.
1621         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1622                 var line: nullable MDLine = first_line
1623                 if start + 3 < line.as(not null).value.length then
1624                         if line.as(not null).value[2] == '-' and line.as(not null).value[3] == '-' then
1625                                 var pos = start + 4
1626                                 while line != null do
1627                                         while pos < line.value.length and line.value[pos] != '-' do
1628                                                 pos += 1
1629                                         end
1630                                         if pos == line.value.length then
1631                                                 line = line.next
1632                                                 pos = 0
1633                                         else
1634                                                 if pos + 2 < line.value.length then
1635                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1636                                                                 first_line.xml_end_line = line
1637                                                                 return pos + 3
1638                                                         end
1639                                                 end
1640                                                 pos += 1
1641                                         end
1642                                 end
1643                         end
1644                 end
1645                 return -1
1646         end
1647
1648         # Extract the text of `self` without leading and trailing.
1649         fun text: String do return value.substring(leading, value.length - trailing)
1650 end
1651
1652 # A markdown line.
1653 interface Line
1654
1655         # Parse the line.
1656         # See `MarkdownProcessor::recurse`.
1657         fun process(v: MarkdownProcessor) is abstract
1658 end
1659
1660 # An empty markdown line.
1661 class LineEmpty
1662         super Line
1663
1664         redef fun process(v) do
1665                 v.current_line = v.current_line.as(not null).next
1666         end
1667 end
1668
1669 # A non-specific markdown construction.
1670 # Mainly used as part of another line construct such as paragraphs or lists.
1671 class LineOther
1672         super Line
1673
1674         redef fun process(v) do
1675                 var line = v.current_line
1676                 # go to block end
1677                 var was_empty = line.as(not null).prev_empty
1678                 while line != null and not line.is_empty do
1679                         var t = v.line_kind(line)
1680                         if (v.in_list or v.ext_mode) and t isa LineList then
1681                                 break
1682                         end
1683                         if v.ext_mode and (t isa LineCode or t isa LineFence) then
1684                                 break
1685                         end
1686                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1687                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1688                                    break
1689                         end
1690                         line = line.next
1691                 end
1692                 # build block
1693                 var current_block = v.current_block.as(not null)
1694                 if line != null and not line.is_empty then
1695                         var block = current_block.split(line.prev.as(not null))
1696                         if v.in_list and not was_empty then
1697                                 block.kind = new BlockNone(block)
1698                         else
1699                                 block.kind = new BlockParagraph(block)
1700                         end
1701                         current_block.remove_leading_empty_lines
1702                 else
1703                         var block: MDBlock
1704                         if line != null then
1705                                 block = current_block.split(line)
1706                         else
1707                                 block = current_block.split(current_block.last_line.as(not null))
1708                         end
1709                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1710                                 block.kind = new BlockNone(block)
1711                         else
1712                                 block.kind = new BlockParagraph(block)
1713                         end
1714                         current_block.remove_leading_empty_lines
1715                 end
1716                 v.current_line = current_block.first_line
1717         end
1718 end
1719
1720 # A line of markdown code.
1721 class LineCode
1722         super Line
1723
1724         redef fun process(v) do
1725                 var line = v.current_line
1726                 # lookup block end
1727                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1728                         line = line.next
1729                 end
1730                 # split at block end line
1731                 var current_block = v.current_block.as(not null)
1732                 var block: MDBlock
1733                 if line != null then
1734                         block = current_block.split(line.prev.as(not null))
1735                 else
1736                         block = current_block.split(current_block.last_line.as(not null))
1737                 end
1738                 block.kind = new BlockCode(block)
1739                 block.remove_surrounding_empty_lines
1740                 v.current_line = current_block.first_line
1741         end
1742 end
1743
1744 # A line of raw XML.
1745 class LineXML
1746         super Line
1747
1748         redef fun process(v) do
1749                 var line = v.current_line
1750                 if line == null then return
1751                 var current_block = v.current_block.as(not null)
1752                 var prev = line.prev
1753                 if prev != null then current_block.split(prev)
1754                 var block = current_block.split(line.xml_end_line.as(not null))
1755                 block.kind = new BlockXML(block)
1756                 current_block.remove_leading_empty_lines
1757                 v.current_line = current_block.first_line
1758         end
1759 end
1760
1761 # A markdown blockquote line.
1762 class LineBlockquote
1763         super Line
1764
1765         redef fun process(v) do
1766                 var line = v.current_line
1767                 var current_block = v.current_block.as(not null)
1768                 # go to bquote end
1769                 while line != null do
1770                         if not line.is_empty and (line.prev_empty and
1771                            line.leading == 0 and
1772                            not v.line_kind(line) isa LineBlockquote) then break
1773                         line = line.next
1774                 end
1775                 # build sub block
1776                 var block: MDBlock
1777                 if line != null then
1778                         block = current_block.split(line.prev.as(not null))
1779                 else
1780                         block = current_block.split(current_block.last_line.as(not null))
1781                 end
1782                 var kind = new BlockQuote(block)
1783                 block.kind = kind
1784                 block.remove_surrounding_empty_lines
1785                 kind.remove_block_quote_prefix(block)
1786                 v.current_line = line
1787                 v.recurse(block, false)
1788                 v.current_line = current_block.first_line
1789         end
1790 end
1791
1792 # A markdown ruler line.
1793 class LineHR
1794         super Line
1795
1796         redef fun process(v) do
1797                 var line = v.current_line
1798                 if line == null then return
1799                 var current_block = v.current_block.as(not null)
1800                 if line.prev != null then current_block.split(line.prev.as(not null))
1801                 var block = current_block.split(line)
1802                 block.kind = new BlockRuler(block)
1803                 current_block.remove_leading_empty_lines
1804                 v.current_line = current_block.first_line
1805         end
1806 end
1807
1808 # A markdown fence code line.
1809 class LineFence
1810         super Line
1811
1812         redef fun process(v) do
1813                 # go to fence end
1814                 var line = v.current_line.as(not null).next
1815                 var current_block = v.current_block.as(not null)
1816                 while line != null do
1817                         if v.line_kind(line) isa LineFence then break
1818                         line = line.next
1819                 end
1820                 if line != null then
1821                         line = line.next
1822                 end
1823                 # build fence block
1824                 var block: MDBlock
1825                 if line != null then
1826                         block = current_block.split(line.prev.as(not null))
1827                 else
1828                         block = current_block.split(current_block.last_line.as(not null))
1829                 end
1830                 block.remove_surrounding_empty_lines
1831                 var meta = block.first_line.as(not null).value.meta_from_fence
1832                 block.kind = new BlockFence(block, meta)
1833                 block.first_line.as(not null).clear
1834                 var last = block.last_line
1835                 if last != null and v.line_kind(last) isa LineFence then
1836                         block.last_line.as(not null).clear
1837                 end
1838                 block.remove_surrounding_empty_lines
1839                 v.current_line = line
1840         end
1841 end
1842
1843 # A markdown headline.
1844 class LineHeadline
1845         super Line
1846
1847         redef fun process(v) do
1848                 var line = v.current_line
1849                 if line == null then return
1850                 var current_block = v.current_block.as(not null)
1851                 var lprev = line.prev
1852                 if lprev != null then current_block.split(lprev)
1853                 var block = current_block.split(line)
1854                 var kind = new BlockHeadline(block)
1855                 block.kind = kind
1856                 kind.transform_headline(block)
1857                 current_block.remove_leading_empty_lines
1858                 v.current_line = current_block.first_line
1859         end
1860 end
1861
1862 # A markdown headline of level 1.
1863 class LineHeadline1
1864         super LineHeadline
1865
1866         redef fun process(v) do
1867                 var line = v.current_line
1868                 if line == null then return
1869                 var current_block = v.current_block.as(not null)
1870                 var lprev = line.prev
1871                 if lprev != null then current_block.split(lprev)
1872                 line.next.as(not null).clear
1873                 var block = current_block.split(line)
1874                 var kind = new BlockHeadline(block)
1875                 kind.depth = 1
1876                 kind.transform_headline(block)
1877                 block.kind = kind
1878                 current_block.remove_leading_empty_lines
1879                 v.current_line = current_block.first_line
1880         end
1881 end
1882
1883 # A markdown headline of level 2.
1884 class LineHeadline2
1885         super LineHeadline
1886
1887         redef fun process(v) do
1888                 var line = v.current_line
1889                 if line == null then return
1890                 var current_block = v.current_block.as(not null)
1891                 var lprev = line.prev
1892                 if lprev != null then current_block.split(lprev)
1893                 line.next.as(not null).clear
1894                 var block = current_block.split(line)
1895                 var kind = new BlockHeadline(block)
1896                 kind.depth = 2
1897                 kind.transform_headline(block)
1898                 block.kind = kind
1899                 current_block.remove_leading_empty_lines
1900                 v.current_line = current_block.first_line
1901         end
1902 end
1903
1904 # A markdown list line.
1905 # Mainly used to factorize code between ordered and unordered lists.
1906 abstract class LineList
1907         super Line
1908
1909         redef fun process(v) do
1910                 var line = v.current_line
1911                 # go to list end
1912                 while line != null do
1913                         var t = v.line_kind(line)
1914                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1915                            not t isa LineList) then break
1916                         line = line.next
1917                 end
1918                 # build list block
1919                 var current_block = v.current_block.as(not null)
1920                 var list: MDBlock
1921                 if line != null then
1922                         list = current_block.split(line.prev.as(not null))
1923                 else
1924                         list = current_block.split(current_block.last_line.as(not null))
1925                 end
1926                 var kind = block_kind(list)
1927                 list.kind = kind
1928                 list.first_line.as(not null).prev_empty = false
1929                 list.last_line.as(not null).next_empty = false
1930                 list.remove_surrounding_empty_lines
1931                 list.first_line.as(not null).prev_empty = false
1932                 list.last_line.as(not null).next_empty = false
1933                 kind.init_block(v)
1934                 var block = list.first_block
1935                 while block != null do
1936                         block.remove_list_indent(v)
1937                         v.recurse(block, true)
1938                         block = block.next
1939                 end
1940                 kind.expand_paragraphs(list)
1941                 v.current_line = line
1942         end
1943
1944         # Create a new block kind based on this line.
1945         protected fun block_kind(block: MDBlock): BlockList is abstract
1946
1947         # Extract string value from `MDLine`.
1948         protected fun extract_value(line: MDLine): String is abstract
1949 end
1950
1951 # An ordered list line.
1952 class LineOList
1953         super LineList
1954
1955         redef fun block_kind(block) do return new BlockOrderedList(block)
1956
1957         redef fun extract_value(line) do
1958                 return line.value.substring_from(line.value.index_of('.') + 2)
1959         end
1960 end
1961
1962 # An unordered list line.
1963 class LineUList
1964         super LineList
1965
1966         redef fun block_kind(block) do return new BlockUnorderedList(block)
1967
1968         redef fun extract_value(line) do
1969                 return line.value.substring_from(line.leading + 2)
1970         end
1971 end
1972
1973 # A token represent a character in the markdown input.
1974 # Some tokens have a specific markup behaviour that is handled here.
1975 abstract class Token
1976
1977         # Location of `self` in the original input.
1978         var location: nullable MDLocation
1979
1980         # Position of `self` in input independant from lines.
1981         var pos: Int
1982
1983         # Character found at `pos` in the markdown input.
1984         var char: Char
1985
1986         # Output that token using `MarkdownEmitter::decorator`.
1987         fun emit(v: MarkdownEmitter) do v.decorator.add_char(v, char)
1988 end
1989
1990 # A token without a specific meaning.
1991 class TokenNone
1992         super Token
1993 end
1994
1995 # An emphasis token.
1996 abstract class TokenEm
1997         super Token
1998
1999         redef fun emit(v) do
2000                 var tmp = v.push_buffer
2001                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
2002                 v.pop_buffer
2003                 if b > 0 then
2004                         v.decorator.add_em(v, tmp)
2005                         v.current_pos = b
2006                 else
2007                         v.addc char
2008                 end
2009         end
2010 end
2011
2012 # An emphasis star token.
2013 class TokenEmStar
2014         super TokenEm
2015 end
2016
2017 # An emphasis underscore token.
2018 class TokenEmUnderscore
2019         super TokenEm
2020 end
2021
2022 # A strong token.
2023 abstract class TokenStrong
2024         super Token
2025
2026         redef fun emit(v) do
2027                 var tmp = v.push_buffer
2028                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2029                 v.pop_buffer
2030                 if b > 0 then
2031                         v.decorator.add_strong(v, tmp)
2032                         v.current_pos = b + 1
2033                 else
2034                         v.addc char
2035                 end
2036         end
2037 end
2038
2039 # A strong star token.
2040 class TokenStrongStar
2041         super TokenStrong
2042 end
2043
2044 # A strong underscore token.
2045 class TokenStrongUnderscore
2046         super TokenStrong
2047 end
2048
2049 # A code token.
2050 # This class is mainly used to factorize work between single and double quoted span codes.
2051 abstract class TokenCode
2052         super Token
2053
2054         redef fun emit(v) do
2055                 var current_text = v.current_text.as(not null)
2056                 var a = pos + next_pos + 1
2057                 var b = v.processor.find_token(current_text, a, self)
2058                 if b > 0 then
2059                         v.current_pos = b + next_pos
2060                         while a < b and current_text[a] == ' ' do a += 1
2061                         if a < b then
2062                                 while current_text[b - 1] == ' ' do b -= 1
2063                                 v.decorator.add_span_code(v, current_text, a, b)
2064                         end
2065                 else
2066                         v.addc char
2067                 end
2068         end
2069
2070         private fun next_pos: Int is abstract
2071 end
2072
2073 # A span code token.
2074 class TokenCodeSingle
2075         super TokenCode
2076
2077         redef fun next_pos do return 0
2078 end
2079
2080 # A doubled span code token.
2081 class TokenCodeDouble
2082         super TokenCode
2083
2084         redef fun next_pos do return 1
2085 end
2086
2087 # A link or image token.
2088 # This class is mainly used to factorize work between images and links.
2089 abstract class TokenLinkOrImage
2090         super Token
2091
2092         # Link adress
2093         var link: nullable Text = null
2094
2095         # Link text
2096         var name: nullable Text = null
2097
2098         # Link title
2099         var comment: nullable Text = null
2100
2101         # Is the link construct an abbreviation?
2102         var is_abbrev = false
2103
2104         redef fun emit(v) do
2105                 var tmp = new FlatBuffer
2106                 var b = check_link(v, tmp, pos, self)
2107                 if b > 0 then
2108                         emit_hyper(v)
2109                         v.current_pos = b
2110                 else
2111                         v.addc char
2112                 end
2113         end
2114
2115         # Emit the hyperlink as link or image.
2116         private fun emit_hyper(v: MarkdownEmitter) is abstract
2117
2118         # Check if the link is a valid link.
2119         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
2120                 var md = v.current_text
2121                 if md == null then return -1
2122                 var pos
2123                 if token isa TokenLink then
2124                         pos = start + 1
2125                 else
2126                         pos = start + 2
2127                 end
2128                 var tmp = new FlatBuffer
2129                 pos = md.read_md_link_id(tmp, pos)
2130                 if pos < start then return -1
2131                 name = tmp
2132                 var old_pos = pos
2133                 pos += 1
2134                 pos = md.skip_spaces(pos)
2135                 if pos < start then
2136                         var tid = name.as(not null).write_to_string.to_lower
2137                         if v.processor.link_refs.has_key(tid) then
2138                                 var lr = v.processor.link_refs[tid]
2139                                 is_abbrev = lr.is_abbrev
2140                                 link = lr.link
2141                                 comment = lr.title
2142                                 pos = old_pos
2143                         else
2144                                 return -1
2145                         end
2146                 else if md[pos] == '(' then
2147                         pos += 1
2148                         pos = md.skip_spaces(pos)
2149                         if pos < start then return -1
2150                         tmp = new FlatBuffer
2151                         var use_lt = md[pos] == '<'
2152                         if use_lt then
2153                                 pos = md.read_until(tmp, pos + 1, '>')
2154                         else
2155                                 pos = md.read_md_link(tmp, pos)
2156                         end
2157                         if pos < start then return -1
2158                         if use_lt then pos += 1
2159                         link = tmp.write_to_string
2160                         if md[pos] == ' ' then
2161                                 pos = md.skip_spaces(pos)
2162                                 if pos > start and md[pos] == '"' then
2163                                         pos += 1
2164                                         tmp = new FlatBuffer
2165                                         pos = md.read_until(tmp, pos, '"')
2166                                         if pos < start then return -1
2167                                         comment = tmp.write_to_string
2168                                         pos += 1
2169                                         pos = md.skip_spaces(pos)
2170                                         if pos == -1 then return -1
2171                                 end
2172                         end
2173                         if pos < start then return -1
2174                         if md[pos] != ')' then return -1
2175                 else if md[pos] == '[' then
2176                         pos += 1
2177                         tmp = new FlatBuffer
2178                         pos = md.read_raw_until(tmp, pos, ']')
2179                         if pos < start then return -1
2180                         var id
2181                         if tmp.length > 0 then
2182                                 id = tmp
2183                         else
2184                                 id = name
2185                         end
2186                         var tid = id.as(not null).write_to_string.to_lower
2187                         if v.processor.link_refs.has_key(tid) then
2188                                 var lr = v.processor.link_refs[tid]
2189                                 link = lr.link
2190                                 comment = lr.title
2191                         end
2192                 else
2193                         var tid = name.as(not null).write_to_string.replace("\n", " ").to_lower
2194                         if v.processor.link_refs.has_key(tid) then
2195                                 var lr = v.processor.link_refs[tid]
2196                                 link = lr.link
2197                                 comment = lr.title
2198                                 pos = old_pos
2199                         else
2200                                 return -1
2201                         end
2202                 end
2203                 if link == null then return -1
2204                 return pos
2205         end
2206 end
2207
2208 # A markdown link token.
2209 class TokenLink
2210         super TokenLinkOrImage
2211
2212         redef fun emit_hyper(v) do
2213                 if is_abbrev and comment != null then
2214                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2215                 else
2216                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2217                 end
2218         end
2219 end
2220
2221 # A markdown image token.
2222 class TokenImage
2223         super TokenLinkOrImage
2224
2225         redef fun emit_hyper(v) do
2226                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2227         end
2228 end
2229
2230 # A HTML/XML token.
2231 class TokenHTML
2232         super Token
2233
2234         redef fun emit(v) do
2235                 var tmp = new FlatBuffer
2236                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2237                 if b > 0 then
2238                         v.add tmp
2239                         v.current_pos = b
2240                 else
2241                         v.decorator.escape_char(v, char)
2242                 end
2243         end
2244
2245         # Is the HTML valid?
2246         # Also take care of link and mailto shortcuts.
2247         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2248                 # check for auto links
2249                 var tmp = new FlatBuffer
2250                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2251                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2252                         pos = md.read_until(tmp, pos, '>')
2253                         if pos != -1 then
2254                                 var link = tmp.write_to_string
2255                                 v.decorator.add_link(v, link, link, null)
2256                                 return pos
2257                         end
2258                 end
2259                 # TODO check for mailto
2260                 # check for inline html
2261                 if start + 2 < md.length then
2262                         return md.read_xml(out, start, true)
2263                 end
2264                 return -1
2265         end
2266 end
2267
2268 # An HTML entity token.
2269 class TokenEntity
2270         super Token
2271
2272         redef fun emit(v) do
2273                 var tmp = new FlatBuffer
2274                 var b = check_entity(tmp, v.current_text.as(not null), pos)
2275                 if b > 0 then
2276                         v.add tmp
2277                         v.current_pos = b
2278                 else
2279                         v.decorator.escape_char(v, char)
2280                 end
2281         end
2282
2283         # Is the entity valid?
2284         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2285                 var pos = md.read_until(out, start, ';')
2286                 if pos < 0 or out.length < 3 then
2287                         return -1
2288                 end
2289                 if out[1] == '#' then
2290                         if out[2] == 'x' or out[2] == 'X' then
2291                                 if out.length < 4 then return -1
2292                                 for i in [3..out.length[ do
2293                                         var c = out[i]
2294                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2295                                                 return -1
2296                                         end
2297                                 end
2298                         else
2299                                 for i in [2..out.length[ do
2300                                         var c = out[i]
2301                                         if c < '0' or c > '9' then return -1
2302                                 end
2303                         end
2304                         out.add ';'
2305                 else
2306                         for i in [1..out.length[ do
2307                                 var c = out[i]
2308                                 if not c.is_digit and not c.is_letter then return -1
2309                         end
2310                         out.add ';'
2311                         # TODO check entity is valid
2312                         # if out.is_entity then
2313                                 return pos
2314                         # else
2315                                 # return -1
2316                         # end
2317                 end
2318                 return pos
2319         end
2320 end
2321
2322 # A markdown escape token.
2323 class TokenEscape
2324         super Token
2325
2326         redef fun emit(v) do
2327                 v.current_pos += 1
2328                 v.addc v.current_text.as(not null)[v.current_pos]
2329         end
2330 end
2331
2332 # A markdown strike token.
2333 #
2334 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2335 class TokenStrike
2336         super Token
2337
2338         redef fun emit(v) do
2339                 var tmp = v.push_buffer
2340                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2341                 v.pop_buffer
2342                 if b > 0 then
2343                         v.decorator.add_strike(v, tmp)
2344                         v.current_pos = b + 1
2345                 else
2346                         v.addc char
2347                 end
2348         end
2349 end
2350
2351 redef class Text
2352
2353         # Get the position of the next non-space character.
2354         private fun skip_spaces(start: Int): Int do
2355                 var pos = start
2356                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2357                         pos += 1
2358                 end
2359                 if pos < length then return pos
2360                 return -1
2361         end
2362
2363         # Read `self` until `nend` and append it to the `out` buffer.
2364         # Escape markdown special chars.
2365         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2366                 var pos = start
2367                 while pos < length do
2368                         var c = self[pos]
2369                         if c == '\\' and pos + 1 < length then
2370                                 pos = escape(out, self[pos + 1], pos)
2371                         else
2372                                 for n in nend do if c == n then break label
2373                                 out.add c
2374                         end
2375                         pos += 1
2376                 end label
2377                 if pos == length then return -1
2378                 return pos
2379         end
2380
2381         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2382         # No escape is made.
2383         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2384                 var pos = start
2385                 while pos < length do
2386                         var c = self[pos]
2387                         var end_reached = false
2388                         for n in nend do
2389                                 if c == n then
2390                                         end_reached = true
2391                                         break
2392                                 end
2393                         end
2394                         if end_reached then break
2395                         out.add c
2396                         pos += 1
2397                 end
2398                 if pos == length then return -1
2399                 return pos
2400         end
2401
2402         # Read `self` as XML until `to` and append it to the `out` buffer.
2403         # Escape HTML special chars.
2404         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2405                 var pos = from
2406                 var in_str = false
2407                 var str_char: nullable Char = null
2408                 while pos < length do
2409                         var c = self[pos]
2410                         if in_str then
2411                                 if c == '\\' then
2412                                         out.add c
2413                                         pos += 1
2414                                         if pos < length then
2415                                                 out.add c
2416                                                 pos += 1
2417                                         end
2418                                         continue
2419                                 end
2420                                 if c == str_char then
2421                                         in_str = false
2422                                         out.add c
2423                                         pos += 1
2424                                         continue
2425                                 end
2426                         end
2427                         if c == '"' or c == '\'' then
2428                                 in_str = true
2429                                 str_char = c
2430                         end
2431                         if not in_str then
2432                                 var end_reached = false
2433                                 for n in [0..to.length[ do
2434                                         if c == to[n] then
2435                                                 end_reached = true
2436                                                 break
2437                                         end
2438                                 end
2439                                 if end_reached then break
2440                         end
2441                         out.add c
2442                         pos += 1
2443                 end
2444                 if pos == length then return -1
2445                 return pos
2446         end
2447
2448         # Read `self` as XML and append it to the `out` buffer.
2449         # Safe mode can be activated to limit reading to valid xml.
2450         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2451                 var pos = 0
2452                 var is_valid = true
2453                 var is_close_tag = false
2454                 if start + 1 >= length then return -1
2455                 if self[start + 1] == '/' then
2456                         is_close_tag = true
2457                         pos = start + 2
2458                 else if self[start + 1] == '!' then
2459                         out.append "<!"
2460                         return start + 1
2461                 else
2462                         is_close_tag = false
2463                         pos = start + 1
2464                 end
2465                 if safe_mode then
2466                         var tmp = new FlatBuffer
2467                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2468                         if pos == -1 then return -1
2469                         var tag = tmp.write_to_string.trim.to_lower
2470                         if not tag.is_valid_html_tag then
2471                                 out.append "&lt;"
2472                                 pos = -1
2473                         else if tag.is_html_unsafe then
2474                                 is_valid = false
2475                                 out.append "&lt;"
2476                                 if is_close_tag then out.add '/'
2477                                 out.append tmp
2478                         else
2479                                 out.append "<"
2480                                 if is_close_tag then out.add '/'
2481                                 out.append tmp
2482                         end
2483                 else
2484                         out.add '<'
2485                         if is_close_tag then out.add '/'
2486                         pos = read_xml_until(out, pos, ' ', '/', '>')
2487                 end
2488                 if pos == -1 then return -1
2489                 pos = read_xml_until(out, pos, '/', '>')
2490                 if pos == -1 then return -1
2491                 if self[pos] == '/' then
2492                         out.append " /"
2493                         pos = self.read_xml_until(out, pos + 1, '>')
2494                         if pos == -1 then return -1
2495                 end
2496                 if self[pos] == '>' then
2497                         if is_valid then
2498                                 out.add '>'
2499                         else
2500                                 out.append "&gt;"
2501                         end
2502                         return pos
2503                 end
2504                 return -1
2505         end
2506
2507         # Read a markdown link address and append it to the `out` buffer.
2508         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2509                 var pos = start
2510                 var counter = 1
2511                 while pos < length do
2512                         var c = self[pos]
2513                         if c == '\\' and pos + 1 < length then
2514                                 pos = escape(out, self[pos + 1], pos)
2515                         else
2516                                 var end_reached = false
2517                                 if c == '(' then
2518                                         counter += 1
2519                                 else if c == ' ' then
2520                                         if counter == 1 then end_reached = true
2521                                 else if c == ')' then
2522                                         counter -= 1
2523                                         if counter == 0 then end_reached = true
2524                                 end
2525                                 if end_reached then break
2526                                 out.add c
2527                         end
2528                         pos += 1
2529                 end
2530                 if pos == length then return -1
2531                 return pos
2532         end
2533
2534         # Read a markdown link text and append it to the `out` buffer.
2535         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2536                 var pos = start
2537                 var counter = 1
2538                 while pos < length do
2539                         var c = self[pos]
2540                         var end_reached = false
2541                         if c == '[' then
2542                                 counter += 1
2543                                 out.add c
2544                         else if c == ']' then
2545                                 counter -= 1
2546                                 if counter == 0 then
2547                                         end_reached = true
2548                                 else
2549                                         out.add c
2550                                 end
2551                         else
2552                                 out.add c
2553                         end
2554                         if end_reached then break
2555                         pos += 1
2556                 end
2557                 if pos == length then return -1
2558                 return pos
2559         end
2560
2561         # Extract the XML tag name from a XML tag.
2562         private fun xml_tag: String do
2563                 var tpl = new FlatBuffer
2564                 var pos = 1
2565                 if pos < length and self[1] == '/' then pos += 1
2566                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2567                         tpl.add self[pos]
2568                         pos += 1
2569                 end
2570                 return tpl.write_to_string.to_lower
2571         end
2572
2573         private fun is_valid_html_tag: Bool do
2574                 if is_empty then return false
2575                 for c in self do
2576                         if not c.is_alpha then return false
2577                 end
2578                 return true
2579         end
2580
2581         # Read and escape the markdown contained in `self`.
2582         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2583                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2584                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2585                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2586                    c == '`' or c == '~' or c == '^' then
2587                         out.add c
2588                         return pos + 1
2589                 end
2590                 out.add '\\'
2591                 return pos
2592         end
2593
2594         # Extract string found at end of fence opening.
2595         private fun meta_from_fence: nullable Text do
2596                 for i in [0..chars.length[ do
2597                         var c = chars[i]
2598                         if c != ' ' and c != '`' and c != '~' then
2599                                 return substring_from(i).trim
2600                         end
2601                 end
2602                 return null
2603         end
2604
2605         # Is `self` an unsafe HTML element?
2606         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2607
2608         # Is `self` a HRML block element?
2609         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2610
2611         # Is `self` a link prefix?
2612         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2613
2614         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2615
2616         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2617
2618         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2619 end
2620
2621 redef class String
2622
2623         # Parse `self` as markdown and return the HTML representation
2624         #.
2625         #    var md = "**Hello World!**"
2626         #    var html = md.md_to_html
2627         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2628         fun md_to_html: Writable do
2629                 var processor = new MarkdownProcessor
2630                 return processor.process(self)
2631         end
2632 end