lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         var emitter: MarkdownEmitter is noinit
  34
  35         init do self.emitter = new MarkdownEmitter(self)
  36
  37         # Process the mardown `input` string and return the processed output.
  38         fun process(input: String): Streamable do
  39                 # init processor
  40                 link_refs.clear
  41                 last_link_ref = null
  42                 current_line = null
  43                 current_block = null
  44                 # parse markdown
  45                 var parent = read_lines(input)
  46                 parent.remove_surrounding_empty_lines
  47                 recurse(parent, false)
  48                 # output processed text
  49                 return emitter.emit(parent.kind)
  50         end
  51
  52         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
  53         private fun read_lines(input: String): MDBlock do
  54                 var block = new MDBlock
  55                 var value = new FlatBuffer
  56                 var i = 0
  57                 while i < input.length do
  58                         value.clear
  59                         var pos = 0
  60                         var eol = false
  61                         while not eol and i < input.length do
  62                                 var c = input[i]
  63                                 if c == '\n' then
  64                                         i += 1
  65                                         eol = true
  66                                 else if c == '\t' then
  67                                         var np = pos + (4 - (pos.bin_and(3)))
  68                                         while pos < np do
  69                                                 value.add ' '
  70                                                 pos += 1
  71                                         end
  72                                         i += 1
  73                                 else
  74                                         pos += 1
  75                                         value.add c
  76                                         i += 1
  77                                 end
  78                         end
  79
  80                         var line = new MDLine(value.write_to_string)
  81                         var is_link_ref = check_link_ref(line)
  82                         # Skip link refs
  83                         if not is_link_ref then block.add_line line
  84                 end
  85                 return block
  86         end
  87
  88         # Check if line is a block link definition.
  89         # Return `true` if line contains a valid link ref and save it into `link_refs`.
  90         private fun check_link_ref(line: MDLine): Bool do
  91                 var md = line.value
  92                 var is_link_ref = false
  93                 var id = new FlatBuffer
  94                 var link = new FlatBuffer
  95                 var comment = new FlatBuffer
  96                 var pos = -1
  97                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
  98                         pos = line.leading + 1
  99                         pos = md.read_until(id, pos, ']')
 100                         if not id.is_empty and pos + 2 < line.value.length then
 101                                 if line.value[pos + 1] == ':' then
 102                                         pos += 2
 103                                         pos = md.skip_spaces(pos)
 104                                         if line.value[pos] == '<' then
 105                                                 pos += 1
 106                                                 pos = md.read_until(link, pos, '>')
 107                                                 pos += 1
 108                                         else
 109                                                 pos = md.read_until(link, pos, ' ', '\n')
 110                                         end
 111                                         if not link.is_empty then
 112                                                 pos = md.skip_spaces(pos)
 113                                                 if pos > 0 and pos < line.value.length then
 114                                                         var c = line.value[pos]
 115                                                         if c == '\"' or c == '\'' or c == '(' then
 116                                                                 pos += 1
 117                                                                 if c == '(' then
 118                                                                         pos = md.read_until(comment, pos, ')')
 119                                                                 else
 120                                                                         pos = md.read_until(comment, pos, c)
 121                                                                 end
 122                                                                 if pos > 0 then is_link_ref = true
 123                                                         end
 124                                                 else
 125                                                         is_link_ref = true
 126                                                 end
 127                                         end
 128                                 end
 129                         end
 130                 end
 131                 if is_link_ref and not id.is_empty and not link.is_empty then
 132                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 133                         add_link_ref(id.write_to_string, lr)
 134                         if comment.is_empty then last_link_ref = lr
 135                         return true
 136                 else
 137                         comment = new FlatBuffer
 138                         if not line.is_empty and last_link_ref != null then
 139                                 pos = line.leading
 140                                 var c = line.value[pos]
 141                                 if c == '\"' or c == '\'' or c ==  '(' then
 142                                         pos += 1
 143                                         if c == '(' then
 144                                                 pos = md.read_until(comment, pos, ')')
 145                                         else
 146                                                 pos = md.read_until(comment, pos, c)
 147                                         end
 148                                 end
 149                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 150                         end
 151                         if comment.is_empty then return false
 152                         return true
 153                 end
 154         end
 155
 156         # Known link refs
 157         # This list will be needed during output to expand links.
 158         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 159
 160         # Last encountered link ref (for multiline definitions)
 161         #
 162         # Markdown allows link refs to be defined over two lines:
 163         #
 164         #       [id]: http://example.com/longish/path/to/resource/here
 165         #               "Optional Title Here"
 166         #
 167         private var last_link_ref: nullable LinkRef = null
 168
 169         # Add a link ref to the list
 170         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 171
 172         # Recursively split a `block`.
 173         #
 174         # The block is splitted according to the type of lines it contains.
 175         # Some blocks can be splited again recursively like lists.
 176         # The `in_list` mode is used to recurse on list and build
 177         # nested paragraphs or code blocks.
 178         fun recurse(root: MDBlock, in_list: Bool) do
 179                 var old_mode = self.in_list
 180                 var old_root = self.current_block
 181                 self.in_list = in_list
 182
 183                 var line = root.first_line
 184                 while line != null and line.is_empty do
 185                         line = line.next
 186                         if line == null then return
 187                 end
 188
 189                 current_line = line
 190                 current_block = root
 191                 while current_line != null do
 192                         current_line.kind(self).process(self)
 193                 end
 194                 self.in_list = old_mode
 195                 self.current_block = old_root
 196         end
 197
 198         # Currently processed line.
 199         # Used when visiting blocks with `recurse`.
 200         var current_line: nullable MDLine = null is writable
 201
 202         # Currently processed block.
 203         # Used when visiting blocks with `recurse`.
 204         var current_block: nullable MDBlock = null is writable
 205
 206         # Is the current recursion in list mode?
 207         # Used when visiting blocks with `recurse`
 208         private var in_list = false
 209 end
 210
 211 # Emit output corresponding to blocks content.
 212 #
 213 # Blocks are created by a previous pass in `MarkdownProcessor`.
 214 # The emitter use a `Decorator` to select the output format.
 215 class MarkdownEmitter
 216
 217         # Processor containing link refs.
 218         var processor: MarkdownProcessor
 219
 220         # Decorator used for output.
 221         # Default is `HTMLDecorator`
 222         var decorator: Decorator = new HTMLDecorator is writable
 223
 224         # Create a new `MardownEmitter` using the default `HTMLDecorator`
 225         init(processor: MarkdownProcessor) do
 226                 self.processor = processor
 227         end
 228
 229         # Create a new `MarkdownEmitter` using a custom `decorator`.
 230         init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
 231                 init processor
 232                 self.decorator = decorator
 233         end
 234
 235         # Output `block` using `decorator` in the current buffer.
 236         fun emit(block: Block): Text do
 237                 var buffer = push_buffer
 238                 block.emit(self)
 239                 pop_buffer
 240                 return buffer
 241         end
 242
 243         # Output the content of `block`.
 244         fun emit_in(block: Block) do block.emit_in(self)
 245
 246         # Transform and emit mardown text
 247         fun emit_text(text: Text) do
 248                 emit_text_until(text, 0, null)
 249         end
 250
 251         # Transform and emit mardown text starting at `from` and
 252         # until a token with the same type as `token` is found.
 253         # Go until the end of text if `token` is null.
 254         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 255                 var old_text = current_text
 256                 var old_pos = current_pos
 257                 current_text = text
 258                 current_pos = start
 259                 while current_pos < text.length do
 260                         var mt = text.token_at(current_pos)
 261                         if (token != null and not token isa TokenNone) and
 262                         (mt.is_same_type(token) or
 263                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 264                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 265                                 return current_pos
 266                         end
 267                         mt.emit(self)
 268                         current_pos += 1
 269                 end
 270                 current_text = old_text
 271                 current_pos = old_pos
 272                 return -1
 273         end
 274
 275         # Currently processed position in `current_text`.
 276         # Used when visiting inline production with `emit_text_until`.
 277         private var current_pos: Int = -1
 278
 279         # Currently processed text.
 280         # Used when visiting inline production with `emit_text_until`.
 281         private var current_text: nullable Text = null
 282
 283         # Stacked buffers.
 284         private var buffer_stack = new List[FlatBuffer]
 285
 286         # Push a new buffer on the stack.
 287         private fun push_buffer: FlatBuffer do
 288                 var buffer = new FlatBuffer
 289                 buffer_stack.add buffer
 290                 return buffer
 291         end
 292
 293         # Pop the last buffer.
 294         private fun pop_buffer do buffer_stack.pop
 295
 296         # Current output buffer.
 297         private fun current_buffer: FlatBuffer do
 298                 assert not buffer_stack.is_empty
 299                 return buffer_stack.last
 300         end
 301
 302         # Append `e` to current buffer.
 303         fun add(e: Streamable) do
 304                 if e isa Text then
 305                         current_buffer.append e
 306                 else
 307                         current_buffer.append e.write_to_string
 308                 end
 309         end
 310
 311         # Append `c` to current buffer.
 312         fun addc(c: Char) do current_buffer.add c
 313
 314         # Append a "\n" line break.
 315         fun addn do current_buffer.add '\n'
 316 end
 317
 318 # A Link Reference.
 319 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 320 #
 321 # Example:
 322 #
 323 #    [1]: http://example.com/ "Optional title"
 324 class LinkRef
 325
 326         # Link href
 327         var link: String
 328
 329         # Optional link title
 330         var title: nullable String = null
 331
 332         # Is the link an abreviation?
 333         var is_abbrev = false
 334
 335         init with_title(link: String, title: nullable String) do
 336                 self.link = link
 337                 self.title = title
 338         end
 339 end
 340
 341 # A `Decorator` is used to emit mardown into a specific format.
 342 # Default decorator used is `HTMLDecorator`.
 343 interface Decorator
 344
 345         # Render a ruler block.
 346         fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
 347
 348         # Render a headline block with corresponding level.
 349         fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
 350
 351         # Render a paragraph block.
 352         fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
 353
 354         # Render a code or fence block.
 355         fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
 356
 357         # Render a blockquote.
 358         fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
 359
 360         # Render an unordered list.
 361         fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
 362
 363         # Render an ordered list.
 364         fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
 365
 366         # Render a list item.
 367         fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
 368
 369         # Render an emphasis text.
 370         fun add_em(v: MarkdownEmitter, text: Text) is abstract
 371
 372         # Render a strong text.
 373         fun add_strong(v: MarkdownEmitter, text: Text) is abstract
 374
 375         # Render a super text.
 376         fun add_super(v: MarkdownEmitter, text: Text) is abstract
 377
 378         # Render a link.
 379         fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 380
 381         # Render an image.
 382         fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 383
 384         # Render an abbreviation.
 385         fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
 386
 387         # Render a code span reading from a buffer.
 388         fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 389
 390         # Render a text and escape it.
 391         fun append_value(v: MarkdownEmitter, value: Text) is abstract
 392
 393         # Render code text from buffer and escape it.
 394         fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 395
 396         # Render a character escape.
 397         fun escape_char(v: MarkdownEmitter, char: Char) is abstract
 398
 399         # Render a line break
 400         fun add_line_break(v: MarkdownEmitter) is abstract
 401 end
 402
 403 # `Decorator` that outputs HTML.
 404 class HTMLDecorator
 405         super Decorator
 406
 407         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 408
 409         redef fun add_headline(v, block) do
 410                 v.add "<h{block.depth}>"
 411                 v.emit_in block
 412                 v.add "</h{block.depth}>\n"
 413         end
 414
 415         redef fun add_paragraph(v, block) do
 416                 v.add "<p>"
 417                 v.emit_in block
 418                 v.add "</p>\n"
 419         end
 420
 421         redef fun add_code(v, block) do
 422                 v.add "<pre><code>"
 423                 v.emit_in block
 424                 v.add "</code></pre>\n"
 425         end
 426
 427         redef fun add_blockquote(v, block) do
 428                 v.add "<blockquote>\n"
 429                 v.emit_in block
 430                 v.add "</blockquote>\n"
 431         end
 432
 433         redef fun add_unorderedlist(v, block) do
 434                 v.add "<ul>\n"
 435                 v.emit_in block
 436                 v.add "</ul>\n"
 437         end
 438
 439         redef fun add_orderedlist(v, block) do
 440                 v.add "<ol>\n"
 441                 v.emit_in block
 442                 v.add "</ol>\n"
 443         end
 444
 445         redef fun add_listitem(v, block) do
 446                 v.add "<li>"
 447                 v.emit_in block
 448                 v.add "</li>\n"
 449         end
 450
 451         redef fun add_em(v, text) do
 452                 v.add "<em>"
 453                 v.add text
 454                 v.add "</em>"
 455         end
 456
 457         redef fun add_strong(v, text) do
 458                 v.add "<strong>"
 459                 v.add text
 460                 v.add "</strong>"
 461         end
 462
 463         redef fun add_super(v, text) do
 464                 v.add "<sup>"
 465                 v.add text
 466                 v.add "</sup>"
 467         end
 468
 469         redef fun add_image(v, link, name, comment) do
 470                 v.add "<img src=\""
 471                 append_value(v, link)
 472                 v.add "\" alt=\""
 473                 append_value(v, name)
 474                 v.add "\""
 475                 if comment != null and not comment.is_empty then
 476                         v.add " title=\""
 477                         append_value(v, comment)
 478                         v.add "\""
 479                 end
 480                 v.add "/>"
 481         end
 482
 483         redef fun add_link(v, link, name, comment) do
 484                 v.add "<a href=\""
 485                 append_value(v, link)
 486                 v.add "\""
 487                 if comment != null and not comment.is_empty then
 488                         v.add " title=\""
 489                         append_value(v, comment)
 490                         v.add "\""
 491                 end
 492                 v.add ">"
 493                 v.emit_text(name)
 494                 v.add "</a>"
 495         end
 496
 497         redef fun add_abbr(v, name, comment) do
 498                 v.add "<abbr title=\""
 499                 append_value(v, comment)
 500                 v.add "\">"
 501                 v.emit_text(name)
 502                 v.add "</abbr>"
 503         end
 504
 505         redef fun add_span_code(v, text, from, to) do
 506                 v.add "<code>"
 507                 append_code(v, text, from, to)
 508                 v.add "</code>"
 509         end
 510
 511         redef fun add_line_break(v) do
 512                 v.add "<br/>"
 513         end
 514
 515         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 516
 517         redef fun escape_char(v, c) do
 518                 if c == '&' then
 519                         v.add "&amp;"
 520                 else if c == '<' then
 521                         v.add "&lt;"
 522                 else if c == '>' then
 523                         v.add "&gt;"
 524                 else if c == '"' then
 525                         v.add "&quot;"
 526                 else if c == '\'' then
 527                         v.add "&apos;"
 528                 else
 529                         v.addc c
 530                 end
 531         end
 532
 533         redef fun append_code(v, buffer, from, to) do
 534                 for i in [from..to[ do
 535                         var c = buffer[i]
 536                         if c == '&' then
 537                                 v.add "&amp;"
 538                         else if c == '<' then
 539                                 v.add "&lt;"
 540                         else if c == '>' then
 541                                 v.add "&gt;"
 542                         else
 543                                 v.addc c
 544                         end
 545                 end
 546         end
 547 end
 548
 549 # A block of markdown lines.
 550 # A `MDBlock` can contains lines and/or sub-blocks.
 551 class MDBlock
 552         # Kind of block.
 553         # See `Block`.
 554         var kind: Block = new BlockNone(self) is writable
 555
 556         # First line if any.
 557         var first_line: nullable MDLine = null is writable
 558
 559         # Last line if any.
 560         var last_line: nullable MDLine = null is writable
 561
 562         # First sub-block if any.
 563         var first_block: nullable MDBlock = null is writable
 564
 565         # Last sub-block if any.
 566         var last_block: nullable MDBlock = null is writable
 567
 568         # Previous block if any.
 569         var prev: nullable MDBlock = null is writable
 570
 571         # Next block if any.
 572         var next: nullable MDBlock = null is writable
 573
 574         # Does this block contain subblocks?
 575         fun has_blocks: Bool do return first_block != null
 576
 577         # Count sub-blocks.
 578         fun count_blocks: Int do
 579                 var count = 0
 580                 var block = first_block
 581                 while block != null do
 582                         count += 1
 583                         block = block.next
 584                 end
 585                 return count
 586         end
 587
 588         # Does this block contain lines?
 589         fun has_lines: Bool do return first_line != null
 590
 591         # Count block lines.
 592         fun count_lines: Int do
 593                 var count = 0
 594                 var line = first_line
 595                 while line != null do
 596                         count += 1
 597                         line = line.next
 598                 end
 599                 return count
 600         end
 601
 602         # Split `self` creating a new sub-block having `line` has `last_line`.
 603         fun split(line: MDLine): MDBlock do
 604                 var block = new MDBlock
 605                 block.first_line = first_line
 606                 block.last_line = line
 607                 first_line = line.next
 608                 line.next = null
 609                 if first_line == null then
 610                         last_line = null
 611                 else
 612                         first_line.prev = null
 613                 end
 614                 if first_block == null then
 615                         first_block = block
 616                         last_block = block
 617                 else
 618                         last_block.next = block
 619                         last_block = block
 620                 end
 621                 return block
 622         end
 623
 624         # Add a `line` to this block.
 625         fun add_line(line: MDLine) do
 626                 if last_line == null then
 627                         first_line = line
 628                         last_line = line
 629                 else
 630                         last_line.next_empty = line.is_empty
 631                         line.prev_empty = last_line.is_empty
 632                         line.prev = last_line
 633                         last_line.next = line
 634                         last_line = line
 635                 end
 636         end
 637
 638         # Remove `line` from this block.
 639         fun remove_line(line: MDLine) do
 640                 if line.prev == null then
 641                         first_line = line.next
 642                 else
 643                         line.prev.next = line.next
 644                 end
 645                 if line.next == null then
 646                         last_line = line.prev
 647                 else
 648                         line.next.prev = line.prev
 649                 end
 650                 line.prev = null
 651                 line.next = null
 652         end
 653
 654         # Remove leading empty lines.
 655         fun remove_leading_empty_lines: Bool do
 656                 var was_empty = false
 657                 var line = first_line
 658                 while line != null and line.is_empty do
 659                         remove_line line
 660                         line = first_line
 661                         was_empty = true
 662                 end
 663                 return was_empty
 664         end
 665
 666         # Remove trailing empty lines.
 667         fun remove_trailing_empty_lines: Bool do
 668                 var was_empty = false
 669                 var line = last_line
 670                 while line != null and line.is_empty do
 671                         remove_line line
 672                         line = last_line
 673                         was_empty = true
 674                 end
 675                 return was_empty
 676         end
 677
 678         # Remove leading and trailing empty lines.
 679         fun remove_surrounding_empty_lines: Bool do
 680                 var was_empty = false
 681                 if remove_leading_empty_lines then was_empty = true
 682                 if remove_trailing_empty_lines then was_empty = true
 683                 return was_empty
 684         end
 685
 686         # Remove list markers and up to 4 leading spaces.
 687         # Used to clean nested lists.
 688         fun remove_list_indent(v: MarkdownProcessor) do
 689                 var line = first_line
 690                 while line != null do
 691                         if not line.is_empty then
 692                                 var kind = line.kind(v)
 693                                 if kind isa LineList then
 694                                         line.value = kind.extract_value(line)
 695                                 else
 696                                         line.value = line.value.substring_from(line.leading.min(4))
 697                                 end
 698                                 line.leading = line.process_leading
 699                         end
 700                         line = line.next
 701                 end
 702         end
 703
 704         # Collect block line text.
 705         fun text: String do
 706                 var text = new FlatBuffer
 707                 var line = first_line
 708                 while line != null do
 709                         if not line.is_empty then
 710                                 text.append line.text
 711                         end
 712                         text.append "\n"
 713                         line = line.next
 714                 end
 715                 return text.write_to_string
 716         end
 717 end
 718
 719 # Representation of a markdown block in the AST.
 720 # Each `Block` is linked to a `MDBlock` that contains mardown code.
 721 abstract class Block
 722
 723         # The markdown block `self` is related to.
 724         var block: MDBlock
 725
 726         # Output `self` using `v.decorator`.
 727         fun emit(v: MarkdownEmitter) do v.emit_in(self)
 728
 729         # Emit the containts of `self`, lines or blocks.
 730         fun emit_in(v: MarkdownEmitter) do
 731                 block.remove_surrounding_empty_lines
 732                 if block.has_lines then
 733                         emit_lines(v)
 734                 else
 735                         emit_blocks(v)
 736                 end
 737         end
 738
 739         # Emit lines contained in `block`.
 740         fun emit_lines(v: MarkdownEmitter) do
 741                 var tpl = v.push_buffer
 742                 var line = block.first_line
 743                 while line != null do
 744                         if not line.is_empty then
 745                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
 746                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
 747                         end
 748                         if line.next != null then
 749                                 v.addn
 750                         end
 751                         line = line.next
 752                 end
 753                 v.pop_buffer
 754                 v.emit_text(tpl)
 755         end
 756
 757         # Emit sub-blocks contained in `block`.
 758         fun emit_blocks(v: MarkdownEmitter) do
 759                 var block = self.block.first_block
 760                 while block != null do
 761                         block.kind.emit(v)
 762                         block = block.next
 763                 end
 764         end
 765 end
 766
 767 # A block without any markdown specificities.
 768 #
 769 # Actually use the same implementation than `BlockCode`,
 770 # this class is only used for typing purposes.
 771 class BlockNone
 772         super Block
 773 end
 774
 775 # A markdown blockquote.
 776 class BlockQuote
 777         super Block
 778
 779         redef fun emit(v) do v.decorator.add_blockquote(v, self)
 780
 781         # Remove blockquote markers.
 782         private fun remove_block_quote_prefix(block: MDBlock) do
 783                 var line = block.first_line
 784                 while line != null do
 785                         if not line.is_empty then
 786                                 if line.value[line.leading] == '>' then
 787                                         var rem = line.leading + 1
 788                                         if line.leading + 1 < line.value.length and
 789                                            line.value[line.leading + 1] == ' ' then
 790                                                 rem += 1
 791                                         end
 792                                         line.value = line.value.substring_from(rem)
 793                                         line.leading = line.process_leading
 794                                 end
 795                         end
 796                         line = line.next
 797                 end
 798         end
 799 end
 800
 801 # A markdown code block.
 802 class BlockCode
 803         super Block
 804
 805         redef fun emit(v) do v.decorator.add_code(v, self)
 806
 807         redef fun emit_lines(v) do
 808                 var line = block.first_line
 809                 while line != null do
 810                         if not line.is_empty then
 811                                 v.decorator.append_code(v, line.value, 4, line.value.length)
 812                         end
 813                         v.addn
 814                         line = line.next
 815                 end
 816         end
 817 end
 818
 819 # A markdown code-fence block.
 820 #
 821 # Actually use the same implementation than `BlockCode`,
 822 # this class is only used for typing purposes.
 823 class BlockFence
 824         super BlockCode
 825 end
 826
 827 # A markdown headline.
 828 class BlockHeadline
 829         super Block
 830
 831         redef fun emit(v) do v.decorator.add_headline(v, self)
 832
 833         # Depth of the headline used to determine the headline level.
 834         var depth = 0
 835
 836         # Remove healine marks from lines contained in `self`.
 837         private fun transform_headline(block: MDBlock) do
 838                 if depth > 0 then return
 839                 var level = 0
 840                 var line = block.first_line
 841                 if line.is_empty then return
 842                 var start = line.leading
 843                 while start < line.value.length and line.value[start] == '#' do
 844                         level += 1
 845                         start += 1
 846                 end
 847                 while start < line.value.length and line.value[start] == ' ' do
 848                         start += 1
 849                 end
 850                 if start >= line.value.length then
 851                         line.is_empty = true
 852                 else
 853                         var nend = line.value.length - line.trailing - 1
 854                         while line.value[nend] == '#' do nend -= 1
 855                         while line.value[nend] == ' ' do nend -= 1
 856                         line.value = line.value.substring(start, nend - start + 1)
 857                         line.leading = 0
 858                         line.trailing = 0
 859                 end
 860                 depth = level.min(6)
 861         end
 862 end
 863
 864 # A markdown list item block.
 865 class BlockListItem
 866         super Block
 867
 868         redef fun emit(v) do v.decorator.add_listitem(v, self)
 869 end
 870
 871 # A markdown list block.
 872 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
 873 abstract class BlockList
 874         super Block
 875
 876         # Split list block into list items sub-blocks.
 877         private fun init_block(v: MarkdownProcessor) do
 878                 var line = block.first_line
 879                 line = line.next
 880                 while line != null do
 881                         var t = line.kind(v)
 882                         if t isa LineList or
 883                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
 884                            not (t isa LineList))) then
 885                                    var sblock = block.split(line.prev.as(not null))
 886                                    sblock.kind = new BlockListItem(sblock)
 887                         end
 888                         line = line.next
 889                 end
 890                 var sblock = block.split(block.last_line.as(not null))
 891                 sblock.kind = new BlockListItem(sblock)
 892         end
 893
 894         # Expand list items as paragraphs if needed.
 895         private fun expand_paragraphs(block: MDBlock) do
 896                 var outer = block.first_block
 897                 var inner: nullable MDBlock
 898                 var has_paragraph = false
 899                 while outer != null and not has_paragraph do
 900                         if outer.kind isa BlockListItem then
 901                                 inner = outer.first_block
 902                                 while inner != null and not has_paragraph do
 903                                         if inner.kind isa BlockParagraph then
 904                                                 has_paragraph = true
 905                                         end
 906                                         inner = inner.next
 907                                 end
 908                         end
 909                         outer = outer.next
 910                 end
 911                 if has_paragraph then
 912                         outer = block.first_block
 913                         while outer != null do
 914                                 if outer.kind isa BlockListItem then
 915                                         inner = outer.first_block
 916                                         while inner != null do
 917                                                 if inner.kind isa BlockNone then
 918                                                         inner.kind = new BlockParagraph(inner)
 919                                                 end
 920                                                 inner = inner.next
 921                                         end
 922                                 end
 923                                 outer = outer.next
 924                         end
 925                 end
 926         end
 927 end
 928
 929 # A markdown ordered list.
 930 class BlockOrderedList
 931         super BlockList
 932
 933         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
 934 end
 935
 936 # A markdown unordred list.
 937 class BlockUnorderedList
 938         super BlockList
 939
 940         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
 941 end
 942
 943 # A markdown paragraph block.
 944 class BlockParagraph
 945         super Block
 946
 947         redef fun emit(v) do v.decorator.add_paragraph(v, self)
 948 end
 949
 950 # A markdown ruler.
 951 class BlockRuler
 952         super Block
 953
 954         redef fun emit(v) do v.decorator.add_ruler(v, self)
 955 end
 956
 957 # Xml blocks that can be found in markdown markup.
 958 class BlockXML
 959         super Block
 960
 961         redef fun emit_lines(v) do
 962                 var line = block.first_line
 963                 while line != null do
 964                         if not line.is_empty then v.add line.value
 965                         v.addn
 966                         line = line.next
 967                 end
 968         end
 969 end
 970
 971 # A markdown line.
 972 class MDLine
 973
 974         # Text contained in this line.
 975         var value: String is writable
 976
 977         # Is this line empty?
 978         # Lines containing only spaces are considered empty.
 979         var is_empty: Bool = true is writable
 980
 981         # Previous line in `MDBlock` or null if first line.
 982         var prev: nullable MDLine = null is writable
 983
 984         # Next line in `MDBlock` or null if last line.
 985         var next: nullable MDLine = null is writable
 986
 987         # Is the previous line empty?
 988         var prev_empty: Bool = false is writable
 989
 990         # Is the next line empty?
 991         var next_empty: Bool = false is writable
 992
 993         init(value: String) do
 994                 self.value = value
 995                 self.leading = process_leading
 996                 if leading != value.length then
 997                         self.is_empty = false
 998                         self.trailing = process_trailing
 999                 end
1000         end
1001
1002         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1003         fun clear do
1004                 value = ""
1005                 leading = 0
1006                 trailing = 0
1007                 is_empty = true
1008                 if prev != null then prev.next_empty = true
1009                 if next != null then next.prev_empty = true
1010         end
1011
1012         # The type of line.
1013         # see `md_line_*`
1014         fun kind(v: MarkdownProcessor): Line do
1015                 var value = self.value
1016                 if is_empty then return new LineEmpty
1017                 if leading > 3 then return new LineCode
1018                 if value[leading] == '#' then return new LineHeadline
1019                 if value[leading] == '>' then return new LineBlockquote
1020
1021                 if value.length - leading - trailing > 2 then
1022                         if value[leading] == '`' and count_chars_start('`') >= 3 then
1023                                 return new LineFence
1024                         end
1025                         if value[leading] == '~' and count_chars_start('~') >= 3 then
1026                                 return new LineFence
1027                         end
1028                 end
1029
1030                 if value.length - leading - trailing > 2 and
1031                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
1032                    if count_chars(value[leading]) >= 3 then
1033                                 return new LineHR
1034                    end
1035                 end
1036
1037                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
1038                         var c = value[leading]
1039                         if c == '*' or c == '-' or c == '+' then return new LineUList
1040                 end
1041
1042                 if value.length - leading >= 3 and value[leading].is_digit then
1043                         var i = leading + 1
1044                         while i < value.length and value[i].is_digit do i += 1
1045                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
1046                                 return new LineOList
1047                         end
1048                 end
1049
1050                 if value[leading] == '<' and check_html then return new LineXML
1051
1052                 if next != null and not next.is_empty then
1053                         if next.count_chars('=') > 0 then
1054                                 return new LineHeadline1
1055                         end
1056                         if next.count_chars('-') > 0 then
1057                                 return new LineHeadline2
1058                         end
1059                 end
1060                 return new LineOther
1061         end
1062
1063         # Number or leading spaces on this line.
1064         var leading: Int = 0 is writable
1065
1066         # Compute `leading` depending on `value`.
1067         fun process_leading: Int do
1068                 var count = 0
1069                 var value = self.value
1070                 while count < value.length and value[count] == ' ' do count += 1
1071                 if leading == value.length then clear
1072                 return count
1073         end
1074
1075         # Number of trailing spaces on this line.
1076         var trailing: Int = 0 is writable
1077
1078         # Compute `trailing` depending on `value`.
1079         fun process_trailing: Int do
1080                 var count = 0
1081                 var value = self.value
1082                 while value[value.length - count - 1] == ' ' do
1083                         count += 1
1084                 end
1085                 return count
1086         end
1087
1088         # Count the amount of `ch` in this line.
1089         # Return A value > 0 if this line only consists of `ch` end spaces.
1090         fun count_chars(ch: Char): Int do
1091                 var count = 0
1092                 for c in value do
1093                         if c == ' ' then
1094                                 continue
1095                         end
1096                         if c == ch then
1097                                 count += 1
1098                                 continue
1099                         end
1100                         count = 0
1101                         break
1102                 end
1103                 return count
1104         end
1105
1106         # Count the amount of `ch` at the start of this line ignoring spaces.
1107         fun count_chars_start(ch: Char): Int do
1108                 var count = 0
1109                 for c in value do
1110                         if c == ' ' then
1111                                 continue
1112                         end
1113                         if c == ch then
1114                                 count += 1
1115                         else
1116                                 break
1117                         end
1118                 end
1119                 return count
1120         end
1121
1122         # Last XML line if any.
1123         private var xml_end_line: nullable MDLine = null
1124
1125         # Does `value` contains valid XML markup?
1126         private fun check_html: Bool do
1127                 var tags = new Array[String]
1128                 var tmp = new FlatBuffer
1129                 var pos = leading
1130                 if pos + 1 < value.length and value[pos + 1] == '!' then
1131                         if read_xml_comment(self, pos) > 0 then return true
1132                 end
1133                 pos = value.read_xml(tmp, pos, false)
1134                 var tag: String
1135                 if pos > -1 then
1136                         tag = tmp.xml_tag
1137                         if not tag.is_html_block then
1138                                 return false
1139                         end
1140                         if tag == "hr" then
1141                                 xml_end_line = self
1142                                 return true
1143                         end
1144                         tags.add tag
1145                         var line: nullable MDLine = self
1146                         while line != null do
1147                                 while pos < line.value.length and line.value[pos] != '<' do
1148                                         pos += 1
1149                                 end
1150                                 if pos >= line.value.length then
1151                                         if line.value[pos - 2] == '/' then
1152                                                 tags.pop
1153                                                 if tags.is_empty then
1154                                                         xml_end_line = line
1155                                                         break
1156                                                 end
1157                                         end
1158                                         line = line.next
1159                                         pos = 0
1160                                 else
1161                                         tmp = new FlatBuffer
1162                                         var new_pos = line.value.read_xml(tmp, pos, false)
1163                                         if new_pos > 0 then
1164                                                 tag = tmp.xml_tag
1165                                                 if tag.is_html_block and not tag == "hr" then
1166                                                         if tmp[1] == '/' then
1167                                                                 if tags.last != tag then
1168                                                                         return false
1169                                                                 end
1170                                                                 tags.pop
1171                                                         else
1172                                                                 tags.add tag
1173                                                         end
1174                                                 end
1175                                                 if tags.is_empty then
1176                                                         xml_end_line = line
1177                                                         break
1178                                                 end
1179                                                 pos = new_pos
1180                                         else
1181                                                 pos += 1
1182                                         end
1183                                 end
1184                         end
1185                         return tags.is_empty
1186                 end
1187                 return false
1188         end
1189
1190         # Read a XML comment.
1191         # Used by `check_html`.
1192         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1193                 var line: nullable MDLine = first_line
1194                 if start + 3 < line.value.length then
1195                         if line.value[2] == '-' and line.value[3] == '-' then
1196                                 var pos = start + 4
1197                                 while line != null do
1198                                         while pos < line.value.length and line.value[pos] != '-' do
1199                                                 pos += 1
1200                                         end
1201                                         if pos == line.value.length then
1202                                                 line = line.next
1203                                                 pos = 0
1204                                         else
1205                                                 if pos + 2 < line.value.length then
1206                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1207                                                                 first_line.xml_end_line = line
1208                                                                 return pos + 3
1209                                                         end
1210                                                 end
1211                                                 pos += 1
1212                                         end
1213                                 end
1214                         end
1215                 end
1216                 return -1
1217         end
1218
1219         # Extract the text of `self` without leading and trailing.
1220         fun text: String do return value.substring(leading, value.length - trailing)
1221 end
1222
1223 # A markdown line.
1224 interface Line
1225
1226         # Parse the line.
1227         # See `MarkdownProcessor::recurse`.
1228         fun process(v: MarkdownProcessor) is abstract
1229 end
1230
1231 # An empty markdown line.
1232 class LineEmpty
1233         super Line
1234
1235         redef fun process(v) do
1236                 v.current_line = v.current_line.next
1237         end
1238 end
1239
1240 # A non-specific markdown construction.
1241 # Mainly used as part of another line construct such as paragraphs or lists.
1242 class LineOther
1243         super Line
1244
1245         redef fun process(v) do
1246                 var line = v.current_line
1247                 # go to block end
1248                 var was_empty = line.prev_empty
1249                 while line != null and not line.is_empty do
1250                         var t = line.kind(v)
1251                         if v.in_list and t isa LineList then
1252                                 break
1253                         end
1254                         if t isa LineCode or t isa LineFence then
1255                                 break
1256                         end
1257                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1258                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1259                                    break
1260                         end
1261                         line = line.next
1262                 end
1263                 # build block
1264                 var bk: Block
1265                 if line != null and not line.is_empty then
1266                         var block = v.current_block.split(line.prev.as(not null))
1267                         if v.in_list and not was_empty then
1268                                 block.kind = new BlockNone(block)
1269                         else
1270                                 block.kind = new BlockParagraph(block)
1271                         end
1272                         v.current_block.remove_leading_empty_lines
1273                 else
1274                         var block: MDBlock
1275                         if line != null then
1276                                 block = v.current_block.split(line)
1277                         else
1278                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1279                         end
1280                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1281                                 block.kind = new BlockNone(block)
1282                         else
1283                                 block.kind = new BlockParagraph(block)
1284                         end
1285                         v.current_block.remove_leading_empty_lines
1286                 end
1287                 v.current_line = v.current_block.first_line
1288         end
1289 end
1290
1291 # A line of markdown code.
1292 class LineCode
1293         super Line
1294
1295         redef fun process(v) do
1296                 var line = v.current_line
1297                 # lookup block end
1298                 while line != null and (line.is_empty or line.kind(v) isa LineCode) do
1299                         line = line.next
1300                 end
1301                 # split at block end line
1302                 var block: MDBlock
1303                 if line != null then
1304                         block = v.current_block.split(line.prev.as(not null))
1305                 else
1306                         block = v.current_block.split(v.current_block.last_line.as(not null))
1307                 end
1308                 block.kind = new BlockCode(block)
1309                 block.remove_surrounding_empty_lines
1310                 v.current_line = v.current_block.first_line
1311         end
1312 end
1313
1314 # A line of raw XML.
1315 class LineXML
1316         super Line
1317
1318         redef fun process(v) do
1319                 var line = v.current_line
1320                 var prev = line.prev
1321                 if prev != null then v.current_block.split(prev)
1322                 var block = v.current_block.split(line.xml_end_line.as(not null))
1323                 block.kind = new BlockXML(block)
1324                 v.current_block.remove_leading_empty_lines
1325                 v.current_line = v.current_block.first_line
1326         end
1327 end
1328
1329 # A markdown blockquote line.
1330 class LineBlockquote
1331         super Line
1332
1333         redef fun process(v) do
1334                 var line = v.current_line
1335                 # go to bquote end
1336                 while line != null do
1337                         if not line.is_empty and (line.prev_empty and
1338                            line.leading == 0 and
1339                            not line.kind(v) isa LineBlockquote) then break
1340                         line = line.next
1341                 end
1342                 # build sub block
1343                 var block: MDBlock
1344                 if line != null then
1345                         block = v.current_block.split(line.prev.as(not null))
1346                 else
1347                         block = v.current_block.split(v.current_block.last_line.as(not null))
1348                 end
1349                 var kind = new BlockQuote(block)
1350                 block.kind = kind
1351                 block.remove_surrounding_empty_lines
1352                 kind.remove_block_quote_prefix(block)
1353                 v.current_line = line
1354                 v.recurse(block, false)
1355                 v.current_line = v.current_block.first_line
1356         end
1357 end
1358
1359 # A markdown ruler line.
1360 class LineHR
1361         super Line
1362
1363         redef fun process(v) do
1364                 var line = v.current_line
1365                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1366                 var block = v.current_block.split(line.as(not null))
1367                 block.kind = new BlockRuler(block)
1368                 v.current_block.remove_leading_empty_lines
1369                 v.current_line = v.current_block.first_line
1370         end
1371 end
1372
1373 # A markdown fence code line.
1374 class LineFence
1375         super Line
1376
1377         redef fun process(v) do
1378                 # go to fence end
1379                 var line = v.current_line.next
1380                 while line != null do
1381                         if line.kind(v) isa LineFence then break
1382                         line = line.next
1383                 end
1384                 if line != null then
1385                         line = line.next
1386                 end
1387                 # build fence block
1388                 var block: MDBlock
1389                 if line != null then
1390                         block = v.current_block.split(line.prev.as(not null))
1391                 else
1392                         block = v.current_block.split(v.current_block.last_line.as(not null))
1393                 end
1394                 block.kind = new BlockFence(block)
1395                 block.first_line.clear
1396                 if block.last_line.kind(v) isa LineFence then
1397                         block.last_line.clear
1398                 end
1399                 block.remove_surrounding_empty_lines
1400                 v.current_line = line
1401         end
1402 end
1403
1404 # A markdown headline.
1405 class LineHeadline
1406         super Line
1407
1408         redef fun process(v) do
1409                 var line = v.current_line
1410                 var lprev = line.prev
1411                 if lprev != null then v.current_block.split(lprev)
1412                 var block = v.current_block.split(line.as(not null))
1413                 var kind = new BlockHeadline(block)
1414                 block.kind = kind
1415                 # TODO block ID
1416                 # block.id = block.first_line.strip_id
1417                 kind.transform_headline(block)
1418                 v.current_block.remove_leading_empty_lines
1419                 v.current_line = v.current_block.first_line
1420         end
1421 end
1422
1423 # A markdown headline of level 1.
1424 class LineHeadline1
1425         super LineHeadline
1426
1427         redef fun process(v) do
1428                 var line = v.current_line
1429                 var lprev = line.prev
1430                 if lprev != null then v.current_block.split(lprev)
1431                 line.next.clear
1432                 var block = v.current_block.split(line.as(not null))
1433                 var kind = new BlockHeadline(block)
1434                 kind.depth = 1
1435                 # TODO block ID
1436                 # block.id = block.first_line.strip_id
1437                 kind.transform_headline(block)
1438                 block.kind = kind
1439                 v.current_block.remove_leading_empty_lines
1440                 v.current_line = v.current_block.first_line
1441         end
1442 end
1443
1444 # A markdown headline of level 2.
1445 class LineHeadline2
1446         super LineHeadline
1447
1448         redef fun process(v) do
1449                 var line = v.current_line
1450                 var lprev = line.prev
1451                 if lprev != null then v.current_block.split(lprev)
1452                 line.next.clear
1453                 var block = v.current_block.split(line.as(not null))
1454                 var kind = new BlockHeadline(block)
1455                 kind.depth = 2
1456                 # TODO block ID
1457                 # block.id = block.first_line.strip_id
1458                 kind.transform_headline(block)
1459                 block.kind = kind
1460                 v.current_block.remove_leading_empty_lines
1461                 v.current_line = v.current_block.first_line
1462         end
1463 end
1464
1465 # A markdown list line.
1466 # Mainly used to factorize code between ordered and unordered lists.
1467 class LineList
1468         super Line
1469
1470         redef fun process(v) do
1471                 var line = v.current_line
1472                 # go to list end
1473                 while line != null do
1474                         var t = line.kind(v)
1475                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1476                            not t isa LineList) then break
1477                         line = line.next
1478                 end
1479                 # build list block
1480                 var list: MDBlock
1481                 if line != null then
1482                         list = v.current_block.split(line.prev.as(not null))
1483                 else
1484                         list = v.current_block.split(v.current_block.last_line.as(not null))
1485                 end
1486                 var kind = block_kind(list)
1487                 list.kind = kind
1488                 list.first_line.prev_empty = false
1489                 list.last_line.next_empty = false
1490                 list.remove_surrounding_empty_lines
1491                 list.first_line.prev_empty = false
1492                 list.last_line.next_empty = false
1493                 kind.init_block(v)
1494                 var block = list.first_block
1495                 while block != null do
1496                         block.remove_list_indent(v)
1497                         v.recurse(block, true)
1498                         block = block.next
1499                 end
1500                 kind.expand_paragraphs(list)
1501                 v.current_line = line
1502         end
1503
1504         # Create a new block kind based on this line.
1505         protected fun block_kind(block: MDBlock): BlockList is abstract
1506
1507         protected fun extract_value(line: MDLine): String is abstract
1508 end
1509
1510 # An ordered list line.
1511 class LineOList
1512         super LineList
1513
1514         redef fun block_kind(block) do return new BlockOrderedList(block)
1515
1516         redef fun extract_value(line) do
1517                 return line.value.substring_from(line.value.index_of('.') + 2)
1518         end
1519 end
1520
1521 # An unordered list line.
1522 class LineUList
1523         super LineList
1524
1525         redef fun block_kind(block) do return new BlockUnorderedList(block)
1526
1527         redef fun extract_value(line) do
1528                 return line.value.substring_from(line.leading + 2)
1529         end
1530 end
1531
1532 # A token represent a character in the markdown input.
1533 # Some tokens have a specific markup behaviour that is handled here.
1534 abstract class Token
1535
1536         # Position of `self` in markdown input.
1537         var pos: Int
1538
1539         # Character found at `pos` in the markdown input.
1540         var char: Char
1541
1542         # Output that token using `MarkdownEmitter::decorator`.
1543         fun emit(v: MarkdownEmitter) do v.addc char
1544 end
1545
1546 # A token without a specific meaning.
1547 class TokenNone
1548         super Token
1549 end
1550
1551 # An emphasis token.
1552 abstract class TokenEm
1553         super Token
1554
1555         redef fun emit(v) do
1556                 var tmp = v.push_buffer
1557                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1558                 v.pop_buffer
1559                 if b > 0 then
1560                         v.decorator.add_em(v, tmp)
1561                         v.current_pos = b
1562                 else
1563                         v.addc char
1564                 end
1565         end
1566 end
1567
1568 # An emphasis star token.
1569 class TokenEmStar
1570         super TokenEm
1571 end
1572
1573 # An emphasis underscore token.
1574 class TokenEmUnderscore
1575         super TokenEm
1576 end
1577
1578 # A strong token.
1579 abstract class TokenStrong
1580         super Token
1581
1582         redef fun emit(v) do
1583                 var tmp = v.push_buffer
1584                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1585                 v.pop_buffer
1586                 if b > 0 then
1587                         v.decorator.add_strong(v, tmp)
1588                         v.current_pos = b + 1
1589                 else
1590                         v.addc char
1591                 end
1592         end
1593 end
1594
1595 # A strong star token.
1596 class TokenStrongStar
1597         super TokenStrong
1598 end
1599
1600 # A strong underscore token.
1601 class TokenStrongUnderscore
1602         super TokenStrong
1603 end
1604
1605 # A code token.
1606 # This class is mainly used to factorize work between single and double quoted span codes.
1607 abstract class TokenCode
1608         super Token
1609
1610         redef fun emit(v) do
1611                 var a = pos + next_pos + 1
1612                 var b = v.current_text.find_token(a, self)
1613                 if b > 0 then
1614                         v.current_pos = b + next_pos
1615                         while a < b and v.current_text[a] == ' ' do a += 1
1616                         if a < b then
1617                                 while v.current_text[b - 1] == ' ' do b -= 1
1618                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1619                         end
1620                 else
1621                         v.addc char
1622                 end
1623         end
1624
1625         private fun next_pos: Int is abstract
1626 end
1627
1628 # A span code token.
1629 class TokenCodeSingle
1630         super TokenCode
1631
1632         redef fun next_pos do return 0
1633 end
1634
1635 # A doubled span code token.
1636 class TokenCodeDouble
1637         super TokenCode
1638
1639         redef fun next_pos do return 1
1640 end
1641
1642 # A link or image token.
1643 # This class is mainly used to factorize work between images and links.
1644 abstract class TokenLinkOrImage
1645         super Token
1646
1647         # Link adress
1648         var link: nullable Text = null
1649
1650         # Link text
1651         var name: nullable Text = null
1652
1653         # Link title
1654         var comment: nullable Text = null
1655
1656         # Is the link construct an abbreviation?
1657         var is_abbrev = false
1658
1659         redef fun emit(v) do
1660                 var tmp = new FlatBuffer
1661                 var b = check_link(v, tmp, pos, self)
1662                 if b > 0 then
1663                         emit_hyper(v)
1664                         v.current_pos = b
1665                 else
1666                         v.addc char
1667                 end
1668         end
1669
1670         # Emit the hyperlink as link or image.
1671         private fun emit_hyper(v: MarkdownEmitter) is abstract
1672
1673         # Check if the link is a valid link.
1674         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1675                 var md = v.current_text
1676                 var pos
1677                 if token isa TokenLink then
1678                         pos = start + 1
1679                 else
1680                         pos = start + 2
1681                 end
1682                 var tmp = new FlatBuffer
1683                 pos = md.read_md_link_id(tmp, pos)
1684                 if pos < start then return -1
1685                 name = tmp
1686                 var old_pos = pos
1687                 pos += 1
1688                 pos = md.skip_spaces(pos)
1689                 if pos < start then
1690                         var tid = name.write_to_string.to_lower
1691                         if v.processor.link_refs.has_key(tid) then
1692                                 var lr = v.processor.link_refs[tid]
1693                                 is_abbrev = lr.is_abbrev
1694                                 link = lr.link
1695                                 comment = lr.title
1696                                 pos = old_pos
1697                         else
1698                                 return -1
1699                         end
1700                 else if md[pos] == '(' then
1701                         pos += 1
1702                         pos = md.skip_spaces(pos)
1703                         if pos < start then return -1
1704                         tmp = new FlatBuffer
1705                         var use_lt = md[pos] == '<'
1706                         if use_lt then
1707                                 pos = md.read_until(tmp, pos + 1, '>')
1708                         else
1709                                 pos = md.read_md_link(tmp, pos)
1710                         end
1711                         if pos < start then return -1
1712                         if use_lt then pos += 1
1713                         link = tmp.write_to_string
1714                         if md[pos] == ' ' then
1715                                 pos = md.skip_spaces(pos)
1716                                 if pos > start and md[pos] == '"' then
1717                                         pos += 1
1718                                         tmp = new FlatBuffer
1719                                         pos = md.read_until(tmp, pos, '"')
1720                                         if pos < start then return -1
1721                                         comment = tmp.write_to_string
1722                                         pos += 1
1723                                         pos = md.skip_spaces(pos)
1724                                         if pos == -1 then return -1
1725                                 end
1726                         end
1727                         if md[pos] != ')' then return -1
1728                 else if md[pos] == '[' then
1729                         pos += 1
1730                         tmp = new FlatBuffer
1731                         pos = md.read_raw_until(tmp, pos, ']')
1732                         if pos < start then return -1
1733                         var id
1734                         if tmp.length > 0 then
1735                                 id = tmp
1736                         else
1737                                 id = name
1738                         end
1739                         var tid = id.write_to_string.to_lower
1740                         if v.processor.link_refs.has_key(tid) then
1741                                 var lr = v.processor.link_refs[tid]
1742                                 link = lr.link
1743                                 comment = lr.title
1744                         end
1745                 else
1746                 var tid = name.write_to_string.replace("\n", " ").to_lower
1747                         if v.processor.link_refs.has_key(tid) then
1748                                 var lr = v.processor.link_refs[tid]
1749                                 link = lr.link
1750                                 comment = lr.title
1751                                 pos = old_pos
1752                         else
1753                                 return -1
1754                         end
1755                 end
1756                 if link == null then return -1
1757                 return pos
1758         end
1759 end
1760
1761 # A markdown link token.
1762 class TokenLink
1763         super TokenLinkOrImage
1764
1765         redef fun emit_hyper(v) do
1766                 if is_abbrev and comment != null then
1767                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
1768                 else
1769                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
1770                 end
1771         end
1772 end
1773
1774 # A markdown image token.
1775 class TokenImage
1776         super TokenLinkOrImage
1777
1778         redef fun emit_hyper(v) do
1779                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
1780         end
1781 end
1782
1783 # A HTML/XML token.
1784 class TokenHTML
1785         super Token
1786
1787         redef fun emit(v) do
1788                 var tmp = new FlatBuffer
1789                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
1790                 if b > 0 then
1791                         v.add tmp
1792                         v.current_pos = b
1793                 else
1794                         v.decorator.escape_char(v, char)
1795                 end
1796         end
1797
1798         # Is the HTML valid?
1799         # Also take care of link and mailto shortcuts.
1800         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
1801                 # check for auto links
1802                 var tmp = new FlatBuffer
1803                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
1804                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
1805                         pos = md.read_until(tmp, pos, '>')
1806                         if pos != -1 then
1807                                 var link = tmp.write_to_string
1808                                 v.decorator.add_link(v, link, link, null)
1809                                 return pos
1810                         end
1811                 end
1812                 # TODO check for mailto
1813                 # check for inline html
1814                 if start + 2 < md.length then
1815                         return md.read_xml(out, start, true)
1816                 end
1817                 return -1
1818         end
1819 end
1820
1821 # An HTML entity token.
1822 class TokenEntity
1823         super Token
1824
1825         redef fun emit(v) do
1826                 var tmp = new FlatBuffer
1827                 var b = check_entity(tmp, v.current_text.as(not null), pos)
1828                 if b > 0 then
1829                         v.add tmp
1830                         v.current_pos = b
1831                 else
1832                         v.decorator.escape_char(v, char)
1833                 end
1834         end
1835
1836         # Is the entity valid?
1837         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
1838                 var pos = md.read_until(out, start, ';')
1839                 if pos < 0 or out.length < 3 then
1840                         return -1
1841                 end
1842                 if out[1] == '#' then
1843                         if out[2] == 'x' or out[2] == 'X' then
1844                                 if out.length < 4 then return -1
1845                                 for i in [3..out.length[ do
1846                                         var c = out[i]
1847                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
1848                                                 return -1
1849                                         end
1850                                 end
1851                         else
1852                                 for i in [2..out.length[ do
1853                                         var c = out[i]
1854                                         if c < '0' or c > '9' then return -1
1855                                 end
1856                         end
1857                         out.add ';'
1858                 else
1859                         for i in [1..out.length[ do
1860                                 var c = out[i]
1861                                 if not c.is_digit and not c.is_letter then return -1
1862                         end
1863                         out.add ';'
1864                         # TODO check entity is valid
1865                         # if out.is_entity then
1866                                 return pos
1867                         # else
1868                                 # return -1
1869                         # end
1870                 end
1871                 return pos
1872         end
1873 end
1874
1875 # A markdown escape token.
1876 class TokenEscape
1877         super Token
1878
1879         redef fun emit(v) do
1880                 v.current_pos += 1
1881                 v.addc v.current_text[v.current_pos]
1882         end
1883 end
1884
1885 # A markdown super token.
1886 class TokenSuper
1887         super Token
1888
1889         redef fun emit(v) do
1890                 var tmp = v.push_buffer
1891                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1892                 v.pop_buffer
1893                 if b > 0 then
1894                         v.decorator.add_super(v, tmp)
1895                         v.current_pos = b
1896                 else
1897                         v.addc char
1898                 end
1899         end
1900 end
1901
1902 redef class Text
1903
1904         # Get the token kind at `pos`.
1905         private fun token_at(pos: Int): Token do
1906                 var c0: Char
1907                 var c1: Char
1908                 var c2: Char
1909                 var c3: Char
1910
1911                 if pos > 0 then
1912                         c0 = self[pos - 1]
1913                 else
1914                         c0 = ' '
1915                 end
1916                 var c = self[pos]
1917
1918                 if pos + 1 < length then
1919                         c1 = self[pos + 1]
1920                 else
1921                         c1 = ' '
1922                 end
1923                 if pos + 2 < length then
1924                         c2 = self[pos + 2]
1925                 else
1926                         c2 = ' '
1927                 end
1928                 if pos + 3 < length then
1929                         c3 = self[pos + 3]
1930                 else
1931                         c3 = ' '
1932                 end
1933
1934                 if c == '*' then
1935                         if c1 == '*' then
1936                                 if c0 != ' ' or c2 != ' ' then
1937                                         return new TokenStrongStar(pos, c)
1938                                 else
1939                                         return new TokenEmStar(pos, c)
1940                                 end
1941                         end
1942                         if c0 != ' ' or c1 != ' ' then
1943                                 return new TokenEmStar(pos, c)
1944                         else
1945                                 return new TokenNone(pos, c)
1946                         end
1947                 else if c == '_' then
1948                         if c1 == '_' then
1949                                 if c0 != ' ' or c2 != ' 'then
1950                                         return new TokenStrongUnderscore(pos, c)
1951                                 else
1952                                         return new TokenEmUnderscore(pos, c)
1953                                 end
1954                         end
1955                         if c0 != ' ' or c1 != ' ' then
1956                                 return new TokenEmUnderscore(pos, c)
1957                         else
1958                                 return new TokenNone(pos, c)
1959                         end
1960                 else if c == '!' then
1961                         if c1 == '[' then return new TokenImage(pos, c)
1962                         return new TokenNone(pos, c)
1963                 else if c == '[' then
1964                         return new TokenLink(pos, c)
1965                 else if c == ']' then
1966                         return new TokenNone(pos, c)
1967                 else if c == '`' then
1968                         if c1 == '`' then
1969                                 return new TokenCodeDouble(pos, c)
1970                         else
1971                                 return new TokenCodeSingle(pos, c)
1972                         end
1973                 else if c == '\\' then
1974                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
1975                                 return new TokenEscape(pos, c)
1976                         else
1977                                 return new TokenNone(pos, c)
1978                         end
1979                 else if c == '<' then
1980                         return new TokenHTML(pos, c)
1981                 else if c == '&' then
1982                         return new TokenEntity(pos, c)
1983                 else if c == '^' then
1984                         if c0 == '^' or c1 == '^' then
1985                                 return new TokenNone(pos, c)
1986                         else
1987                                 return new TokenSuper(pos, c)
1988                         end
1989                 else
1990                         return new TokenNone(pos, c)
1991                 end
1992         end
1993
1994         # Find the position of a `token` in `self`.
1995         private fun find_token(start: Int, token: Token): Int do
1996                 var pos = start
1997                 while pos < length do
1998                         if token_at(pos).is_same_type(token) then
1999                                 return pos
2000                         end
2001                         pos += 1
2002                 end
2003                 return -1
2004         end
2005
2006         # Get the position of the next non-space character.
2007         private fun skip_spaces(start: Int): Int do
2008                 var pos = start
2009                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2010                         pos += 1
2011                 end
2012                 if pos < length then return pos
2013                 return -1
2014         end
2015
2016         # Read `self` until `nend` and append it to the `out` buffer.
2017         # Escape markdown special chars.
2018         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2019                 var pos = start
2020                 while pos < length do
2021                         var c = self[pos]
2022                         if c == '\\' and pos + 1 < length then
2023                                 pos = escape(out, self[pos + 1], pos)
2024                         else
2025                                 var end_reached = false
2026                                 for n in nend do
2027                                         if c == n then
2028                                                 end_reached = true
2029                                                 break
2030                                         end
2031                                 end
2032                                 if end_reached then break
2033                                 out.add c
2034                         end
2035                         pos += 1
2036                 end
2037                 if pos == length then return -1
2038                 return pos
2039         end
2040
2041         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2042         # No escape is made.
2043         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2044                 var pos = start
2045                 while pos < length do
2046                         var c = self[pos]
2047                         var end_reached = false
2048                         for n in nend do
2049                                 if c == n then
2050                                         end_reached = true
2051                                         break
2052                                 end
2053                         end
2054                         if end_reached then break
2055                         out.add c
2056                         pos += 1
2057                 end
2058                 if pos == length then return -1
2059                 return pos
2060         end
2061
2062         # Read `self` as XML until `to` and append it to the `out` buffer.
2063         # Escape HTML special chars.
2064         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2065                 var pos = from
2066                 var in_str = false
2067                 var str_char: nullable Char = null
2068                 while pos < length do
2069                         var c = self[pos]
2070                         if in_str then
2071                                 if c == '\\' then
2072                                         out.add c
2073                                         pos += 1
2074                                         if pos < length then
2075                                                 out.add c
2076                                                 pos += 1
2077                                         end
2078                                         continue
2079                                 end
2080                                 if c == str_char then
2081                                         in_str = false
2082                                         out.add c
2083                                         pos += 1
2084                                         continue
2085                                 end
2086                         end
2087                         if c == '"' or c == '\'' then
2088                                 in_str = true
2089                                 str_char = c
2090                         end
2091                         if not in_str then
2092                                 var end_reached = false
2093                                 for n in [0..to.length[ do
2094                                         if c == to[n] then
2095                                                 end_reached = true
2096                                                 break
2097                                         end
2098                                 end
2099                                 if end_reached then break
2100                         end
2101                         out.add c
2102                         pos += 1
2103                 end
2104                 if pos == length then return -1
2105                 return pos
2106         end
2107
2108         # Read `self` as XML and append it to the `out` buffer.
2109         # Safe mode can be activated to limit reading to valid xml.
2110         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2111                 var pos = 0
2112                 var is_close_tag = false
2113                 if start + 1 >= length then return -1
2114                 if self[start + 1] == '/' then
2115                         is_close_tag = true
2116                         pos = start + 2
2117                 else if self[start + 1] == '!' then
2118                         out.append "<!"
2119                         return start + 1
2120                 else
2121                         is_close_tag = false
2122                         pos = start + 1
2123                 end
2124                 if safe_mode then
2125                         var tmp = new FlatBuffer
2126                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2127                         if pos == -1 then return -1
2128                         var tag = tmp.write_to_string.trim.to_lower
2129                         if tag.is_html_unsafe then
2130                                 out.append "&lt;"
2131                                 if is_close_tag then out.add '/'
2132                                 out.append tmp
2133                         else
2134                                 out.append "<"
2135                                 if is_close_tag then out.add '/'
2136                                 out.append tmp
2137                         end
2138                 else
2139                         out.add '<'
2140                         if is_close_tag then out.add '/'
2141                         pos = read_xml_until(out, pos, ' ', '/', '>')
2142                 end
2143                 if pos == -1 then return -1
2144                 pos = read_xml_until(out, pos, '/', '>')
2145                 if pos == -1 then return -1
2146                 if self[pos] == '/' then
2147                         out.append " /"
2148                         pos = self.read_xml_until(out, pos + 1, '>')
2149                         if pos == -1 then return -1
2150                 end
2151                 if self[pos] == '>' then
2152                         out.add '>'
2153                         return pos
2154                 end
2155                 return -1
2156         end
2157
2158         # Read a markdown link address and append it to the `out` buffer.
2159         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2160                 var pos = start
2161                 var counter = 1
2162                 while pos < length do
2163                         var c = self[pos]
2164                         if c == '\\' and pos + 1 < length then
2165                                 pos = escape(out, self[pos + 1], pos)
2166                         else
2167                                 var end_reached = false
2168                                 if c == '(' then
2169                                         counter += 1
2170                                 else if c == ' ' then
2171                                         if counter == 1 then end_reached = true
2172                                 else if c == ')' then
2173                                         counter -= 1
2174                                         if counter == 0 then end_reached = true
2175                                 end
2176                                 if end_reached then break
2177                                 out.add c
2178                         end
2179                         pos += 1
2180                 end
2181                 if pos == length then return -1
2182                 return pos
2183         end
2184
2185         # Read a markdown link text and append it to the `out` buffer.
2186         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2187                 var pos = start
2188                 var counter = 1
2189                 while pos < length do
2190                         var c = self[pos]
2191                         var end_reached = false
2192                         if c == '[' then
2193                                 counter += 1
2194                                 out.add c
2195                         else if c == ']' then
2196                                 counter -= 1
2197                                 if counter == 0 then
2198                                         end_reached = true
2199                                 else
2200                                         out.add c
2201                                 end
2202                         else
2203                                 out.add c
2204                         end
2205                         if end_reached then break
2206                         pos += 1
2207                 end
2208                 if pos == length then return -1
2209                 return pos
2210         end
2211
2212         # Extract the XML tag name from a XML tag.
2213         private fun xml_tag: String do
2214                 var tpl = new FlatBuffer
2215                 var pos = 1
2216                 if pos < length and self[1] == '/' then pos += 1
2217                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2218                         tpl.add self[pos]
2219                         pos += 1
2220                 end
2221                 return tpl.write_to_string.to_lower
2222         end
2223
2224         # Read and escape the markdown contained in `self`.
2225         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2226                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2227                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2228                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2229                    c == '`' or c == '~' or c == '^' then
2230                         out.add c
2231                         return pos + 1
2232                 end
2233                 out.add '\\'
2234                 return pos
2235         end
2236
2237         # Is `self` an unsafe HTML element?
2238         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2239
2240         # Is `self` a HRML block element?
2241         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2242
2243         # Is `self` a link prefix?
2244         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2245
2246         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2247
2248         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2249
2250         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2251 end
2252
2253 redef class String
2254
2255         # Parse `self` as markdown and return the HTML representation
2256         #.
2257         #    var md = "**Hello World!**"
2258         #    var html = md.md_to_html
2259         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2260         fun md_to_html: Streamable do
2261                 var processor = new MarkdownProcessor
2262                 return processor.process(self)
2263         end
2264 end