lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         var emitter: MarkdownEmitter is noinit
  34
  35         init do self.emitter = new MarkdownEmitter(self)
  36
  37         # Process the mardown `input` string and return the processed output.
  38         fun process(input: String): Streamable do
  39                 # init processor
  40                 link_refs.clear
  41                 last_link_ref = null
  42                 current_line = null
  43                 current_block = null
  44                 # parse markdown
  45                 var parent = read_lines(input)
  46                 parent.remove_surrounding_empty_lines
  47                 recurse(parent, false)
  48                 # output processed text
  49                 return emitter.emit(parent.kind)
  50         end
  51
  52         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
  53         private fun read_lines(input: String): MDBlock do
  54                 var block = new MDBlock
  55                 var value = new FlatBuffer
  56                 var i = 0
  57                 while i < input.length do
  58                         value.clear
  59                         var pos = 0
  60                         var eol = false
  61                         while not eol and i < input.length do
  62                                 var c = input[i]
  63                                 if c == '\n' then
  64                                         i += 1
  65                                         eol = true
  66                                 else if c == '\t' then
  67                                         var np = pos + (4 - (pos.bin_and(3)))
  68                                         while pos < np do
  69                                                 value.add ' '
  70                                                 pos += 1
  71                                         end
  72                                         i += 1
  73                                 else
  74                                         pos += 1
  75                                         value.add c
  76                                         i += 1
  77                                 end
  78                         end
  79
  80                         var line = new MDLine(value.write_to_string)
  81                         var is_link_ref = check_link_ref(line)
  82                         # Skip link refs
  83                         if not is_link_ref then block.add_line line
  84                 end
  85                 return block
  86         end
  87
  88         # Check if line is a block link definition.
  89         # Return `true` if line contains a valid link ref and save it into `link_refs`.
  90         private fun check_link_ref(line: MDLine): Bool do
  91                 var md = line.value
  92                 var is_link_ref = false
  93                 var id = new FlatBuffer
  94                 var link = new FlatBuffer
  95                 var comment = new FlatBuffer
  96                 var pos = -1
  97                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
  98                         pos = line.leading + 1
  99                         pos = md.read_until(id, pos, ']')
 100                         if not id.is_empty and pos + 2 < line.value.length then
 101                                 if line.value[pos + 1] == ':' then
 102                                         pos += 2
 103                                         pos = md.skip_spaces(pos)
 104                                         if line.value[pos] == '<' then
 105                                                 pos += 1
 106                                                 pos = md.read_until(link, pos, '>')
 107                                                 pos += 1
 108                                         else
 109                                                 pos = md.read_until(link, pos, ' ', '\n')
 110                                         end
 111                                         if not link.is_empty then
 112                                                 pos = md.skip_spaces(pos)
 113                                                 if pos > 0 and pos < line.value.length then
 114                                                         var c = line.value[pos]
 115                                                         if c == '\"' or c == '\'' or c == '(' then
 116                                                                 pos += 1
 117                                                                 if c == '(' then
 118                                                                         pos = md.read_until(comment, pos, ')')
 119                                                                 else
 120                                                                         pos = md.read_until(comment, pos, c)
 121                                                                 end
 122                                                                 if pos > 0 then is_link_ref = true
 123                                                         end
 124                                                 else
 125                                                         is_link_ref = true
 126                                                 end
 127                                         end
 128                                 end
 129                         end
 130                 end
 131                 if is_link_ref and not id.is_empty and not link.is_empty then
 132                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 133                         add_link_ref(id.write_to_string, lr)
 134                         if comment.is_empty then last_link_ref = lr
 135                         return true
 136                 else
 137                         comment = new FlatBuffer
 138                         if not line.is_empty and last_link_ref != null then
 139                                 pos = line.leading
 140                                 var c = line.value[pos]
 141                                 if c == '\"' or c == '\'' or c ==  '(' then
 142                                         pos += 1
 143                                         if c == '(' then
 144                                                 pos = md.read_until(comment, pos, ')')
 145                                         else
 146                                                 pos = md.read_until(comment, pos, c)
 147                                         end
 148                                 end
 149                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 150                         end
 151                         if comment.is_empty then return false
 152                         return true
 153                 end
 154         end
 155
 156         # Known link refs
 157         # This list will be needed during output to expand links.
 158         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 159
 160         # Last encountered link ref (for multiline definitions)
 161         #
 162         # Markdown allows link refs to be defined over two lines:
 163         #
 164         #       [id]: http://example.com/longish/path/to/resource/here
 165         #               "Optional Title Here"
 166         #
 167         private var last_link_ref: nullable LinkRef = null
 168
 169         # Add a link ref to the list
 170         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 171
 172         # Recursively split a `block`.
 173         #
 174         # The block is splitted according to the type of lines it contains.
 175         # Some blocks can be splited again recursively like lists.
 176         # The `in_list` mode is used to recurse on list and build
 177         # nested paragraphs or code blocks.
 178         fun recurse(root: MDBlock, in_list: Bool) do
 179                 var old_mode = self.in_list
 180                 var old_root = self.current_block
 181                 self.in_list = in_list
 182
 183                 var line = root.first_line
 184                 while line != null and line.is_empty do
 185                         line = line.next
 186                         if line == null then return
 187                 end
 188
 189                 current_line = line
 190                 current_block = root
 191                 while current_line != null do
 192                         line_kind(current_line.as(not null)).process(self)
 193                 end
 194                 self.in_list = old_mode
 195                 self.current_block = old_root
 196         end
 197
 198         # Currently processed line.
 199         # Used when visiting blocks with `recurse`.
 200         var current_line: nullable MDLine = null is writable
 201
 202         # Currently processed block.
 203         # Used when visiting blocks with `recurse`.
 204         var current_block: nullable MDBlock = null is writable
 205
 206         # Is the current recursion in list mode?
 207         # Used when visiting blocks with `recurse`
 208         private var in_list = false
 209
 210         # The type of line.
 211         # see: `md_line_*`
 212         fun line_kind(md: MDLine): Line do
 213                 var value = md.value
 214                 var leading = md.leading
 215                 var trailing = md.trailing
 216                 if md.is_empty then return new LineEmpty
 217                 if md.leading > 3 then return new LineCode
 218                 if value[leading] == '#' then return new LineHeadline
 219                 if value[leading] == '>' then return new LineBlockquote
 220
 221                 if value.length - leading - trailing > 2 then
 222                         if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 223                                 return new LineFence
 224                         end
 225                         if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 226                                 return new LineFence
 227                         end
 228                 end
 229
 230                 if value.length - leading - trailing > 2 and
 231                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 232                    if md.count_chars(value[leading]) >= 3 then
 233                                 return new LineHR
 234                    end
 235                 end
 236
 237                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 238                         var c = value[leading]
 239                         if c == '*' or c == '-' or c == '+' then return new LineUList
 240                 end
 241
 242                 if value.length - leading >= 3 and value[leading].is_digit then
 243                         var i = leading + 1
 244                         while i < value.length and value[i].is_digit do i += 1
 245                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 246                                 return new LineOList
 247                         end
 248                 end
 249
 250                 if value[leading] == '<' and md.check_html then return new LineXML
 251
 252                 var next = md.next
 253                 if next != null and not next.is_empty then
 254                         if next.count_chars('=') > 0 then
 255                                 return new LineHeadline1
 256                         end
 257                         if next.count_chars('-') > 0 then
 258                                 return new LineHeadline2
 259                         end
 260                 end
 261                 return new LineOther
 262         end
 263
 264 end
 265
 266 # Emit output corresponding to blocks content.
 267 #
 268 # Blocks are created by a previous pass in `MarkdownProcessor`.
 269 # The emitter use a `Decorator` to select the output format.
 270 class MarkdownEmitter
 271
 272         # Processor containing link refs.
 273         var processor: MarkdownProcessor
 274
 275         # Decorator used for output.
 276         # Default is `HTMLDecorator`
 277         var decorator: Decorator = new HTMLDecorator is writable
 278
 279         # Create a new `MardownEmitter` using the default `HTMLDecorator`
 280         init(processor: MarkdownProcessor) do
 281                 self.processor = processor
 282         end
 283
 284         # Create a new `MarkdownEmitter` using a custom `decorator`.
 285         init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
 286                 init processor
 287                 self.decorator = decorator
 288         end
 289
 290         # Output `block` using `decorator` in the current buffer.
 291         fun emit(block: Block): Text do
 292                 var buffer = push_buffer
 293                 block.emit(self)
 294                 pop_buffer
 295                 return buffer
 296         end
 297
 298         # Output the content of `block`.
 299         fun emit_in(block: Block) do block.emit_in(self)
 300
 301         # Transform and emit mardown text
 302         fun emit_text(text: Text) do
 303                 emit_text_until(text, 0, null)
 304         end
 305
 306         # Transform and emit mardown text starting at `from` and
 307         # until a token with the same type as `token` is found.
 308         # Go until the end of text if `token` is null.
 309         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 310                 var old_text = current_text
 311                 var old_pos = current_pos
 312                 current_text = text
 313                 current_pos = start
 314                 while current_pos < text.length do
 315                         var mt = text.token_at(current_pos)
 316                         if (token != null and not token isa TokenNone) and
 317                         (mt.is_same_type(token) or
 318                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 319                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 320                                 return current_pos
 321                         end
 322                         mt.emit(self)
 323                         current_pos += 1
 324                 end
 325                 current_text = old_text
 326                 current_pos = old_pos
 327                 return -1
 328         end
 329
 330         # Currently processed position in `current_text`.
 331         # Used when visiting inline production with `emit_text_until`.
 332         private var current_pos: Int = -1
 333
 334         # Currently processed text.
 335         # Used when visiting inline production with `emit_text_until`.
 336         private var current_text: nullable Text = null
 337
 338         # Stacked buffers.
 339         private var buffer_stack = new List[FlatBuffer]
 340
 341         # Push a new buffer on the stack.
 342         private fun push_buffer: FlatBuffer do
 343                 var buffer = new FlatBuffer
 344                 buffer_stack.add buffer
 345                 return buffer
 346         end
 347
 348         # Pop the last buffer.
 349         private fun pop_buffer do buffer_stack.pop
 350
 351         # Current output buffer.
 352         private fun current_buffer: FlatBuffer do
 353                 assert not buffer_stack.is_empty
 354                 return buffer_stack.last
 355         end
 356
 357         # Append `e` to current buffer.
 358         fun add(e: Streamable) do
 359                 if e isa Text then
 360                         current_buffer.append e
 361                 else
 362                         current_buffer.append e.write_to_string
 363                 end
 364         end
 365
 366         # Append `c` to current buffer.
 367         fun addc(c: Char) do current_buffer.add c
 368
 369         # Append a "\n" line break.
 370         fun addn do current_buffer.add '\n'
 371 end
 372
 373 # A Link Reference.
 374 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 375 #
 376 # Example:
 377 #
 378 #    [1]: http://example.com/ "Optional title"
 379 class LinkRef
 380
 381         # Link href
 382         var link: String
 383
 384         # Optional link title
 385         var title: nullable String = null
 386
 387         # Is the link an abreviation?
 388         var is_abbrev = false
 389
 390         # Create a link with a title.
 391         init with_title(link: String, title: nullable String) do
 392                 self.link = link
 393                 self.title = title
 394         end
 395 end
 396
 397 # A `Decorator` is used to emit mardown into a specific format.
 398 # Default decorator used is `HTMLDecorator`.
 399 interface Decorator
 400
 401         # Render a ruler block.
 402         fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
 403
 404         # Render a headline block with corresponding level.
 405         fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
 406
 407         # Render a paragraph block.
 408         fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
 409
 410         # Render a code or fence block.
 411         fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
 412
 413         # Render a blockquote.
 414         fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
 415
 416         # Render an unordered list.
 417         fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
 418
 419         # Render an ordered list.
 420         fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
 421
 422         # Render a list item.
 423         fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
 424
 425         # Render an emphasis text.
 426         fun add_em(v: MarkdownEmitter, text: Text) is abstract
 427
 428         # Render a strong text.
 429         fun add_strong(v: MarkdownEmitter, text: Text) is abstract
 430
 431         # Render a super text.
 432         fun add_super(v: MarkdownEmitter, text: Text) is abstract
 433
 434         # Render a link.
 435         fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 436
 437         # Render an image.
 438         fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 439
 440         # Render an abbreviation.
 441         fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
 442
 443         # Render a code span reading from a buffer.
 444         fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 445
 446         # Render a text and escape it.
 447         fun append_value(v: MarkdownEmitter, value: Text) is abstract
 448
 449         # Render code text from buffer and escape it.
 450         fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 451
 452         # Render a character escape.
 453         fun escape_char(v: MarkdownEmitter, char: Char) is abstract
 454
 455         # Render a line break
 456         fun add_line_break(v: MarkdownEmitter) is abstract
 457
 458         # Generate a new html valid id from a `String`.
 459         fun strip_id(txt: String): String is abstract
 460
 461         # Found headlines during the processing labeled by their ids.
 462         fun headlines: ArrayMap[String, HeadLine] is abstract
 463 end
 464
 465 # Class representing a markdown headline.
 466 class HeadLine
 467         # Unique identifier of this headline.
 468         var id: String
 469
 470         # Text of the headline.
 471         var title: String
 472
 473         # Level of this headline.
 474         #
 475         # According toe the markdown specification, level must be in `[1..6]`.
 476         var level: Int
 477 end
 478
 479 # `Decorator` that outputs HTML.
 480 class HTMLDecorator
 481         super Decorator
 482
 483         redef var headlines = new ArrayMap[String, HeadLine]
 484
 485         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 486
 487         redef fun add_headline(v, block) do
 488                 # save headline
 489                 var txt = block.block.first_line.value
 490                 var id = strip_id(txt)
 491                 var lvl = block.depth
 492                 headlines[id] = new HeadLine(id, txt, lvl)
 493                 # output it
 494                 v.add "<h{lvl} id=\"{id}\">"
 495                 v.emit_in block
 496                 v.add "</h{lvl}>\n"
 497         end
 498
 499         redef fun add_paragraph(v, block) do
 500                 v.add "<p>"
 501                 v.emit_in block
 502                 v.add "</p>\n"
 503         end
 504
 505         redef fun add_code(v, block) do
 506                 v.add "<pre><code>"
 507                 v.emit_in block
 508                 v.add "</code></pre>\n"
 509         end
 510
 511         redef fun add_blockquote(v, block) do
 512                 v.add "<blockquote>\n"
 513                 v.emit_in block
 514                 v.add "</blockquote>\n"
 515         end
 516
 517         redef fun add_unorderedlist(v, block) do
 518                 v.add "<ul>\n"
 519                 v.emit_in block
 520                 v.add "</ul>\n"
 521         end
 522
 523         redef fun add_orderedlist(v, block) do
 524                 v.add "<ol>\n"
 525                 v.emit_in block
 526                 v.add "</ol>\n"
 527         end
 528
 529         redef fun add_listitem(v, block) do
 530                 v.add "<li>"
 531                 v.emit_in block
 532                 v.add "</li>\n"
 533         end
 534
 535         redef fun add_em(v, text) do
 536                 v.add "<em>"
 537                 v.add text
 538                 v.add "</em>"
 539         end
 540
 541         redef fun add_strong(v, text) do
 542                 v.add "<strong>"
 543                 v.add text
 544                 v.add "</strong>"
 545         end
 546
 547         redef fun add_super(v, text) do
 548                 v.add "<sup>"
 549                 v.add text
 550                 v.add "</sup>"
 551         end
 552
 553         redef fun add_image(v, link, name, comment) do
 554                 v.add "<img src=\""
 555                 append_value(v, link)
 556                 v.add "\" alt=\""
 557                 append_value(v, name)
 558                 v.add "\""
 559                 if comment != null and not comment.is_empty then
 560                         v.add " title=\""
 561                         append_value(v, comment)
 562                         v.add "\""
 563                 end
 564                 v.add "/>"
 565         end
 566
 567         redef fun add_link(v, link, name, comment) do
 568                 v.add "<a href=\""
 569                 append_value(v, link)
 570                 v.add "\""
 571                 if comment != null and not comment.is_empty then
 572                         v.add " title=\""
 573                         append_value(v, comment)
 574                         v.add "\""
 575                 end
 576                 v.add ">"
 577                 v.emit_text(name)
 578                 v.add "</a>"
 579         end
 580
 581         redef fun add_abbr(v, name, comment) do
 582                 v.add "<abbr title=\""
 583                 append_value(v, comment)
 584                 v.add "\">"
 585                 v.emit_text(name)
 586                 v.add "</abbr>"
 587         end
 588
 589         redef fun add_span_code(v, text, from, to) do
 590                 v.add "<code>"
 591                 append_code(v, text, from, to)
 592                 v.add "</code>"
 593         end
 594
 595         redef fun add_line_break(v) do
 596                 v.add "<br/>"
 597         end
 598
 599         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 600
 601         redef fun escape_char(v, c) do
 602                 if c == '&' then
 603                         v.add "&amp;"
 604                 else if c == '<' then
 605                         v.add "&lt;"
 606                 else if c == '>' then
 607                         v.add "&gt;"
 608                 else if c == '"' then
 609                         v.add "&quot;"
 610                 else if c == '\'' then
 611                         v.add "&apos;"
 612                 else
 613                         v.addc c
 614                 end
 615         end
 616
 617         redef fun append_code(v, buffer, from, to) do
 618                 for i in [from..to[ do
 619                         var c = buffer[i]
 620                         if c == '&' then
 621                                 v.add "&amp;"
 622                         else if c == '<' then
 623                                 v.add "&lt;"
 624                         else if c == '>' then
 625                                 v.add "&gt;"
 626                         else
 627                                 v.addc c
 628                         end
 629                 end
 630         end
 631
 632         redef fun strip_id(txt) do
 633                 # strip id
 634                 var b = new FlatBuffer
 635                 for c in txt do
 636                         if c == ' ' then
 637                                 b.add '_'
 638                         else
 639                                 if not c.is_letter and
 640                                    not c.is_digit and
 641                                    not allowed_id_chars.has(c) then continue
 642                                 b.add c
 643                         end
 644                 end
 645                 var res = b.to_s
 646                 var key = res
 647                 # check for multiple id definitions
 648                 if headlines.has_key(key) then
 649                         var i = 1
 650                         key = "{res}_{i}"
 651                         while headlines.has_key(key) do
 652                                 i += 1
 653                                 key = "{res}_{i}"
 654                         end
 655                 end
 656                 return key
 657         end
 658
 659         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 660 end
 661
 662 # A block of markdown lines.
 663 # A `MDBlock` can contains lines and/or sub-blocks.
 664 class MDBlock
 665         # Kind of block.
 666         # See `Block`.
 667         var kind: Block = new BlockNone(self) is writable
 668
 669         # First line if any.
 670         var first_line: nullable MDLine = null is writable
 671
 672         # Last line if any.
 673         var last_line: nullable MDLine = null is writable
 674
 675         # First sub-block if any.
 676         var first_block: nullable MDBlock = null is writable
 677
 678         # Last sub-block if any.
 679         var last_block: nullable MDBlock = null is writable
 680
 681         # Previous block if any.
 682         var prev: nullable MDBlock = null is writable
 683
 684         # Next block if any.
 685         var next: nullable MDBlock = null is writable
 686
 687         # Does this block contain subblocks?
 688         fun has_blocks: Bool do return first_block != null
 689
 690         # Count sub-blocks.
 691         fun count_blocks: Int do
 692                 var count = 0
 693                 var block = first_block
 694                 while block != null do
 695                         count += 1
 696                         block = block.next
 697                 end
 698                 return count
 699         end
 700
 701         # Does this block contain lines?
 702         fun has_lines: Bool do return first_line != null
 703
 704         # Count block lines.
 705         fun count_lines: Int do
 706                 var count = 0
 707                 var line = first_line
 708                 while line != null do
 709                         count += 1
 710                         line = line.next
 711                 end
 712                 return count
 713         end
 714
 715         # Split `self` creating a new sub-block having `line` has `last_line`.
 716         fun split(line: MDLine): MDBlock do
 717                 var block = new MDBlock
 718                 block.first_line = first_line
 719                 block.last_line = line
 720                 first_line = line.next
 721                 line.next = null
 722                 if first_line == null then
 723                         last_line = null
 724                 else
 725                         first_line.prev = null
 726                 end
 727                 if first_block == null then
 728                         first_block = block
 729                         last_block = block
 730                 else
 731                         last_block.next = block
 732                         last_block = block
 733                 end
 734                 return block
 735         end
 736
 737         # Add a `line` to this block.
 738         fun add_line(line: MDLine) do
 739                 if last_line == null then
 740                         first_line = line
 741                         last_line = line
 742                 else
 743                         last_line.next_empty = line.is_empty
 744                         line.prev_empty = last_line.is_empty
 745                         line.prev = last_line
 746                         last_line.next = line
 747                         last_line = line
 748                 end
 749         end
 750
 751         # Remove `line` from this block.
 752         fun remove_line(line: MDLine) do
 753                 if line.prev == null then
 754                         first_line = line.next
 755                 else
 756                         line.prev.next = line.next
 757                 end
 758                 if line.next == null then
 759                         last_line = line.prev
 760                 else
 761                         line.next.prev = line.prev
 762                 end
 763                 line.prev = null
 764                 line.next = null
 765         end
 766
 767         # Remove leading empty lines.
 768         fun remove_leading_empty_lines: Bool do
 769                 var was_empty = false
 770                 var line = first_line
 771                 while line != null and line.is_empty do
 772                         remove_line line
 773                         line = first_line
 774                         was_empty = true
 775                 end
 776                 return was_empty
 777         end
 778
 779         # Remove trailing empty lines.
 780         fun remove_trailing_empty_lines: Bool do
 781                 var was_empty = false
 782                 var line = last_line
 783                 while line != null and line.is_empty do
 784                         remove_line line
 785                         line = last_line
 786                         was_empty = true
 787                 end
 788                 return was_empty
 789         end
 790
 791         # Remove leading and trailing empty lines.
 792         fun remove_surrounding_empty_lines: Bool do
 793                 var was_empty = false
 794                 if remove_leading_empty_lines then was_empty = true
 795                 if remove_trailing_empty_lines then was_empty = true
 796                 return was_empty
 797         end
 798
 799         # Remove list markers and up to 4 leading spaces.
 800         # Used to clean nested lists.
 801         fun remove_list_indent(v: MarkdownProcessor) do
 802                 var line = first_line
 803                 while line != null do
 804                         if not line.is_empty then
 805                                 var kind = v.line_kind(line)
 806                                 if kind isa LineList then
 807                                         line.value = kind.extract_value(line)
 808                                 else
 809                                         line.value = line.value.substring_from(line.leading.min(4))
 810                                 end
 811                                 line.leading = line.process_leading
 812                         end
 813                         line = line.next
 814                 end
 815         end
 816
 817         # Collect block line text.
 818         fun text: String do
 819                 var text = new FlatBuffer
 820                 var line = first_line
 821                 while line != null do
 822                         if not line.is_empty then
 823                                 text.append line.text
 824                         end
 825                         text.append "\n"
 826                         line = line.next
 827                 end
 828                 return text.write_to_string
 829         end
 830 end
 831
 832 # Representation of a markdown block in the AST.
 833 # Each `Block` is linked to a `MDBlock` that contains mardown code.
 834 abstract class Block
 835
 836         # The markdown block `self` is related to.
 837         var block: MDBlock
 838
 839         # Output `self` using `v.decorator`.
 840         fun emit(v: MarkdownEmitter) do v.emit_in(self)
 841
 842         # Emit the containts of `self`, lines or blocks.
 843         fun emit_in(v: MarkdownEmitter) do
 844                 block.remove_surrounding_empty_lines
 845                 if block.has_lines then
 846                         emit_lines(v)
 847                 else
 848                         emit_blocks(v)
 849                 end
 850         end
 851
 852         # Emit lines contained in `block`.
 853         fun emit_lines(v: MarkdownEmitter) do
 854                 var tpl = v.push_buffer
 855                 var line = block.first_line
 856                 while line != null do
 857                         if not line.is_empty then
 858                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
 859                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
 860                         end
 861                         if line.next != null then
 862                                 v.addn
 863                         end
 864                         line = line.next
 865                 end
 866                 v.pop_buffer
 867                 v.emit_text(tpl)
 868         end
 869
 870         # Emit sub-blocks contained in `block`.
 871         fun emit_blocks(v: MarkdownEmitter) do
 872                 var block = self.block.first_block
 873                 while block != null do
 874                         block.kind.emit(v)
 875                         block = block.next
 876                 end
 877         end
 878 end
 879
 880 # A block without any markdown specificities.
 881 #
 882 # Actually use the same implementation than `BlockCode`,
 883 # this class is only used for typing purposes.
 884 class BlockNone
 885         super Block
 886 end
 887
 888 # A markdown blockquote.
 889 class BlockQuote
 890         super Block
 891
 892         redef fun emit(v) do v.decorator.add_blockquote(v, self)
 893
 894         # Remove blockquote markers.
 895         private fun remove_block_quote_prefix(block: MDBlock) do
 896                 var line = block.first_line
 897                 while line != null do
 898                         if not line.is_empty then
 899                                 if line.value[line.leading] == '>' then
 900                                         var rem = line.leading + 1
 901                                         if line.leading + 1 < line.value.length and
 902                                            line.value[line.leading + 1] == ' ' then
 903                                                 rem += 1
 904                                         end
 905                                         line.value = line.value.substring_from(rem)
 906                                         line.leading = line.process_leading
 907                                 end
 908                         end
 909                         line = line.next
 910                 end
 911         end
 912 end
 913
 914 # A markdown code block.
 915 class BlockCode
 916         super Block
 917
 918         # Number of char to skip at the beginning of the line.
 919         #
 920         # Block code lines start at 4 spaces.
 921         protected var line_start = 4
 922
 923         redef fun emit(v) do v.decorator.add_code(v, self)
 924
 925         redef fun emit_lines(v) do
 926                 var line = block.first_line
 927                 while line != null do
 928                         if not line.is_empty then
 929                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
 930                         end
 931                         v.addn
 932                         line = line.next
 933                 end
 934         end
 935 end
 936
 937 # A markdown code-fence block.
 938 #
 939 # Actually use the same implementation than `BlockCode`,
 940 # this class is only used for typing purposes.
 941 class BlockFence
 942         super BlockCode
 943
 944         # Fence code lines start at 0 spaces.
 945         redef var line_start = 0
 946 end
 947
 948 # A markdown headline.
 949 class BlockHeadline
 950         super Block
 951
 952         redef fun emit(v) do v.decorator.add_headline(v, self)
 953
 954         # Depth of the headline used to determine the headline level.
 955         var depth = 0
 956
 957         # Remove healine marks from lines contained in `self`.
 958         private fun transform_headline(block: MDBlock) do
 959                 if depth > 0 then return
 960                 var level = 0
 961                 var line = block.first_line
 962                 if line.is_empty then return
 963                 var start = line.leading
 964                 while start < line.value.length and line.value[start] == '#' do
 965                         level += 1
 966                         start += 1
 967                 end
 968                 while start < line.value.length and line.value[start] == ' ' do
 969                         start += 1
 970                 end
 971                 if start >= line.value.length then
 972                         line.is_empty = true
 973                 else
 974                         var nend = line.value.length - line.trailing - 1
 975                         while line.value[nend] == '#' do nend -= 1
 976                         while line.value[nend] == ' ' do nend -= 1
 977                         line.value = line.value.substring(start, nend - start + 1)
 978                         line.leading = 0
 979                         line.trailing = 0
 980                 end
 981                 depth = level.min(6)
 982         end
 983 end
 984
 985 # A markdown list item block.
 986 class BlockListItem
 987         super Block
 988
 989         redef fun emit(v) do v.decorator.add_listitem(v, self)
 990 end
 991
 992 # A markdown list block.
 993 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
 994 abstract class BlockList
 995         super Block
 996
 997         # Split list block into list items sub-blocks.
 998         private fun init_block(v: MarkdownProcessor) do
 999                 var line = block.first_line
1000                 line = line.next
1001                 while line != null do
1002                         var t = v.line_kind(line)
1003                         if t isa LineList or
1004                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1005                            not (t isa LineList))) then
1006                                    var sblock = block.split(line.prev.as(not null))
1007                                    sblock.kind = new BlockListItem(sblock)
1008                         end
1009                         line = line.next
1010                 end
1011                 var sblock = block.split(block.last_line.as(not null))
1012                 sblock.kind = new BlockListItem(sblock)
1013         end
1014
1015         # Expand list items as paragraphs if needed.
1016         private fun expand_paragraphs(block: MDBlock) do
1017                 var outer = block.first_block
1018                 var inner: nullable MDBlock
1019                 var has_paragraph = false
1020                 while outer != null and not has_paragraph do
1021                         if outer.kind isa BlockListItem then
1022                                 inner = outer.first_block
1023                                 while inner != null and not has_paragraph do
1024                                         if inner.kind isa BlockParagraph then
1025                                                 has_paragraph = true
1026                                         end
1027                                         inner = inner.next
1028                                 end
1029                         end
1030                         outer = outer.next
1031                 end
1032                 if has_paragraph then
1033                         outer = block.first_block
1034                         while outer != null do
1035                                 if outer.kind isa BlockListItem then
1036                                         inner = outer.first_block
1037                                         while inner != null do
1038                                                 if inner.kind isa BlockNone then
1039                                                         inner.kind = new BlockParagraph(inner)
1040                                                 end
1041                                                 inner = inner.next
1042                                         end
1043                                 end
1044                                 outer = outer.next
1045                         end
1046                 end
1047         end
1048 end
1049
1050 # A markdown ordered list.
1051 class BlockOrderedList
1052         super BlockList
1053
1054         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1055 end
1056
1057 # A markdown unordred list.
1058 class BlockUnorderedList
1059         super BlockList
1060
1061         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1062 end
1063
1064 # A markdown paragraph block.
1065 class BlockParagraph
1066         super Block
1067
1068         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1069 end
1070
1071 # A markdown ruler.
1072 class BlockRuler
1073         super Block
1074
1075         redef fun emit(v) do v.decorator.add_ruler(v, self)
1076 end
1077
1078 # Xml blocks that can be found in markdown markup.
1079 class BlockXML
1080         super Block
1081
1082         redef fun emit_lines(v) do
1083                 var line = block.first_line
1084                 while line != null do
1085                         if not line.is_empty then v.add line.value
1086                         v.addn
1087                         line = line.next
1088                 end
1089         end
1090 end
1091
1092 # A markdown line.
1093 class MDLine
1094
1095         # Text contained in this line.
1096         var value: String is writable
1097
1098         # Is this line empty?
1099         # Lines containing only spaces are considered empty.
1100         var is_empty: Bool = true is writable
1101
1102         # Previous line in `MDBlock` or null if first line.
1103         var prev: nullable MDLine = null is writable
1104
1105         # Next line in `MDBlock` or null if last line.
1106         var next: nullable MDLine = null is writable
1107
1108         # Is the previous line empty?
1109         var prev_empty: Bool = false is writable
1110
1111         # Is the next line empty?
1112         var next_empty: Bool = false is writable
1113
1114         # Initialize a new MDLine from its string value
1115         init(value: String) do
1116                 self.value = value
1117                 self.leading = process_leading
1118                 if leading != value.length then
1119                         self.is_empty = false
1120                         self.trailing = process_trailing
1121                 end
1122         end
1123
1124         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1125         fun clear do
1126                 value = ""
1127                 leading = 0
1128                 trailing = 0
1129                 is_empty = true
1130                 if prev != null then prev.next_empty = true
1131                 if next != null then next.prev_empty = true
1132         end
1133
1134         # Number or leading spaces on this line.
1135         var leading: Int = 0 is writable
1136
1137         # Compute `leading` depending on `value`.
1138         fun process_leading: Int do
1139                 var count = 0
1140                 var value = self.value
1141                 while count < value.length and value[count] == ' ' do count += 1
1142                 if leading == value.length then clear
1143                 return count
1144         end
1145
1146         # Number of trailing spaces on this line.
1147         var trailing: Int = 0 is writable
1148
1149         # Compute `trailing` depending on `value`.
1150         fun process_trailing: Int do
1151                 var count = 0
1152                 var value = self.value
1153                 while value[value.length - count - 1] == ' ' do
1154                         count += 1
1155                 end
1156                 return count
1157         end
1158
1159         # Count the amount of `ch` in this line.
1160         # Return A value > 0 if this line only consists of `ch` end spaces.
1161         fun count_chars(ch: Char): Int do
1162                 var count = 0
1163                 for c in value do
1164                         if c == ' ' then
1165                                 continue
1166                         end
1167                         if c == ch then
1168                                 count += 1
1169                                 continue
1170                         end
1171                         count = 0
1172                         break
1173                 end
1174                 return count
1175         end
1176
1177         # Count the amount of `ch` at the start of this line ignoring spaces.
1178         fun count_chars_start(ch: Char): Int do
1179                 var count = 0
1180                 for c in value do
1181                         if c == ' ' then
1182                                 continue
1183                         end
1184                         if c == ch then
1185                                 count += 1
1186                         else
1187                                 break
1188                         end
1189                 end
1190                 return count
1191         end
1192
1193         # Last XML line if any.
1194         private var xml_end_line: nullable MDLine = null
1195
1196         # Does `value` contains valid XML markup?
1197         private fun check_html: Bool do
1198                 var tags = new Array[String]
1199                 var tmp = new FlatBuffer
1200                 var pos = leading
1201                 if pos + 1 < value.length and value[pos + 1] == '!' then
1202                         if read_xml_comment(self, pos) > 0 then return true
1203                 end
1204                 pos = value.read_xml(tmp, pos, false)
1205                 var tag: String
1206                 if pos > -1 then
1207                         tag = tmp.xml_tag
1208                         if not tag.is_html_block then
1209                                 return false
1210                         end
1211                         if tag == "hr" then
1212                                 xml_end_line = self
1213                                 return true
1214                         end
1215                         tags.add tag
1216                         var line: nullable MDLine = self
1217                         while line != null do
1218                                 while pos < line.value.length and line.value[pos] != '<' do
1219                                         pos += 1
1220                                 end
1221                                 if pos >= line.value.length then
1222                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1223                                                 tags.pop
1224                                                 if tags.is_empty then
1225                                                         xml_end_line = line
1226                                                         break
1227                                                 end
1228                                         end
1229                                         line = line.next
1230                                         pos = 0
1231                                 else
1232                                         tmp = new FlatBuffer
1233                                         var new_pos = line.value.read_xml(tmp, pos, false)
1234                                         if new_pos > 0 then
1235                                                 tag = tmp.xml_tag
1236                                                 if tag.is_html_block and not tag == "hr" then
1237                                                         if tmp[1] == '/' then
1238                                                                 if tags.last != tag then
1239                                                                         return false
1240                                                                 end
1241                                                                 tags.pop
1242                                                         else
1243                                                                 tags.add tag
1244                                                         end
1245                                                 end
1246                                                 if tags.is_empty then
1247                                                         xml_end_line = line
1248                                                         break
1249                                                 end
1250                                                 pos = new_pos
1251                                         else
1252                                                 pos += 1
1253                                         end
1254                                 end
1255                         end
1256                         return tags.is_empty
1257                 end
1258                 return false
1259         end
1260
1261         # Read a XML comment.
1262         # Used by `check_html`.
1263         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1264                 var line: nullable MDLine = first_line
1265                 if start + 3 < line.value.length then
1266                         if line.value[2] == '-' and line.value[3] == '-' then
1267                                 var pos = start + 4
1268                                 while line != null do
1269                                         while pos < line.value.length and line.value[pos] != '-' do
1270                                                 pos += 1
1271                                         end
1272                                         if pos == line.value.length then
1273                                                 line = line.next
1274                                                 pos = 0
1275                                         else
1276                                                 if pos + 2 < line.value.length then
1277                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1278                                                                 first_line.xml_end_line = line
1279                                                                 return pos + 3
1280                                                         end
1281                                                 end
1282                                                 pos += 1
1283                                         end
1284                                 end
1285                         end
1286                 end
1287                 return -1
1288         end
1289
1290         # Extract the text of `self` without leading and trailing.
1291         fun text: String do return value.substring(leading, value.length - trailing)
1292 end
1293
1294 # A markdown line.
1295 interface Line
1296
1297         # Parse the line.
1298         # See `MarkdownProcessor::recurse`.
1299         fun process(v: MarkdownProcessor) is abstract
1300 end
1301
1302 # An empty markdown line.
1303 class LineEmpty
1304         super Line
1305
1306         redef fun process(v) do
1307                 v.current_line = v.current_line.next
1308         end
1309 end
1310
1311 # A non-specific markdown construction.
1312 # Mainly used as part of another line construct such as paragraphs or lists.
1313 class LineOther
1314         super Line
1315
1316         redef fun process(v) do
1317                 var line = v.current_line
1318                 # go to block end
1319                 var was_empty = line.prev_empty
1320                 while line != null and not line.is_empty do
1321                         var t = v.line_kind(line)
1322                         if v.in_list and t isa LineList then
1323                                 break
1324                         end
1325                         if t isa LineCode or t isa LineFence then
1326                                 break
1327                         end
1328                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1329                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1330                                    break
1331                         end
1332                         line = line.next
1333                 end
1334                 # build block
1335                 var bk: Block
1336                 if line != null and not line.is_empty then
1337                         var block = v.current_block.split(line.prev.as(not null))
1338                         if v.in_list and not was_empty then
1339                                 block.kind = new BlockNone(block)
1340                         else
1341                                 block.kind = new BlockParagraph(block)
1342                         end
1343                         v.current_block.remove_leading_empty_lines
1344                 else
1345                         var block: MDBlock
1346                         if line != null then
1347                                 block = v.current_block.split(line)
1348                         else
1349                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1350                         end
1351                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1352                                 block.kind = new BlockNone(block)
1353                         else
1354                                 block.kind = new BlockParagraph(block)
1355                         end
1356                         v.current_block.remove_leading_empty_lines
1357                 end
1358                 v.current_line = v.current_block.first_line
1359         end
1360 end
1361
1362 # A line of markdown code.
1363 class LineCode
1364         super Line
1365
1366         redef fun process(v) do
1367                 var line = v.current_line
1368                 # lookup block end
1369                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1370                         line = line.next
1371                 end
1372                 # split at block end line
1373                 var block: MDBlock
1374                 if line != null then
1375                         block = v.current_block.split(line.prev.as(not null))
1376                 else
1377                         block = v.current_block.split(v.current_block.last_line.as(not null))
1378                 end
1379                 block.kind = new BlockCode(block)
1380                 block.remove_surrounding_empty_lines
1381                 v.current_line = v.current_block.first_line
1382         end
1383 end
1384
1385 # A line of raw XML.
1386 class LineXML
1387         super Line
1388
1389         redef fun process(v) do
1390                 var line = v.current_line
1391                 var prev = line.prev
1392                 if prev != null then v.current_block.split(prev)
1393                 var block = v.current_block.split(line.xml_end_line.as(not null))
1394                 block.kind = new BlockXML(block)
1395                 v.current_block.remove_leading_empty_lines
1396                 v.current_line = v.current_block.first_line
1397         end
1398 end
1399
1400 # A markdown blockquote line.
1401 class LineBlockquote
1402         super Line
1403
1404         redef fun process(v) do
1405                 var line = v.current_line
1406                 # go to bquote end
1407                 while line != null do
1408                         if not line.is_empty and (line.prev_empty and
1409                            line.leading == 0 and
1410                            not v.line_kind(line) isa LineBlockquote) then break
1411                         line = line.next
1412                 end
1413                 # build sub block
1414                 var block: MDBlock
1415                 if line != null then
1416                         block = v.current_block.split(line.prev.as(not null))
1417                 else
1418                         block = v.current_block.split(v.current_block.last_line.as(not null))
1419                 end
1420                 var kind = new BlockQuote(block)
1421                 block.kind = kind
1422                 block.remove_surrounding_empty_lines
1423                 kind.remove_block_quote_prefix(block)
1424                 v.current_line = line
1425                 v.recurse(block, false)
1426                 v.current_line = v.current_block.first_line
1427         end
1428 end
1429
1430 # A markdown ruler line.
1431 class LineHR
1432         super Line
1433
1434         redef fun process(v) do
1435                 var line = v.current_line
1436                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1437                 var block = v.current_block.split(line.as(not null))
1438                 block.kind = new BlockRuler(block)
1439                 v.current_block.remove_leading_empty_lines
1440                 v.current_line = v.current_block.first_line
1441         end
1442 end
1443
1444 # A markdown fence code line.
1445 class LineFence
1446         super Line
1447
1448         redef fun process(v) do
1449                 # go to fence end
1450                 var line = v.current_line.next
1451                 while line != null do
1452                         if v.line_kind(line) isa LineFence then break
1453                         line = line.next
1454                 end
1455                 if line != null then
1456                         line = line.next
1457                 end
1458                 # build fence block
1459                 var block: MDBlock
1460                 if line != null then
1461                         block = v.current_block.split(line.prev.as(not null))
1462                 else
1463                         block = v.current_block.split(v.current_block.last_line.as(not null))
1464                 end
1465                 block.kind = new BlockFence(block)
1466                 block.first_line.clear
1467                 var last = block.last_line
1468                 if last != null and v.line_kind(last) isa LineFence then
1469                         block.last_line.clear
1470                 end
1471                 block.remove_surrounding_empty_lines
1472                 v.current_line = line
1473         end
1474 end
1475
1476 # A markdown headline.
1477 class LineHeadline
1478         super Line
1479
1480         redef fun process(v) do
1481                 var line = v.current_line
1482                 var lprev = line.prev
1483                 if lprev != null then v.current_block.split(lprev)
1484                 var block = v.current_block.split(line.as(not null))
1485                 var kind = new BlockHeadline(block)
1486                 block.kind = kind
1487                 kind.transform_headline(block)
1488                 v.current_block.remove_leading_empty_lines
1489                 v.current_line = v.current_block.first_line
1490         end
1491 end
1492
1493 # A markdown headline of level 1.
1494 class LineHeadline1
1495         super LineHeadline
1496
1497         redef fun process(v) do
1498                 var line = v.current_line
1499                 var lprev = line.prev
1500                 if lprev != null then v.current_block.split(lprev)
1501                 line.next.clear
1502                 var block = v.current_block.split(line.as(not null))
1503                 var kind = new BlockHeadline(block)
1504                 kind.depth = 1
1505                 kind.transform_headline(block)
1506                 block.kind = kind
1507                 v.current_block.remove_leading_empty_lines
1508                 v.current_line = v.current_block.first_line
1509         end
1510 end
1511
1512 # A markdown headline of level 2.
1513 class LineHeadline2
1514         super LineHeadline
1515
1516         redef fun process(v) do
1517                 var line = v.current_line
1518                 var lprev = line.prev
1519                 if lprev != null then v.current_block.split(lprev)
1520                 line.next.clear
1521                 var block = v.current_block.split(line.as(not null))
1522                 var kind = new BlockHeadline(block)
1523                 kind.depth = 2
1524                 kind.transform_headline(block)
1525                 block.kind = kind
1526                 v.current_block.remove_leading_empty_lines
1527                 v.current_line = v.current_block.first_line
1528         end
1529 end
1530
1531 # A markdown list line.
1532 # Mainly used to factorize code between ordered and unordered lists.
1533 class LineList
1534         super Line
1535
1536         redef fun process(v) do
1537                 var line = v.current_line
1538                 # go to list end
1539                 while line != null do
1540                         var t = v.line_kind(line)
1541                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1542                            not t isa LineList) then break
1543                         line = line.next
1544                 end
1545                 # build list block
1546                 var list: MDBlock
1547                 if line != null then
1548                         list = v.current_block.split(line.prev.as(not null))
1549                 else
1550                         list = v.current_block.split(v.current_block.last_line.as(not null))
1551                 end
1552                 var kind = block_kind(list)
1553                 list.kind = kind
1554                 list.first_line.prev_empty = false
1555                 list.last_line.next_empty = false
1556                 list.remove_surrounding_empty_lines
1557                 list.first_line.prev_empty = false
1558                 list.last_line.next_empty = false
1559                 kind.init_block(v)
1560                 var block = list.first_block
1561                 while block != null do
1562                         block.remove_list_indent(v)
1563                         v.recurse(block, true)
1564                         block = block.next
1565                 end
1566                 kind.expand_paragraphs(list)
1567                 v.current_line = line
1568         end
1569
1570         # Create a new block kind based on this line.
1571         protected fun block_kind(block: MDBlock): BlockList is abstract
1572
1573         # Extract string value from `MDLine`.
1574         protected fun extract_value(line: MDLine): String is abstract
1575 end
1576
1577 # An ordered list line.
1578 class LineOList
1579         super LineList
1580
1581         redef fun block_kind(block) do return new BlockOrderedList(block)
1582
1583         redef fun extract_value(line) do
1584                 return line.value.substring_from(line.value.index_of('.') + 2)
1585         end
1586 end
1587
1588 # An unordered list line.
1589 class LineUList
1590         super LineList
1591
1592         redef fun block_kind(block) do return new BlockUnorderedList(block)
1593
1594         redef fun extract_value(line) do
1595                 return line.value.substring_from(line.leading + 2)
1596         end
1597 end
1598
1599 # A token represent a character in the markdown input.
1600 # Some tokens have a specific markup behaviour that is handled here.
1601 abstract class Token
1602
1603         # Position of `self` in markdown input.
1604         var pos: Int
1605
1606         # Character found at `pos` in the markdown input.
1607         var char: Char
1608
1609         # Output that token using `MarkdownEmitter::decorator`.
1610         fun emit(v: MarkdownEmitter) do v.addc char
1611 end
1612
1613 # A token without a specific meaning.
1614 class TokenNone
1615         super Token
1616 end
1617
1618 # An emphasis token.
1619 abstract class TokenEm
1620         super Token
1621
1622         redef fun emit(v) do
1623                 var tmp = v.push_buffer
1624                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1625                 v.pop_buffer
1626                 if b > 0 then
1627                         v.decorator.add_em(v, tmp)
1628                         v.current_pos = b
1629                 else
1630                         v.addc char
1631                 end
1632         end
1633 end
1634
1635 # An emphasis star token.
1636 class TokenEmStar
1637         super TokenEm
1638 end
1639
1640 # An emphasis underscore token.
1641 class TokenEmUnderscore
1642         super TokenEm
1643 end
1644
1645 # A strong token.
1646 abstract class TokenStrong
1647         super Token
1648
1649         redef fun emit(v) do
1650                 var tmp = v.push_buffer
1651                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1652                 v.pop_buffer
1653                 if b > 0 then
1654                         v.decorator.add_strong(v, tmp)
1655                         v.current_pos = b + 1
1656                 else
1657                         v.addc char
1658                 end
1659         end
1660 end
1661
1662 # A strong star token.
1663 class TokenStrongStar
1664         super TokenStrong
1665 end
1666
1667 # A strong underscore token.
1668 class TokenStrongUnderscore
1669         super TokenStrong
1670 end
1671
1672 # A code token.
1673 # This class is mainly used to factorize work between single and double quoted span codes.
1674 abstract class TokenCode
1675         super Token
1676
1677         redef fun emit(v) do
1678                 var a = pos + next_pos + 1
1679                 var b = v.current_text.find_token(a, self)
1680                 if b > 0 then
1681                         v.current_pos = b + next_pos
1682                         while a < b and v.current_text[a] == ' ' do a += 1
1683                         if a < b then
1684                                 while v.current_text[b - 1] == ' ' do b -= 1
1685                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1686                         end
1687                 else
1688                         v.addc char
1689                 end
1690         end
1691
1692         private fun next_pos: Int is abstract
1693 end
1694
1695 # A span code token.
1696 class TokenCodeSingle
1697         super TokenCode
1698
1699         redef fun next_pos do return 0
1700 end
1701
1702 # A doubled span code token.
1703 class TokenCodeDouble
1704         super TokenCode
1705
1706         redef fun next_pos do return 1
1707 end
1708
1709 # A link or image token.
1710 # This class is mainly used to factorize work between images and links.
1711 abstract class TokenLinkOrImage
1712         super Token
1713
1714         # Link adress
1715         var link: nullable Text = null
1716
1717         # Link text
1718         var name: nullable Text = null
1719
1720         # Link title
1721         var comment: nullable Text = null
1722
1723         # Is the link construct an abbreviation?
1724         var is_abbrev = false
1725
1726         redef fun emit(v) do
1727                 var tmp = new FlatBuffer
1728                 var b = check_link(v, tmp, pos, self)
1729                 if b > 0 then
1730                         emit_hyper(v)
1731                         v.current_pos = b
1732                 else
1733                         v.addc char
1734                 end
1735         end
1736
1737         # Emit the hyperlink as link or image.
1738         private fun emit_hyper(v: MarkdownEmitter) is abstract
1739
1740         # Check if the link is a valid link.
1741         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1742                 var md = v.current_text
1743                 var pos
1744                 if token isa TokenLink then
1745                         pos = start + 1
1746                 else
1747                         pos = start + 2
1748                 end
1749                 var tmp = new FlatBuffer
1750                 pos = md.read_md_link_id(tmp, pos)
1751                 if pos < start then return -1
1752                 name = tmp
1753                 var old_pos = pos
1754                 pos += 1
1755                 pos = md.skip_spaces(pos)
1756                 if pos < start then
1757                         var tid = name.write_to_string.to_lower
1758                         if v.processor.link_refs.has_key(tid) then
1759                                 var lr = v.processor.link_refs[tid]
1760                                 is_abbrev = lr.is_abbrev
1761                                 link = lr.link
1762                                 comment = lr.title
1763                                 pos = old_pos
1764                         else
1765                                 return -1
1766                         end
1767                 else if md[pos] == '(' then
1768                         pos += 1
1769                         pos = md.skip_spaces(pos)
1770                         if pos < start then return -1
1771                         tmp = new FlatBuffer
1772                         var use_lt = md[pos] == '<'
1773                         if use_lt then
1774                                 pos = md.read_until(tmp, pos + 1, '>')
1775                         else
1776                                 pos = md.read_md_link(tmp, pos)
1777                         end
1778                         if pos < start then return -1
1779                         if use_lt then pos += 1
1780                         link = tmp.write_to_string
1781                         if md[pos] == ' ' then
1782                                 pos = md.skip_spaces(pos)
1783                                 if pos > start and md[pos] == '"' then
1784                                         pos += 1
1785                                         tmp = new FlatBuffer
1786                                         pos = md.read_until(tmp, pos, '"')
1787                                         if pos < start then return -1
1788                                         comment = tmp.write_to_string
1789                                         pos += 1
1790                                         pos = md.skip_spaces(pos)
1791                                         if pos == -1 then return -1
1792                                 end
1793                         end
1794                         if md[pos] != ')' then return -1
1795                 else if md[pos] == '[' then
1796                         pos += 1
1797                         tmp = new FlatBuffer
1798                         pos = md.read_raw_until(tmp, pos, ']')
1799                         if pos < start then return -1
1800                         var id
1801                         if tmp.length > 0 then
1802                                 id = tmp
1803                         else
1804                                 id = name
1805                         end
1806                         var tid = id.write_to_string.to_lower
1807                         if v.processor.link_refs.has_key(tid) then
1808                                 var lr = v.processor.link_refs[tid]
1809                                 link = lr.link
1810                                 comment = lr.title
1811                         end
1812                 else
1813                 var tid = name.write_to_string.replace("\n", " ").to_lower
1814                         if v.processor.link_refs.has_key(tid) then
1815                                 var lr = v.processor.link_refs[tid]
1816                                 link = lr.link
1817                                 comment = lr.title
1818                                 pos = old_pos
1819                         else
1820                                 return -1
1821                         end
1822                 end
1823                 if link == null then return -1
1824                 return pos
1825         end
1826 end
1827
1828 # A markdown link token.
1829 class TokenLink
1830         super TokenLinkOrImage
1831
1832         redef fun emit_hyper(v) do
1833                 if is_abbrev and comment != null then
1834                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
1835                 else
1836                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
1837                 end
1838         end
1839 end
1840
1841 # A markdown image token.
1842 class TokenImage
1843         super TokenLinkOrImage
1844
1845         redef fun emit_hyper(v) do
1846                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
1847         end
1848 end
1849
1850 # A HTML/XML token.
1851 class TokenHTML
1852         super Token
1853
1854         redef fun emit(v) do
1855                 var tmp = new FlatBuffer
1856                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
1857                 if b > 0 then
1858                         v.add tmp
1859                         v.current_pos = b
1860                 else
1861                         v.decorator.escape_char(v, char)
1862                 end
1863         end
1864
1865         # Is the HTML valid?
1866         # Also take care of link and mailto shortcuts.
1867         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
1868                 # check for auto links
1869                 var tmp = new FlatBuffer
1870                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
1871                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
1872                         pos = md.read_until(tmp, pos, '>')
1873                         if pos != -1 then
1874                                 var link = tmp.write_to_string
1875                                 v.decorator.add_link(v, link, link, null)
1876                                 return pos
1877                         end
1878                 end
1879                 # TODO check for mailto
1880                 # check for inline html
1881                 if start + 2 < md.length then
1882                         return md.read_xml(out, start, true)
1883                 end
1884                 return -1
1885         end
1886 end
1887
1888 # An HTML entity token.
1889 class TokenEntity
1890         super Token
1891
1892         redef fun emit(v) do
1893                 var tmp = new FlatBuffer
1894                 var b = check_entity(tmp, v.current_text.as(not null), pos)
1895                 if b > 0 then
1896                         v.add tmp
1897                         v.current_pos = b
1898                 else
1899                         v.decorator.escape_char(v, char)
1900                 end
1901         end
1902
1903         # Is the entity valid?
1904         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
1905                 var pos = md.read_until(out, start, ';')
1906                 if pos < 0 or out.length < 3 then
1907                         return -1
1908                 end
1909                 if out[1] == '#' then
1910                         if out[2] == 'x' or out[2] == 'X' then
1911                                 if out.length < 4 then return -1
1912                                 for i in [3..out.length[ do
1913                                         var c = out[i]
1914                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
1915                                                 return -1
1916                                         end
1917                                 end
1918                         else
1919                                 for i in [2..out.length[ do
1920                                         var c = out[i]
1921                                         if c < '0' or c > '9' then return -1
1922                                 end
1923                         end
1924                         out.add ';'
1925                 else
1926                         for i in [1..out.length[ do
1927                                 var c = out[i]
1928                                 if not c.is_digit and not c.is_letter then return -1
1929                         end
1930                         out.add ';'
1931                         # TODO check entity is valid
1932                         # if out.is_entity then
1933                                 return pos
1934                         # else
1935                                 # return -1
1936                         # end
1937                 end
1938                 return pos
1939         end
1940 end
1941
1942 # A markdown escape token.
1943 class TokenEscape
1944         super Token
1945
1946         redef fun emit(v) do
1947                 v.current_pos += 1
1948                 v.addc v.current_text[v.current_pos]
1949         end
1950 end
1951
1952 # A markdown super token.
1953 class TokenSuper
1954         super Token
1955
1956         redef fun emit(v) do
1957                 var tmp = v.push_buffer
1958                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1959                 v.pop_buffer
1960                 if b > 0 then
1961                         v.decorator.add_super(v, tmp)
1962                         v.current_pos = b
1963                 else
1964                         v.addc char
1965                 end
1966         end
1967 end
1968
1969 redef class Text
1970
1971         # Get the token kind at `pos`.
1972         private fun token_at(pos: Int): Token do
1973                 var c0: Char
1974                 var c1: Char
1975                 var c2: Char
1976                 var c3: Char
1977
1978                 if pos > 0 then
1979                         c0 = self[pos - 1]
1980                 else
1981                         c0 = ' '
1982                 end
1983                 var c = self[pos]
1984
1985                 if pos + 1 < length then
1986                         c1 = self[pos + 1]
1987                 else
1988                         c1 = ' '
1989                 end
1990                 if pos + 2 < length then
1991                         c2 = self[pos + 2]
1992                 else
1993                         c2 = ' '
1994                 end
1995                 if pos + 3 < length then
1996                         c3 = self[pos + 3]
1997                 else
1998                         c3 = ' '
1999                 end
2000
2001                 if c == '*' then
2002                         if c1 == '*' then
2003                                 if c0 != ' ' or c2 != ' ' then
2004                                         return new TokenStrongStar(pos, c)
2005                                 else
2006                                         return new TokenEmStar(pos, c)
2007                                 end
2008                         end
2009                         if c0 != ' ' or c1 != ' ' then
2010                                 return new TokenEmStar(pos, c)
2011                         else
2012                                 return new TokenNone(pos, c)
2013                         end
2014                 else if c == '_' then
2015                         if c1 == '_' then
2016                                 if c0 != ' ' or c2 != ' 'then
2017                                         return new TokenStrongUnderscore(pos, c)
2018                                 else
2019                                         return new TokenEmUnderscore(pos, c)
2020                                 end
2021                         end
2022                         if c0 != ' ' or c1 != ' ' then
2023                                 return new TokenEmUnderscore(pos, c)
2024                         else
2025                                 return new TokenNone(pos, c)
2026                         end
2027                 else if c == '!' then
2028                         if c1 == '[' then return new TokenImage(pos, c)
2029                         return new TokenNone(pos, c)
2030                 else if c == '[' then
2031                         return new TokenLink(pos, c)
2032                 else if c == ']' then
2033                         return new TokenNone(pos, c)
2034                 else if c == '`' then
2035                         if c1 == '`' then
2036                                 return new TokenCodeDouble(pos, c)
2037                         else
2038                                 return new TokenCodeSingle(pos, c)
2039                         end
2040                 else if c == '\\' then
2041                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
2042                                 return new TokenEscape(pos, c)
2043                         else
2044                                 return new TokenNone(pos, c)
2045                         end
2046                 else if c == '<' then
2047                         return new TokenHTML(pos, c)
2048                 else if c == '&' then
2049                         return new TokenEntity(pos, c)
2050                 else if c == '^' then
2051                         if c0 == '^' or c1 == '^' then
2052                                 return new TokenNone(pos, c)
2053                         else
2054                                 return new TokenSuper(pos, c)
2055                         end
2056                 else
2057                         return new TokenNone(pos, c)
2058                 end
2059         end
2060
2061         # Find the position of a `token` in `self`.
2062         private fun find_token(start: Int, token: Token): Int do
2063                 var pos = start
2064                 while pos < length do
2065                         if token_at(pos).is_same_type(token) then
2066                                 return pos
2067                         end
2068                         pos += 1
2069                 end
2070                 return -1
2071         end
2072
2073         # Get the position of the next non-space character.
2074         private fun skip_spaces(start: Int): Int do
2075                 var pos = start
2076                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2077                         pos += 1
2078                 end
2079                 if pos < length then return pos
2080                 return -1
2081         end
2082
2083         # Read `self` until `nend` and append it to the `out` buffer.
2084         # Escape markdown special chars.
2085         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2086                 var pos = start
2087                 while pos < length do
2088                         var c = self[pos]
2089                         if c == '\\' and pos + 1 < length then
2090                                 pos = escape(out, self[pos + 1], pos)
2091                         else
2092                                 var end_reached = false
2093                                 for n in nend do
2094                                         if c == n then
2095                                                 end_reached = true
2096                                                 break
2097                                         end
2098                                 end
2099                                 if end_reached then break
2100                                 out.add c
2101                         end
2102                         pos += 1
2103                 end
2104                 if pos == length then return -1
2105                 return pos
2106         end
2107
2108         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2109         # No escape is made.
2110         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2111                 var pos = start
2112                 while pos < length do
2113                         var c = self[pos]
2114                         var end_reached = false
2115                         for n in nend do
2116                                 if c == n then
2117                                         end_reached = true
2118                                         break
2119                                 end
2120                         end
2121                         if end_reached then break
2122                         out.add c
2123                         pos += 1
2124                 end
2125                 if pos == length then return -1
2126                 return pos
2127         end
2128
2129         # Read `self` as XML until `to` and append it to the `out` buffer.
2130         # Escape HTML special chars.
2131         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2132                 var pos = from
2133                 var in_str = false
2134                 var str_char: nullable Char = null
2135                 while pos < length do
2136                         var c = self[pos]
2137                         if in_str then
2138                                 if c == '\\' then
2139                                         out.add c
2140                                         pos += 1
2141                                         if pos < length then
2142                                                 out.add c
2143                                                 pos += 1
2144                                         end
2145                                         continue
2146                                 end
2147                                 if c == str_char then
2148                                         in_str = false
2149                                         out.add c
2150                                         pos += 1
2151                                         continue
2152                                 end
2153                         end
2154                         if c == '"' or c == '\'' then
2155                                 in_str = true
2156                                 str_char = c
2157                         end
2158                         if not in_str then
2159                                 var end_reached = false
2160                                 for n in [0..to.length[ do
2161                                         if c == to[n] then
2162                                                 end_reached = true
2163                                                 break
2164                                         end
2165                                 end
2166                                 if end_reached then break
2167                         end
2168                         out.add c
2169                         pos += 1
2170                 end
2171                 if pos == length then return -1
2172                 return pos
2173         end
2174
2175         # Read `self` as XML and append it to the `out` buffer.
2176         # Safe mode can be activated to limit reading to valid xml.
2177         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2178                 var pos = 0
2179                 var is_close_tag = false
2180                 if start + 1 >= length then return -1
2181                 if self[start + 1] == '/' then
2182                         is_close_tag = true
2183                         pos = start + 2
2184                 else if self[start + 1] == '!' then
2185                         out.append "<!"
2186                         return start + 1
2187                 else
2188                         is_close_tag = false
2189                         pos = start + 1
2190                 end
2191                 if safe_mode then
2192                         var tmp = new FlatBuffer
2193                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2194                         if pos == -1 then return -1
2195                         var tag = tmp.write_to_string.trim.to_lower
2196                         if tag.is_html_unsafe then
2197                                 out.append "&lt;"
2198                                 if is_close_tag then out.add '/'
2199                                 out.append tmp
2200                         else
2201                                 out.append "<"
2202                                 if is_close_tag then out.add '/'
2203                                 out.append tmp
2204                         end
2205                 else
2206                         out.add '<'
2207                         if is_close_tag then out.add '/'
2208                         pos = read_xml_until(out, pos, ' ', '/', '>')
2209                 end
2210                 if pos == -1 then return -1
2211                 pos = read_xml_until(out, pos, '/', '>')
2212                 if pos == -1 then return -1
2213                 if self[pos] == '/' then
2214                         out.append " /"
2215                         pos = self.read_xml_until(out, pos + 1, '>')
2216                         if pos == -1 then return -1
2217                 end
2218                 if self[pos] == '>' then
2219                         out.add '>'
2220                         return pos
2221                 end
2222                 return -1
2223         end
2224
2225         # Read a markdown link address and append it to the `out` buffer.
2226         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2227                 var pos = start
2228                 var counter = 1
2229                 while pos < length do
2230                         var c = self[pos]
2231                         if c == '\\' and pos + 1 < length then
2232                                 pos = escape(out, self[pos + 1], pos)
2233                         else
2234                                 var end_reached = false
2235                                 if c == '(' then
2236                                         counter += 1
2237                                 else if c == ' ' then
2238                                         if counter == 1 then end_reached = true
2239                                 else if c == ')' then
2240                                         counter -= 1
2241                                         if counter == 0 then end_reached = true
2242                                 end
2243                                 if end_reached then break
2244                                 out.add c
2245                         end
2246                         pos += 1
2247                 end
2248                 if pos == length then return -1
2249                 return pos
2250         end
2251
2252         # Read a markdown link text and append it to the `out` buffer.
2253         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2254                 var pos = start
2255                 var counter = 1
2256                 while pos < length do
2257                         var c = self[pos]
2258                         var end_reached = false
2259                         if c == '[' then
2260                                 counter += 1
2261                                 out.add c
2262                         else if c == ']' then
2263                                 counter -= 1
2264                                 if counter == 0 then
2265                                         end_reached = true
2266                                 else
2267                                         out.add c
2268                                 end
2269                         else
2270                                 out.add c
2271                         end
2272                         if end_reached then break
2273                         pos += 1
2274                 end
2275                 if pos == length then return -1
2276                 return pos
2277         end
2278
2279         # Extract the XML tag name from a XML tag.
2280         private fun xml_tag: String do
2281                 var tpl = new FlatBuffer
2282                 var pos = 1
2283                 if pos < length and self[1] == '/' then pos += 1
2284                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2285                         tpl.add self[pos]
2286                         pos += 1
2287                 end
2288                 return tpl.write_to_string.to_lower
2289         end
2290
2291         # Read and escape the markdown contained in `self`.
2292         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2293                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2294                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2295                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2296                    c == '`' or c == '~' or c == '^' then
2297                         out.add c
2298                         return pos + 1
2299                 end
2300                 out.add '\\'
2301                 return pos
2302         end
2303
2304         # Is `self` an unsafe HTML element?
2305         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2306
2307         # Is `self` a HRML block element?
2308         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2309
2310         # Is `self` a link prefix?
2311         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2312
2313         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2314
2315         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2316
2317         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2318 end
2319
2320 redef class String
2321
2322         # Parse `self` as markdown and return the HTML representation
2323         #.
2324         #    var md = "**Hello World!**"
2325         #    var html = md.md_to_html
2326         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2327         fun md_to_html: Streamable do
2328                 var processor = new MarkdownProcessor
2329                 return processor.process(self)
2330         end
2331 end