lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         var emitter: MarkdownEmitter is noinit
  34
  35         init do self.emitter = new MarkdownEmitter(self)
  36
  37         # Process the mardown `input` string and return the processed output.
  38         fun process(input: String): Streamable do
  39                 # init processor
  40                 link_refs.clear
  41                 last_link_ref = null
  42                 current_line = null
  43                 current_block = null
  44                 # parse markdown
  45                 var parent = read_lines(input)
  46                 parent.remove_surrounding_empty_lines
  47                 recurse(parent, false)
  48                 # output processed text
  49                 return emitter.emit(parent.kind)
  50         end
  51
  52         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
  53         private fun read_lines(input: String): MDBlock do
  54                 var block = new MDBlock
  55                 var value = new FlatBuffer
  56                 var i = 0
  57                 while i < input.length do
  58                         value.clear
  59                         var pos = 0
  60                         var eol = false
  61                         while not eol and i < input.length do
  62                                 var c = input[i]
  63                                 if c == '\n' then
  64                                         i += 1
  65                                         eol = true
  66                                 else if c == '\t' then
  67                                         var np = pos + (4 - (pos.bin_and(3)))
  68                                         while pos < np do
  69                                                 value.add ' '
  70                                                 pos += 1
  71                                         end
  72                                         i += 1
  73                                 else
  74                                         pos += 1
  75                                         value.add c
  76                                         i += 1
  77                                 end
  78                         end
  79
  80                         var line = new MDLine(value.write_to_string)
  81                         var is_link_ref = check_link_ref(line)
  82                         # Skip link refs
  83                         if not is_link_ref then block.add_line line
  84                 end
  85                 return block
  86         end
  87
  88         # Check if line is a block link definition.
  89         # Return `true` if line contains a valid link ref and save it into `link_refs`.
  90         private fun check_link_ref(line: MDLine): Bool do
  91                 var md = line.value
  92                 var is_link_ref = false
  93                 var id = new FlatBuffer
  94                 var link = new FlatBuffer
  95                 var comment = new FlatBuffer
  96                 var pos = -1
  97                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
  98                         pos = line.leading + 1
  99                         pos = md.read_until(id, pos, ']')
 100                         if not id.is_empty and pos + 2 < line.value.length then
 101                                 if line.value[pos + 1] == ':' then
 102                                         pos += 2
 103                                         pos = md.skip_spaces(pos)
 104                                         if line.value[pos] == '<' then
 105                                                 pos += 1
 106                                                 pos = md.read_until(link, pos, '>')
 107                                                 pos += 1
 108                                         else
 109                                                 pos = md.read_until(link, pos, ' ', '\n')
 110                                         end
 111                                         if not link.is_empty then
 112                                                 pos = md.skip_spaces(pos)
 113                                                 if pos > 0 and pos < line.value.length then
 114                                                         var c = line.value[pos]
 115                                                         if c == '\"' or c == '\'' or c == '(' then
 116                                                                 pos += 1
 117                                                                 if c == '(' then
 118                                                                         pos = md.read_until(comment, pos, ')')
 119                                                                 else
 120                                                                         pos = md.read_until(comment, pos, c)
 121                                                                 end
 122                                                                 if pos > 0 then is_link_ref = true
 123                                                         end
 124                                                 else
 125                                                         is_link_ref = true
 126                                                 end
 127                                         end
 128                                 end
 129                         end
 130                 end
 131                 if is_link_ref and not id.is_empty and not link.is_empty then
 132                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 133                         add_link_ref(id.write_to_string, lr)
 134                         if comment.is_empty then last_link_ref = lr
 135                         return true
 136                 else
 137                         comment = new FlatBuffer
 138                         if not line.is_empty and last_link_ref != null then
 139                                 pos = line.leading
 140                                 var c = line.value[pos]
 141                                 if c == '\"' or c == '\'' or c ==  '(' then
 142                                         pos += 1
 143                                         if c == '(' then
 144                                                 pos = md.read_until(comment, pos, ')')
 145                                         else
 146                                                 pos = md.read_until(comment, pos, c)
 147                                         end
 148                                 end
 149                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 150                         end
 151                         if comment.is_empty then return false
 152                         return true
 153                 end
 154         end
 155
 156         # Known link refs
 157         # This list will be needed during output to expand links.
 158         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 159
 160         # Last encountered link ref (for multiline definitions)
 161         #
 162         # Markdown allows link refs to be defined over two lines:
 163         #
 164         #       [id]: http://example.com/longish/path/to/resource/here
 165         #               "Optional Title Here"
 166         #
 167         private var last_link_ref: nullable LinkRef = null
 168
 169         # Add a link ref to the list
 170         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 171
 172         # Recursively split a `block`.
 173         #
 174         # The block is splitted according to the type of lines it contains.
 175         # Some blocks can be splited again recursively like lists.
 176         # The `in_list` mode is used to recurse on list and build
 177         # nested paragraphs or code blocks.
 178         fun recurse(root: MDBlock, in_list: Bool) do
 179                 var old_mode = self.in_list
 180                 var old_root = self.current_block
 181                 self.in_list = in_list
 182
 183                 var line = root.first_line
 184                 while line != null and line.is_empty do
 185                         line = line.next
 186                         if line == null then return
 187                 end
 188
 189                 current_line = line
 190                 current_block = root
 191                 while current_line != null do
 192                         current_line.kind(self).process(self)
 193                 end
 194                 self.in_list = old_mode
 195                 self.current_block = old_root
 196         end
 197
 198         # Currently processed line.
 199         # Used when visiting blocks with `recurse`.
 200         var current_line: nullable MDLine = null is writable
 201
 202         # Currently processed block.
 203         # Used when visiting blocks with `recurse`.
 204         var current_block: nullable MDBlock = null is writable
 205
 206         # Is the current recursion in list mode?
 207         # Used when visiting blocks with `recurse`
 208         private var in_list = false
 209 end
 210
 211 # Emit output corresponding to blocks content.
 212 #
 213 # Blocks are created by a previous pass in `MarkdownProcessor`.
 214 # The emitter use a `Decorator` to select the output format.
 215 class MarkdownEmitter
 216
 217         # Processor containing link refs.
 218         var processor: MarkdownProcessor
 219
 220         # Decorator used for output.
 221         # Default is `HTMLDecorator`
 222         var decorator: Decorator = new HTMLDecorator is writable
 223
 224         # Create a new `MardownEmitter` using the default `HTMLDecorator`
 225         init(processor: MarkdownProcessor) do
 226                 self.processor = processor
 227         end
 228
 229         # Create a new `MarkdownEmitter` using a custom `decorator`.
 230         init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
 231                 init processor
 232                 self.decorator = decorator
 233         end
 234
 235         # Output `block` using `decorator` in the current buffer.
 236         fun emit(block: Block): Text do
 237                 var buffer = push_buffer
 238                 block.emit(self)
 239                 pop_buffer
 240                 return buffer
 241         end
 242
 243         # Output the content of `block`.
 244         fun emit_in(block: Block) do block.emit_in(self)
 245
 246         # Transform and emit mardown text
 247         fun emit_text(text: Text) do
 248                 emit_text_until(text, 0, null)
 249         end
 250
 251         # Transform and emit mardown text starting at `from` and
 252         # until a token with the same type as `token` is found.
 253         # Go until the end of text if `token` is null.
 254         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 255                 var old_text = current_text
 256                 var old_pos = current_pos
 257                 current_text = text
 258                 current_pos = start
 259                 while current_pos < text.length do
 260                         var mt = text.token_at(current_pos)
 261                         if (token != null and not token isa TokenNone) and
 262                         (mt.is_same_type(token) or
 263                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 264                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 265                                 return current_pos
 266                         end
 267                         mt.emit(self)
 268                         current_pos += 1
 269                 end
 270                 current_text = old_text
 271                 current_pos = old_pos
 272                 return -1
 273         end
 274
 275         # Currently processed position in `current_text`.
 276         # Used when visiting inline production with `emit_text_until`.
 277         private var current_pos: Int = -1
 278
 279         # Currently processed text.
 280         # Used when visiting inline production with `emit_text_until`.
 281         private var current_text: nullable Text = null
 282
 283         # Stacked buffers.
 284         private var buffer_stack = new List[FlatBuffer]
 285
 286         # Push a new buffer on the stack.
 287         private fun push_buffer: FlatBuffer do
 288                 var buffer = new FlatBuffer
 289                 buffer_stack.add buffer
 290                 return buffer
 291         end
 292
 293         # Pop the last buffer.
 294         private fun pop_buffer do buffer_stack.pop
 295
 296         # Current output buffer.
 297         private fun current_buffer: FlatBuffer do
 298                 assert not buffer_stack.is_empty
 299                 return buffer_stack.last
 300         end
 301
 302         # Append `e` to current buffer.
 303         fun add(e: Streamable) do
 304                 if e isa Text then
 305                         current_buffer.append e
 306                 else
 307                         current_buffer.append e.write_to_string
 308                 end
 309         end
 310
 311         # Append `c` to current buffer.
 312         fun addc(c: Char) do current_buffer.add c
 313
 314         # Append a "\n" line break.
 315         fun addn do current_buffer.add '\n'
 316 end
 317
 318 # A Link Reference.
 319 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 320 #
 321 # Example:
 322 #
 323 #    [1]: http://example.com/ "Optional title"
 324 class LinkRef
 325
 326         # Link href
 327         var link: String
 328
 329         # Optional link title
 330         var title: nullable String = null
 331
 332         # Is the link an abreviation?
 333         var is_abbrev = false
 334
 335         init with_title(link: String, title: nullable String) do
 336                 self.link = link
 337                 self.title = title
 338         end
 339 end
 340
 341 # A `Decorator` is used to emit mardown into a specific format.
 342 # Default decorator used is `HTMLDecorator`.
 343 interface Decorator
 344
 345         # Render a ruler block.
 346         fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
 347
 348         # Render a headline block with corresponding level.
 349         fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
 350
 351         # Render a paragraph block.
 352         fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
 353
 354         # Render a code or fence block.
 355         fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
 356
 357         # Render a blockquote.
 358         fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
 359
 360         # Render an unordered list.
 361         fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
 362
 363         # Render an ordered list.
 364         fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
 365
 366         # Render a list item.
 367         fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
 368
 369         # Render an emphasis text.
 370         fun add_em(v: MarkdownEmitter, text: Text) is abstract
 371
 372         # Render a strong text.
 373         fun add_strong(v: MarkdownEmitter, text: Text) is abstract
 374
 375         # Render a super text.
 376         fun add_super(v: MarkdownEmitter, text: Text) is abstract
 377
 378         # Render a link.
 379         fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 380
 381         # Render an image.
 382         fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 383
 384         # Render an abbreviation.
 385         fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
 386
 387         # Render a code span reading from a buffer.
 388         fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 389
 390         # Render a text and escape it.
 391         fun append_value(v: MarkdownEmitter, value: Text) is abstract
 392
 393         # Render code text from buffer and escape it.
 394         fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 395
 396         # Render a character escape.
 397         fun escape_char(v: MarkdownEmitter, char: Char) is abstract
 398
 399         # Render a line break
 400         fun add_line_break(v: MarkdownEmitter) is abstract
 401
 402         # Generate a new html valid id from a `String`.
 403         fun strip_id(txt: String): String is abstract
 404
 405         # Found headlines during the processing labeled by their ids.
 406         fun headlines: ArrayMap[String, HeadLine] is abstract
 407 end
 408
 409 # Class representing a markdown headline.
 410 class HeadLine
 411         # Unique identifier of this headline.
 412         var id: String
 413
 414         # Text of the headline.
 415         var title: String
 416
 417         # Level of this headline.
 418         #
 419         # According toe the markdown specification, level must be in `[1..6]`.
 420         var level: Int
 421 end
 422
 423 # `Decorator` that outputs HTML.
 424 class HTMLDecorator
 425         super Decorator
 426
 427         redef var headlines = new ArrayMap[String, HeadLine]
 428
 429         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 430
 431         redef fun add_headline(v, block) do
 432                 # save headline
 433                 var txt = block.block.first_line.value
 434                 var id = strip_id(txt)
 435                 var lvl = block.depth
 436                 headlines[id] = new HeadLine(id, txt, lvl)
 437                 # output it
 438                 v.add "<h{lvl} id=\"{id}\">"
 439                 v.emit_in block
 440                 v.add "</h{lvl}>\n"
 441         end
 442
 443         redef fun add_paragraph(v, block) do
 444                 v.add "<p>"
 445                 v.emit_in block
 446                 v.add "</p>\n"
 447         end
 448
 449         redef fun add_code(v, block) do
 450                 v.add "<pre><code>"
 451                 v.emit_in block
 452                 v.add "</code></pre>\n"
 453         end
 454
 455         redef fun add_blockquote(v, block) do
 456                 v.add "<blockquote>\n"
 457                 v.emit_in block
 458                 v.add "</blockquote>\n"
 459         end
 460
 461         redef fun add_unorderedlist(v, block) do
 462                 v.add "<ul>\n"
 463                 v.emit_in block
 464                 v.add "</ul>\n"
 465         end
 466
 467         redef fun add_orderedlist(v, block) do
 468                 v.add "<ol>\n"
 469                 v.emit_in block
 470                 v.add "</ol>\n"
 471         end
 472
 473         redef fun add_listitem(v, block) do
 474                 v.add "<li>"
 475                 v.emit_in block
 476                 v.add "</li>\n"
 477         end
 478
 479         redef fun add_em(v, text) do
 480                 v.add "<em>"
 481                 v.add text
 482                 v.add "</em>"
 483         end
 484
 485         redef fun add_strong(v, text) do
 486                 v.add "<strong>"
 487                 v.add text
 488                 v.add "</strong>"
 489         end
 490
 491         redef fun add_super(v, text) do
 492                 v.add "<sup>"
 493                 v.add text
 494                 v.add "</sup>"
 495         end
 496
 497         redef fun add_image(v, link, name, comment) do
 498                 v.add "<img src=\""
 499                 append_value(v, link)
 500                 v.add "\" alt=\""
 501                 append_value(v, name)
 502                 v.add "\""
 503                 if comment != null and not comment.is_empty then
 504                         v.add " title=\""
 505                         append_value(v, comment)
 506                         v.add "\""
 507                 end
 508                 v.add "/>"
 509         end
 510
 511         redef fun add_link(v, link, name, comment) do
 512                 v.add "<a href=\""
 513                 append_value(v, link)
 514                 v.add "\""
 515                 if comment != null and not comment.is_empty then
 516                         v.add " title=\""
 517                         append_value(v, comment)
 518                         v.add "\""
 519                 end
 520                 v.add ">"
 521                 v.emit_text(name)
 522                 v.add "</a>"
 523         end
 524
 525         redef fun add_abbr(v, name, comment) do
 526                 v.add "<abbr title=\""
 527                 append_value(v, comment)
 528                 v.add "\">"
 529                 v.emit_text(name)
 530                 v.add "</abbr>"
 531         end
 532
 533         redef fun add_span_code(v, text, from, to) do
 534                 v.add "<code>"
 535                 append_code(v, text, from, to)
 536                 v.add "</code>"
 537         end
 538
 539         redef fun add_line_break(v) do
 540                 v.add "<br/>"
 541         end
 542
 543         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 544
 545         redef fun escape_char(v, c) do
 546                 if c == '&' then
 547                         v.add "&amp;"
 548                 else if c == '<' then
 549                         v.add "&lt;"
 550                 else if c == '>' then
 551                         v.add "&gt;"
 552                 else if c == '"' then
 553                         v.add "&quot;"
 554                 else if c == '\'' then
 555                         v.add "&apos;"
 556                 else
 557                         v.addc c
 558                 end
 559         end
 560
 561         redef fun append_code(v, buffer, from, to) do
 562                 for i in [from..to[ do
 563                         var c = buffer[i]
 564                         if c == '&' then
 565                                 v.add "&amp;"
 566                         else if c == '<' then
 567                                 v.add "&lt;"
 568                         else if c == '>' then
 569                                 v.add "&gt;"
 570                         else
 571                                 v.addc c
 572                         end
 573                 end
 574         end
 575
 576         redef fun strip_id(txt) do
 577                 # strip id
 578                 var b = new FlatBuffer
 579                 for c in txt do
 580                         if c == ' ' then
 581                                 b.add '_'
 582                         else
 583                                 if not c.is_letter and
 584                                    not c.is_digit and
 585                                    not allowed_id_chars.has(c) then continue
 586                                 b.add c
 587                         end
 588                 end
 589                 var res = b.to_s
 590                 var key = res
 591                 # check for multiple id definitions
 592                 if headlines.has_key(key) then
 593                         var i = 1
 594                         key = "{res}_{i}"
 595                         while headlines.has_key(key) do
 596                                 i += 1
 597                                 key = "{res}_{i}"
 598                         end
 599                 end
 600                 return key
 601         end
 602
 603         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 604 end
 605
 606 # A block of markdown lines.
 607 # A `MDBlock` can contains lines and/or sub-blocks.
 608 class MDBlock
 609         # Kind of block.
 610         # See `Block`.
 611         var kind: Block = new BlockNone(self) is writable
 612
 613         # First line if any.
 614         var first_line: nullable MDLine = null is writable
 615
 616         # Last line if any.
 617         var last_line: nullable MDLine = null is writable
 618
 619         # First sub-block if any.
 620         var first_block: nullable MDBlock = null is writable
 621
 622         # Last sub-block if any.
 623         var last_block: nullable MDBlock = null is writable
 624
 625         # Previous block if any.
 626         var prev: nullable MDBlock = null is writable
 627
 628         # Next block if any.
 629         var next: nullable MDBlock = null is writable
 630
 631         # Does this block contain subblocks?
 632         fun has_blocks: Bool do return first_block != null
 633
 634         # Count sub-blocks.
 635         fun count_blocks: Int do
 636                 var count = 0
 637                 var block = first_block
 638                 while block != null do
 639                         count += 1
 640                         block = block.next
 641                 end
 642                 return count
 643         end
 644
 645         # Does this block contain lines?
 646         fun has_lines: Bool do return first_line != null
 647
 648         # Count block lines.
 649         fun count_lines: Int do
 650                 var count = 0
 651                 var line = first_line
 652                 while line != null do
 653                         count += 1
 654                         line = line.next
 655                 end
 656                 return count
 657         end
 658
 659         # Split `self` creating a new sub-block having `line` has `last_line`.
 660         fun split(line: MDLine): MDBlock do
 661                 var block = new MDBlock
 662                 block.first_line = first_line
 663                 block.last_line = line
 664                 first_line = line.next
 665                 line.next = null
 666                 if first_line == null then
 667                         last_line = null
 668                 else
 669                         first_line.prev = null
 670                 end
 671                 if first_block == null then
 672                         first_block = block
 673                         last_block = block
 674                 else
 675                         last_block.next = block
 676                         last_block = block
 677                 end
 678                 return block
 679         end
 680
 681         # Add a `line` to this block.
 682         fun add_line(line: MDLine) do
 683                 if last_line == null then
 684                         first_line = line
 685                         last_line = line
 686                 else
 687                         last_line.next_empty = line.is_empty
 688                         line.prev_empty = last_line.is_empty
 689                         line.prev = last_line
 690                         last_line.next = line
 691                         last_line = line
 692                 end
 693         end
 694
 695         # Remove `line` from this block.
 696         fun remove_line(line: MDLine) do
 697                 if line.prev == null then
 698                         first_line = line.next
 699                 else
 700                         line.prev.next = line.next
 701                 end
 702                 if line.next == null then
 703                         last_line = line.prev
 704                 else
 705                         line.next.prev = line.prev
 706                 end
 707                 line.prev = null
 708                 line.next = null
 709         end
 710
 711         # Remove leading empty lines.
 712         fun remove_leading_empty_lines: Bool do
 713                 var was_empty = false
 714                 var line = first_line
 715                 while line != null and line.is_empty do
 716                         remove_line line
 717                         line = first_line
 718                         was_empty = true
 719                 end
 720                 return was_empty
 721         end
 722
 723         # Remove trailing empty lines.
 724         fun remove_trailing_empty_lines: Bool do
 725                 var was_empty = false
 726                 var line = last_line
 727                 while line != null and line.is_empty do
 728                         remove_line line
 729                         line = last_line
 730                         was_empty = true
 731                 end
 732                 return was_empty
 733         end
 734
 735         # Remove leading and trailing empty lines.
 736         fun remove_surrounding_empty_lines: Bool do
 737                 var was_empty = false
 738                 if remove_leading_empty_lines then was_empty = true
 739                 if remove_trailing_empty_lines then was_empty = true
 740                 return was_empty
 741         end
 742
 743         # Remove list markers and up to 4 leading spaces.
 744         # Used to clean nested lists.
 745         fun remove_list_indent(v: MarkdownProcessor) do
 746                 var line = first_line
 747                 while line != null do
 748                         if not line.is_empty then
 749                                 var kind = line.kind(v)
 750                                 if kind isa LineList then
 751                                         line.value = kind.extract_value(line)
 752                                 else
 753                                         line.value = line.value.substring_from(line.leading.min(4))
 754                                 end
 755                                 line.leading = line.process_leading
 756                         end
 757                         line = line.next
 758                 end
 759         end
 760
 761         # Collect block line text.
 762         fun text: String do
 763                 var text = new FlatBuffer
 764                 var line = first_line
 765                 while line != null do
 766                         if not line.is_empty then
 767                                 text.append line.text
 768                         end
 769                         text.append "\n"
 770                         line = line.next
 771                 end
 772                 return text.write_to_string
 773         end
 774 end
 775
 776 # Representation of a markdown block in the AST.
 777 # Each `Block` is linked to a `MDBlock` that contains mardown code.
 778 abstract class Block
 779
 780         # The markdown block `self` is related to.
 781         var block: MDBlock
 782
 783         # Output `self` using `v.decorator`.
 784         fun emit(v: MarkdownEmitter) do v.emit_in(self)
 785
 786         # Emit the containts of `self`, lines or blocks.
 787         fun emit_in(v: MarkdownEmitter) do
 788                 block.remove_surrounding_empty_lines
 789                 if block.has_lines then
 790                         emit_lines(v)
 791                 else
 792                         emit_blocks(v)
 793                 end
 794         end
 795
 796         # Emit lines contained in `block`.
 797         fun emit_lines(v: MarkdownEmitter) do
 798                 var tpl = v.push_buffer
 799                 var line = block.first_line
 800                 while line != null do
 801                         if not line.is_empty then
 802                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
 803                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
 804                         end
 805                         if line.next != null then
 806                                 v.addn
 807                         end
 808                         line = line.next
 809                 end
 810                 v.pop_buffer
 811                 v.emit_text(tpl)
 812         end
 813
 814         # Emit sub-blocks contained in `block`.
 815         fun emit_blocks(v: MarkdownEmitter) do
 816                 var block = self.block.first_block
 817                 while block != null do
 818                         block.kind.emit(v)
 819                         block = block.next
 820                 end
 821         end
 822 end
 823
 824 # A block without any markdown specificities.
 825 #
 826 # Actually use the same implementation than `BlockCode`,
 827 # this class is only used for typing purposes.
 828 class BlockNone
 829         super Block
 830 end
 831
 832 # A markdown blockquote.
 833 class BlockQuote
 834         super Block
 835
 836         redef fun emit(v) do v.decorator.add_blockquote(v, self)
 837
 838         # Remove blockquote markers.
 839         private fun remove_block_quote_prefix(block: MDBlock) do
 840                 var line = block.first_line
 841                 while line != null do
 842                         if not line.is_empty then
 843                                 if line.value[line.leading] == '>' then
 844                                         var rem = line.leading + 1
 845                                         if line.leading + 1 < line.value.length and
 846                                            line.value[line.leading + 1] == ' ' then
 847                                                 rem += 1
 848                                         end
 849                                         line.value = line.value.substring_from(rem)
 850                                         line.leading = line.process_leading
 851                                 end
 852                         end
 853                         line = line.next
 854                 end
 855         end
 856 end
 857
 858 # A markdown code block.
 859 class BlockCode
 860         super Block
 861
 862         redef fun emit(v) do v.decorator.add_code(v, self)
 863
 864         redef fun emit_lines(v) do
 865                 var line = block.first_line
 866                 while line != null do
 867                         if not line.is_empty then
 868                                 v.decorator.append_code(v, line.value, 4, line.value.length)
 869                         end
 870                         v.addn
 871                         line = line.next
 872                 end
 873         end
 874 end
 875
 876 # A markdown code-fence block.
 877 #
 878 # Actually use the same implementation than `BlockCode`,
 879 # this class is only used for typing purposes.
 880 class BlockFence
 881         super BlockCode
 882 end
 883
 884 # A markdown headline.
 885 class BlockHeadline
 886         super Block
 887
 888         redef fun emit(v) do v.decorator.add_headline(v, self)
 889
 890         # Depth of the headline used to determine the headline level.
 891         var depth = 0
 892
 893         # Remove healine marks from lines contained in `self`.
 894         private fun transform_headline(block: MDBlock) do
 895                 if depth > 0 then return
 896                 var level = 0
 897                 var line = block.first_line
 898                 if line.is_empty then return
 899                 var start = line.leading
 900                 while start < line.value.length and line.value[start] == '#' do
 901                         level += 1
 902                         start += 1
 903                 end
 904                 while start < line.value.length and line.value[start] == ' ' do
 905                         start += 1
 906                 end
 907                 if start >= line.value.length then
 908                         line.is_empty = true
 909                 else
 910                         var nend = line.value.length - line.trailing - 1
 911                         while line.value[nend] == '#' do nend -= 1
 912                         while line.value[nend] == ' ' do nend -= 1
 913                         line.value = line.value.substring(start, nend - start + 1)
 914                         line.leading = 0
 915                         line.trailing = 0
 916                 end
 917                 depth = level.min(6)
 918         end
 919 end
 920
 921 # A markdown list item block.
 922 class BlockListItem
 923         super Block
 924
 925         redef fun emit(v) do v.decorator.add_listitem(v, self)
 926 end
 927
 928 # A markdown list block.
 929 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
 930 abstract class BlockList
 931         super Block
 932
 933         # Split list block into list items sub-blocks.
 934         private fun init_block(v: MarkdownProcessor) do
 935                 var line = block.first_line
 936                 line = line.next
 937                 while line != null do
 938                         var t = line.kind(v)
 939                         if t isa LineList or
 940                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
 941                            not (t isa LineList))) then
 942                                    var sblock = block.split(line.prev.as(not null))
 943                                    sblock.kind = new BlockListItem(sblock)
 944                         end
 945                         line = line.next
 946                 end
 947                 var sblock = block.split(block.last_line.as(not null))
 948                 sblock.kind = new BlockListItem(sblock)
 949         end
 950
 951         # Expand list items as paragraphs if needed.
 952         private fun expand_paragraphs(block: MDBlock) do
 953                 var outer = block.first_block
 954                 var inner: nullable MDBlock
 955                 var has_paragraph = false
 956                 while outer != null and not has_paragraph do
 957                         if outer.kind isa BlockListItem then
 958                                 inner = outer.first_block
 959                                 while inner != null and not has_paragraph do
 960                                         if inner.kind isa BlockParagraph then
 961                                                 has_paragraph = true
 962                                         end
 963                                         inner = inner.next
 964                                 end
 965                         end
 966                         outer = outer.next
 967                 end
 968                 if has_paragraph then
 969                         outer = block.first_block
 970                         while outer != null do
 971                                 if outer.kind isa BlockListItem then
 972                                         inner = outer.first_block
 973                                         while inner != null do
 974                                                 if inner.kind isa BlockNone then
 975                                                         inner.kind = new BlockParagraph(inner)
 976                                                 end
 977                                                 inner = inner.next
 978                                         end
 979                                 end
 980                                 outer = outer.next
 981                         end
 982                 end
 983         end
 984 end
 985
 986 # A markdown ordered list.
 987 class BlockOrderedList
 988         super BlockList
 989
 990         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
 991 end
 992
 993 # A markdown unordred list.
 994 class BlockUnorderedList
 995         super BlockList
 996
 997         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
 998 end
 999
1000 # A markdown paragraph block.
1001 class BlockParagraph
1002         super Block
1003
1004         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1005 end
1006
1007 # A markdown ruler.
1008 class BlockRuler
1009         super Block
1010
1011         redef fun emit(v) do v.decorator.add_ruler(v, self)
1012 end
1013
1014 # Xml blocks that can be found in markdown markup.
1015 class BlockXML
1016         super Block
1017
1018         redef fun emit_lines(v) do
1019                 var line = block.first_line
1020                 while line != null do
1021                         if not line.is_empty then v.add line.value
1022                         v.addn
1023                         line = line.next
1024                 end
1025         end
1026 end
1027
1028 # A markdown line.
1029 class MDLine
1030
1031         # Text contained in this line.
1032         var value: String is writable
1033
1034         # Is this line empty?
1035         # Lines containing only spaces are considered empty.
1036         var is_empty: Bool = true is writable
1037
1038         # Previous line in `MDBlock` or null if first line.
1039         var prev: nullable MDLine = null is writable
1040
1041         # Next line in `MDBlock` or null if last line.
1042         var next: nullable MDLine = null is writable
1043
1044         # Is the previous line empty?
1045         var prev_empty: Bool = false is writable
1046
1047         # Is the next line empty?
1048         var next_empty: Bool = false is writable
1049
1050         init(value: String) do
1051                 self.value = value
1052                 self.leading = process_leading
1053                 if leading != value.length then
1054                         self.is_empty = false
1055                         self.trailing = process_trailing
1056                 end
1057         end
1058
1059         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1060         fun clear do
1061                 value = ""
1062                 leading = 0
1063                 trailing = 0
1064                 is_empty = true
1065                 if prev != null then prev.next_empty = true
1066                 if next != null then next.prev_empty = true
1067         end
1068
1069         # The type of line.
1070         # see `md_line_*`
1071         fun kind(v: MarkdownProcessor): Line do
1072                 var value = self.value
1073                 if is_empty then return new LineEmpty
1074                 if leading > 3 then return new LineCode
1075                 if value[leading] == '#' then return new LineHeadline
1076                 if value[leading] == '>' then return new LineBlockquote
1077
1078                 if value.length - leading - trailing > 2 then
1079                         if value[leading] == '`' and count_chars_start('`') >= 3 then
1080                                 return new LineFence
1081                         end
1082                         if value[leading] == '~' and count_chars_start('~') >= 3 then
1083                                 return new LineFence
1084                         end
1085                 end
1086
1087                 if value.length - leading - trailing > 2 and
1088                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
1089                    if count_chars(value[leading]) >= 3 then
1090                                 return new LineHR
1091                    end
1092                 end
1093
1094                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
1095                         var c = value[leading]
1096                         if c == '*' or c == '-' or c == '+' then return new LineUList
1097                 end
1098
1099                 if value.length - leading >= 3 and value[leading].is_digit then
1100                         var i = leading + 1
1101                         while i < value.length and value[i].is_digit do i += 1
1102                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
1103                                 return new LineOList
1104                         end
1105                 end
1106
1107                 if value[leading] == '<' and check_html then return new LineXML
1108
1109                 if next != null and not next.is_empty then
1110                         if next.count_chars('=') > 0 then
1111                                 return new LineHeadline1
1112                         end
1113                         if next.count_chars('-') > 0 then
1114                                 return new LineHeadline2
1115                         end
1116                 end
1117                 return new LineOther
1118         end
1119
1120         # Number or leading spaces on this line.
1121         var leading: Int = 0 is writable
1122
1123         # Compute `leading` depending on `value`.
1124         fun process_leading: Int do
1125                 var count = 0
1126                 var value = self.value
1127                 while count < value.length and value[count] == ' ' do count += 1
1128                 if leading == value.length then clear
1129                 return count
1130         end
1131
1132         # Number of trailing spaces on this line.
1133         var trailing: Int = 0 is writable
1134
1135         # Compute `trailing` depending on `value`.
1136         fun process_trailing: Int do
1137                 var count = 0
1138                 var value = self.value
1139                 while value[value.length - count - 1] == ' ' do
1140                         count += 1
1141                 end
1142                 return count
1143         end
1144
1145         # Count the amount of `ch` in this line.
1146         # Return A value > 0 if this line only consists of `ch` end spaces.
1147         fun count_chars(ch: Char): Int do
1148                 var count = 0
1149                 for c in value do
1150                         if c == ' ' then
1151                                 continue
1152                         end
1153                         if c == ch then
1154                                 count += 1
1155                                 continue
1156                         end
1157                         count = 0
1158                         break
1159                 end
1160                 return count
1161         end
1162
1163         # Count the amount of `ch` at the start of this line ignoring spaces.
1164         fun count_chars_start(ch: Char): Int do
1165                 var count = 0
1166                 for c in value do
1167                         if c == ' ' then
1168                                 continue
1169                         end
1170                         if c == ch then
1171                                 count += 1
1172                         else
1173                                 break
1174                         end
1175                 end
1176                 return count
1177         end
1178
1179         # Last XML line if any.
1180         private var xml_end_line: nullable MDLine = null
1181
1182         # Does `value` contains valid XML markup?
1183         private fun check_html: Bool do
1184                 var tags = new Array[String]
1185                 var tmp = new FlatBuffer
1186                 var pos = leading
1187                 if pos + 1 < value.length and value[pos + 1] == '!' then
1188                         if read_xml_comment(self, pos) > 0 then return true
1189                 end
1190                 pos = value.read_xml(tmp, pos, false)
1191                 var tag: String
1192                 if pos > -1 then
1193                         tag = tmp.xml_tag
1194                         if not tag.is_html_block then
1195                                 return false
1196                         end
1197                         if tag == "hr" then
1198                                 xml_end_line = self
1199                                 return true
1200                         end
1201                         tags.add tag
1202                         var line: nullable MDLine = self
1203                         while line != null do
1204                                 while pos < line.value.length and line.value[pos] != '<' do
1205                                         pos += 1
1206                                 end
1207                                 if pos >= line.value.length then
1208                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1209                                                 tags.pop
1210                                                 if tags.is_empty then
1211                                                         xml_end_line = line
1212                                                         break
1213                                                 end
1214                                         end
1215                                         line = line.next
1216                                         pos = 0
1217                                 else
1218                                         tmp = new FlatBuffer
1219                                         var new_pos = line.value.read_xml(tmp, pos, false)
1220                                         if new_pos > 0 then
1221                                                 tag = tmp.xml_tag
1222                                                 if tag.is_html_block and not tag == "hr" then
1223                                                         if tmp[1] == '/' then
1224                                                                 if tags.last != tag then
1225                                                                         return false
1226                                                                 end
1227                                                                 tags.pop
1228                                                         else
1229                                                                 tags.add tag
1230                                                         end
1231                                                 end
1232                                                 if tags.is_empty then
1233                                                         xml_end_line = line
1234                                                         break
1235                                                 end
1236                                                 pos = new_pos
1237                                         else
1238                                                 pos += 1
1239                                         end
1240                                 end
1241                         end
1242                         return tags.is_empty
1243                 end
1244                 return false
1245         end
1246
1247         # Read a XML comment.
1248         # Used by `check_html`.
1249         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1250                 var line: nullable MDLine = first_line
1251                 if start + 3 < line.value.length then
1252                         if line.value[2] == '-' and line.value[3] == '-' then
1253                                 var pos = start + 4
1254                                 while line != null do
1255                                         while pos < line.value.length and line.value[pos] != '-' do
1256                                                 pos += 1
1257                                         end
1258                                         if pos == line.value.length then
1259                                                 line = line.next
1260                                                 pos = 0
1261                                         else
1262                                                 if pos + 2 < line.value.length then
1263                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1264                                                                 first_line.xml_end_line = line
1265                                                                 return pos + 3
1266                                                         end
1267                                                 end
1268                                                 pos += 1
1269                                         end
1270                                 end
1271                         end
1272                 end
1273                 return -1
1274         end
1275
1276         # Extract the text of `self` without leading and trailing.
1277         fun text: String do return value.substring(leading, value.length - trailing)
1278 end
1279
1280 # A markdown line.
1281 interface Line
1282
1283         # Parse the line.
1284         # See `MarkdownProcessor::recurse`.
1285         fun process(v: MarkdownProcessor) is abstract
1286 end
1287
1288 # An empty markdown line.
1289 class LineEmpty
1290         super Line
1291
1292         redef fun process(v) do
1293                 v.current_line = v.current_line.next
1294         end
1295 end
1296
1297 # A non-specific markdown construction.
1298 # Mainly used as part of another line construct such as paragraphs or lists.
1299 class LineOther
1300         super Line
1301
1302         redef fun process(v) do
1303                 var line = v.current_line
1304                 # go to block end
1305                 var was_empty = line.prev_empty
1306                 while line != null and not line.is_empty do
1307                         var t = line.kind(v)
1308                         if v.in_list and t isa LineList then
1309                                 break
1310                         end
1311                         if t isa LineCode or t isa LineFence then
1312                                 break
1313                         end
1314                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1315                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1316                                    break
1317                         end
1318                         line = line.next
1319                 end
1320                 # build block
1321                 var bk: Block
1322                 if line != null and not line.is_empty then
1323                         var block = v.current_block.split(line.prev.as(not null))
1324                         if v.in_list and not was_empty then
1325                                 block.kind = new BlockNone(block)
1326                         else
1327                                 block.kind = new BlockParagraph(block)
1328                         end
1329                         v.current_block.remove_leading_empty_lines
1330                 else
1331                         var block: MDBlock
1332                         if line != null then
1333                                 block = v.current_block.split(line)
1334                         else
1335                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1336                         end
1337                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1338                                 block.kind = new BlockNone(block)
1339                         else
1340                                 block.kind = new BlockParagraph(block)
1341                         end
1342                         v.current_block.remove_leading_empty_lines
1343                 end
1344                 v.current_line = v.current_block.first_line
1345         end
1346 end
1347
1348 # A line of markdown code.
1349 class LineCode
1350         super Line
1351
1352         redef fun process(v) do
1353                 var line = v.current_line
1354                 # lookup block end
1355                 while line != null and (line.is_empty or line.kind(v) isa LineCode) do
1356                         line = line.next
1357                 end
1358                 # split at block end line
1359                 var block: MDBlock
1360                 if line != null then
1361                         block = v.current_block.split(line.prev.as(not null))
1362                 else
1363                         block = v.current_block.split(v.current_block.last_line.as(not null))
1364                 end
1365                 block.kind = new BlockCode(block)
1366                 block.remove_surrounding_empty_lines
1367                 v.current_line = v.current_block.first_line
1368         end
1369 end
1370
1371 # A line of raw XML.
1372 class LineXML
1373         super Line
1374
1375         redef fun process(v) do
1376                 var line = v.current_line
1377                 var prev = line.prev
1378                 if prev != null then v.current_block.split(prev)
1379                 var block = v.current_block.split(line.xml_end_line.as(not null))
1380                 block.kind = new BlockXML(block)
1381                 v.current_block.remove_leading_empty_lines
1382                 v.current_line = v.current_block.first_line
1383         end
1384 end
1385
1386 # A markdown blockquote line.
1387 class LineBlockquote
1388         super Line
1389
1390         redef fun process(v) do
1391                 var line = v.current_line
1392                 # go to bquote end
1393                 while line != null do
1394                         if not line.is_empty and (line.prev_empty and
1395                            line.leading == 0 and
1396                            not line.kind(v) isa LineBlockquote) then break
1397                         line = line.next
1398                 end
1399                 # build sub block
1400                 var block: MDBlock
1401                 if line != null then
1402                         block = v.current_block.split(line.prev.as(not null))
1403                 else
1404                         block = v.current_block.split(v.current_block.last_line.as(not null))
1405                 end
1406                 var kind = new BlockQuote(block)
1407                 block.kind = kind
1408                 block.remove_surrounding_empty_lines
1409                 kind.remove_block_quote_prefix(block)
1410                 v.current_line = line
1411                 v.recurse(block, false)
1412                 v.current_line = v.current_block.first_line
1413         end
1414 end
1415
1416 # A markdown ruler line.
1417 class LineHR
1418         super Line
1419
1420         redef fun process(v) do
1421                 var line = v.current_line
1422                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1423                 var block = v.current_block.split(line.as(not null))
1424                 block.kind = new BlockRuler(block)
1425                 v.current_block.remove_leading_empty_lines
1426                 v.current_line = v.current_block.first_line
1427         end
1428 end
1429
1430 # A markdown fence code line.
1431 class LineFence
1432         super Line
1433
1434         redef fun process(v) do
1435                 # go to fence end
1436                 var line = v.current_line.next
1437                 while line != null do
1438                         if line.kind(v) isa LineFence then break
1439                         line = line.next
1440                 end
1441                 if line != null then
1442                         line = line.next
1443                 end
1444                 # build fence block
1445                 var block: MDBlock
1446                 if line != null then
1447                         block = v.current_block.split(line.prev.as(not null))
1448                 else
1449                         block = v.current_block.split(v.current_block.last_line.as(not null))
1450                 end
1451                 block.kind = new BlockFence(block)
1452                 block.first_line.clear
1453                 if block.last_line.kind(v) isa LineFence then
1454                         block.last_line.clear
1455                 end
1456                 block.remove_surrounding_empty_lines
1457                 v.current_line = line
1458         end
1459 end
1460
1461 # A markdown headline.
1462 class LineHeadline
1463         super Line
1464
1465         redef fun process(v) do
1466                 var line = v.current_line
1467                 var lprev = line.prev
1468                 if lprev != null then v.current_block.split(lprev)
1469                 var block = v.current_block.split(line.as(not null))
1470                 var kind = new BlockHeadline(block)
1471                 block.kind = kind
1472                 kind.transform_headline(block)
1473                 v.current_block.remove_leading_empty_lines
1474                 v.current_line = v.current_block.first_line
1475         end
1476 end
1477
1478 # A markdown headline of level 1.
1479 class LineHeadline1
1480         super LineHeadline
1481
1482         redef fun process(v) do
1483                 var line = v.current_line
1484                 var lprev = line.prev
1485                 if lprev != null then v.current_block.split(lprev)
1486                 line.next.clear
1487                 var block = v.current_block.split(line.as(not null))
1488                 var kind = new BlockHeadline(block)
1489                 kind.depth = 1
1490                 kind.transform_headline(block)
1491                 block.kind = kind
1492                 v.current_block.remove_leading_empty_lines
1493                 v.current_line = v.current_block.first_line
1494         end
1495 end
1496
1497 # A markdown headline of level 2.
1498 class LineHeadline2
1499         super LineHeadline
1500
1501         redef fun process(v) do
1502                 var line = v.current_line
1503                 var lprev = line.prev
1504                 if lprev != null then v.current_block.split(lprev)
1505                 line.next.clear
1506                 var block = v.current_block.split(line.as(not null))
1507                 var kind = new BlockHeadline(block)
1508                 kind.depth = 2
1509                 kind.transform_headline(block)
1510                 block.kind = kind
1511                 v.current_block.remove_leading_empty_lines
1512                 v.current_line = v.current_block.first_line
1513         end
1514 end
1515
1516 # A markdown list line.
1517 # Mainly used to factorize code between ordered and unordered lists.
1518 class LineList
1519         super Line
1520
1521         redef fun process(v) do
1522                 var line = v.current_line
1523                 # go to list end
1524                 while line != null do
1525                         var t = line.kind(v)
1526                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1527                            not t isa LineList) then break
1528                         line = line.next
1529                 end
1530                 # build list block
1531                 var list: MDBlock
1532                 if line != null then
1533                         list = v.current_block.split(line.prev.as(not null))
1534                 else
1535                         list = v.current_block.split(v.current_block.last_line.as(not null))
1536                 end
1537                 var kind = block_kind(list)
1538                 list.kind = kind
1539                 list.first_line.prev_empty = false
1540                 list.last_line.next_empty = false
1541                 list.remove_surrounding_empty_lines
1542                 list.first_line.prev_empty = false
1543                 list.last_line.next_empty = false
1544                 kind.init_block(v)
1545                 var block = list.first_block
1546                 while block != null do
1547                         block.remove_list_indent(v)
1548                         v.recurse(block, true)
1549                         block = block.next
1550                 end
1551                 kind.expand_paragraphs(list)
1552                 v.current_line = line
1553         end
1554
1555         # Create a new block kind based on this line.
1556         protected fun block_kind(block: MDBlock): BlockList is abstract
1557
1558         protected fun extract_value(line: MDLine): String is abstract
1559 end
1560
1561 # An ordered list line.
1562 class LineOList
1563         super LineList
1564
1565         redef fun block_kind(block) do return new BlockOrderedList(block)
1566
1567         redef fun extract_value(line) do
1568                 return line.value.substring_from(line.value.index_of('.') + 2)
1569         end
1570 end
1571
1572 # An unordered list line.
1573 class LineUList
1574         super LineList
1575
1576         redef fun block_kind(block) do return new BlockUnorderedList(block)
1577
1578         redef fun extract_value(line) do
1579                 return line.value.substring_from(line.leading + 2)
1580         end
1581 end
1582
1583 # A token represent a character in the markdown input.
1584 # Some tokens have a specific markup behaviour that is handled here.
1585 abstract class Token
1586
1587         # Position of `self` in markdown input.
1588         var pos: Int
1589
1590         # Character found at `pos` in the markdown input.
1591         var char: Char
1592
1593         # Output that token using `MarkdownEmitter::decorator`.
1594         fun emit(v: MarkdownEmitter) do v.addc char
1595 end
1596
1597 # A token without a specific meaning.
1598 class TokenNone
1599         super Token
1600 end
1601
1602 # An emphasis token.
1603 abstract class TokenEm
1604         super Token
1605
1606         redef fun emit(v) do
1607                 var tmp = v.push_buffer
1608                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1609                 v.pop_buffer
1610                 if b > 0 then
1611                         v.decorator.add_em(v, tmp)
1612                         v.current_pos = b
1613                 else
1614                         v.addc char
1615                 end
1616         end
1617 end
1618
1619 # An emphasis star token.
1620 class TokenEmStar
1621         super TokenEm
1622 end
1623
1624 # An emphasis underscore token.
1625 class TokenEmUnderscore
1626         super TokenEm
1627 end
1628
1629 # A strong token.
1630 abstract class TokenStrong
1631         super Token
1632
1633         redef fun emit(v) do
1634                 var tmp = v.push_buffer
1635                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1636                 v.pop_buffer
1637                 if b > 0 then
1638                         v.decorator.add_strong(v, tmp)
1639                         v.current_pos = b + 1
1640                 else
1641                         v.addc char
1642                 end
1643         end
1644 end
1645
1646 # A strong star token.
1647 class TokenStrongStar
1648         super TokenStrong
1649 end
1650
1651 # A strong underscore token.
1652 class TokenStrongUnderscore
1653         super TokenStrong
1654 end
1655
1656 # A code token.
1657 # This class is mainly used to factorize work between single and double quoted span codes.
1658 abstract class TokenCode
1659         super Token
1660
1661         redef fun emit(v) do
1662                 var a = pos + next_pos + 1
1663                 var b = v.current_text.find_token(a, self)
1664                 if b > 0 then
1665                         v.current_pos = b + next_pos
1666                         while a < b and v.current_text[a] == ' ' do a += 1
1667                         if a < b then
1668                                 while v.current_text[b - 1] == ' ' do b -= 1
1669                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1670                         end
1671                 else
1672                         v.addc char
1673                 end
1674         end
1675
1676         private fun next_pos: Int is abstract
1677 end
1678
1679 # A span code token.
1680 class TokenCodeSingle
1681         super TokenCode
1682
1683         redef fun next_pos do return 0
1684 end
1685
1686 # A doubled span code token.
1687 class TokenCodeDouble
1688         super TokenCode
1689
1690         redef fun next_pos do return 1
1691 end
1692
1693 # A link or image token.
1694 # This class is mainly used to factorize work between images and links.
1695 abstract class TokenLinkOrImage
1696         super Token
1697
1698         # Link adress
1699         var link: nullable Text = null
1700
1701         # Link text
1702         var name: nullable Text = null
1703
1704         # Link title
1705         var comment: nullable Text = null
1706
1707         # Is the link construct an abbreviation?
1708         var is_abbrev = false
1709
1710         redef fun emit(v) do
1711                 var tmp = new FlatBuffer
1712                 var b = check_link(v, tmp, pos, self)
1713                 if b > 0 then
1714                         emit_hyper(v)
1715                         v.current_pos = b
1716                 else
1717                         v.addc char
1718                 end
1719         end
1720
1721         # Emit the hyperlink as link or image.
1722         private fun emit_hyper(v: MarkdownEmitter) is abstract
1723
1724         # Check if the link is a valid link.
1725         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1726                 var md = v.current_text
1727                 var pos
1728                 if token isa TokenLink then
1729                         pos = start + 1
1730                 else
1731                         pos = start + 2
1732                 end
1733                 var tmp = new FlatBuffer
1734                 pos = md.read_md_link_id(tmp, pos)
1735                 if pos < start then return -1
1736                 name = tmp
1737                 var old_pos = pos
1738                 pos += 1
1739                 pos = md.skip_spaces(pos)
1740                 if pos < start then
1741                         var tid = name.write_to_string.to_lower
1742                         if v.processor.link_refs.has_key(tid) then
1743                                 var lr = v.processor.link_refs[tid]
1744                                 is_abbrev = lr.is_abbrev
1745                                 link = lr.link
1746                                 comment = lr.title
1747                                 pos = old_pos
1748                         else
1749                                 return -1
1750                         end
1751                 else if md[pos] == '(' then
1752                         pos += 1
1753                         pos = md.skip_spaces(pos)
1754                         if pos < start then return -1
1755                         tmp = new FlatBuffer
1756                         var use_lt = md[pos] == '<'
1757                         if use_lt then
1758                                 pos = md.read_until(tmp, pos + 1, '>')
1759                         else
1760                                 pos = md.read_md_link(tmp, pos)
1761                         end
1762                         if pos < start then return -1
1763                         if use_lt then pos += 1
1764                         link = tmp.write_to_string
1765                         if md[pos] == ' ' then
1766                                 pos = md.skip_spaces(pos)
1767                                 if pos > start and md[pos] == '"' then
1768                                         pos += 1
1769                                         tmp = new FlatBuffer
1770                                         pos = md.read_until(tmp, pos, '"')
1771                                         if pos < start then return -1
1772                                         comment = tmp.write_to_string
1773                                         pos += 1
1774                                         pos = md.skip_spaces(pos)
1775                                         if pos == -1 then return -1
1776                                 end
1777                         end
1778                         if md[pos] != ')' then return -1
1779                 else if md[pos] == '[' then
1780                         pos += 1
1781                         tmp = new FlatBuffer
1782                         pos = md.read_raw_until(tmp, pos, ']')
1783                         if pos < start then return -1
1784                         var id
1785                         if tmp.length > 0 then
1786                                 id = tmp
1787                         else
1788                                 id = name
1789                         end
1790                         var tid = id.write_to_string.to_lower
1791                         if v.processor.link_refs.has_key(tid) then
1792                                 var lr = v.processor.link_refs[tid]
1793                                 link = lr.link
1794                                 comment = lr.title
1795                         end
1796                 else
1797                 var tid = name.write_to_string.replace("\n", " ").to_lower
1798                         if v.processor.link_refs.has_key(tid) then
1799                                 var lr = v.processor.link_refs[tid]
1800                                 link = lr.link
1801                                 comment = lr.title
1802                                 pos = old_pos
1803                         else
1804                                 return -1
1805                         end
1806                 end
1807                 if link == null then return -1
1808                 return pos
1809         end
1810 end
1811
1812 # A markdown link token.
1813 class TokenLink
1814         super TokenLinkOrImage
1815
1816         redef fun emit_hyper(v) do
1817                 if is_abbrev and comment != null then
1818                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
1819                 else
1820                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
1821                 end
1822         end
1823 end
1824
1825 # A markdown image token.
1826 class TokenImage
1827         super TokenLinkOrImage
1828
1829         redef fun emit_hyper(v) do
1830                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
1831         end
1832 end
1833
1834 # A HTML/XML token.
1835 class TokenHTML
1836         super Token
1837
1838         redef fun emit(v) do
1839                 var tmp = new FlatBuffer
1840                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
1841                 if b > 0 then
1842                         v.add tmp
1843                         v.current_pos = b
1844                 else
1845                         v.decorator.escape_char(v, char)
1846                 end
1847         end
1848
1849         # Is the HTML valid?
1850         # Also take care of link and mailto shortcuts.
1851         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
1852                 # check for auto links
1853                 var tmp = new FlatBuffer
1854                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
1855                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
1856                         pos = md.read_until(tmp, pos, '>')
1857                         if pos != -1 then
1858                                 var link = tmp.write_to_string
1859                                 v.decorator.add_link(v, link, link, null)
1860                                 return pos
1861                         end
1862                 end
1863                 # TODO check for mailto
1864                 # check for inline html
1865                 if start + 2 < md.length then
1866                         return md.read_xml(out, start, true)
1867                 end
1868                 return -1
1869         end
1870 end
1871
1872 # An HTML entity token.
1873 class TokenEntity
1874         super Token
1875
1876         redef fun emit(v) do
1877                 var tmp = new FlatBuffer
1878                 var b = check_entity(tmp, v.current_text.as(not null), pos)
1879                 if b > 0 then
1880                         v.add tmp
1881                         v.current_pos = b
1882                 else
1883                         v.decorator.escape_char(v, char)
1884                 end
1885         end
1886
1887         # Is the entity valid?
1888         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
1889                 var pos = md.read_until(out, start, ';')
1890                 if pos < 0 or out.length < 3 then
1891                         return -1
1892                 end
1893                 if out[1] == '#' then
1894                         if out[2] == 'x' or out[2] == 'X' then
1895                                 if out.length < 4 then return -1
1896                                 for i in [3..out.length[ do
1897                                         var c = out[i]
1898                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
1899                                                 return -1
1900                                         end
1901                                 end
1902                         else
1903                                 for i in [2..out.length[ do
1904                                         var c = out[i]
1905                                         if c < '0' or c > '9' then return -1
1906                                 end
1907                         end
1908                         out.add ';'
1909                 else
1910                         for i in [1..out.length[ do
1911                                 var c = out[i]
1912                                 if not c.is_digit and not c.is_letter then return -1
1913                         end
1914                         out.add ';'
1915                         # TODO check entity is valid
1916                         # if out.is_entity then
1917                                 return pos
1918                         # else
1919                                 # return -1
1920                         # end
1921                 end
1922                 return pos
1923         end
1924 end
1925
1926 # A markdown escape token.
1927 class TokenEscape
1928         super Token
1929
1930         redef fun emit(v) do
1931                 v.current_pos += 1
1932                 v.addc v.current_text[v.current_pos]
1933         end
1934 end
1935
1936 # A markdown super token.
1937 class TokenSuper
1938         super Token
1939
1940         redef fun emit(v) do
1941                 var tmp = v.push_buffer
1942                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1943                 v.pop_buffer
1944                 if b > 0 then
1945                         v.decorator.add_super(v, tmp)
1946                         v.current_pos = b
1947                 else
1948                         v.addc char
1949                 end
1950         end
1951 end
1952
1953 redef class Text
1954
1955         # Get the token kind at `pos`.
1956         private fun token_at(pos: Int): Token do
1957                 var c0: Char
1958                 var c1: Char
1959                 var c2: Char
1960                 var c3: Char
1961
1962                 if pos > 0 then
1963                         c0 = self[pos - 1]
1964                 else
1965                         c0 = ' '
1966                 end
1967                 var c = self[pos]
1968
1969                 if pos + 1 < length then
1970                         c1 = self[pos + 1]
1971                 else
1972                         c1 = ' '
1973                 end
1974                 if pos + 2 < length then
1975                         c2 = self[pos + 2]
1976                 else
1977                         c2 = ' '
1978                 end
1979                 if pos + 3 < length then
1980                         c3 = self[pos + 3]
1981                 else
1982                         c3 = ' '
1983                 end
1984
1985                 if c == '*' then
1986                         if c1 == '*' then
1987                                 if c0 != ' ' or c2 != ' ' then
1988                                         return new TokenStrongStar(pos, c)
1989                                 else
1990                                         return new TokenEmStar(pos, c)
1991                                 end
1992                         end
1993                         if c0 != ' ' or c1 != ' ' then
1994                                 return new TokenEmStar(pos, c)
1995                         else
1996                                 return new TokenNone(pos, c)
1997                         end
1998                 else if c == '_' then
1999                         if c1 == '_' then
2000                                 if c0 != ' ' or c2 != ' 'then
2001                                         return new TokenStrongUnderscore(pos, c)
2002                                 else
2003                                         return new TokenEmUnderscore(pos, c)
2004                                 end
2005                         end
2006                         if c0 != ' ' or c1 != ' ' then
2007                                 return new TokenEmUnderscore(pos, c)
2008                         else
2009                                 return new TokenNone(pos, c)
2010                         end
2011                 else if c == '!' then
2012                         if c1 == '[' then return new TokenImage(pos, c)
2013                         return new TokenNone(pos, c)
2014                 else if c == '[' then
2015                         return new TokenLink(pos, c)
2016                 else if c == ']' then
2017                         return new TokenNone(pos, c)
2018                 else if c == '`' then
2019                         if c1 == '`' then
2020                                 return new TokenCodeDouble(pos, c)
2021                         else
2022                                 return new TokenCodeSingle(pos, c)
2023                         end
2024                 else if c == '\\' then
2025                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
2026                                 return new TokenEscape(pos, c)
2027                         else
2028                                 return new TokenNone(pos, c)
2029                         end
2030                 else if c == '<' then
2031                         return new TokenHTML(pos, c)
2032                 else if c == '&' then
2033                         return new TokenEntity(pos, c)
2034                 else if c == '^' then
2035                         if c0 == '^' or c1 == '^' then
2036                                 return new TokenNone(pos, c)
2037                         else
2038                                 return new TokenSuper(pos, c)
2039                         end
2040                 else
2041                         return new TokenNone(pos, c)
2042                 end
2043         end
2044
2045         # Find the position of a `token` in `self`.
2046         private fun find_token(start: Int, token: Token): Int do
2047                 var pos = start
2048                 while pos < length do
2049                         if token_at(pos).is_same_type(token) then
2050                                 return pos
2051                         end
2052                         pos += 1
2053                 end
2054                 return -1
2055         end
2056
2057         # Get the position of the next non-space character.
2058         private fun skip_spaces(start: Int): Int do
2059                 var pos = start
2060                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2061                         pos += 1
2062                 end
2063                 if pos < length then return pos
2064                 return -1
2065         end
2066
2067         # Read `self` until `nend` and append it to the `out` buffer.
2068         # Escape markdown special chars.
2069         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2070                 var pos = start
2071                 while pos < length do
2072                         var c = self[pos]
2073                         if c == '\\' and pos + 1 < length then
2074                                 pos = escape(out, self[pos + 1], pos)
2075                         else
2076                                 var end_reached = false
2077                                 for n in nend do
2078                                         if c == n then
2079                                                 end_reached = true
2080                                                 break
2081                                         end
2082                                 end
2083                                 if end_reached then break
2084                                 out.add c
2085                         end
2086                         pos += 1
2087                 end
2088                 if pos == length then return -1
2089                 return pos
2090         end
2091
2092         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2093         # No escape is made.
2094         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2095                 var pos = start
2096                 while pos < length do
2097                         var c = self[pos]
2098                         var end_reached = false
2099                         for n in nend do
2100                                 if c == n then
2101                                         end_reached = true
2102                                         break
2103                                 end
2104                         end
2105                         if end_reached then break
2106                         out.add c
2107                         pos += 1
2108                 end
2109                 if pos == length then return -1
2110                 return pos
2111         end
2112
2113         # Read `self` as XML until `to` and append it to the `out` buffer.
2114         # Escape HTML special chars.
2115         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2116                 var pos = from
2117                 var in_str = false
2118                 var str_char: nullable Char = null
2119                 while pos < length do
2120                         var c = self[pos]
2121                         if in_str then
2122                                 if c == '\\' then
2123                                         out.add c
2124                                         pos += 1
2125                                         if pos < length then
2126                                                 out.add c
2127                                                 pos += 1
2128                                         end
2129                                         continue
2130                                 end
2131                                 if c == str_char then
2132                                         in_str = false
2133                                         out.add c
2134                                         pos += 1
2135                                         continue
2136                                 end
2137                         end
2138                         if c == '"' or c == '\'' then
2139                                 in_str = true
2140                                 str_char = c
2141                         end
2142                         if not in_str then
2143                                 var end_reached = false
2144                                 for n in [0..to.length[ do
2145                                         if c == to[n] then
2146                                                 end_reached = true
2147                                                 break
2148                                         end
2149                                 end
2150                                 if end_reached then break
2151                         end
2152                         out.add c
2153                         pos += 1
2154                 end
2155                 if pos == length then return -1
2156                 return pos
2157         end
2158
2159         # Read `self` as XML and append it to the `out` buffer.
2160         # Safe mode can be activated to limit reading to valid xml.
2161         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2162                 var pos = 0
2163                 var is_close_tag = false
2164                 if start + 1 >= length then return -1
2165                 if self[start + 1] == '/' then
2166                         is_close_tag = true
2167                         pos = start + 2
2168                 else if self[start + 1] == '!' then
2169                         out.append "<!"
2170                         return start + 1
2171                 else
2172                         is_close_tag = false
2173                         pos = start + 1
2174                 end
2175                 if safe_mode then
2176                         var tmp = new FlatBuffer
2177                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2178                         if pos == -1 then return -1
2179                         var tag = tmp.write_to_string.trim.to_lower
2180                         if tag.is_html_unsafe then
2181                                 out.append "&lt;"
2182                                 if is_close_tag then out.add '/'
2183                                 out.append tmp
2184                         else
2185                                 out.append "<"
2186                                 if is_close_tag then out.add '/'
2187                                 out.append tmp
2188                         end
2189                 else
2190                         out.add '<'
2191                         if is_close_tag then out.add '/'
2192                         pos = read_xml_until(out, pos, ' ', '/', '>')
2193                 end
2194                 if pos == -1 then return -1
2195                 pos = read_xml_until(out, pos, '/', '>')
2196                 if pos == -1 then return -1
2197                 if self[pos] == '/' then
2198                         out.append " /"
2199                         pos = self.read_xml_until(out, pos + 1, '>')
2200                         if pos == -1 then return -1
2201                 end
2202                 if self[pos] == '>' then
2203                         out.add '>'
2204                         return pos
2205                 end
2206                 return -1
2207         end
2208
2209         # Read a markdown link address and append it to the `out` buffer.
2210         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2211                 var pos = start
2212                 var counter = 1
2213                 while pos < length do
2214                         var c = self[pos]
2215                         if c == '\\' and pos + 1 < length then
2216                                 pos = escape(out, self[pos + 1], pos)
2217                         else
2218                                 var end_reached = false
2219                                 if c == '(' then
2220                                         counter += 1
2221                                 else if c == ' ' then
2222                                         if counter == 1 then end_reached = true
2223                                 else if c == ')' then
2224                                         counter -= 1
2225                                         if counter == 0 then end_reached = true
2226                                 end
2227                                 if end_reached then break
2228                                 out.add c
2229                         end
2230                         pos += 1
2231                 end
2232                 if pos == length then return -1
2233                 return pos
2234         end
2235
2236         # Read a markdown link text and append it to the `out` buffer.
2237         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2238                 var pos = start
2239                 var counter = 1
2240                 while pos < length do
2241                         var c = self[pos]
2242                         var end_reached = false
2243                         if c == '[' then
2244                                 counter += 1
2245                                 out.add c
2246                         else if c == ']' then
2247                                 counter -= 1
2248                                 if counter == 0 then
2249                                         end_reached = true
2250                                 else
2251                                         out.add c
2252                                 end
2253                         else
2254                                 out.add c
2255                         end
2256                         if end_reached then break
2257                         pos += 1
2258                 end
2259                 if pos == length then return -1
2260                 return pos
2261         end
2262
2263         # Extract the XML tag name from a XML tag.
2264         private fun xml_tag: String do
2265                 var tpl = new FlatBuffer
2266                 var pos = 1
2267                 if pos < length and self[1] == '/' then pos += 1
2268                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2269                         tpl.add self[pos]
2270                         pos += 1
2271                 end
2272                 return tpl.write_to_string.to_lower
2273         end
2274
2275         # Read and escape the markdown contained in `self`.
2276         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2277                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2278                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2279                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2280                    c == '`' or c == '~' or c == '^' then
2281                         out.add c
2282                         return pos + 1
2283                 end
2284                 out.add '\\'
2285                 return pos
2286         end
2287
2288         # Is `self` an unsafe HTML element?
2289         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2290
2291         # Is `self` a HRML block element?
2292         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2293
2294         # Is `self` a link prefix?
2295         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2296
2297         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2298
2299         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2300
2301         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2302 end
2303
2304 redef class String
2305
2306         # Parse `self` as markdown and return the HTML representation
2307         #.
2308         #    var md = "**Hello World!**"
2309         #    var html = md.md_to_html
2310         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2311         fun md_to_html: Streamable do
2312                 var processor = new MarkdownProcessor
2313                 return processor.process(self)
2314         end
2315 end