lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         var emitter: MarkdownEmitter is noinit
  34
  35         init do self.emitter = new MarkdownEmitter(self)
  36
  37         # Process the mardown `input` string and return the processed output.
  38         fun process(input: String): Streamable do
  39                 # init processor
  40                 link_refs.clear
  41                 last_link_ref = null
  42                 current_line = null
  43                 current_block = null
  44                 # parse markdown
  45                 var parent = read_lines(input)
  46                 parent.remove_surrounding_empty_lines
  47                 recurse(parent, false)
  48                 # output processed text
  49                 return emitter.emit(parent.kind)
  50         end
  51
  52         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
  53         private fun read_lines(input: String): MDBlock do
  54                 var block = new MDBlock
  55                 var value = new FlatBuffer
  56                 var i = 0
  57                 while i < input.length do
  58                         value.clear
  59                         var pos = 0
  60                         var eol = false
  61                         while not eol and i < input.length do
  62                                 var c = input[i]
  63                                 if c == '\n' then
  64                                         i += 1
  65                                         eol = true
  66                                 else if c == '\t' then
  67                                         var np = pos + (4 - (pos.bin_and(3)))
  68                                         while pos < np do
  69                                                 value.add ' '
  70                                                 pos += 1
  71                                         end
  72                                         i += 1
  73                                 else
  74                                         pos += 1
  75                                         value.add c
  76                                         i += 1
  77                                 end
  78                         end
  79
  80                         var line = new MDLine(value.write_to_string)
  81                         var is_link_ref = check_link_ref(line)
  82                         # Skip link refs
  83                         if not is_link_ref then block.add_line line
  84                 end
  85                 return block
  86         end
  87
  88         # Check if line is a block link definition.
  89         # Return `true` if line contains a valid link ref and save it into `link_refs`.
  90         private fun check_link_ref(line: MDLine): Bool do
  91                 var md = line.value
  92                 var is_link_ref = false
  93                 var id = new FlatBuffer
  94                 var link = new FlatBuffer
  95                 var comment = new FlatBuffer
  96                 var pos = -1
  97                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
  98                         pos = line.leading + 1
  99                         pos = md.read_until(id, pos, ']')
 100                         if not id.is_empty and pos + 2 < line.value.length then
 101                                 if line.value[pos + 1] == ':' then
 102                                         pos += 2
 103                                         pos = md.skip_spaces(pos)
 104                                         if line.value[pos] == '<' then
 105                                                 pos += 1
 106                                                 pos = md.read_until(link, pos, '>')
 107                                                 pos += 1
 108                                         else
 109                                                 pos = md.read_until(link, pos, ' ', '\n')
 110                                         end
 111                                         if not link.is_empty then
 112                                                 pos = md.skip_spaces(pos)
 113                                                 if pos > 0 and pos < line.value.length then
 114                                                         var c = line.value[pos]
 115                                                         if c == '\"' or c == '\'' or c == '(' then
 116                                                                 pos += 1
 117                                                                 if c == '(' then
 118                                                                         pos = md.read_until(comment, pos, ')')
 119                                                                 else
 120                                                                         pos = md.read_until(comment, pos, c)
 121                                                                 end
 122                                                                 if pos > 0 then is_link_ref = true
 123                                                         end
 124                                                 else
 125                                                         is_link_ref = true
 126                                                 end
 127                                         end
 128                                 end
 129                         end
 130                 end
 131                 if is_link_ref and not id.is_empty and not link.is_empty then
 132                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 133                         add_link_ref(id.write_to_string, lr)
 134                         if comment.is_empty then last_link_ref = lr
 135                         return true
 136                 else
 137                         comment = new FlatBuffer
 138                         if not line.is_empty and last_link_ref != null then
 139                                 pos = line.leading
 140                                 var c = line.value[pos]
 141                                 if c == '\"' or c == '\'' or c ==  '(' then
 142                                         pos += 1
 143                                         if c == '(' then
 144                                                 pos = md.read_until(comment, pos, ')')
 145                                         else
 146                                                 pos = md.read_until(comment, pos, c)
 147                                         end
 148                                 end
 149                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 150                         end
 151                         if comment.is_empty then return false
 152                         return true
 153                 end
 154         end
 155
 156         # Known link refs
 157         # This list will be needed during output to expand links.
 158         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 159
 160         # Last encountered link ref (for multiline definitions)
 161         #
 162         # Markdown allows link refs to be defined over two lines:
 163         #
 164         #       [id]: http://example.com/longish/path/to/resource/here
 165         #               "Optional Title Here"
 166         #
 167         private var last_link_ref: nullable LinkRef = null
 168
 169         # Add a link ref to the list
 170         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 171
 172         # Recursively split a `block`.
 173         #
 174         # The block is splitted according to the type of lines it contains.
 175         # Some blocks can be splited again recursively like lists.
 176         # The `in_list` mode is used to recurse on list and build
 177         # nested paragraphs or code blocks.
 178         fun recurse(root: MDBlock, in_list: Bool) do
 179                 var old_mode = self.in_list
 180                 var old_root = self.current_block
 181                 self.in_list = in_list
 182
 183                 var line = root.first_line
 184                 while line != null and line.is_empty do
 185                         line = line.next
 186                         if line == null then return
 187                 end
 188
 189                 current_line = line
 190                 current_block = root
 191                 while current_line != null do
 192                         line_kind(current_line.as(not null)).process(self)
 193                 end
 194                 self.in_list = old_mode
 195                 self.current_block = old_root
 196         end
 197
 198         # Currently processed line.
 199         # Used when visiting blocks with `recurse`.
 200         var current_line: nullable MDLine = null is writable
 201
 202         # Currently processed block.
 203         # Used when visiting blocks with `recurse`.
 204         var current_block: nullable MDBlock = null is writable
 205
 206         # Is the current recursion in list mode?
 207         # Used when visiting blocks with `recurse`
 208         private var in_list = false
 209
 210         # The type of line.
 211         # see: `md_line_*`
 212         fun line_kind(md: MDLine): Line do
 213                 var value = md.value
 214                 var leading = md.leading
 215                 var trailing = md.trailing
 216                 if md.is_empty then return new LineEmpty
 217                 if md.leading > 3 then return new LineCode
 218                 if value[leading] == '#' then return new LineHeadline
 219                 if value[leading] == '>' then return new LineBlockquote
 220
 221                 if value.length - leading - trailing > 2 then
 222                         if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 223                                 return new LineFence
 224                         end
 225                         if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 226                                 return new LineFence
 227                         end
 228                 end
 229
 230                 if value.length - leading - trailing > 2 and
 231                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 232                    if md.count_chars(value[leading]) >= 3 then
 233                                 return new LineHR
 234                    end
 235                 end
 236
 237                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 238                         var c = value[leading]
 239                         if c == '*' or c == '-' or c == '+' then return new LineUList
 240                 end
 241
 242                 if value.length - leading >= 3 and value[leading].is_digit then
 243                         var i = leading + 1
 244                         while i < value.length and value[i].is_digit do i += 1
 245                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 246                                 return new LineOList
 247                         end
 248                 end
 249
 250                 if value[leading] == '<' and md.check_html then return new LineXML
 251
 252                 var next = md.next
 253                 if next != null and not next.is_empty then
 254                         if next.count_chars('=') > 0 then
 255                                 return new LineHeadline1
 256                         end
 257                         if next.count_chars('-') > 0 then
 258                                 return new LineHeadline2
 259                         end
 260                 end
 261                 return new LineOther
 262         end
 263
 264 end
 265
 266 # Emit output corresponding to blocks content.
 267 #
 268 # Blocks are created by a previous pass in `MarkdownProcessor`.
 269 # The emitter use a `Decorator` to select the output format.
 270 class MarkdownEmitter
 271
 272         # Processor containing link refs.
 273         var processor: MarkdownProcessor
 274
 275         # Decorator used for output.
 276         # Default is `HTMLDecorator`
 277         var decorator: Decorator = new HTMLDecorator is writable
 278
 279         # Create a new `MardownEmitter` using the default `HTMLDecorator`
 280         init(processor: MarkdownProcessor) do
 281                 self.processor = processor
 282         end
 283
 284         # Create a new `MarkdownEmitter` using a custom `decorator`.
 285         init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
 286                 init processor
 287                 self.decorator = decorator
 288         end
 289
 290         # Output `block` using `decorator` in the current buffer.
 291         fun emit(block: Block): Text do
 292                 var buffer = push_buffer
 293                 block.emit(self)
 294                 pop_buffer
 295                 return buffer
 296         end
 297
 298         # Output the content of `block`.
 299         fun emit_in(block: Block) do block.emit_in(self)
 300
 301         # Transform and emit mardown text
 302         fun emit_text(text: Text) do
 303                 emit_text_until(text, 0, null)
 304         end
 305
 306         # Transform and emit mardown text starting at `from` and
 307         # until a token with the same type as `token` is found.
 308         # Go until the end of text if `token` is null.
 309         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 310                 var old_text = current_text
 311                 var old_pos = current_pos
 312                 current_text = text
 313                 current_pos = start
 314                 while current_pos < text.length do
 315                         var mt = text.token_at(current_pos)
 316                         if (token != null and not token isa TokenNone) and
 317                         (mt.is_same_type(token) or
 318                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 319                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 320                                 return current_pos
 321                         end
 322                         mt.emit(self)
 323                         current_pos += 1
 324                 end
 325                 current_text = old_text
 326                 current_pos = old_pos
 327                 return -1
 328         end
 329
 330         # Currently processed position in `current_text`.
 331         # Used when visiting inline production with `emit_text_until`.
 332         private var current_pos: Int = -1
 333
 334         # Currently processed text.
 335         # Used when visiting inline production with `emit_text_until`.
 336         private var current_text: nullable Text = null
 337
 338         # Stacked buffers.
 339         private var buffer_stack = new List[FlatBuffer]
 340
 341         # Push a new buffer on the stack.
 342         private fun push_buffer: FlatBuffer do
 343                 var buffer = new FlatBuffer
 344                 buffer_stack.add buffer
 345                 return buffer
 346         end
 347
 348         # Pop the last buffer.
 349         private fun pop_buffer do buffer_stack.pop
 350
 351         # Current output buffer.
 352         private fun current_buffer: FlatBuffer do
 353                 assert not buffer_stack.is_empty
 354                 return buffer_stack.last
 355         end
 356
 357         # Append `e` to current buffer.
 358         fun add(e: Streamable) do
 359                 if e isa Text then
 360                         current_buffer.append e
 361                 else
 362                         current_buffer.append e.write_to_string
 363                 end
 364         end
 365
 366         # Append `c` to current buffer.
 367         fun addc(c: Char) do current_buffer.add c
 368
 369         # Append a "\n" line break.
 370         fun addn do current_buffer.add '\n'
 371 end
 372
 373 # A Link Reference.
 374 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 375 #
 376 # Example:
 377 #
 378 #    [1]: http://example.com/ "Optional title"
 379 class LinkRef
 380
 381         # Link href
 382         var link: String
 383
 384         # Optional link title
 385         var title: nullable String = null
 386
 387         # Is the link an abreviation?
 388         var is_abbrev = false
 389
 390         init with_title(link: String, title: nullable String) do
 391                 self.link = link
 392                 self.title = title
 393         end
 394 end
 395
 396 # A `Decorator` is used to emit mardown into a specific format.
 397 # Default decorator used is `HTMLDecorator`.
 398 interface Decorator
 399
 400         # Render a ruler block.
 401         fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
 402
 403         # Render a headline block with corresponding level.
 404         fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
 405
 406         # Render a paragraph block.
 407         fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
 408
 409         # Render a code or fence block.
 410         fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
 411
 412         # Render a blockquote.
 413         fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
 414
 415         # Render an unordered list.
 416         fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
 417
 418         # Render an ordered list.
 419         fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
 420
 421         # Render a list item.
 422         fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
 423
 424         # Render an emphasis text.
 425         fun add_em(v: MarkdownEmitter, text: Text) is abstract
 426
 427         # Render a strong text.
 428         fun add_strong(v: MarkdownEmitter, text: Text) is abstract
 429
 430         # Render a super text.
 431         fun add_super(v: MarkdownEmitter, text: Text) is abstract
 432
 433         # Render a link.
 434         fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 435
 436         # Render an image.
 437         fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 438
 439         # Render an abbreviation.
 440         fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
 441
 442         # Render a code span reading from a buffer.
 443         fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 444
 445         # Render a text and escape it.
 446         fun append_value(v: MarkdownEmitter, value: Text) is abstract
 447
 448         # Render code text from buffer and escape it.
 449         fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 450
 451         # Render a character escape.
 452         fun escape_char(v: MarkdownEmitter, char: Char) is abstract
 453
 454         # Render a line break
 455         fun add_line_break(v: MarkdownEmitter) is abstract
 456
 457         # Generate a new html valid id from a `String`.
 458         fun strip_id(txt: String): String is abstract
 459
 460         # Found headlines during the processing labeled by their ids.
 461         fun headlines: ArrayMap[String, HeadLine] is abstract
 462 end
 463
 464 # Class representing a markdown headline.
 465 class HeadLine
 466         # Unique identifier of this headline.
 467         var id: String
 468
 469         # Text of the headline.
 470         var title: String
 471
 472         # Level of this headline.
 473         #
 474         # According toe the markdown specification, level must be in `[1..6]`.
 475         var level: Int
 476 end
 477
 478 # `Decorator` that outputs HTML.
 479 class HTMLDecorator
 480         super Decorator
 481
 482         redef var headlines = new ArrayMap[String, HeadLine]
 483
 484         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 485
 486         redef fun add_headline(v, block) do
 487                 # save headline
 488                 var txt = block.block.first_line.value
 489                 var id = strip_id(txt)
 490                 var lvl = block.depth
 491                 headlines[id] = new HeadLine(id, txt, lvl)
 492                 # output it
 493                 v.add "<h{lvl} id=\"{id}\">"
 494                 v.emit_in block
 495                 v.add "</h{lvl}>\n"
 496         end
 497
 498         redef fun add_paragraph(v, block) do
 499                 v.add "<p>"
 500                 v.emit_in block
 501                 v.add "</p>\n"
 502         end
 503
 504         redef fun add_code(v, block) do
 505                 v.add "<pre><code>"
 506                 v.emit_in block
 507                 v.add "</code></pre>\n"
 508         end
 509
 510         redef fun add_blockquote(v, block) do
 511                 v.add "<blockquote>\n"
 512                 v.emit_in block
 513                 v.add "</blockquote>\n"
 514         end
 515
 516         redef fun add_unorderedlist(v, block) do
 517                 v.add "<ul>\n"
 518                 v.emit_in block
 519                 v.add "</ul>\n"
 520         end
 521
 522         redef fun add_orderedlist(v, block) do
 523                 v.add "<ol>\n"
 524                 v.emit_in block
 525                 v.add "</ol>\n"
 526         end
 527
 528         redef fun add_listitem(v, block) do
 529                 v.add "<li>"
 530                 v.emit_in block
 531                 v.add "</li>\n"
 532         end
 533
 534         redef fun add_em(v, text) do
 535                 v.add "<em>"
 536                 v.add text
 537                 v.add "</em>"
 538         end
 539
 540         redef fun add_strong(v, text) do
 541                 v.add "<strong>"
 542                 v.add text
 543                 v.add "</strong>"
 544         end
 545
 546         redef fun add_super(v, text) do
 547                 v.add "<sup>"
 548                 v.add text
 549                 v.add "</sup>"
 550         end
 551
 552         redef fun add_image(v, link, name, comment) do
 553                 v.add "<img src=\""
 554                 append_value(v, link)
 555                 v.add "\" alt=\""
 556                 append_value(v, name)
 557                 v.add "\""
 558                 if comment != null and not comment.is_empty then
 559                         v.add " title=\""
 560                         append_value(v, comment)
 561                         v.add "\""
 562                 end
 563                 v.add "/>"
 564         end
 565
 566         redef fun add_link(v, link, name, comment) do
 567                 v.add "<a href=\""
 568                 append_value(v, link)
 569                 v.add "\""
 570                 if comment != null and not comment.is_empty then
 571                         v.add " title=\""
 572                         append_value(v, comment)
 573                         v.add "\""
 574                 end
 575                 v.add ">"
 576                 v.emit_text(name)
 577                 v.add "</a>"
 578         end
 579
 580         redef fun add_abbr(v, name, comment) do
 581                 v.add "<abbr title=\""
 582                 append_value(v, comment)
 583                 v.add "\">"
 584                 v.emit_text(name)
 585                 v.add "</abbr>"
 586         end
 587
 588         redef fun add_span_code(v, text, from, to) do
 589                 v.add "<code>"
 590                 append_code(v, text, from, to)
 591                 v.add "</code>"
 592         end
 593
 594         redef fun add_line_break(v) do
 595                 v.add "<br/>"
 596         end
 597
 598         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 599
 600         redef fun escape_char(v, c) do
 601                 if c == '&' then
 602                         v.add "&amp;"
 603                 else if c == '<' then
 604                         v.add "&lt;"
 605                 else if c == '>' then
 606                         v.add "&gt;"
 607                 else if c == '"' then
 608                         v.add "&quot;"
 609                 else if c == '\'' then
 610                         v.add "&apos;"
 611                 else
 612                         v.addc c
 613                 end
 614         end
 615
 616         redef fun append_code(v, buffer, from, to) do
 617                 for i in [from..to[ do
 618                         var c = buffer[i]
 619                         if c == '&' then
 620                                 v.add "&amp;"
 621                         else if c == '<' then
 622                                 v.add "&lt;"
 623                         else if c == '>' then
 624                                 v.add "&gt;"
 625                         else
 626                                 v.addc c
 627                         end
 628                 end
 629         end
 630
 631         redef fun strip_id(txt) do
 632                 # strip id
 633                 var b = new FlatBuffer
 634                 for c in txt do
 635                         if c == ' ' then
 636                                 b.add '_'
 637                         else
 638                                 if not c.is_letter and
 639                                    not c.is_digit and
 640                                    not allowed_id_chars.has(c) then continue
 641                                 b.add c
 642                         end
 643                 end
 644                 var res = b.to_s
 645                 var key = res
 646                 # check for multiple id definitions
 647                 if headlines.has_key(key) then
 648                         var i = 1
 649                         key = "{res}_{i}"
 650                         while headlines.has_key(key) do
 651                                 i += 1
 652                                 key = "{res}_{i}"
 653                         end
 654                 end
 655                 return key
 656         end
 657
 658         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 659 end
 660
 661 # A block of markdown lines.
 662 # A `MDBlock` can contains lines and/or sub-blocks.
 663 class MDBlock
 664         # Kind of block.
 665         # See `Block`.
 666         var kind: Block = new BlockNone(self) is writable
 667
 668         # First line if any.
 669         var first_line: nullable MDLine = null is writable
 670
 671         # Last line if any.
 672         var last_line: nullable MDLine = null is writable
 673
 674         # First sub-block if any.
 675         var first_block: nullable MDBlock = null is writable
 676
 677         # Last sub-block if any.
 678         var last_block: nullable MDBlock = null is writable
 679
 680         # Previous block if any.
 681         var prev: nullable MDBlock = null is writable
 682
 683         # Next block if any.
 684         var next: nullable MDBlock = null is writable
 685
 686         # Does this block contain subblocks?
 687         fun has_blocks: Bool do return first_block != null
 688
 689         # Count sub-blocks.
 690         fun count_blocks: Int do
 691                 var count = 0
 692                 var block = first_block
 693                 while block != null do
 694                         count += 1
 695                         block = block.next
 696                 end
 697                 return count
 698         end
 699
 700         # Does this block contain lines?
 701         fun has_lines: Bool do return first_line != null
 702
 703         # Count block lines.
 704         fun count_lines: Int do
 705                 var count = 0
 706                 var line = first_line
 707                 while line != null do
 708                         count += 1
 709                         line = line.next
 710                 end
 711                 return count
 712         end
 713
 714         # Split `self` creating a new sub-block having `line` has `last_line`.
 715         fun split(line: MDLine): MDBlock do
 716                 var block = new MDBlock
 717                 block.first_line = first_line
 718                 block.last_line = line
 719                 first_line = line.next
 720                 line.next = null
 721                 if first_line == null then
 722                         last_line = null
 723                 else
 724                         first_line.prev = null
 725                 end
 726                 if first_block == null then
 727                         first_block = block
 728                         last_block = block
 729                 else
 730                         last_block.next = block
 731                         last_block = block
 732                 end
 733                 return block
 734         end
 735
 736         # Add a `line` to this block.
 737         fun add_line(line: MDLine) do
 738                 if last_line == null then
 739                         first_line = line
 740                         last_line = line
 741                 else
 742                         last_line.next_empty = line.is_empty
 743                         line.prev_empty = last_line.is_empty
 744                         line.prev = last_line
 745                         last_line.next = line
 746                         last_line = line
 747                 end
 748         end
 749
 750         # Remove `line` from this block.
 751         fun remove_line(line: MDLine) do
 752                 if line.prev == null then
 753                         first_line = line.next
 754                 else
 755                         line.prev.next = line.next
 756                 end
 757                 if line.next == null then
 758                         last_line = line.prev
 759                 else
 760                         line.next.prev = line.prev
 761                 end
 762                 line.prev = null
 763                 line.next = null
 764         end
 765
 766         # Remove leading empty lines.
 767         fun remove_leading_empty_lines: Bool do
 768                 var was_empty = false
 769                 var line = first_line
 770                 while line != null and line.is_empty do
 771                         remove_line line
 772                         line = first_line
 773                         was_empty = true
 774                 end
 775                 return was_empty
 776         end
 777
 778         # Remove trailing empty lines.
 779         fun remove_trailing_empty_lines: Bool do
 780                 var was_empty = false
 781                 var line = last_line
 782                 while line != null and line.is_empty do
 783                         remove_line line
 784                         line = last_line
 785                         was_empty = true
 786                 end
 787                 return was_empty
 788         end
 789
 790         # Remove leading and trailing empty lines.
 791         fun remove_surrounding_empty_lines: Bool do
 792                 var was_empty = false
 793                 if remove_leading_empty_lines then was_empty = true
 794                 if remove_trailing_empty_lines then was_empty = true
 795                 return was_empty
 796         end
 797
 798         # Remove list markers and up to 4 leading spaces.
 799         # Used to clean nested lists.
 800         fun remove_list_indent(v: MarkdownProcessor) do
 801                 var line = first_line
 802                 while line != null do
 803                         if not line.is_empty then
 804                                 var kind = v.line_kind(line)
 805                                 if kind isa LineList then
 806                                         line.value = kind.extract_value(line)
 807                                 else
 808                                         line.value = line.value.substring_from(line.leading.min(4))
 809                                 end
 810                                 line.leading = line.process_leading
 811                         end
 812                         line = line.next
 813                 end
 814         end
 815
 816         # Collect block line text.
 817         fun text: String do
 818                 var text = new FlatBuffer
 819                 var line = first_line
 820                 while line != null do
 821                         if not line.is_empty then
 822                                 text.append line.text
 823                         end
 824                         text.append "\n"
 825                         line = line.next
 826                 end
 827                 return text.write_to_string
 828         end
 829 end
 830
 831 # Representation of a markdown block in the AST.
 832 # Each `Block` is linked to a `MDBlock` that contains mardown code.
 833 abstract class Block
 834
 835         # The markdown block `self` is related to.
 836         var block: MDBlock
 837
 838         # Output `self` using `v.decorator`.
 839         fun emit(v: MarkdownEmitter) do v.emit_in(self)
 840
 841         # Emit the containts of `self`, lines or blocks.
 842         fun emit_in(v: MarkdownEmitter) do
 843                 block.remove_surrounding_empty_lines
 844                 if block.has_lines then
 845                         emit_lines(v)
 846                 else
 847                         emit_blocks(v)
 848                 end
 849         end
 850
 851         # Emit lines contained in `block`.
 852         fun emit_lines(v: MarkdownEmitter) do
 853                 var tpl = v.push_buffer
 854                 var line = block.first_line
 855                 while line != null do
 856                         if not line.is_empty then
 857                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
 858                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
 859                         end
 860                         if line.next != null then
 861                                 v.addn
 862                         end
 863                         line = line.next
 864                 end
 865                 v.pop_buffer
 866                 v.emit_text(tpl)
 867         end
 868
 869         # Emit sub-blocks contained in `block`.
 870         fun emit_blocks(v: MarkdownEmitter) do
 871                 var block = self.block.first_block
 872                 while block != null do
 873                         block.kind.emit(v)
 874                         block = block.next
 875                 end
 876         end
 877 end
 878
 879 # A block without any markdown specificities.
 880 #
 881 # Actually use the same implementation than `BlockCode`,
 882 # this class is only used for typing purposes.
 883 class BlockNone
 884         super Block
 885 end
 886
 887 # A markdown blockquote.
 888 class BlockQuote
 889         super Block
 890
 891         redef fun emit(v) do v.decorator.add_blockquote(v, self)
 892
 893         # Remove blockquote markers.
 894         private fun remove_block_quote_prefix(block: MDBlock) do
 895                 var line = block.first_line
 896                 while line != null do
 897                         if not line.is_empty then
 898                                 if line.value[line.leading] == '>' then
 899                                         var rem = line.leading + 1
 900                                         if line.leading + 1 < line.value.length and
 901                                            line.value[line.leading + 1] == ' ' then
 902                                                 rem += 1
 903                                         end
 904                                         line.value = line.value.substring_from(rem)
 905                                         line.leading = line.process_leading
 906                                 end
 907                         end
 908                         line = line.next
 909                 end
 910         end
 911 end
 912
 913 # A markdown code block.
 914 class BlockCode
 915         super Block
 916
 917         redef fun emit(v) do v.decorator.add_code(v, self)
 918
 919         redef fun emit_lines(v) do
 920                 var line = block.first_line
 921                 while line != null do
 922                         if not line.is_empty then
 923                                 v.decorator.append_code(v, line.value, 4, line.value.length)
 924                         end
 925                         v.addn
 926                         line = line.next
 927                 end
 928         end
 929 end
 930
 931 # A markdown code-fence block.
 932 #
 933 # Actually use the same implementation than `BlockCode`,
 934 # this class is only used for typing purposes.
 935 class BlockFence
 936         super BlockCode
 937 end
 938
 939 # A markdown headline.
 940 class BlockHeadline
 941         super Block
 942
 943         redef fun emit(v) do v.decorator.add_headline(v, self)
 944
 945         # Depth of the headline used to determine the headline level.
 946         var depth = 0
 947
 948         # Remove healine marks from lines contained in `self`.
 949         private fun transform_headline(block: MDBlock) do
 950                 if depth > 0 then return
 951                 var level = 0
 952                 var line = block.first_line
 953                 if line.is_empty then return
 954                 var start = line.leading
 955                 while start < line.value.length and line.value[start] == '#' do
 956                         level += 1
 957                         start += 1
 958                 end
 959                 while start < line.value.length and line.value[start] == ' ' do
 960                         start += 1
 961                 end
 962                 if start >= line.value.length then
 963                         line.is_empty = true
 964                 else
 965                         var nend = line.value.length - line.trailing - 1
 966                         while line.value[nend] == '#' do nend -= 1
 967                         while line.value[nend] == ' ' do nend -= 1
 968                         line.value = line.value.substring(start, nend - start + 1)
 969                         line.leading = 0
 970                         line.trailing = 0
 971                 end
 972                 depth = level.min(6)
 973         end
 974 end
 975
 976 # A markdown list item block.
 977 class BlockListItem
 978         super Block
 979
 980         redef fun emit(v) do v.decorator.add_listitem(v, self)
 981 end
 982
 983 # A markdown list block.
 984 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
 985 abstract class BlockList
 986         super Block
 987
 988         # Split list block into list items sub-blocks.
 989         private fun init_block(v: MarkdownProcessor) do
 990                 var line = block.first_line
 991                 line = line.next
 992                 while line != null do
 993                         var t = v.line_kind(line)
 994                         if t isa LineList or
 995                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
 996                            not (t isa LineList))) then
 997                                    var sblock = block.split(line.prev.as(not null))
 998                                    sblock.kind = new BlockListItem(sblock)
 999                         end
1000                         line = line.next
1001                 end
1002                 var sblock = block.split(block.last_line.as(not null))
1003                 sblock.kind = new BlockListItem(sblock)
1004         end
1005
1006         # Expand list items as paragraphs if needed.
1007         private fun expand_paragraphs(block: MDBlock) do
1008                 var outer = block.first_block
1009                 var inner: nullable MDBlock
1010                 var has_paragraph = false
1011                 while outer != null and not has_paragraph do
1012                         if outer.kind isa BlockListItem then
1013                                 inner = outer.first_block
1014                                 while inner != null and not has_paragraph do
1015                                         if inner.kind isa BlockParagraph then
1016                                                 has_paragraph = true
1017                                         end
1018                                         inner = inner.next
1019                                 end
1020                         end
1021                         outer = outer.next
1022                 end
1023                 if has_paragraph then
1024                         outer = block.first_block
1025                         while outer != null do
1026                                 if outer.kind isa BlockListItem then
1027                                         inner = outer.first_block
1028                                         while inner != null do
1029                                                 if inner.kind isa BlockNone then
1030                                                         inner.kind = new BlockParagraph(inner)
1031                                                 end
1032                                                 inner = inner.next
1033                                         end
1034                                 end
1035                                 outer = outer.next
1036                         end
1037                 end
1038         end
1039 end
1040
1041 # A markdown ordered list.
1042 class BlockOrderedList
1043         super BlockList
1044
1045         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1046 end
1047
1048 # A markdown unordred list.
1049 class BlockUnorderedList
1050         super BlockList
1051
1052         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1053 end
1054
1055 # A markdown paragraph block.
1056 class BlockParagraph
1057         super Block
1058
1059         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1060 end
1061
1062 # A markdown ruler.
1063 class BlockRuler
1064         super Block
1065
1066         redef fun emit(v) do v.decorator.add_ruler(v, self)
1067 end
1068
1069 # Xml blocks that can be found in markdown markup.
1070 class BlockXML
1071         super Block
1072
1073         redef fun emit_lines(v) do
1074                 var line = block.first_line
1075                 while line != null do
1076                         if not line.is_empty then v.add line.value
1077                         v.addn
1078                         line = line.next
1079                 end
1080         end
1081 end
1082
1083 # A markdown line.
1084 class MDLine
1085
1086         # Text contained in this line.
1087         var value: String is writable
1088
1089         # Is this line empty?
1090         # Lines containing only spaces are considered empty.
1091         var is_empty: Bool = true is writable
1092
1093         # Previous line in `MDBlock` or null if first line.
1094         var prev: nullable MDLine = null is writable
1095
1096         # Next line in `MDBlock` or null if last line.
1097         var next: nullable MDLine = null is writable
1098
1099         # Is the previous line empty?
1100         var prev_empty: Bool = false is writable
1101
1102         # Is the next line empty?
1103         var next_empty: Bool = false is writable
1104
1105         init(value: String) do
1106                 self.value = value
1107                 self.leading = process_leading
1108                 if leading != value.length then
1109                         self.is_empty = false
1110                         self.trailing = process_trailing
1111                 end
1112         end
1113
1114         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1115         fun clear do
1116                 value = ""
1117                 leading = 0
1118                 trailing = 0
1119                 is_empty = true
1120                 if prev != null then prev.next_empty = true
1121                 if next != null then next.prev_empty = true
1122         end
1123
1124         # Number or leading spaces on this line.
1125         var leading: Int = 0 is writable
1126
1127         # Compute `leading` depending on `value`.
1128         fun process_leading: Int do
1129                 var count = 0
1130                 var value = self.value
1131                 while count < value.length and value[count] == ' ' do count += 1
1132                 if leading == value.length then clear
1133                 return count
1134         end
1135
1136         # Number of trailing spaces on this line.
1137         var trailing: Int = 0 is writable
1138
1139         # Compute `trailing` depending on `value`.
1140         fun process_trailing: Int do
1141                 var count = 0
1142                 var value = self.value
1143                 while value[value.length - count - 1] == ' ' do
1144                         count += 1
1145                 end
1146                 return count
1147         end
1148
1149         # Count the amount of `ch` in this line.
1150         # Return A value > 0 if this line only consists of `ch` end spaces.
1151         fun count_chars(ch: Char): Int do
1152                 var count = 0
1153                 for c in value do
1154                         if c == ' ' then
1155                                 continue
1156                         end
1157                         if c == ch then
1158                                 count += 1
1159                                 continue
1160                         end
1161                         count = 0
1162                         break
1163                 end
1164                 return count
1165         end
1166
1167         # Count the amount of `ch` at the start of this line ignoring spaces.
1168         fun count_chars_start(ch: Char): Int do
1169                 var count = 0
1170                 for c in value do
1171                         if c == ' ' then
1172                                 continue
1173                         end
1174                         if c == ch then
1175                                 count += 1
1176                         else
1177                                 break
1178                         end
1179                 end
1180                 return count
1181         end
1182
1183         # Last XML line if any.
1184         private var xml_end_line: nullable MDLine = null
1185
1186         # Does `value` contains valid XML markup?
1187         private fun check_html: Bool do
1188                 var tags = new Array[String]
1189                 var tmp = new FlatBuffer
1190                 var pos = leading
1191                 if pos + 1 < value.length and value[pos + 1] == '!' then
1192                         if read_xml_comment(self, pos) > 0 then return true
1193                 end
1194                 pos = value.read_xml(tmp, pos, false)
1195                 var tag: String
1196                 if pos > -1 then
1197                         tag = tmp.xml_tag
1198                         if not tag.is_html_block then
1199                                 return false
1200                         end
1201                         if tag == "hr" then
1202                                 xml_end_line = self
1203                                 return true
1204                         end
1205                         tags.add tag
1206                         var line: nullable MDLine = self
1207                         while line != null do
1208                                 while pos < line.value.length and line.value[pos] != '<' do
1209                                         pos += 1
1210                                 end
1211                                 if pos >= line.value.length then
1212                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1213                                                 tags.pop
1214                                                 if tags.is_empty then
1215                                                         xml_end_line = line
1216                                                         break
1217                                                 end
1218                                         end
1219                                         line = line.next
1220                                         pos = 0
1221                                 else
1222                                         tmp = new FlatBuffer
1223                                         var new_pos = line.value.read_xml(tmp, pos, false)
1224                                         if new_pos > 0 then
1225                                                 tag = tmp.xml_tag
1226                                                 if tag.is_html_block and not tag == "hr" then
1227                                                         if tmp[1] == '/' then
1228                                                                 if tags.last != tag then
1229                                                                         return false
1230                                                                 end
1231                                                                 tags.pop
1232                                                         else
1233                                                                 tags.add tag
1234                                                         end
1235                                                 end
1236                                                 if tags.is_empty then
1237                                                         xml_end_line = line
1238                                                         break
1239                                                 end
1240                                                 pos = new_pos
1241                                         else
1242                                                 pos += 1
1243                                         end
1244                                 end
1245                         end
1246                         return tags.is_empty
1247                 end
1248                 return false
1249         end
1250
1251         # Read a XML comment.
1252         # Used by `check_html`.
1253         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1254                 var line: nullable MDLine = first_line
1255                 if start + 3 < line.value.length then
1256                         if line.value[2] == '-' and line.value[3] == '-' then
1257                                 var pos = start + 4
1258                                 while line != null do
1259                                         while pos < line.value.length and line.value[pos] != '-' do
1260                                                 pos += 1
1261                                         end
1262                                         if pos == line.value.length then
1263                                                 line = line.next
1264                                                 pos = 0
1265                                         else
1266                                                 if pos + 2 < line.value.length then
1267                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1268                                                                 first_line.xml_end_line = line
1269                                                                 return pos + 3
1270                                                         end
1271                                                 end
1272                                                 pos += 1
1273                                         end
1274                                 end
1275                         end
1276                 end
1277                 return -1
1278         end
1279
1280         # Extract the text of `self` without leading and trailing.
1281         fun text: String do return value.substring(leading, value.length - trailing)
1282 end
1283
1284 # A markdown line.
1285 interface Line
1286
1287         # Parse the line.
1288         # See `MarkdownProcessor::recurse`.
1289         fun process(v: MarkdownProcessor) is abstract
1290 end
1291
1292 # An empty markdown line.
1293 class LineEmpty
1294         super Line
1295
1296         redef fun process(v) do
1297                 v.current_line = v.current_line.next
1298         end
1299 end
1300
1301 # A non-specific markdown construction.
1302 # Mainly used as part of another line construct such as paragraphs or lists.
1303 class LineOther
1304         super Line
1305
1306         redef fun process(v) do
1307                 var line = v.current_line
1308                 # go to block end
1309                 var was_empty = line.prev_empty
1310                 while line != null and not line.is_empty do
1311                         var t = v.line_kind(line)
1312                         if v.in_list and t isa LineList then
1313                                 break
1314                         end
1315                         if t isa LineCode or t isa LineFence then
1316                                 break
1317                         end
1318                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1319                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1320                                    break
1321                         end
1322                         line = line.next
1323                 end
1324                 # build block
1325                 var bk: Block
1326                 if line != null and not line.is_empty then
1327                         var block = v.current_block.split(line.prev.as(not null))
1328                         if v.in_list and not was_empty then
1329                                 block.kind = new BlockNone(block)
1330                         else
1331                                 block.kind = new BlockParagraph(block)
1332                         end
1333                         v.current_block.remove_leading_empty_lines
1334                 else
1335                         var block: MDBlock
1336                         if line != null then
1337                                 block = v.current_block.split(line)
1338                         else
1339                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1340                         end
1341                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1342                                 block.kind = new BlockNone(block)
1343                         else
1344                                 block.kind = new BlockParagraph(block)
1345                         end
1346                         v.current_block.remove_leading_empty_lines
1347                 end
1348                 v.current_line = v.current_block.first_line
1349         end
1350 end
1351
1352 # A line of markdown code.
1353 class LineCode
1354         super Line
1355
1356         redef fun process(v) do
1357                 var line = v.current_line
1358                 # lookup block end
1359                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1360                         line = line.next
1361                 end
1362                 # split at block end line
1363                 var block: MDBlock
1364                 if line != null then
1365                         block = v.current_block.split(line.prev.as(not null))
1366                 else
1367                         block = v.current_block.split(v.current_block.last_line.as(not null))
1368                 end
1369                 block.kind = new BlockCode(block)
1370                 block.remove_surrounding_empty_lines
1371                 v.current_line = v.current_block.first_line
1372         end
1373 end
1374
1375 # A line of raw XML.
1376 class LineXML
1377         super Line
1378
1379         redef fun process(v) do
1380                 var line = v.current_line
1381                 var prev = line.prev
1382                 if prev != null then v.current_block.split(prev)
1383                 var block = v.current_block.split(line.xml_end_line.as(not null))
1384                 block.kind = new BlockXML(block)
1385                 v.current_block.remove_leading_empty_lines
1386                 v.current_line = v.current_block.first_line
1387         end
1388 end
1389
1390 # A markdown blockquote line.
1391 class LineBlockquote
1392         super Line
1393
1394         redef fun process(v) do
1395                 var line = v.current_line
1396                 # go to bquote end
1397                 while line != null do
1398                         if not line.is_empty and (line.prev_empty and
1399                            line.leading == 0 and
1400                            not v.line_kind(line) isa LineBlockquote) then break
1401                         line = line.next
1402                 end
1403                 # build sub block
1404                 var block: MDBlock
1405                 if line != null then
1406                         block = v.current_block.split(line.prev.as(not null))
1407                 else
1408                         block = v.current_block.split(v.current_block.last_line.as(not null))
1409                 end
1410                 var kind = new BlockQuote(block)
1411                 block.kind = kind
1412                 block.remove_surrounding_empty_lines
1413                 kind.remove_block_quote_prefix(block)
1414                 v.current_line = line
1415                 v.recurse(block, false)
1416                 v.current_line = v.current_block.first_line
1417         end
1418 end
1419
1420 # A markdown ruler line.
1421 class LineHR
1422         super Line
1423
1424         redef fun process(v) do
1425                 var line = v.current_line
1426                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1427                 var block = v.current_block.split(line.as(not null))
1428                 block.kind = new BlockRuler(block)
1429                 v.current_block.remove_leading_empty_lines
1430                 v.current_line = v.current_block.first_line
1431         end
1432 end
1433
1434 # A markdown fence code line.
1435 class LineFence
1436         super Line
1437
1438         redef fun process(v) do
1439                 # go to fence end
1440                 var line = v.current_line.next
1441                 while line != null do
1442                         if v.line_kind(line) isa LineFence then break
1443                         line = line.next
1444                 end
1445                 if line != null then
1446                         line = line.next
1447                 end
1448                 # build fence block
1449                 var block: MDBlock
1450                 if line != null then
1451                         block = v.current_block.split(line.prev.as(not null))
1452                 else
1453                         block = v.current_block.split(v.current_block.last_line.as(not null))
1454                 end
1455                 block.kind = new BlockFence(block)
1456                 block.first_line.clear
1457                 var last = block.last_line
1458                 if last != null and v.line_kind(last) isa LineFence then
1459                         block.last_line.clear
1460                 end
1461                 block.remove_surrounding_empty_lines
1462                 v.current_line = line
1463         end
1464 end
1465
1466 # A markdown headline.
1467 class LineHeadline
1468         super Line
1469
1470         redef fun process(v) do
1471                 var line = v.current_line
1472                 var lprev = line.prev
1473                 if lprev != null then v.current_block.split(lprev)
1474                 var block = v.current_block.split(line.as(not null))
1475                 var kind = new BlockHeadline(block)
1476                 block.kind = kind
1477                 kind.transform_headline(block)
1478                 v.current_block.remove_leading_empty_lines
1479                 v.current_line = v.current_block.first_line
1480         end
1481 end
1482
1483 # A markdown headline of level 1.
1484 class LineHeadline1
1485         super LineHeadline
1486
1487         redef fun process(v) do
1488                 var line = v.current_line
1489                 var lprev = line.prev
1490                 if lprev != null then v.current_block.split(lprev)
1491                 line.next.clear
1492                 var block = v.current_block.split(line.as(not null))
1493                 var kind = new BlockHeadline(block)
1494                 kind.depth = 1
1495                 kind.transform_headline(block)
1496                 block.kind = kind
1497                 v.current_block.remove_leading_empty_lines
1498                 v.current_line = v.current_block.first_line
1499         end
1500 end
1501
1502 # A markdown headline of level 2.
1503 class LineHeadline2
1504         super LineHeadline
1505
1506         redef fun process(v) do
1507                 var line = v.current_line
1508                 var lprev = line.prev
1509                 if lprev != null then v.current_block.split(lprev)
1510                 line.next.clear
1511                 var block = v.current_block.split(line.as(not null))
1512                 var kind = new BlockHeadline(block)
1513                 kind.depth = 2
1514                 kind.transform_headline(block)
1515                 block.kind = kind
1516                 v.current_block.remove_leading_empty_lines
1517                 v.current_line = v.current_block.first_line
1518         end
1519 end
1520
1521 # A markdown list line.
1522 # Mainly used to factorize code between ordered and unordered lists.
1523 class LineList
1524         super Line
1525
1526         redef fun process(v) do
1527                 var line = v.current_line
1528                 # go to list end
1529                 while line != null do
1530                         var t = v.line_kind(line)
1531                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1532                            not t isa LineList) then break
1533                         line = line.next
1534                 end
1535                 # build list block
1536                 var list: MDBlock
1537                 if line != null then
1538                         list = v.current_block.split(line.prev.as(not null))
1539                 else
1540                         list = v.current_block.split(v.current_block.last_line.as(not null))
1541                 end
1542                 var kind = block_kind(list)
1543                 list.kind = kind
1544                 list.first_line.prev_empty = false
1545                 list.last_line.next_empty = false
1546                 list.remove_surrounding_empty_lines
1547                 list.first_line.prev_empty = false
1548                 list.last_line.next_empty = false
1549                 kind.init_block(v)
1550                 var block = list.first_block
1551                 while block != null do
1552                         block.remove_list_indent(v)
1553                         v.recurse(block, true)
1554                         block = block.next
1555                 end
1556                 kind.expand_paragraphs(list)
1557                 v.current_line = line
1558         end
1559
1560         # Create a new block kind based on this line.
1561         protected fun block_kind(block: MDBlock): BlockList is abstract
1562
1563         protected fun extract_value(line: MDLine): String is abstract
1564 end
1565
1566 # An ordered list line.
1567 class LineOList
1568         super LineList
1569
1570         redef fun block_kind(block) do return new BlockOrderedList(block)
1571
1572         redef fun extract_value(line) do
1573                 return line.value.substring_from(line.value.index_of('.') + 2)
1574         end
1575 end
1576
1577 # An unordered list line.
1578 class LineUList
1579         super LineList
1580
1581         redef fun block_kind(block) do return new BlockUnorderedList(block)
1582
1583         redef fun extract_value(line) do
1584                 return line.value.substring_from(line.leading + 2)
1585         end
1586 end
1587
1588 # A token represent a character in the markdown input.
1589 # Some tokens have a specific markup behaviour that is handled here.
1590 abstract class Token
1591
1592         # Position of `self` in markdown input.
1593         var pos: Int
1594
1595         # Character found at `pos` in the markdown input.
1596         var char: Char
1597
1598         # Output that token using `MarkdownEmitter::decorator`.
1599         fun emit(v: MarkdownEmitter) do v.addc char
1600 end
1601
1602 # A token without a specific meaning.
1603 class TokenNone
1604         super Token
1605 end
1606
1607 # An emphasis token.
1608 abstract class TokenEm
1609         super Token
1610
1611         redef fun emit(v) do
1612                 var tmp = v.push_buffer
1613                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1614                 v.pop_buffer
1615                 if b > 0 then
1616                         v.decorator.add_em(v, tmp)
1617                         v.current_pos = b
1618                 else
1619                         v.addc char
1620                 end
1621         end
1622 end
1623
1624 # An emphasis star token.
1625 class TokenEmStar
1626         super TokenEm
1627 end
1628
1629 # An emphasis underscore token.
1630 class TokenEmUnderscore
1631         super TokenEm
1632 end
1633
1634 # A strong token.
1635 abstract class TokenStrong
1636         super Token
1637
1638         redef fun emit(v) do
1639                 var tmp = v.push_buffer
1640                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1641                 v.pop_buffer
1642                 if b > 0 then
1643                         v.decorator.add_strong(v, tmp)
1644                         v.current_pos = b + 1
1645                 else
1646                         v.addc char
1647                 end
1648         end
1649 end
1650
1651 # A strong star token.
1652 class TokenStrongStar
1653         super TokenStrong
1654 end
1655
1656 # A strong underscore token.
1657 class TokenStrongUnderscore
1658         super TokenStrong
1659 end
1660
1661 # A code token.
1662 # This class is mainly used to factorize work between single and double quoted span codes.
1663 abstract class TokenCode
1664         super Token
1665
1666         redef fun emit(v) do
1667                 var a = pos + next_pos + 1
1668                 var b = v.current_text.find_token(a, self)
1669                 if b > 0 then
1670                         v.current_pos = b + next_pos
1671                         while a < b and v.current_text[a] == ' ' do a += 1
1672                         if a < b then
1673                                 while v.current_text[b - 1] == ' ' do b -= 1
1674                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1675                         end
1676                 else
1677                         v.addc char
1678                 end
1679         end
1680
1681         private fun next_pos: Int is abstract
1682 end
1683
1684 # A span code token.
1685 class TokenCodeSingle
1686         super TokenCode
1687
1688         redef fun next_pos do return 0
1689 end
1690
1691 # A doubled span code token.
1692 class TokenCodeDouble
1693         super TokenCode
1694
1695         redef fun next_pos do return 1
1696 end
1697
1698 # A link or image token.
1699 # This class is mainly used to factorize work between images and links.
1700 abstract class TokenLinkOrImage
1701         super Token
1702
1703         # Link adress
1704         var link: nullable Text = null
1705
1706         # Link text
1707         var name: nullable Text = null
1708
1709         # Link title
1710         var comment: nullable Text = null
1711
1712         # Is the link construct an abbreviation?
1713         var is_abbrev = false
1714
1715         redef fun emit(v) do
1716                 var tmp = new FlatBuffer
1717                 var b = check_link(v, tmp, pos, self)
1718                 if b > 0 then
1719                         emit_hyper(v)
1720                         v.current_pos = b
1721                 else
1722                         v.addc char
1723                 end
1724         end
1725
1726         # Emit the hyperlink as link or image.
1727         private fun emit_hyper(v: MarkdownEmitter) is abstract
1728
1729         # Check if the link is a valid link.
1730         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1731                 var md = v.current_text
1732                 var pos
1733                 if token isa TokenLink then
1734                         pos = start + 1
1735                 else
1736                         pos = start + 2
1737                 end
1738                 var tmp = new FlatBuffer
1739                 pos = md.read_md_link_id(tmp, pos)
1740                 if pos < start then return -1
1741                 name = tmp
1742                 var old_pos = pos
1743                 pos += 1
1744                 pos = md.skip_spaces(pos)
1745                 if pos < start then
1746                         var tid = name.write_to_string.to_lower
1747                         if v.processor.link_refs.has_key(tid) then
1748                                 var lr = v.processor.link_refs[tid]
1749                                 is_abbrev = lr.is_abbrev
1750                                 link = lr.link
1751                                 comment = lr.title
1752                                 pos = old_pos
1753                         else
1754                                 return -1
1755                         end
1756                 else if md[pos] == '(' then
1757                         pos += 1
1758                         pos = md.skip_spaces(pos)
1759                         if pos < start then return -1
1760                         tmp = new FlatBuffer
1761                         var use_lt = md[pos] == '<'
1762                         if use_lt then
1763                                 pos = md.read_until(tmp, pos + 1, '>')
1764                         else
1765                                 pos = md.read_md_link(tmp, pos)
1766                         end
1767                         if pos < start then return -1
1768                         if use_lt then pos += 1
1769                         link = tmp.write_to_string
1770                         if md[pos] == ' ' then
1771                                 pos = md.skip_spaces(pos)
1772                                 if pos > start and md[pos] == '"' then
1773                                         pos += 1
1774                                         tmp = new FlatBuffer
1775                                         pos = md.read_until(tmp, pos, '"')
1776                                         if pos < start then return -1
1777                                         comment = tmp.write_to_string
1778                                         pos += 1
1779                                         pos = md.skip_spaces(pos)
1780                                         if pos == -1 then return -1
1781                                 end
1782                         end
1783                         if md[pos] != ')' then return -1
1784                 else if md[pos] == '[' then
1785                         pos += 1
1786                         tmp = new FlatBuffer
1787                         pos = md.read_raw_until(tmp, pos, ']')
1788                         if pos < start then return -1
1789                         var id
1790                         if tmp.length > 0 then
1791                                 id = tmp
1792                         else
1793                                 id = name
1794                         end
1795                         var tid = id.write_to_string.to_lower
1796                         if v.processor.link_refs.has_key(tid) then
1797                                 var lr = v.processor.link_refs[tid]
1798                                 link = lr.link
1799                                 comment = lr.title
1800                         end
1801                 else
1802                 var tid = name.write_to_string.replace("\n", " ").to_lower
1803                         if v.processor.link_refs.has_key(tid) then
1804                                 var lr = v.processor.link_refs[tid]
1805                                 link = lr.link
1806                                 comment = lr.title
1807                                 pos = old_pos
1808                         else
1809                                 return -1
1810                         end
1811                 end
1812                 if link == null then return -1
1813                 return pos
1814         end
1815 end
1816
1817 # A markdown link token.
1818 class TokenLink
1819         super TokenLinkOrImage
1820
1821         redef fun emit_hyper(v) do
1822                 if is_abbrev and comment != null then
1823                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
1824                 else
1825                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
1826                 end
1827         end
1828 end
1829
1830 # A markdown image token.
1831 class TokenImage
1832         super TokenLinkOrImage
1833
1834         redef fun emit_hyper(v) do
1835                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
1836         end
1837 end
1838
1839 # A HTML/XML token.
1840 class TokenHTML
1841         super Token
1842
1843         redef fun emit(v) do
1844                 var tmp = new FlatBuffer
1845                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
1846                 if b > 0 then
1847                         v.add tmp
1848                         v.current_pos = b
1849                 else
1850                         v.decorator.escape_char(v, char)
1851                 end
1852         end
1853
1854         # Is the HTML valid?
1855         # Also take care of link and mailto shortcuts.
1856         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
1857                 # check for auto links
1858                 var tmp = new FlatBuffer
1859                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
1860                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
1861                         pos = md.read_until(tmp, pos, '>')
1862                         if pos != -1 then
1863                                 var link = tmp.write_to_string
1864                                 v.decorator.add_link(v, link, link, null)
1865                                 return pos
1866                         end
1867                 end
1868                 # TODO check for mailto
1869                 # check for inline html
1870                 if start + 2 < md.length then
1871                         return md.read_xml(out, start, true)
1872                 end
1873                 return -1
1874         end
1875 end
1876
1877 # An HTML entity token.
1878 class TokenEntity
1879         super Token
1880
1881         redef fun emit(v) do
1882                 var tmp = new FlatBuffer
1883                 var b = check_entity(tmp, v.current_text.as(not null), pos)
1884                 if b > 0 then
1885                         v.add tmp
1886                         v.current_pos = b
1887                 else
1888                         v.decorator.escape_char(v, char)
1889                 end
1890         end
1891
1892         # Is the entity valid?
1893         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
1894                 var pos = md.read_until(out, start, ';')
1895                 if pos < 0 or out.length < 3 then
1896                         return -1
1897                 end
1898                 if out[1] == '#' then
1899                         if out[2] == 'x' or out[2] == 'X' then
1900                                 if out.length < 4 then return -1
1901                                 for i in [3..out.length[ do
1902                                         var c = out[i]
1903                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
1904                                                 return -1
1905                                         end
1906                                 end
1907                         else
1908                                 for i in [2..out.length[ do
1909                                         var c = out[i]
1910                                         if c < '0' or c > '9' then return -1
1911                                 end
1912                         end
1913                         out.add ';'
1914                 else
1915                         for i in [1..out.length[ do
1916                                 var c = out[i]
1917                                 if not c.is_digit and not c.is_letter then return -1
1918                         end
1919                         out.add ';'
1920                         # TODO check entity is valid
1921                         # if out.is_entity then
1922                                 return pos
1923                         # else
1924                                 # return -1
1925                         # end
1926                 end
1927                 return pos
1928         end
1929 end
1930
1931 # A markdown escape token.
1932 class TokenEscape
1933         super Token
1934
1935         redef fun emit(v) do
1936                 v.current_pos += 1
1937                 v.addc v.current_text[v.current_pos]
1938         end
1939 end
1940
1941 # A markdown super token.
1942 class TokenSuper
1943         super Token
1944
1945         redef fun emit(v) do
1946                 var tmp = v.push_buffer
1947                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1948                 v.pop_buffer
1949                 if b > 0 then
1950                         v.decorator.add_super(v, tmp)
1951                         v.current_pos = b
1952                 else
1953                         v.addc char
1954                 end
1955         end
1956 end
1957
1958 redef class Text
1959
1960         # Get the token kind at `pos`.
1961         private fun token_at(pos: Int): Token do
1962                 var c0: Char
1963                 var c1: Char
1964                 var c2: Char
1965                 var c3: Char
1966
1967                 if pos > 0 then
1968                         c0 = self[pos - 1]
1969                 else
1970                         c0 = ' '
1971                 end
1972                 var c = self[pos]
1973
1974                 if pos + 1 < length then
1975                         c1 = self[pos + 1]
1976                 else
1977                         c1 = ' '
1978                 end
1979                 if pos + 2 < length then
1980                         c2 = self[pos + 2]
1981                 else
1982                         c2 = ' '
1983                 end
1984                 if pos + 3 < length then
1985                         c3 = self[pos + 3]
1986                 else
1987                         c3 = ' '
1988                 end
1989
1990                 if c == '*' then
1991                         if c1 == '*' then
1992                                 if c0 != ' ' or c2 != ' ' then
1993                                         return new TokenStrongStar(pos, c)
1994                                 else
1995                                         return new TokenEmStar(pos, c)
1996                                 end
1997                         end
1998                         if c0 != ' ' or c1 != ' ' then
1999                                 return new TokenEmStar(pos, c)
2000                         else
2001                                 return new TokenNone(pos, c)
2002                         end
2003                 else if c == '_' then
2004                         if c1 == '_' then
2005                                 if c0 != ' ' or c2 != ' 'then
2006                                         return new TokenStrongUnderscore(pos, c)
2007                                 else
2008                                         return new TokenEmUnderscore(pos, c)
2009                                 end
2010                         end
2011                         if c0 != ' ' or c1 != ' ' then
2012                                 return new TokenEmUnderscore(pos, c)
2013                         else
2014                                 return new TokenNone(pos, c)
2015                         end
2016                 else if c == '!' then
2017                         if c1 == '[' then return new TokenImage(pos, c)
2018                         return new TokenNone(pos, c)
2019                 else if c == '[' then
2020                         return new TokenLink(pos, c)
2021                 else if c == ']' then
2022                         return new TokenNone(pos, c)
2023                 else if c == '`' then
2024                         if c1 == '`' then
2025                                 return new TokenCodeDouble(pos, c)
2026                         else
2027                                 return new TokenCodeSingle(pos, c)
2028                         end
2029                 else if c == '\\' then
2030                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
2031                                 return new TokenEscape(pos, c)
2032                         else
2033                                 return new TokenNone(pos, c)
2034                         end
2035                 else if c == '<' then
2036                         return new TokenHTML(pos, c)
2037                 else if c == '&' then
2038                         return new TokenEntity(pos, c)
2039                 else if c == '^' then
2040                         if c0 == '^' or c1 == '^' then
2041                                 return new TokenNone(pos, c)
2042                         else
2043                                 return new TokenSuper(pos, c)
2044                         end
2045                 else
2046                         return new TokenNone(pos, c)
2047                 end
2048         end
2049
2050         # Find the position of a `token` in `self`.
2051         private fun find_token(start: Int, token: Token): Int do
2052                 var pos = start
2053                 while pos < length do
2054                         if token_at(pos).is_same_type(token) then
2055                                 return pos
2056                         end
2057                         pos += 1
2058                 end
2059                 return -1
2060         end
2061
2062         # Get the position of the next non-space character.
2063         private fun skip_spaces(start: Int): Int do
2064                 var pos = start
2065                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2066                         pos += 1
2067                 end
2068                 if pos < length then return pos
2069                 return -1
2070         end
2071
2072         # Read `self` until `nend` and append it to the `out` buffer.
2073         # Escape markdown special chars.
2074         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2075                 var pos = start
2076                 while pos < length do
2077                         var c = self[pos]
2078                         if c == '\\' and pos + 1 < length then
2079                                 pos = escape(out, self[pos + 1], pos)
2080                         else
2081                                 var end_reached = false
2082                                 for n in nend do
2083                                         if c == n then
2084                                                 end_reached = true
2085                                                 break
2086                                         end
2087                                 end
2088                                 if end_reached then break
2089                                 out.add c
2090                         end
2091                         pos += 1
2092                 end
2093                 if pos == length then return -1
2094                 return pos
2095         end
2096
2097         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2098         # No escape is made.
2099         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2100                 var pos = start
2101                 while pos < length do
2102                         var c = self[pos]
2103                         var end_reached = false
2104                         for n in nend do
2105                                 if c == n then
2106                                         end_reached = true
2107                                         break
2108                                 end
2109                         end
2110                         if end_reached then break
2111                         out.add c
2112                         pos += 1
2113                 end
2114                 if pos == length then return -1
2115                 return pos
2116         end
2117
2118         # Read `self` as XML until `to` and append it to the `out` buffer.
2119         # Escape HTML special chars.
2120         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2121                 var pos = from
2122                 var in_str = false
2123                 var str_char: nullable Char = null
2124                 while pos < length do
2125                         var c = self[pos]
2126                         if in_str then
2127                                 if c == '\\' then
2128                                         out.add c
2129                                         pos += 1
2130                                         if pos < length then
2131                                                 out.add c
2132                                                 pos += 1
2133                                         end
2134                                         continue
2135                                 end
2136                                 if c == str_char then
2137                                         in_str = false
2138                                         out.add c
2139                                         pos += 1
2140                                         continue
2141                                 end
2142                         end
2143                         if c == '"' or c == '\'' then
2144                                 in_str = true
2145                                 str_char = c
2146                         end
2147                         if not in_str then
2148                                 var end_reached = false
2149                                 for n in [0..to.length[ do
2150                                         if c == to[n] then
2151                                                 end_reached = true
2152                                                 break
2153                                         end
2154                                 end
2155                                 if end_reached then break
2156                         end
2157                         out.add c
2158                         pos += 1
2159                 end
2160                 if pos == length then return -1
2161                 return pos
2162         end
2163
2164         # Read `self` as XML and append it to the `out` buffer.
2165         # Safe mode can be activated to limit reading to valid xml.
2166         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2167                 var pos = 0
2168                 var is_close_tag = false
2169                 if start + 1 >= length then return -1
2170                 if self[start + 1] == '/' then
2171                         is_close_tag = true
2172                         pos = start + 2
2173                 else if self[start + 1] == '!' then
2174                         out.append "<!"
2175                         return start + 1
2176                 else
2177                         is_close_tag = false
2178                         pos = start + 1
2179                 end
2180                 if safe_mode then
2181                         var tmp = new FlatBuffer
2182                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2183                         if pos == -1 then return -1
2184                         var tag = tmp.write_to_string.trim.to_lower
2185                         if tag.is_html_unsafe then
2186                                 out.append "&lt;"
2187                                 if is_close_tag then out.add '/'
2188                                 out.append tmp
2189                         else
2190                                 out.append "<"
2191                                 if is_close_tag then out.add '/'
2192                                 out.append tmp
2193                         end
2194                 else
2195                         out.add '<'
2196                         if is_close_tag then out.add '/'
2197                         pos = read_xml_until(out, pos, ' ', '/', '>')
2198                 end
2199                 if pos == -1 then return -1
2200                 pos = read_xml_until(out, pos, '/', '>')
2201                 if pos == -1 then return -1
2202                 if self[pos] == '/' then
2203                         out.append " /"
2204                         pos = self.read_xml_until(out, pos + 1, '>')
2205                         if pos == -1 then return -1
2206                 end
2207                 if self[pos] == '>' then
2208                         out.add '>'
2209                         return pos
2210                 end
2211                 return -1
2212         end
2213
2214         # Read a markdown link address and append it to the `out` buffer.
2215         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2216                 var pos = start
2217                 var counter = 1
2218                 while pos < length do
2219                         var c = self[pos]
2220                         if c == '\\' and pos + 1 < length then
2221                                 pos = escape(out, self[pos + 1], pos)
2222                         else
2223                                 var end_reached = false
2224                                 if c == '(' then
2225                                         counter += 1
2226                                 else if c == ' ' then
2227                                         if counter == 1 then end_reached = true
2228                                 else if c == ')' then
2229                                         counter -= 1
2230                                         if counter == 0 then end_reached = true
2231                                 end
2232                                 if end_reached then break
2233                                 out.add c
2234                         end
2235                         pos += 1
2236                 end
2237                 if pos == length then return -1
2238                 return pos
2239         end
2240
2241         # Read a markdown link text and append it to the `out` buffer.
2242         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2243                 var pos = start
2244                 var counter = 1
2245                 while pos < length do
2246                         var c = self[pos]
2247                         var end_reached = false
2248                         if c == '[' then
2249                                 counter += 1
2250                                 out.add c
2251                         else if c == ']' then
2252                                 counter -= 1
2253                                 if counter == 0 then
2254                                         end_reached = true
2255                                 else
2256                                         out.add c
2257                                 end
2258                         else
2259                                 out.add c
2260                         end
2261                         if end_reached then break
2262                         pos += 1
2263                 end
2264                 if pos == length then return -1
2265                 return pos
2266         end
2267
2268         # Extract the XML tag name from a XML tag.
2269         private fun xml_tag: String do
2270                 var tpl = new FlatBuffer
2271                 var pos = 1
2272                 if pos < length and self[1] == '/' then pos += 1
2273                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2274                         tpl.add self[pos]
2275                         pos += 1
2276                 end
2277                 return tpl.write_to_string.to_lower
2278         end
2279
2280         # Read and escape the markdown contained in `self`.
2281         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2282                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2283                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2284                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2285                    c == '`' or c == '~' or c == '^' then
2286                         out.add c
2287                         return pos + 1
2288                 end
2289                 out.add '\\'
2290                 return pos
2291         end
2292
2293         # Is `self` an unsafe HTML element?
2294         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2295
2296         # Is `self` a HRML block element?
2297         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2298
2299         # Is `self` a link prefix?
2300         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2301
2302         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2303
2304         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2305
2306         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2307 end
2308
2309 redef class String
2310
2311         # Parse `self` as markdown and return the HTML representation
2312         #.
2313         #    var md = "**Hello World!**"
2314         #    var html = md.md_to_html
2315         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2316         fun md_to_html: Streamable do
2317                 var processor = new MarkdownProcessor
2318                 return processor.process(self)
2319         end
2320 end