lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # `MarkdownEmitter` used for ouput.
  34         var emitter: MarkdownEmitter is noinit
  35
  36         init do self.emitter = new MarkdownEmitter(self)
  37
  38         # Process the mardown `input` string and return the processed output.
  39         fun process(input: String): Streamable do
  40                 # init processor
  41                 link_refs.clear
  42                 last_link_ref = null
  43                 current_line = null
  44                 current_block = null
  45                 # parse markdown
  46                 var parent = read_lines(input)
  47                 parent.remove_surrounding_empty_lines
  48                 recurse(parent, false)
  49                 # output processed text
  50                 return emitter.emit(parent.kind)
  51         end
  52
  53         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
  54         private fun read_lines(input: String): MDBlock do
  55                 var block = new MDBlock
  56                 var value = new FlatBuffer
  57                 var i = 0
  58                 while i < input.length do
  59                         value.clear
  60                         var pos = 0
  61                         var eol = false
  62                         while not eol and i < input.length do
  63                                 var c = input[i]
  64                                 if c == '\n' then
  65                                         i += 1
  66                                         eol = true
  67                                 else if c == '\t' then
  68                                         var np = pos + (4 - (pos.bin_and(3)))
  69                                         while pos < np do
  70                                                 value.add ' '
  71                                                 pos += 1
  72                                         end
  73                                         i += 1
  74                                 else
  75                                         pos += 1
  76                                         value.add c
  77                                         i += 1
  78                                 end
  79                         end
  80
  81                         var line = new MDLine(value.write_to_string)
  82                         var is_link_ref = check_link_ref(line)
  83                         # Skip link refs
  84                         if not is_link_ref then block.add_line line
  85                 end
  86                 return block
  87         end
  88
  89         # Check if line is a block link definition.
  90         # Return `true` if line contains a valid link ref and save it into `link_refs`.
  91         private fun check_link_ref(line: MDLine): Bool do
  92                 var md = line.value
  93                 var is_link_ref = false
  94                 var id = new FlatBuffer
  95                 var link = new FlatBuffer
  96                 var comment = new FlatBuffer
  97                 var pos = -1
  98                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
  99                         pos = line.leading + 1
 100                         pos = md.read_until(id, pos, ']')
 101                         if not id.is_empty and pos + 2 < line.value.length then
 102                                 if line.value[pos + 1] == ':' then
 103                                         pos += 2
 104                                         pos = md.skip_spaces(pos)
 105                                         if line.value[pos] == '<' then
 106                                                 pos += 1
 107                                                 pos = md.read_until(link, pos, '>')
 108                                                 pos += 1
 109                                         else
 110                                                 pos = md.read_until(link, pos, ' ', '\n')
 111                                         end
 112                                         if not link.is_empty then
 113                                                 pos = md.skip_spaces(pos)
 114                                                 if pos > 0 and pos < line.value.length then
 115                                                         var c = line.value[pos]
 116                                                         if c == '\"' or c == '\'' or c == '(' then
 117                                                                 pos += 1
 118                                                                 if c == '(' then
 119                                                                         pos = md.read_until(comment, pos, ')')
 120                                                                 else
 121                                                                         pos = md.read_until(comment, pos, c)
 122                                                                 end
 123                                                                 if pos > 0 then is_link_ref = true
 124                                                         end
 125                                                 else
 126                                                         is_link_ref = true
 127                                                 end
 128                                         end
 129                                 end
 130                         end
 131                 end
 132                 if is_link_ref and not id.is_empty and not link.is_empty then
 133                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 134                         add_link_ref(id.write_to_string, lr)
 135                         if comment.is_empty then last_link_ref = lr
 136                         return true
 137                 else
 138                         comment = new FlatBuffer
 139                         if not line.is_empty and last_link_ref != null then
 140                                 pos = line.leading
 141                                 var c = line.value[pos]
 142                                 if c == '\"' or c == '\'' or c ==  '(' then
 143                                         pos += 1
 144                                         if c == '(' then
 145                                                 pos = md.read_until(comment, pos, ')')
 146                                         else
 147                                                 pos = md.read_until(comment, pos, c)
 148                                         end
 149                                 end
 150                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 151                         end
 152                         if comment.is_empty then return false
 153                         return true
 154                 end
 155         end
 156
 157         # Known link refs
 158         # This list will be needed during output to expand links.
 159         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 160
 161         # Last encountered link ref (for multiline definitions)
 162         #
 163         # Markdown allows link refs to be defined over two lines:
 164         #
 165         #       [id]: http://example.com/longish/path/to/resource/here
 166         #               "Optional Title Here"
 167         #
 168         private var last_link_ref: nullable LinkRef = null
 169
 170         # Add a link ref to the list
 171         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 172
 173         # Recursively split a `block`.
 174         #
 175         # The block is splitted according to the type of lines it contains.
 176         # Some blocks can be splited again recursively like lists.
 177         # The `in_list` mode is used to recurse on list and build
 178         # nested paragraphs or code blocks.
 179         fun recurse(root: MDBlock, in_list: Bool) do
 180                 var old_mode = self.in_list
 181                 var old_root = self.current_block
 182                 self.in_list = in_list
 183
 184                 var line = root.first_line
 185                 while line != null and line.is_empty do
 186                         line = line.next
 187                         if line == null then return
 188                 end
 189
 190                 current_line = line
 191                 current_block = root
 192                 while current_line != null do
 193                         line_kind(current_line.as(not null)).process(self)
 194                 end
 195                 self.in_list = old_mode
 196                 self.current_block = old_root
 197         end
 198
 199         # Currently processed line.
 200         # Used when visiting blocks with `recurse`.
 201         var current_line: nullable MDLine = null is writable
 202
 203         # Currently processed block.
 204         # Used when visiting blocks with `recurse`.
 205         var current_block: nullable MDBlock = null is writable
 206
 207         # Is the current recursion in list mode?
 208         # Used when visiting blocks with `recurse`
 209         private var in_list = false
 210
 211         # The type of line.
 212         # see: `md_line_*`
 213         fun line_kind(md: MDLine): Line do
 214                 var value = md.value
 215                 var leading = md.leading
 216                 var trailing = md.trailing
 217                 if md.is_empty then return new LineEmpty
 218                 if md.leading > 3 then return new LineCode
 219                 if value[leading] == '#' then return new LineHeadline
 220                 if value[leading] == '>' then return new LineBlockquote
 221
 222                 if value.length - leading - trailing > 2 then
 223                         if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 224                                 return new LineFence
 225                         end
 226                         if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 227                                 return new LineFence
 228                         end
 229                 end
 230
 231                 if value.length - leading - trailing > 2 and
 232                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 233                    if md.count_chars(value[leading]) >= 3 then
 234                                 return new LineHR
 235                    end
 236                 end
 237
 238                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 239                         var c = value[leading]
 240                         if c == '*' or c == '-' or c == '+' then return new LineUList
 241                 end
 242
 243                 if value.length - leading >= 3 and value[leading].is_digit then
 244                         var i = leading + 1
 245                         while i < value.length and value[i].is_digit do i += 1
 246                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 247                                 return new LineOList
 248                         end
 249                 end
 250
 251                 if value[leading] == '<' and md.check_html then return new LineXML
 252
 253                 var next = md.next
 254                 if next != null and not next.is_empty then
 255                         if next.count_chars('=') > 0 then
 256                                 return new LineHeadline1
 257                         end
 258                         if next.count_chars('-') > 0 then
 259                                 return new LineHeadline2
 260                         end
 261                 end
 262                 return new LineOther
 263         end
 264
 265         # Get the token kind at `pos`.
 266         fun token_at(text: Text, pos: Int): Token do
 267                 var c0: Char
 268                 var c1: Char
 269                 var c2: Char
 270
 271                 if pos > 0 then
 272                         c0 = text[pos - 1]
 273                 else
 274                         c0 = ' '
 275                 end
 276                 var c = text[pos]
 277
 278                 if pos + 1 < text.length then
 279                         c1 = text[pos + 1]
 280                 else
 281                         c1 = ' '
 282                 end
 283                 if pos + 2 < text.length then
 284                         c2 = text[pos + 2]
 285                 else
 286                         c2 = ' '
 287                 end
 288
 289                 if c == '*' then
 290                         if c1 == '*' then
 291                                 if c0 != ' ' or c2 != ' ' then
 292                                         return new TokenStrongStar(pos, c)
 293                                 else
 294                                         return new TokenEmStar(pos, c)
 295                                 end
 296                         end
 297                         if c0 != ' ' or c1 != ' ' then
 298                                 return new TokenEmStar(pos, c)
 299                         else
 300                                 return new TokenNone(pos, c)
 301                         end
 302                 else if c == '_' then
 303                         if c1 == '_' then
 304                                 if c0 != ' ' or c2 != ' 'then
 305                                         return new TokenStrongUnderscore(pos, c)
 306                                 else
 307                                         return new TokenEmUnderscore(pos, c)
 308                                 end
 309                         end
 310                         if c0 != ' ' or c1 != ' ' then
 311                                 return new TokenEmUnderscore(pos, c)
 312                         else
 313                                 return new TokenNone(pos, c)
 314                         end
 315                 else if c == '!' then
 316                         if c1 == '[' then return new TokenImage(pos, c)
 317                         return new TokenNone(pos, c)
 318                 else if c == '[' then
 319                         return new TokenLink(pos, c)
 320                 else if c == ']' then
 321                         return new TokenNone(pos, c)
 322                 else if c == '`' then
 323                         if c1 == '`' then
 324                                 return new TokenCodeDouble(pos, c)
 325                         else
 326                                 return new TokenCodeSingle(pos, c)
 327                         end
 328                 else if c == '\\' then
 329                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 330                                 return new TokenEscape(pos, c)
 331                         else
 332                                 return new TokenNone(pos, c)
 333                         end
 334                 else if c == '<' then
 335                         return new TokenHTML(pos, c)
 336                 else if c == '&' then
 337                         return new TokenEntity(pos, c)
 338                 else if c == '^' then
 339                         if c0 == '^' or c1 == '^' then
 340                                 return new TokenNone(pos, c)
 341                         else
 342                                 return new TokenSuper(pos, c)
 343                         end
 344                 else
 345                         return new TokenNone(pos, c)
 346                 end
 347         end
 348
 349         # Find the position of a `token` in `self`.
 350         fun find_token(text: Text, start: Int, token: Token): Int do
 351                 var pos = start
 352                 while pos < text.length do
 353                         if token_at(text, pos).is_same_type(token) then
 354                                 return pos
 355                         end
 356                         pos += 1
 357                 end
 358                 return -1
 359         end
 360 end
 361
 362 # Emit output corresponding to blocks content.
 363 #
 364 # Blocks are created by a previous pass in `MarkdownProcessor`.
 365 # The emitter use a `Decorator` to select the output format.
 366 class MarkdownEmitter
 367
 368         # Processor containing link refs.
 369         var processor: MarkdownProcessor
 370
 371         # Decorator used for output.
 372         # Default is `HTMLDecorator`
 373         var decorator: Decorator = new HTMLDecorator is writable
 374
 375         # Create a new `MarkdownEmitter` using a custom `decorator`.
 376         init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
 377                 init processor
 378                 self.decorator = decorator
 379         end
 380
 381         # Output `block` using `decorator` in the current buffer.
 382         fun emit(block: Block): Text do
 383                 var buffer = push_buffer
 384                 block.emit(self)
 385                 pop_buffer
 386                 return buffer
 387         end
 388
 389         # Output the content of `block`.
 390         fun emit_in(block: Block) do block.emit_in(self)
 391
 392         # Transform and emit mardown text
 393         fun emit_text(text: Text) do
 394                 emit_text_until(text, 0, null)
 395         end
 396
 397         # Transform and emit mardown text starting at `from` and
 398         # until a token with the same type as `token` is found.
 399         # Go until the end of text if `token` is null.
 400         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 401                 var old_text = current_text
 402                 var old_pos = current_pos
 403                 current_text = text
 404                 current_pos = start
 405                 while current_pos < text.length do
 406                         var mt = processor.token_at(text, current_pos)
 407                         if (token != null and not token isa TokenNone) and
 408                         (mt.is_same_type(token) or
 409                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 410                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 411                                 return current_pos
 412                         end
 413                         mt.emit(self)
 414                         current_pos += 1
 415                 end
 416                 current_text = old_text
 417                 current_pos = old_pos
 418                 return -1
 419         end
 420
 421         # Currently processed position in `current_text`.
 422         # Used when visiting inline production with `emit_text_until`.
 423         private var current_pos: Int = -1
 424
 425         # Currently processed text.
 426         # Used when visiting inline production with `emit_text_until`.
 427         private var current_text: nullable Text = null
 428
 429         # Stacked buffers.
 430         private var buffer_stack = new List[FlatBuffer]
 431
 432         # Push a new buffer on the stack.
 433         private fun push_buffer: FlatBuffer do
 434                 var buffer = new FlatBuffer
 435                 buffer_stack.add buffer
 436                 return buffer
 437         end
 438
 439         # Pop the last buffer.
 440         private fun pop_buffer do buffer_stack.pop
 441
 442         # Current output buffer.
 443         private fun current_buffer: FlatBuffer do
 444                 assert not buffer_stack.is_empty
 445                 return buffer_stack.last
 446         end
 447
 448         # Append `e` to current buffer.
 449         fun add(e: Streamable) do
 450                 if e isa Text then
 451                         current_buffer.append e
 452                 else
 453                         current_buffer.append e.write_to_string
 454                 end
 455         end
 456
 457         # Append `c` to current buffer.
 458         fun addc(c: Char) do current_buffer.add c
 459
 460         # Append a "\n" line break.
 461         fun addn do current_buffer.add '\n'
 462 end
 463
 464 # A Link Reference.
 465 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 466 #
 467 # Example:
 468 #
 469 #    [1]: http://example.com/ "Optional title"
 470 class LinkRef
 471
 472         # Link href
 473         var link: String
 474
 475         # Optional link title
 476         var title: nullable String = null
 477
 478         # Is the link an abreviation?
 479         var is_abbrev = false
 480
 481         # Create a link with a title.
 482         init with_title(link: String, title: nullable String) do
 483                 self.link = link
 484                 self.title = title
 485         end
 486 end
 487
 488 # A `Decorator` is used to emit mardown into a specific format.
 489 # Default decorator used is `HTMLDecorator`.
 490 interface Decorator
 491
 492         # Render a ruler block.
 493         fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
 494
 495         # Render a headline block with corresponding level.
 496         fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
 497
 498         # Render a paragraph block.
 499         fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
 500
 501         # Render a code or fence block.
 502         fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
 503
 504         # Render a blockquote.
 505         fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
 506
 507         # Render an unordered list.
 508         fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
 509
 510         # Render an ordered list.
 511         fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
 512
 513         # Render a list item.
 514         fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
 515
 516         # Render an emphasis text.
 517         fun add_em(v: MarkdownEmitter, text: Text) is abstract
 518
 519         # Render a strong text.
 520         fun add_strong(v: MarkdownEmitter, text: Text) is abstract
 521
 522         # Render a super text.
 523         fun add_super(v: MarkdownEmitter, text: Text) is abstract
 524
 525         # Render a link.
 526         fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 527
 528         # Render an image.
 529         fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 530
 531         # Render an abbreviation.
 532         fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
 533
 534         # Render a code span reading from a buffer.
 535         fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 536
 537         # Render a text and escape it.
 538         fun append_value(v: MarkdownEmitter, value: Text) is abstract
 539
 540         # Render code text from buffer and escape it.
 541         fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 542
 543         # Render a character escape.
 544         fun escape_char(v: MarkdownEmitter, char: Char) is abstract
 545
 546         # Render a line break
 547         fun add_line_break(v: MarkdownEmitter) is abstract
 548
 549         # Generate a new html valid id from a `String`.
 550         fun strip_id(txt: String): String is abstract
 551
 552         # Found headlines during the processing labeled by their ids.
 553         fun headlines: ArrayMap[String, HeadLine] is abstract
 554 end
 555
 556 # Class representing a markdown headline.
 557 class HeadLine
 558         # Unique identifier of this headline.
 559         var id: String
 560
 561         # Text of the headline.
 562         var title: String
 563
 564         # Level of this headline.
 565         #
 566         # According toe the markdown specification, level must be in `[1..6]`.
 567         var level: Int
 568 end
 569
 570 # `Decorator` that outputs HTML.
 571 class HTMLDecorator
 572         super Decorator
 573
 574         redef var headlines = new ArrayMap[String, HeadLine]
 575
 576         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 577
 578         redef fun add_headline(v, block) do
 579                 # save headline
 580                 var txt = block.block.first_line.value
 581                 var id = strip_id(txt)
 582                 var lvl = block.depth
 583                 headlines[id] = new HeadLine(id, txt, lvl)
 584                 # output it
 585                 v.add "<h{lvl} id=\"{id}\">"
 586                 v.emit_in block
 587                 v.add "</h{lvl}>\n"
 588         end
 589
 590         redef fun add_paragraph(v, block) do
 591                 v.add "<p>"
 592                 v.emit_in block
 593                 v.add "</p>\n"
 594         end
 595
 596         redef fun add_code(v, block) do
 597                 v.add "<pre><code>"
 598                 v.emit_in block
 599                 v.add "</code></pre>\n"
 600         end
 601
 602         redef fun add_blockquote(v, block) do
 603                 v.add "<blockquote>\n"
 604                 v.emit_in block
 605                 v.add "</blockquote>\n"
 606         end
 607
 608         redef fun add_unorderedlist(v, block) do
 609                 v.add "<ul>\n"
 610                 v.emit_in block
 611                 v.add "</ul>\n"
 612         end
 613
 614         redef fun add_orderedlist(v, block) do
 615                 v.add "<ol>\n"
 616                 v.emit_in block
 617                 v.add "</ol>\n"
 618         end
 619
 620         redef fun add_listitem(v, block) do
 621                 v.add "<li>"
 622                 v.emit_in block
 623                 v.add "</li>\n"
 624         end
 625
 626         redef fun add_em(v, text) do
 627                 v.add "<em>"
 628                 v.add text
 629                 v.add "</em>"
 630         end
 631
 632         redef fun add_strong(v, text) do
 633                 v.add "<strong>"
 634                 v.add text
 635                 v.add "</strong>"
 636         end
 637
 638         redef fun add_super(v, text) do
 639                 v.add "<sup>"
 640                 v.add text
 641                 v.add "</sup>"
 642         end
 643
 644         redef fun add_image(v, link, name, comment) do
 645                 v.add "<img src=\""
 646                 append_value(v, link)
 647                 v.add "\" alt=\""
 648                 append_value(v, name)
 649                 v.add "\""
 650                 if comment != null and not comment.is_empty then
 651                         v.add " title=\""
 652                         append_value(v, comment)
 653                         v.add "\""
 654                 end
 655                 v.add "/>"
 656         end
 657
 658         redef fun add_link(v, link, name, comment) do
 659                 v.add "<a href=\""
 660                 append_value(v, link)
 661                 v.add "\""
 662                 if comment != null and not comment.is_empty then
 663                         v.add " title=\""
 664                         append_value(v, comment)
 665                         v.add "\""
 666                 end
 667                 v.add ">"
 668                 v.emit_text(name)
 669                 v.add "</a>"
 670         end
 671
 672         redef fun add_abbr(v, name, comment) do
 673                 v.add "<abbr title=\""
 674                 append_value(v, comment)
 675                 v.add "\">"
 676                 v.emit_text(name)
 677                 v.add "</abbr>"
 678         end
 679
 680         redef fun add_span_code(v, text, from, to) do
 681                 v.add "<code>"
 682                 append_code(v, text, from, to)
 683                 v.add "</code>"
 684         end
 685
 686         redef fun add_line_break(v) do
 687                 v.add "<br/>"
 688         end
 689
 690         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 691
 692         redef fun escape_char(v, c) do
 693                 if c == '&' then
 694                         v.add "&amp;"
 695                 else if c == '<' then
 696                         v.add "&lt;"
 697                 else if c == '>' then
 698                         v.add "&gt;"
 699                 else if c == '"' then
 700                         v.add "&quot;"
 701                 else if c == '\'' then
 702                         v.add "&apos;"
 703                 else
 704                         v.addc c
 705                 end
 706         end
 707
 708         redef fun append_code(v, buffer, from, to) do
 709                 for i in [from..to[ do
 710                         var c = buffer[i]
 711                         if c == '&' then
 712                                 v.add "&amp;"
 713                         else if c == '<' then
 714                                 v.add "&lt;"
 715                         else if c == '>' then
 716                                 v.add "&gt;"
 717                         else
 718                                 v.addc c
 719                         end
 720                 end
 721         end
 722
 723         redef fun strip_id(txt) do
 724                 # strip id
 725                 var b = new FlatBuffer
 726                 for c in txt do
 727                         if c == ' ' then
 728                                 b.add '_'
 729                         else
 730                                 if not c.is_letter and
 731                                    not c.is_digit and
 732                                    not allowed_id_chars.has(c) then continue
 733                                 b.add c
 734                         end
 735                 end
 736                 var res = b.to_s
 737                 var key = res
 738                 # check for multiple id definitions
 739                 if headlines.has_key(key) then
 740                         var i = 1
 741                         key = "{res}_{i}"
 742                         while headlines.has_key(key) do
 743                                 i += 1
 744                                 key = "{res}_{i}"
 745                         end
 746                 end
 747                 return key
 748         end
 749
 750         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 751 end
 752
 753 # A block of markdown lines.
 754 # A `MDBlock` can contains lines and/or sub-blocks.
 755 class MDBlock
 756         # Kind of block.
 757         # See `Block`.
 758         var kind: Block = new BlockNone(self) is writable
 759
 760         # First line if any.
 761         var first_line: nullable MDLine = null is writable
 762
 763         # Last line if any.
 764         var last_line: nullable MDLine = null is writable
 765
 766         # First sub-block if any.
 767         var first_block: nullable MDBlock = null is writable
 768
 769         # Last sub-block if any.
 770         var last_block: nullable MDBlock = null is writable
 771
 772         # Previous block if any.
 773         var prev: nullable MDBlock = null is writable
 774
 775         # Next block if any.
 776         var next: nullable MDBlock = null is writable
 777
 778         # Does this block contain subblocks?
 779         fun has_blocks: Bool do return first_block != null
 780
 781         # Count sub-blocks.
 782         fun count_blocks: Int do
 783                 var count = 0
 784                 var block = first_block
 785                 while block != null do
 786                         count += 1
 787                         block = block.next
 788                 end
 789                 return count
 790         end
 791
 792         # Does this block contain lines?
 793         fun has_lines: Bool do return first_line != null
 794
 795         # Count block lines.
 796         fun count_lines: Int do
 797                 var count = 0
 798                 var line = first_line
 799                 while line != null do
 800                         count += 1
 801                         line = line.next
 802                 end
 803                 return count
 804         end
 805
 806         # Split `self` creating a new sub-block having `line` has `last_line`.
 807         fun split(line: MDLine): MDBlock do
 808                 var block = new MDBlock
 809                 block.first_line = first_line
 810                 block.last_line = line
 811                 first_line = line.next
 812                 line.next = null
 813                 if first_line == null then
 814                         last_line = null
 815                 else
 816                         first_line.prev = null
 817                 end
 818                 if first_block == null then
 819                         first_block = block
 820                         last_block = block
 821                 else
 822                         last_block.next = block
 823                         last_block = block
 824                 end
 825                 return block
 826         end
 827
 828         # Add a `line` to this block.
 829         fun add_line(line: MDLine) do
 830                 if last_line == null then
 831                         first_line = line
 832                         last_line = line
 833                 else
 834                         last_line.next_empty = line.is_empty
 835                         line.prev_empty = last_line.is_empty
 836                         line.prev = last_line
 837                         last_line.next = line
 838                         last_line = line
 839                 end
 840         end
 841
 842         # Remove `line` from this block.
 843         fun remove_line(line: MDLine) do
 844                 if line.prev == null then
 845                         first_line = line.next
 846                 else
 847                         line.prev.next = line.next
 848                 end
 849                 if line.next == null then
 850                         last_line = line.prev
 851                 else
 852                         line.next.prev = line.prev
 853                 end
 854                 line.prev = null
 855                 line.next = null
 856         end
 857
 858         # Remove leading empty lines.
 859         fun remove_leading_empty_lines: Bool do
 860                 var was_empty = false
 861                 var line = first_line
 862                 while line != null and line.is_empty do
 863                         remove_line line
 864                         line = first_line
 865                         was_empty = true
 866                 end
 867                 return was_empty
 868         end
 869
 870         # Remove trailing empty lines.
 871         fun remove_trailing_empty_lines: Bool do
 872                 var was_empty = false
 873                 var line = last_line
 874                 while line != null and line.is_empty do
 875                         remove_line line
 876                         line = last_line
 877                         was_empty = true
 878                 end
 879                 return was_empty
 880         end
 881
 882         # Remove leading and trailing empty lines.
 883         fun remove_surrounding_empty_lines: Bool do
 884                 var was_empty = false
 885                 if remove_leading_empty_lines then was_empty = true
 886                 if remove_trailing_empty_lines then was_empty = true
 887                 return was_empty
 888         end
 889
 890         # Remove list markers and up to 4 leading spaces.
 891         # Used to clean nested lists.
 892         fun remove_list_indent(v: MarkdownProcessor) do
 893                 var line = first_line
 894                 while line != null do
 895                         if not line.is_empty then
 896                                 var kind = v.line_kind(line)
 897                                 if kind isa LineList then
 898                                         line.value = kind.extract_value(line)
 899                                 else
 900                                         line.value = line.value.substring_from(line.leading.min(4))
 901                                 end
 902                                 line.leading = line.process_leading
 903                         end
 904                         line = line.next
 905                 end
 906         end
 907
 908         # Collect block line text.
 909         fun text: String do
 910                 var text = new FlatBuffer
 911                 var line = first_line
 912                 while line != null do
 913                         if not line.is_empty then
 914                                 text.append line.text
 915                         end
 916                         text.append "\n"
 917                         line = line.next
 918                 end
 919                 return text.write_to_string
 920         end
 921 end
 922
 923 # Representation of a markdown block in the AST.
 924 # Each `Block` is linked to a `MDBlock` that contains mardown code.
 925 abstract class Block
 926
 927         # The markdown block `self` is related to.
 928         var block: MDBlock
 929
 930         # Output `self` using `v.decorator`.
 931         fun emit(v: MarkdownEmitter) do v.emit_in(self)
 932
 933         # Emit the containts of `self`, lines or blocks.
 934         fun emit_in(v: MarkdownEmitter) do
 935                 block.remove_surrounding_empty_lines
 936                 if block.has_lines then
 937                         emit_lines(v)
 938                 else
 939                         emit_blocks(v)
 940                 end
 941         end
 942
 943         # Emit lines contained in `block`.
 944         fun emit_lines(v: MarkdownEmitter) do
 945                 var tpl = v.push_buffer
 946                 var line = block.first_line
 947                 while line != null do
 948                         if not line.is_empty then
 949                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
 950                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
 951                         end
 952                         if line.next != null then
 953                                 v.addn
 954                         end
 955                         line = line.next
 956                 end
 957                 v.pop_buffer
 958                 v.emit_text(tpl)
 959         end
 960
 961         # Emit sub-blocks contained in `block`.
 962         fun emit_blocks(v: MarkdownEmitter) do
 963                 var block = self.block.first_block
 964                 while block != null do
 965                         block.kind.emit(v)
 966                         block = block.next
 967                 end
 968         end
 969 end
 970
 971 # A block without any markdown specificities.
 972 #
 973 # Actually use the same implementation than `BlockCode`,
 974 # this class is only used for typing purposes.
 975 class BlockNone
 976         super Block
 977 end
 978
 979 # A markdown blockquote.
 980 class BlockQuote
 981         super Block
 982
 983         redef fun emit(v) do v.decorator.add_blockquote(v, self)
 984
 985         # Remove blockquote markers.
 986         private fun remove_block_quote_prefix(block: MDBlock) do
 987                 var line = block.first_line
 988                 while line != null do
 989                         if not line.is_empty then
 990                                 if line.value[line.leading] == '>' then
 991                                         var rem = line.leading + 1
 992                                         if line.leading + 1 < line.value.length and
 993                                            line.value[line.leading + 1] == ' ' then
 994                                                 rem += 1
 995                                         end
 996                                         line.value = line.value.substring_from(rem)
 997                                         line.leading = line.process_leading
 998                                 end
 999                         end
1000                         line = line.next
1001                 end
1002         end
1003 end
1004
1005 # A markdown code block.
1006 class BlockCode
1007         super Block
1008
1009         # Number of char to skip at the beginning of the line.
1010         #
1011         # Block code lines start at 4 spaces.
1012         protected var line_start = 4
1013
1014         redef fun emit(v) do v.decorator.add_code(v, self)
1015
1016         redef fun emit_lines(v) do
1017                 var line = block.first_line
1018                 while line != null do
1019                         if not line.is_empty then
1020                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1021                         end
1022                         v.addn
1023                         line = line.next
1024                 end
1025         end
1026 end
1027
1028 # A markdown code-fence block.
1029 #
1030 # Actually use the same implementation than `BlockCode`,
1031 # this class is only used for typing purposes.
1032 class BlockFence
1033         super BlockCode
1034
1035         # Fence code lines start at 0 spaces.
1036         redef var line_start = 0
1037 end
1038
1039 # A markdown headline.
1040 class BlockHeadline
1041         super Block
1042
1043         redef fun emit(v) do v.decorator.add_headline(v, self)
1044
1045         # Depth of the headline used to determine the headline level.
1046         var depth = 0
1047
1048         # Remove healine marks from lines contained in `self`.
1049         private fun transform_headline(block: MDBlock) do
1050                 if depth > 0 then return
1051                 var level = 0
1052                 var line = block.first_line
1053                 if line.is_empty then return
1054                 var start = line.leading
1055                 while start < line.value.length and line.value[start] == '#' do
1056                         level += 1
1057                         start += 1
1058                 end
1059                 while start < line.value.length and line.value[start] == ' ' do
1060                         start += 1
1061                 end
1062                 if start >= line.value.length then
1063                         line.is_empty = true
1064                 else
1065                         var nend = line.value.length - line.trailing - 1
1066                         while line.value[nend] == '#' do nend -= 1
1067                         while line.value[nend] == ' ' do nend -= 1
1068                         line.value = line.value.substring(start, nend - start + 1)
1069                         line.leading = 0
1070                         line.trailing = 0
1071                 end
1072                 depth = level.min(6)
1073         end
1074 end
1075
1076 # A markdown list item block.
1077 class BlockListItem
1078         super Block
1079
1080         redef fun emit(v) do v.decorator.add_listitem(v, self)
1081 end
1082
1083 # A markdown list block.
1084 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1085 abstract class BlockList
1086         super Block
1087
1088         # Split list block into list items sub-blocks.
1089         private fun init_block(v: MarkdownProcessor) do
1090                 var line = block.first_line
1091                 line = line.next
1092                 while line != null do
1093                         var t = v.line_kind(line)
1094                         if t isa LineList or
1095                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1096                            not (t isa LineList))) then
1097                                    var sblock = block.split(line.prev.as(not null))
1098                                    sblock.kind = new BlockListItem(sblock)
1099                         end
1100                         line = line.next
1101                 end
1102                 var sblock = block.split(block.last_line.as(not null))
1103                 sblock.kind = new BlockListItem(sblock)
1104         end
1105
1106         # Expand list items as paragraphs if needed.
1107         private fun expand_paragraphs(block: MDBlock) do
1108                 var outer = block.first_block
1109                 var inner: nullable MDBlock
1110                 var has_paragraph = false
1111                 while outer != null and not has_paragraph do
1112                         if outer.kind isa BlockListItem then
1113                                 inner = outer.first_block
1114                                 while inner != null and not has_paragraph do
1115                                         if inner.kind isa BlockParagraph then
1116                                                 has_paragraph = true
1117                                         end
1118                                         inner = inner.next
1119                                 end
1120                         end
1121                         outer = outer.next
1122                 end
1123                 if has_paragraph then
1124                         outer = block.first_block
1125                         while outer != null do
1126                                 if outer.kind isa BlockListItem then
1127                                         inner = outer.first_block
1128                                         while inner != null do
1129                                                 if inner.kind isa BlockNone then
1130                                                         inner.kind = new BlockParagraph(inner)
1131                                                 end
1132                                                 inner = inner.next
1133                                         end
1134                                 end
1135                                 outer = outer.next
1136                         end
1137                 end
1138         end
1139 end
1140
1141 # A markdown ordered list.
1142 class BlockOrderedList
1143         super BlockList
1144
1145         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1146 end
1147
1148 # A markdown unordred list.
1149 class BlockUnorderedList
1150         super BlockList
1151
1152         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1153 end
1154
1155 # A markdown paragraph block.
1156 class BlockParagraph
1157         super Block
1158
1159         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1160 end
1161
1162 # A markdown ruler.
1163 class BlockRuler
1164         super Block
1165
1166         redef fun emit(v) do v.decorator.add_ruler(v, self)
1167 end
1168
1169 # Xml blocks that can be found in markdown markup.
1170 class BlockXML
1171         super Block
1172
1173         redef fun emit_lines(v) do
1174                 var line = block.first_line
1175                 while line != null do
1176                         if not line.is_empty then v.add line.value
1177                         v.addn
1178                         line = line.next
1179                 end
1180         end
1181 end
1182
1183 # A markdown line.
1184 class MDLine
1185
1186         # Text contained in this line.
1187         var value: String is writable
1188
1189         # Is this line empty?
1190         # Lines containing only spaces are considered empty.
1191         var is_empty: Bool = true is writable
1192
1193         # Previous line in `MDBlock` or null if first line.
1194         var prev: nullable MDLine = null is writable
1195
1196         # Next line in `MDBlock` or null if last line.
1197         var next: nullable MDLine = null is writable
1198
1199         # Is the previous line empty?
1200         var prev_empty: Bool = false is writable
1201
1202         # Is the next line empty?
1203         var next_empty: Bool = false is writable
1204
1205         # Initialize a new MDLine from its string value
1206         init do
1207                 self.leading = process_leading
1208                 if leading != value.length then
1209                         self.is_empty = false
1210                         self.trailing = process_trailing
1211                 end
1212         end
1213
1214         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1215         fun clear do
1216                 value = ""
1217                 leading = 0
1218                 trailing = 0
1219                 is_empty = true
1220                 if prev != null then prev.next_empty = true
1221                 if next != null then next.prev_empty = true
1222         end
1223
1224         # Number or leading spaces on this line.
1225         var leading: Int = 0 is writable
1226
1227         # Compute `leading` depending on `value`.
1228         fun process_leading: Int do
1229                 var count = 0
1230                 var value = self.value
1231                 while count < value.length and value[count] == ' ' do count += 1
1232                 if leading == value.length then clear
1233                 return count
1234         end
1235
1236         # Number of trailing spaces on this line.
1237         var trailing: Int = 0 is writable
1238
1239         # Compute `trailing` depending on `value`.
1240         fun process_trailing: Int do
1241                 var count = 0
1242                 var value = self.value
1243                 while value[value.length - count - 1] == ' ' do
1244                         count += 1
1245                 end
1246                 return count
1247         end
1248
1249         # Count the amount of `ch` in this line.
1250         # Return A value > 0 if this line only consists of `ch` end spaces.
1251         fun count_chars(ch: Char): Int do
1252                 var count = 0
1253                 for c in value do
1254                         if c == ' ' then
1255                                 continue
1256                         end
1257                         if c == ch then
1258                                 count += 1
1259                                 continue
1260                         end
1261                         count = 0
1262                         break
1263                 end
1264                 return count
1265         end
1266
1267         # Count the amount of `ch` at the start of this line ignoring spaces.
1268         fun count_chars_start(ch: Char): Int do
1269                 var count = 0
1270                 for c in value do
1271                         if c == ' ' then
1272                                 continue
1273                         end
1274                         if c == ch then
1275                                 count += 1
1276                         else
1277                                 break
1278                         end
1279                 end
1280                 return count
1281         end
1282
1283         # Last XML line if any.
1284         private var xml_end_line: nullable MDLine = null
1285
1286         # Does `value` contains valid XML markup?
1287         private fun check_html: Bool do
1288                 var tags = new Array[String]
1289                 var tmp = new FlatBuffer
1290                 var pos = leading
1291                 if pos + 1 < value.length and value[pos + 1] == '!' then
1292                         if read_xml_comment(self, pos) > 0 then return true
1293                 end
1294                 pos = value.read_xml(tmp, pos, false)
1295                 var tag: String
1296                 if pos > -1 then
1297                         tag = tmp.xml_tag
1298                         if not tag.is_html_block then
1299                                 return false
1300                         end
1301                         if tag == "hr" then
1302                                 xml_end_line = self
1303                                 return true
1304                         end
1305                         tags.add tag
1306                         var line: nullable MDLine = self
1307                         while line != null do
1308                                 while pos < line.value.length and line.value[pos] != '<' do
1309                                         pos += 1
1310                                 end
1311                                 if pos >= line.value.length then
1312                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1313                                                 tags.pop
1314                                                 if tags.is_empty then
1315                                                         xml_end_line = line
1316                                                         break
1317                                                 end
1318                                         end
1319                                         line = line.next
1320                                         pos = 0
1321                                 else
1322                                         tmp = new FlatBuffer
1323                                         var new_pos = line.value.read_xml(tmp, pos, false)
1324                                         if new_pos > 0 then
1325                                                 tag = tmp.xml_tag
1326                                                 if tag.is_html_block and not tag == "hr" then
1327                                                         if tmp[1] == '/' then
1328                                                                 if tags.last != tag then
1329                                                                         return false
1330                                                                 end
1331                                                                 tags.pop
1332                                                         else
1333                                                                 tags.add tag
1334                                                         end
1335                                                 end
1336                                                 if tags.is_empty then
1337                                                         xml_end_line = line
1338                                                         break
1339                                                 end
1340                                                 pos = new_pos
1341                                         else
1342                                                 pos += 1
1343                                         end
1344                                 end
1345                         end
1346                         return tags.is_empty
1347                 end
1348                 return false
1349         end
1350
1351         # Read a XML comment.
1352         # Used by `check_html`.
1353         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1354                 var line: nullable MDLine = first_line
1355                 if start + 3 < line.value.length then
1356                         if line.value[2] == '-' and line.value[3] == '-' then
1357                                 var pos = start + 4
1358                                 while line != null do
1359                                         while pos < line.value.length and line.value[pos] != '-' do
1360                                                 pos += 1
1361                                         end
1362                                         if pos == line.value.length then
1363                                                 line = line.next
1364                                                 pos = 0
1365                                         else
1366                                                 if pos + 2 < line.value.length then
1367                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1368                                                                 first_line.xml_end_line = line
1369                                                                 return pos + 3
1370                                                         end
1371                                                 end
1372                                                 pos += 1
1373                                         end
1374                                 end
1375                         end
1376                 end
1377                 return -1
1378         end
1379
1380         # Extract the text of `self` without leading and trailing.
1381         fun text: String do return value.substring(leading, value.length - trailing)
1382 end
1383
1384 # A markdown line.
1385 interface Line
1386
1387         # Parse the line.
1388         # See `MarkdownProcessor::recurse`.
1389         fun process(v: MarkdownProcessor) is abstract
1390 end
1391
1392 # An empty markdown line.
1393 class LineEmpty
1394         super Line
1395
1396         redef fun process(v) do
1397                 v.current_line = v.current_line.next
1398         end
1399 end
1400
1401 # A non-specific markdown construction.
1402 # Mainly used as part of another line construct such as paragraphs or lists.
1403 class LineOther
1404         super Line
1405
1406         redef fun process(v) do
1407                 var line = v.current_line
1408                 # go to block end
1409                 var was_empty = line.prev_empty
1410                 while line != null and not line.is_empty do
1411                         var t = v.line_kind(line)
1412                         if v.in_list and t isa LineList then
1413                                 break
1414                         end
1415                         if t isa LineCode or t isa LineFence then
1416                                 break
1417                         end
1418                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1419                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1420                                    break
1421                         end
1422                         line = line.next
1423                 end
1424                 # build block
1425                 if line != null and not line.is_empty then
1426                         var block = v.current_block.split(line.prev.as(not null))
1427                         if v.in_list and not was_empty then
1428                                 block.kind = new BlockNone(block)
1429                         else
1430                                 block.kind = new BlockParagraph(block)
1431                         end
1432                         v.current_block.remove_leading_empty_lines
1433                 else
1434                         var block: MDBlock
1435                         if line != null then
1436                                 block = v.current_block.split(line)
1437                         else
1438                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1439                         end
1440                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1441                                 block.kind = new BlockNone(block)
1442                         else
1443                                 block.kind = new BlockParagraph(block)
1444                         end
1445                         v.current_block.remove_leading_empty_lines
1446                 end
1447                 v.current_line = v.current_block.first_line
1448         end
1449 end
1450
1451 # A line of markdown code.
1452 class LineCode
1453         super Line
1454
1455         redef fun process(v) do
1456                 var line = v.current_line
1457                 # lookup block end
1458                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1459                         line = line.next
1460                 end
1461                 # split at block end line
1462                 var block: MDBlock
1463                 if line != null then
1464                         block = v.current_block.split(line.prev.as(not null))
1465                 else
1466                         block = v.current_block.split(v.current_block.last_line.as(not null))
1467                 end
1468                 block.kind = new BlockCode(block)
1469                 block.remove_surrounding_empty_lines
1470                 v.current_line = v.current_block.first_line
1471         end
1472 end
1473
1474 # A line of raw XML.
1475 class LineXML
1476         super Line
1477
1478         redef fun process(v) do
1479                 var line = v.current_line
1480                 var prev = line.prev
1481                 if prev != null then v.current_block.split(prev)
1482                 var block = v.current_block.split(line.xml_end_line.as(not null))
1483                 block.kind = new BlockXML(block)
1484                 v.current_block.remove_leading_empty_lines
1485                 v.current_line = v.current_block.first_line
1486         end
1487 end
1488
1489 # A markdown blockquote line.
1490 class LineBlockquote
1491         super Line
1492
1493         redef fun process(v) do
1494                 var line = v.current_line
1495                 # go to bquote end
1496                 while line != null do
1497                         if not line.is_empty and (line.prev_empty and
1498                            line.leading == 0 and
1499                            not v.line_kind(line) isa LineBlockquote) then break
1500                         line = line.next
1501                 end
1502                 # build sub block
1503                 var block: MDBlock
1504                 if line != null then
1505                         block = v.current_block.split(line.prev.as(not null))
1506                 else
1507                         block = v.current_block.split(v.current_block.last_line.as(not null))
1508                 end
1509                 var kind = new BlockQuote(block)
1510                 block.kind = kind
1511                 block.remove_surrounding_empty_lines
1512                 kind.remove_block_quote_prefix(block)
1513                 v.current_line = line
1514                 v.recurse(block, false)
1515                 v.current_line = v.current_block.first_line
1516         end
1517 end
1518
1519 # A markdown ruler line.
1520 class LineHR
1521         super Line
1522
1523         redef fun process(v) do
1524                 var line = v.current_line
1525                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1526                 var block = v.current_block.split(line.as(not null))
1527                 block.kind = new BlockRuler(block)
1528                 v.current_block.remove_leading_empty_lines
1529                 v.current_line = v.current_block.first_line
1530         end
1531 end
1532
1533 # A markdown fence code line.
1534 class LineFence
1535         super Line
1536
1537         redef fun process(v) do
1538                 # go to fence end
1539                 var line = v.current_line.next
1540                 while line != null do
1541                         if v.line_kind(line) isa LineFence then break
1542                         line = line.next
1543                 end
1544                 if line != null then
1545                         line = line.next
1546                 end
1547                 # build fence block
1548                 var block: MDBlock
1549                 if line != null then
1550                         block = v.current_block.split(line.prev.as(not null))
1551                 else
1552                         block = v.current_block.split(v.current_block.last_line.as(not null))
1553                 end
1554                 block.kind = new BlockFence(block)
1555                 block.first_line.clear
1556                 var last = block.last_line
1557                 if last != null and v.line_kind(last) isa LineFence then
1558                         block.last_line.clear
1559                 end
1560                 block.remove_surrounding_empty_lines
1561                 v.current_line = line
1562         end
1563 end
1564
1565 # A markdown headline.
1566 class LineHeadline
1567         super Line
1568
1569         redef fun process(v) do
1570                 var line = v.current_line
1571                 var lprev = line.prev
1572                 if lprev != null then v.current_block.split(lprev)
1573                 var block = v.current_block.split(line.as(not null))
1574                 var kind = new BlockHeadline(block)
1575                 block.kind = kind
1576                 kind.transform_headline(block)
1577                 v.current_block.remove_leading_empty_lines
1578                 v.current_line = v.current_block.first_line
1579         end
1580 end
1581
1582 # A markdown headline of level 1.
1583 class LineHeadline1
1584         super LineHeadline
1585
1586         redef fun process(v) do
1587                 var line = v.current_line
1588                 var lprev = line.prev
1589                 if lprev != null then v.current_block.split(lprev)
1590                 line.next.clear
1591                 var block = v.current_block.split(line.as(not null))
1592                 var kind = new BlockHeadline(block)
1593                 kind.depth = 1
1594                 kind.transform_headline(block)
1595                 block.kind = kind
1596                 v.current_block.remove_leading_empty_lines
1597                 v.current_line = v.current_block.first_line
1598         end
1599 end
1600
1601 # A markdown headline of level 2.
1602 class LineHeadline2
1603         super LineHeadline
1604
1605         redef fun process(v) do
1606                 var line = v.current_line
1607                 var lprev = line.prev
1608                 if lprev != null then v.current_block.split(lprev)
1609                 line.next.clear
1610                 var block = v.current_block.split(line.as(not null))
1611                 var kind = new BlockHeadline(block)
1612                 kind.depth = 2
1613                 kind.transform_headline(block)
1614                 block.kind = kind
1615                 v.current_block.remove_leading_empty_lines
1616                 v.current_line = v.current_block.first_line
1617         end
1618 end
1619
1620 # A markdown list line.
1621 # Mainly used to factorize code between ordered and unordered lists.
1622 class LineList
1623         super Line
1624
1625         redef fun process(v) do
1626                 var line = v.current_line
1627                 # go to list end
1628                 while line != null do
1629                         var t = v.line_kind(line)
1630                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1631                            not t isa LineList) then break
1632                         line = line.next
1633                 end
1634                 # build list block
1635                 var list: MDBlock
1636                 if line != null then
1637                         list = v.current_block.split(line.prev.as(not null))
1638                 else
1639                         list = v.current_block.split(v.current_block.last_line.as(not null))
1640                 end
1641                 var kind = block_kind(list)
1642                 list.kind = kind
1643                 list.first_line.prev_empty = false
1644                 list.last_line.next_empty = false
1645                 list.remove_surrounding_empty_lines
1646                 list.first_line.prev_empty = false
1647                 list.last_line.next_empty = false
1648                 kind.init_block(v)
1649                 var block = list.first_block
1650                 while block != null do
1651                         block.remove_list_indent(v)
1652                         v.recurse(block, true)
1653                         block = block.next
1654                 end
1655                 kind.expand_paragraphs(list)
1656                 v.current_line = line
1657         end
1658
1659         # Create a new block kind based on this line.
1660         protected fun block_kind(block: MDBlock): BlockList is abstract
1661
1662         # Extract string value from `MDLine`.
1663         protected fun extract_value(line: MDLine): String is abstract
1664 end
1665
1666 # An ordered list line.
1667 class LineOList
1668         super LineList
1669
1670         redef fun block_kind(block) do return new BlockOrderedList(block)
1671
1672         redef fun extract_value(line) do
1673                 return line.value.substring_from(line.value.index_of('.') + 2)
1674         end
1675 end
1676
1677 # An unordered list line.
1678 class LineUList
1679         super LineList
1680
1681         redef fun block_kind(block) do return new BlockUnorderedList(block)
1682
1683         redef fun extract_value(line) do
1684                 return line.value.substring_from(line.leading + 2)
1685         end
1686 end
1687
1688 # A token represent a character in the markdown input.
1689 # Some tokens have a specific markup behaviour that is handled here.
1690 abstract class Token
1691
1692         # Position of `self` in markdown input.
1693         var pos: Int
1694
1695         # Character found at `pos` in the markdown input.
1696         var char: Char
1697
1698         # Output that token using `MarkdownEmitter::decorator`.
1699         fun emit(v: MarkdownEmitter) do v.addc char
1700 end
1701
1702 # A token without a specific meaning.
1703 class TokenNone
1704         super Token
1705 end
1706
1707 # An emphasis token.
1708 abstract class TokenEm
1709         super Token
1710
1711         redef fun emit(v) do
1712                 var tmp = v.push_buffer
1713                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1714                 v.pop_buffer
1715                 if b > 0 then
1716                         v.decorator.add_em(v, tmp)
1717                         v.current_pos = b
1718                 else
1719                         v.addc char
1720                 end
1721         end
1722 end
1723
1724 # An emphasis star token.
1725 class TokenEmStar
1726         super TokenEm
1727 end
1728
1729 # An emphasis underscore token.
1730 class TokenEmUnderscore
1731         super TokenEm
1732 end
1733
1734 # A strong token.
1735 abstract class TokenStrong
1736         super Token
1737
1738         redef fun emit(v) do
1739                 var tmp = v.push_buffer
1740                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1741                 v.pop_buffer
1742                 if b > 0 then
1743                         v.decorator.add_strong(v, tmp)
1744                         v.current_pos = b + 1
1745                 else
1746                         v.addc char
1747                 end
1748         end
1749 end
1750
1751 # A strong star token.
1752 class TokenStrongStar
1753         super TokenStrong
1754 end
1755
1756 # A strong underscore token.
1757 class TokenStrongUnderscore
1758         super TokenStrong
1759 end
1760
1761 # A code token.
1762 # This class is mainly used to factorize work between single and double quoted span codes.
1763 abstract class TokenCode
1764         super Token
1765
1766         redef fun emit(v) do
1767                 var a = pos + next_pos + 1
1768                 var b = v.processor.find_token(v.current_text.as(not null), a, self)
1769                 if b > 0 then
1770                         v.current_pos = b + next_pos
1771                         while a < b and v.current_text[a] == ' ' do a += 1
1772                         if a < b then
1773                                 while v.current_text[b - 1] == ' ' do b -= 1
1774                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1775                         end
1776                 else
1777                         v.addc char
1778                 end
1779         end
1780
1781         private fun next_pos: Int is abstract
1782 end
1783
1784 # A span code token.
1785 class TokenCodeSingle
1786         super TokenCode
1787
1788         redef fun next_pos do return 0
1789 end
1790
1791 # A doubled span code token.
1792 class TokenCodeDouble
1793         super TokenCode
1794
1795         redef fun next_pos do return 1
1796 end
1797
1798 # A link or image token.
1799 # This class is mainly used to factorize work between images and links.
1800 abstract class TokenLinkOrImage
1801         super Token
1802
1803         # Link adress
1804         var link: nullable Text = null
1805
1806         # Link text
1807         var name: nullable Text = null
1808
1809         # Link title
1810         var comment: nullable Text = null
1811
1812         # Is the link construct an abbreviation?
1813         var is_abbrev = false
1814
1815         redef fun emit(v) do
1816                 var tmp = new FlatBuffer
1817                 var b = check_link(v, tmp, pos, self)
1818                 if b > 0 then
1819                         emit_hyper(v)
1820                         v.current_pos = b
1821                 else
1822                         v.addc char
1823                 end
1824         end
1825
1826         # Emit the hyperlink as link or image.
1827         private fun emit_hyper(v: MarkdownEmitter) is abstract
1828
1829         # Check if the link is a valid link.
1830         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1831                 var md = v.current_text
1832                 var pos
1833                 if token isa TokenLink then
1834                         pos = start + 1
1835                 else
1836                         pos = start + 2
1837                 end
1838                 var tmp = new FlatBuffer
1839                 pos = md.read_md_link_id(tmp, pos)
1840                 if pos < start then return -1
1841                 name = tmp
1842                 var old_pos = pos
1843                 pos += 1
1844                 pos = md.skip_spaces(pos)
1845                 if pos < start then
1846                         var tid = name.write_to_string.to_lower
1847                         if v.processor.link_refs.has_key(tid) then
1848                                 var lr = v.processor.link_refs[tid]
1849                                 is_abbrev = lr.is_abbrev
1850                                 link = lr.link
1851                                 comment = lr.title
1852                                 pos = old_pos
1853                         else
1854                                 return -1
1855                         end
1856                 else if md[pos] == '(' then
1857                         pos += 1
1858                         pos = md.skip_spaces(pos)
1859                         if pos < start then return -1
1860                         tmp = new FlatBuffer
1861                         var use_lt = md[pos] == '<'
1862                         if use_lt then
1863                                 pos = md.read_until(tmp, pos + 1, '>')
1864                         else
1865                                 pos = md.read_md_link(tmp, pos)
1866                         end
1867                         if pos < start then return -1
1868                         if use_lt then pos += 1
1869                         link = tmp.write_to_string
1870                         if md[pos] == ' ' then
1871                                 pos = md.skip_spaces(pos)
1872                                 if pos > start and md[pos] == '"' then
1873                                         pos += 1
1874                                         tmp = new FlatBuffer
1875                                         pos = md.read_until(tmp, pos, '"')
1876                                         if pos < start then return -1
1877                                         comment = tmp.write_to_string
1878                                         pos += 1
1879                                         pos = md.skip_spaces(pos)
1880                                         if pos == -1 then return -1
1881                                 end
1882                         end
1883                         if md[pos] != ')' then return -1
1884                 else if md[pos] == '[' then
1885                         pos += 1
1886                         tmp = new FlatBuffer
1887                         pos = md.read_raw_until(tmp, pos, ']')
1888                         if pos < start then return -1
1889                         var id
1890                         if tmp.length > 0 then
1891                                 id = tmp
1892                         else
1893                                 id = name
1894                         end
1895                         var tid = id.write_to_string.to_lower
1896                         if v.processor.link_refs.has_key(tid) then
1897                                 var lr = v.processor.link_refs[tid]
1898                                 link = lr.link
1899                                 comment = lr.title
1900                         end
1901                 else
1902                 var tid = name.write_to_string.replace("\n", " ").to_lower
1903                         if v.processor.link_refs.has_key(tid) then
1904                                 var lr = v.processor.link_refs[tid]
1905                                 link = lr.link
1906                                 comment = lr.title
1907                                 pos = old_pos
1908                         else
1909                                 return -1
1910                         end
1911                 end
1912                 if link == null then return -1
1913                 return pos
1914         end
1915 end
1916
1917 # A markdown link token.
1918 class TokenLink
1919         super TokenLinkOrImage
1920
1921         redef fun emit_hyper(v) do
1922                 if is_abbrev and comment != null then
1923                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
1924                 else
1925                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
1926                 end
1927         end
1928 end
1929
1930 # A markdown image token.
1931 class TokenImage
1932         super TokenLinkOrImage
1933
1934         redef fun emit_hyper(v) do
1935                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
1936         end
1937 end
1938
1939 # A HTML/XML token.
1940 class TokenHTML
1941         super Token
1942
1943         redef fun emit(v) do
1944                 var tmp = new FlatBuffer
1945                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
1946                 if b > 0 then
1947                         v.add tmp
1948                         v.current_pos = b
1949                 else
1950                         v.decorator.escape_char(v, char)
1951                 end
1952         end
1953
1954         # Is the HTML valid?
1955         # Also take care of link and mailto shortcuts.
1956         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
1957                 # check for auto links
1958                 var tmp = new FlatBuffer
1959                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
1960                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
1961                         pos = md.read_until(tmp, pos, '>')
1962                         if pos != -1 then
1963                                 var link = tmp.write_to_string
1964                                 v.decorator.add_link(v, link, link, null)
1965                                 return pos
1966                         end
1967                 end
1968                 # TODO check for mailto
1969                 # check for inline html
1970                 if start + 2 < md.length then
1971                         return md.read_xml(out, start, true)
1972                 end
1973                 return -1
1974         end
1975 end
1976
1977 # An HTML entity token.
1978 class TokenEntity
1979         super Token
1980
1981         redef fun emit(v) do
1982                 var tmp = new FlatBuffer
1983                 var b = check_entity(tmp, v.current_text.as(not null), pos)
1984                 if b > 0 then
1985                         v.add tmp
1986                         v.current_pos = b
1987                 else
1988                         v.decorator.escape_char(v, char)
1989                 end
1990         end
1991
1992         # Is the entity valid?
1993         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
1994                 var pos = md.read_until(out, start, ';')
1995                 if pos < 0 or out.length < 3 then
1996                         return -1
1997                 end
1998                 if out[1] == '#' then
1999                         if out[2] == 'x' or out[2] == 'X' then
2000                                 if out.length < 4 then return -1
2001                                 for i in [3..out.length[ do
2002                                         var c = out[i]
2003                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2004                                                 return -1
2005                                         end
2006                                 end
2007                         else
2008                                 for i in [2..out.length[ do
2009                                         var c = out[i]
2010                                         if c < '0' or c > '9' then return -1
2011                                 end
2012                         end
2013                         out.add ';'
2014                 else
2015                         for i in [1..out.length[ do
2016                                 var c = out[i]
2017                                 if not c.is_digit and not c.is_letter then return -1
2018                         end
2019                         out.add ';'
2020                         # TODO check entity is valid
2021                         # if out.is_entity then
2022                                 return pos
2023                         # else
2024                                 # return -1
2025                         # end
2026                 end
2027                 return pos
2028         end
2029 end
2030
2031 # A markdown escape token.
2032 class TokenEscape
2033         super Token
2034
2035         redef fun emit(v) do
2036                 v.current_pos += 1
2037                 v.addc v.current_text[v.current_pos]
2038         end
2039 end
2040
2041 # A markdown super token.
2042 class TokenSuper
2043         super Token
2044
2045         redef fun emit(v) do
2046                 var tmp = v.push_buffer
2047                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
2048                 v.pop_buffer
2049                 if b > 0 then
2050                         v.decorator.add_super(v, tmp)
2051                         v.current_pos = b
2052                 else
2053                         v.addc char
2054                 end
2055         end
2056 end
2057
2058 redef class Text
2059
2060         # Get the position of the next non-space character.
2061         private fun skip_spaces(start: Int): Int do
2062                 var pos = start
2063                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2064                         pos += 1
2065                 end
2066                 if pos < length then return pos
2067                 return -1
2068         end
2069
2070         # Read `self` until `nend` and append it to the `out` buffer.
2071         # Escape markdown special chars.
2072         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2073                 var pos = start
2074                 while pos < length do
2075                         var c = self[pos]
2076                         if c == '\\' and pos + 1 < length then
2077                                 pos = escape(out, self[pos + 1], pos)
2078                         else
2079                                 var end_reached = false
2080                                 for n in nend do
2081                                         if c == n then
2082                                                 end_reached = true
2083                                                 break
2084                                         end
2085                                 end
2086                                 if end_reached then break
2087                                 out.add c
2088                         end
2089                         pos += 1
2090                 end
2091                 if pos == length then return -1
2092                 return pos
2093         end
2094
2095         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2096         # No escape is made.
2097         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2098                 var pos = start
2099                 while pos < length do
2100                         var c = self[pos]
2101                         var end_reached = false
2102                         for n in nend do
2103                                 if c == n then
2104                                         end_reached = true
2105                                         break
2106                                 end
2107                         end
2108                         if end_reached then break
2109                         out.add c
2110                         pos += 1
2111                 end
2112                 if pos == length then return -1
2113                 return pos
2114         end
2115
2116         # Read `self` as XML until `to` and append it to the `out` buffer.
2117         # Escape HTML special chars.
2118         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2119                 var pos = from
2120                 var in_str = false
2121                 var str_char: nullable Char = null
2122                 while pos < length do
2123                         var c = self[pos]
2124                         if in_str then
2125                                 if c == '\\' then
2126                                         out.add c
2127                                         pos += 1
2128                                         if pos < length then
2129                                                 out.add c
2130                                                 pos += 1
2131                                         end
2132                                         continue
2133                                 end
2134                                 if c == str_char then
2135                                         in_str = false
2136                                         out.add c
2137                                         pos += 1
2138                                         continue
2139                                 end
2140                         end
2141                         if c == '"' or c == '\'' then
2142                                 in_str = true
2143                                 str_char = c
2144                         end
2145                         if not in_str then
2146                                 var end_reached = false
2147                                 for n in [0..to.length[ do
2148                                         if c == to[n] then
2149                                                 end_reached = true
2150                                                 break
2151                                         end
2152                                 end
2153                                 if end_reached then break
2154                         end
2155                         out.add c
2156                         pos += 1
2157                 end
2158                 if pos == length then return -1
2159                 return pos
2160         end
2161
2162         # Read `self` as XML and append it to the `out` buffer.
2163         # Safe mode can be activated to limit reading to valid xml.
2164         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2165                 var pos = 0
2166                 var is_close_tag = false
2167                 if start + 1 >= length then return -1
2168                 if self[start + 1] == '/' then
2169                         is_close_tag = true
2170                         pos = start + 2
2171                 else if self[start + 1] == '!' then
2172                         out.append "<!"
2173                         return start + 1
2174                 else
2175                         is_close_tag = false
2176                         pos = start + 1
2177                 end
2178                 if safe_mode then
2179                         var tmp = new FlatBuffer
2180                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2181                         if pos == -1 then return -1
2182                         var tag = tmp.write_to_string.trim.to_lower
2183                         if tag.is_html_unsafe then
2184                                 out.append "&lt;"
2185                                 if is_close_tag then out.add '/'
2186                                 out.append tmp
2187                         else
2188                                 out.append "<"
2189                                 if is_close_tag then out.add '/'
2190                                 out.append tmp
2191                         end
2192                 else
2193                         out.add '<'
2194                         if is_close_tag then out.add '/'
2195                         pos = read_xml_until(out, pos, ' ', '/', '>')
2196                 end
2197                 if pos == -1 then return -1
2198                 pos = read_xml_until(out, pos, '/', '>')
2199                 if pos == -1 then return -1
2200                 if self[pos] == '/' then
2201                         out.append " /"
2202                         pos = self.read_xml_until(out, pos + 1, '>')
2203                         if pos == -1 then return -1
2204                 end
2205                 if self[pos] == '>' then
2206                         out.add '>'
2207                         return pos
2208                 end
2209                 return -1
2210         end
2211
2212         # Read a markdown link address and append it to the `out` buffer.
2213         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2214                 var pos = start
2215                 var counter = 1
2216                 while pos < length do
2217                         var c = self[pos]
2218                         if c == '\\' and pos + 1 < length then
2219                                 pos = escape(out, self[pos + 1], pos)
2220                         else
2221                                 var end_reached = false
2222                                 if c == '(' then
2223                                         counter += 1
2224                                 else if c == ' ' then
2225                                         if counter == 1 then end_reached = true
2226                                 else if c == ')' then
2227                                         counter -= 1
2228                                         if counter == 0 then end_reached = true
2229                                 end
2230                                 if end_reached then break
2231                                 out.add c
2232                         end
2233                         pos += 1
2234                 end
2235                 if pos == length then return -1
2236                 return pos
2237         end
2238
2239         # Read a markdown link text and append it to the `out` buffer.
2240         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2241                 var pos = start
2242                 var counter = 1
2243                 while pos < length do
2244                         var c = self[pos]
2245                         var end_reached = false
2246                         if c == '[' then
2247                                 counter += 1
2248                                 out.add c
2249                         else if c == ']' then
2250                                 counter -= 1
2251                                 if counter == 0 then
2252                                         end_reached = true
2253                                 else
2254                                         out.add c
2255                                 end
2256                         else
2257                                 out.add c
2258                         end
2259                         if end_reached then break
2260                         pos += 1
2261                 end
2262                 if pos == length then return -1
2263                 return pos
2264         end
2265
2266         # Extract the XML tag name from a XML tag.
2267         private fun xml_tag: String do
2268                 var tpl = new FlatBuffer
2269                 var pos = 1
2270                 if pos < length and self[1] == '/' then pos += 1
2271                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2272                         tpl.add self[pos]
2273                         pos += 1
2274                 end
2275                 return tpl.write_to_string.to_lower
2276         end
2277
2278         # Read and escape the markdown contained in `self`.
2279         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2280                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2281                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2282                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2283                    c == '`' or c == '~' or c == '^' then
2284                         out.add c
2285                         return pos + 1
2286                 end
2287                 out.add '\\'
2288                 return pos
2289         end
2290
2291         # Is `self` an unsafe HTML element?
2292         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2293
2294         # Is `self` a HRML block element?
2295         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2296
2297         # Is `self` a link prefix?
2298         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2299
2300         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2301
2302         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2303
2304         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2305 end
2306
2307 redef class String
2308
2309         # Parse `self` as markdown and return the HTML representation
2310         #.
2311         #    var md = "**Hello World!**"
2312         #    var html = md.md_to_html
2313         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2314         fun md_to_html: Streamable do
2315                 var processor = new MarkdownProcessor
2316                 return processor.process(self)
2317         end
2318 end