lib/markdown/markdown.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Markdown parsing.
  16 module markdown
  17
  18 import template
  19
  20 # Parse a markdown string and split it in blocks.
  21 #
  22 # Blocks are then outputed by an `MarkdownEmitter`.
  23 #
  24 # Usage:
  25 #
  26 #    var proc = new MarkdownProcessor
  27 #    var html = proc.process("**Hello World!**")
  28 #    assert html == "<p><strong>Hello World!</strong></p>\n"
  29 #
  30 # SEE: `String::md_to_html` for a shortcut.
  31 class MarkdownProcessor
  32
  33         # `MarkdownEmitter` used for ouput.
  34         var emitter: MarkdownEmitter is noinit
  35
  36         init do self.emitter = new MarkdownEmitter(self)
  37
  38         # Process the mardown `input` string and return the processed output.
  39         fun process(input: String): Streamable do
  40                 # init processor
  41                 link_refs.clear
  42                 last_link_ref = null
  43                 current_line = null
  44                 current_block = null
  45                 # parse markdown
  46                 var parent = read_lines(input)
  47                 parent.remove_surrounding_empty_lines
  48                 recurse(parent, false)
  49                 # output processed text
  50                 return emitter.emit(parent.kind)
  51         end
  52
  53         # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
  54         private fun read_lines(input: String): MDBlock do
  55                 var block = new MDBlock
  56                 var value = new FlatBuffer
  57                 var i = 0
  58                 while i < input.length do
  59                         value.clear
  60                         var pos = 0
  61                         var eol = false
  62                         while not eol and i < input.length do
  63                                 var c = input[i]
  64                                 if c == '\n' then
  65                                         i += 1
  66                                         eol = true
  67                                 else if c == '\t' then
  68                                         var np = pos + (4 - (pos.bin_and(3)))
  69                                         while pos < np do
  70                                                 value.add ' '
  71                                                 pos += 1
  72                                         end
  73                                         i += 1
  74                                 else
  75                                         pos += 1
  76                                         value.add c
  77                                         i += 1
  78                                 end
  79                         end
  80
  81                         var line = new MDLine(value.write_to_string)
  82                         var is_link_ref = check_link_ref(line)
  83                         # Skip link refs
  84                         if not is_link_ref then block.add_line line
  85                 end
  86                 return block
  87         end
  88
  89         # Check if line is a block link definition.
  90         # Return `true` if line contains a valid link ref and save it into `link_refs`.
  91         private fun check_link_ref(line: MDLine): Bool do
  92                 var md = line.value
  93                 var is_link_ref = false
  94                 var id = new FlatBuffer
  95                 var link = new FlatBuffer
  96                 var comment = new FlatBuffer
  97                 var pos = -1
  98                 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
  99                         pos = line.leading + 1
 100                         pos = md.read_until(id, pos, ']')
 101                         if not id.is_empty and pos + 2 < line.value.length then
 102                                 if line.value[pos + 1] == ':' then
 103                                         pos += 2
 104                                         pos = md.skip_spaces(pos)
 105                                         if line.value[pos] == '<' then
 106                                                 pos += 1
 107                                                 pos = md.read_until(link, pos, '>')
 108                                                 pos += 1
 109                                         else
 110                                                 pos = md.read_until(link, pos, ' ', '\n')
 111                                         end
 112                                         if not link.is_empty then
 113                                                 pos = md.skip_spaces(pos)
 114                                                 if pos > 0 and pos < line.value.length then
 115                                                         var c = line.value[pos]
 116                                                         if c == '\"' or c == '\'' or c == '(' then
 117                                                                 pos += 1
 118                                                                 if c == '(' then
 119                                                                         pos = md.read_until(comment, pos, ')')
 120                                                                 else
 121                                                                         pos = md.read_until(comment, pos, c)
 122                                                                 end
 123                                                                 if pos > 0 then is_link_ref = true
 124                                                         end
 125                                                 else
 126                                                         is_link_ref = true
 127                                                 end
 128                                         end
 129                                 end
 130                         end
 131                 end
 132                 if is_link_ref and not id.is_empty and not link.is_empty then
 133                         var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
 134                         add_link_ref(id.write_to_string, lr)
 135                         if comment.is_empty then last_link_ref = lr
 136                         return true
 137                 else
 138                         comment = new FlatBuffer
 139                         if not line.is_empty and last_link_ref != null then
 140                                 pos = line.leading
 141                                 var c = line.value[pos]
 142                                 if c == '\"' or c == '\'' or c ==  '(' then
 143                                         pos += 1
 144                                         if c == '(' then
 145                                                 pos = md.read_until(comment, pos, ')')
 146                                         else
 147                                                 pos = md.read_until(comment, pos, c)
 148                                         end
 149                                 end
 150                                 if not comment.is_empty then last_link_ref.title = comment.write_to_string
 151                         end
 152                         if comment.is_empty then return false
 153                         return true
 154                 end
 155         end
 156
 157         # Known link refs
 158         # This list will be needed during output to expand links.
 159         var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
 160
 161         # Last encountered link ref (for multiline definitions)
 162         #
 163         # Markdown allows link refs to be defined over two lines:
 164         #
 165         #       [id]: http://example.com/longish/path/to/resource/here
 166         #               "Optional Title Here"
 167         #
 168         private var last_link_ref: nullable LinkRef = null
 169
 170         # Add a link ref to the list
 171         fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
 172
 173         # Recursively split a `block`.
 174         #
 175         # The block is splitted according to the type of lines it contains.
 176         # Some blocks can be splited again recursively like lists.
 177         # The `in_list` mode is used to recurse on list and build
 178         # nested paragraphs or code blocks.
 179         fun recurse(root: MDBlock, in_list: Bool) do
 180                 var old_mode = self.in_list
 181                 var old_root = self.current_block
 182                 self.in_list = in_list
 183
 184                 var line = root.first_line
 185                 while line != null and line.is_empty do
 186                         line = line.next
 187                         if line == null then return
 188                 end
 189
 190                 current_line = line
 191                 current_block = root
 192                 while current_line != null do
 193                         line_kind(current_line.as(not null)).process(self)
 194                 end
 195                 self.in_list = old_mode
 196                 self.current_block = old_root
 197         end
 198
 199         # Currently processed line.
 200         # Used when visiting blocks with `recurse`.
 201         var current_line: nullable MDLine = null is writable
 202
 203         # Currently processed block.
 204         # Used when visiting blocks with `recurse`.
 205         var current_block: nullable MDBlock = null is writable
 206
 207         # Is the current recursion in list mode?
 208         # Used when visiting blocks with `recurse`
 209         private var in_list = false
 210
 211         # The type of line.
 212         # see: `md_line_*`
 213         fun line_kind(md: MDLine): Line do
 214                 var value = md.value
 215                 var leading = md.leading
 216                 var trailing = md.trailing
 217                 if md.is_empty then return new LineEmpty
 218                 if md.leading > 3 then return new LineCode
 219                 if value[leading] == '#' then return new LineHeadline
 220                 if value[leading] == '>' then return new LineBlockquote
 221
 222                 if value.length - leading - trailing > 2 then
 223                         if value[leading] == '`' and md.count_chars_start('`') >= 3 then
 224                                 return new LineFence
 225                         end
 226                         if value[leading] == '~' and md.count_chars_start('~') >= 3 then
 227                                 return new LineFence
 228                         end
 229                 end
 230
 231                 if value.length - leading - trailing > 2 and
 232                    (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
 233                    if md.count_chars(value[leading]) >= 3 then
 234                                 return new LineHR
 235                    end
 236                 end
 237
 238                 if value.length - leading >= 2 and value[leading + 1] == ' ' then
 239                         var c = value[leading]
 240                         if c == '*' or c == '-' or c == '+' then return new LineUList
 241                 end
 242
 243                 if value.length - leading >= 3 and value[leading].is_digit then
 244                         var i = leading + 1
 245                         while i < value.length and value[i].is_digit do i += 1
 246                         if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
 247                                 return new LineOList
 248                         end
 249                 end
 250
 251                 if value[leading] == '<' and md.check_html then return new LineXML
 252
 253                 var next = md.next
 254                 if next != null and not next.is_empty then
 255                         if next.count_chars('=') > 0 then
 256                                 return new LineHeadline1
 257                         end
 258                         if next.count_chars('-') > 0 then
 259                                 return new LineHeadline2
 260                         end
 261                 end
 262                 return new LineOther
 263         end
 264
 265         # Get the token kind at `pos`.
 266         fun token_at(text: Text, pos: Int): Token do
 267                 var c0: Char
 268                 var c1: Char
 269                 var c2: Char
 270
 271                 if pos > 0 then
 272                         c0 = text[pos - 1]
 273                 else
 274                         c0 = ' '
 275                 end
 276                 var c = text[pos]
 277
 278                 if pos + 1 < text.length then
 279                         c1 = text[pos + 1]
 280                 else
 281                         c1 = ' '
 282                 end
 283                 if pos + 2 < text.length then
 284                         c2 = text[pos + 2]
 285                 else
 286                         c2 = ' '
 287                 end
 288
 289                 if c == '*' then
 290                         if c1 == '*' then
 291                                 if c0 != ' ' or c2 != ' ' then
 292                                         return new TokenStrongStar(pos, c)
 293                                 else
 294                                         return new TokenEmStar(pos, c)
 295                                 end
 296                         end
 297                         if c0 != ' ' or c1 != ' ' then
 298                                 return new TokenEmStar(pos, c)
 299                         else
 300                                 return new TokenNone(pos, c)
 301                         end
 302                 else if c == '_' then
 303                         if c1 == '_' then
 304                                 if c0 != ' ' or c2 != ' 'then
 305                                         return new TokenStrongUnderscore(pos, c)
 306                                 else
 307                                         return new TokenEmUnderscore(pos, c)
 308                                 end
 309                         end
 310                         if c0 != ' ' or c1 != ' ' then
 311                                 return new TokenEmUnderscore(pos, c)
 312                         else
 313                                 return new TokenNone(pos, c)
 314                         end
 315                 else if c == '!' then
 316                         if c1 == '[' then return new TokenImage(pos, c)
 317                         return new TokenNone(pos, c)
 318                 else if c == '[' then
 319                         return new TokenLink(pos, c)
 320                 else if c == ']' then
 321                         return new TokenNone(pos, c)
 322                 else if c == '`' then
 323                         if c1 == '`' then
 324                                 return new TokenCodeDouble(pos, c)
 325                         else
 326                                 return new TokenCodeSingle(pos, c)
 327                         end
 328                 else if c == '\\' then
 329                         if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
 330                                 return new TokenEscape(pos, c)
 331                         else
 332                                 return new TokenNone(pos, c)
 333                         end
 334                 else if c == '<' then
 335                         return new TokenHTML(pos, c)
 336                 else if c == '&' then
 337                         return new TokenEntity(pos, c)
 338                 else
 339                         return new TokenNone(pos, c)
 340                 end
 341         end
 342
 343         # Find the position of a `token` in `self`.
 344         fun find_token(text: Text, start: Int, token: Token): Int do
 345                 var pos = start
 346                 while pos < text.length do
 347                         if token_at(text, pos).is_same_type(token) then
 348                                 return pos
 349                         end
 350                         pos += 1
 351                 end
 352                 return -1
 353         end
 354 end
 355
 356 # Emit output corresponding to blocks content.
 357 #
 358 # Blocks are created by a previous pass in `MarkdownProcessor`.
 359 # The emitter use a `Decorator` to select the output format.
 360 class MarkdownEmitter
 361
 362         # Processor containing link refs.
 363         var processor: MarkdownProcessor
 364
 365         # Decorator used for output.
 366         # Default is `HTMLDecorator`
 367         var decorator: Decorator = new HTMLDecorator is writable
 368
 369         # Create a new `MarkdownEmitter` using a custom `decorator`.
 370         init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
 371                 init processor
 372                 self.decorator = decorator
 373         end
 374
 375         # Output `block` using `decorator` in the current buffer.
 376         fun emit(block: Block): Text do
 377                 var buffer = push_buffer
 378                 block.emit(self)
 379                 pop_buffer
 380                 return buffer
 381         end
 382
 383         # Output the content of `block`.
 384         fun emit_in(block: Block) do block.emit_in(self)
 385
 386         # Transform and emit mardown text
 387         fun emit_text(text: Text) do
 388                 emit_text_until(text, 0, null)
 389         end
 390
 391         # Transform and emit mardown text starting at `from` and
 392         # until a token with the same type as `token` is found.
 393         # Go until the end of text if `token` is null.
 394         fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
 395                 var old_text = current_text
 396                 var old_pos = current_pos
 397                 current_text = text
 398                 current_pos = start
 399                 while current_pos < text.length do
 400                         var mt = processor.token_at(text, current_pos)
 401                         if (token != null and not token isa TokenNone) and
 402                         (mt.is_same_type(token) or
 403                         (token isa TokenEmStar and mt isa TokenStrongStar) or
 404                         (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
 405                                 return current_pos
 406                         end
 407                         mt.emit(self)
 408                         current_pos += 1
 409                 end
 410                 current_text = old_text
 411                 current_pos = old_pos
 412                 return -1
 413         end
 414
 415         # Currently processed position in `current_text`.
 416         # Used when visiting inline production with `emit_text_until`.
 417         private var current_pos: Int = -1
 418
 419         # Currently processed text.
 420         # Used when visiting inline production with `emit_text_until`.
 421         private var current_text: nullable Text = null
 422
 423         # Stacked buffers.
 424         private var buffer_stack = new List[FlatBuffer]
 425
 426         # Push a new buffer on the stack.
 427         private fun push_buffer: FlatBuffer do
 428                 var buffer = new FlatBuffer
 429                 buffer_stack.add buffer
 430                 return buffer
 431         end
 432
 433         # Pop the last buffer.
 434         private fun pop_buffer do buffer_stack.pop
 435
 436         # Current output buffer.
 437         private fun current_buffer: FlatBuffer do
 438                 assert not buffer_stack.is_empty
 439                 return buffer_stack.last
 440         end
 441
 442         # Append `e` to current buffer.
 443         fun add(e: Streamable) do
 444                 if e isa Text then
 445                         current_buffer.append e
 446                 else
 447                         current_buffer.append e.write_to_string
 448                 end
 449         end
 450
 451         # Append `c` to current buffer.
 452         fun addc(c: Char) do current_buffer.add c
 453
 454         # Append a "\n" line break.
 455         fun addn do current_buffer.add '\n'
 456 end
 457
 458 # A Link Reference.
 459 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
 460 #
 461 # Example:
 462 #
 463 #    [1]: http://example.com/ "Optional title"
 464 class LinkRef
 465
 466         # Link href
 467         var link: String
 468
 469         # Optional link title
 470         var title: nullable String = null
 471
 472         # Is the link an abreviation?
 473         var is_abbrev = false
 474
 475         # Create a link with a title.
 476         init with_title(link: String, title: nullable String) do
 477                 self.link = link
 478                 self.title = title
 479         end
 480 end
 481
 482 # A `Decorator` is used to emit mardown into a specific format.
 483 # Default decorator used is `HTMLDecorator`.
 484 interface Decorator
 485
 486         # Render a ruler block.
 487         fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
 488
 489         # Render a headline block with corresponding level.
 490         fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
 491
 492         # Render a paragraph block.
 493         fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
 494
 495         # Render a code or fence block.
 496         fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
 497
 498         # Render a blockquote.
 499         fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
 500
 501         # Render an unordered list.
 502         fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
 503
 504         # Render an ordered list.
 505         fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
 506
 507         # Render a list item.
 508         fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
 509
 510         # Render an emphasis text.
 511         fun add_em(v: MarkdownEmitter, text: Text) is abstract
 512
 513         # Render a strong text.
 514         fun add_strong(v: MarkdownEmitter, text: Text) is abstract
 515
 516         # Render a link.
 517         fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 518
 519         # Render an image.
 520         fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
 521
 522         # Render an abbreviation.
 523         fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
 524
 525         # Render a code span reading from a buffer.
 526         fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 527
 528         # Render a text and escape it.
 529         fun append_value(v: MarkdownEmitter, value: Text) is abstract
 530
 531         # Render code text from buffer and escape it.
 532         fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
 533
 534         # Render a character escape.
 535         fun escape_char(v: MarkdownEmitter, char: Char) is abstract
 536
 537         # Render a line break
 538         fun add_line_break(v: MarkdownEmitter) is abstract
 539
 540         # Generate a new html valid id from a `String`.
 541         fun strip_id(txt: String): String is abstract
 542
 543         # Found headlines during the processing labeled by their ids.
 544         fun headlines: ArrayMap[String, HeadLine] is abstract
 545 end
 546
 547 # Class representing a markdown headline.
 548 class HeadLine
 549         # Unique identifier of this headline.
 550         var id: String
 551
 552         # Text of the headline.
 553         var title: String
 554
 555         # Level of this headline.
 556         #
 557         # According toe the markdown specification, level must be in `[1..6]`.
 558         var level: Int
 559 end
 560
 561 # `Decorator` that outputs HTML.
 562 class HTMLDecorator
 563         super Decorator
 564
 565         redef var headlines = new ArrayMap[String, HeadLine]
 566
 567         redef fun add_ruler(v, block) do v.add "<hr/>\n"
 568
 569         redef fun add_headline(v, block) do
 570                 # save headline
 571                 var txt = block.block.first_line.value
 572                 var id = strip_id(txt)
 573                 var lvl = block.depth
 574                 headlines[id] = new HeadLine(id, txt, lvl)
 575                 # output it
 576                 v.add "<h{lvl} id=\"{id}\">"
 577                 v.emit_in block
 578                 v.add "</h{lvl}>\n"
 579         end
 580
 581         redef fun add_paragraph(v, block) do
 582                 v.add "<p>"
 583                 v.emit_in block
 584                 v.add "</p>\n"
 585         end
 586
 587         redef fun add_code(v, block) do
 588                 v.add "<pre><code>"
 589                 v.emit_in block
 590                 v.add "</code></pre>\n"
 591         end
 592
 593         redef fun add_blockquote(v, block) do
 594                 v.add "<blockquote>\n"
 595                 v.emit_in block
 596                 v.add "</blockquote>\n"
 597         end
 598
 599         redef fun add_unorderedlist(v, block) do
 600                 v.add "<ul>\n"
 601                 v.emit_in block
 602                 v.add "</ul>\n"
 603         end
 604
 605         redef fun add_orderedlist(v, block) do
 606                 v.add "<ol>\n"
 607                 v.emit_in block
 608                 v.add "</ol>\n"
 609         end
 610
 611         redef fun add_listitem(v, block) do
 612                 v.add "<li>"
 613                 v.emit_in block
 614                 v.add "</li>\n"
 615         end
 616
 617         redef fun add_em(v, text) do
 618                 v.add "<em>"
 619                 v.add text
 620                 v.add "</em>"
 621         end
 622
 623         redef fun add_strong(v, text) do
 624                 v.add "<strong>"
 625                 v.add text
 626                 v.add "</strong>"
 627         end
 628
 629         redef fun add_image(v, link, name, comment) do
 630                 v.add "<img src=\""
 631                 append_value(v, link)
 632                 v.add "\" alt=\""
 633                 append_value(v, name)
 634                 v.add "\""
 635                 if comment != null and not comment.is_empty then
 636                         v.add " title=\""
 637                         append_value(v, comment)
 638                         v.add "\""
 639                 end
 640                 v.add "/>"
 641         end
 642
 643         redef fun add_link(v, link, name, comment) do
 644                 v.add "<a href=\""
 645                 append_value(v, link)
 646                 v.add "\""
 647                 if comment != null and not comment.is_empty then
 648                         v.add " title=\""
 649                         append_value(v, comment)
 650                         v.add "\""
 651                 end
 652                 v.add ">"
 653                 v.emit_text(name)
 654                 v.add "</a>"
 655         end
 656
 657         redef fun add_abbr(v, name, comment) do
 658                 v.add "<abbr title=\""
 659                 append_value(v, comment)
 660                 v.add "\">"
 661                 v.emit_text(name)
 662                 v.add "</abbr>"
 663         end
 664
 665         redef fun add_span_code(v, text, from, to) do
 666                 v.add "<code>"
 667                 append_code(v, text, from, to)
 668                 v.add "</code>"
 669         end
 670
 671         redef fun add_line_break(v) do
 672                 v.add "<br/>"
 673         end
 674
 675         redef fun append_value(v, text) do for c in text do escape_char(v, c)
 676
 677         redef fun escape_char(v, c) do
 678                 if c == '&' then
 679                         v.add "&amp;"
 680                 else if c == '<' then
 681                         v.add "&lt;"
 682                 else if c == '>' then
 683                         v.add "&gt;"
 684                 else if c == '"' then
 685                         v.add "&quot;"
 686                 else if c == '\'' then
 687                         v.add "&apos;"
 688                 else
 689                         v.addc c
 690                 end
 691         end
 692
 693         redef fun append_code(v, buffer, from, to) do
 694                 for i in [from..to[ do
 695                         var c = buffer[i]
 696                         if c == '&' then
 697                                 v.add "&amp;"
 698                         else if c == '<' then
 699                                 v.add "&lt;"
 700                         else if c == '>' then
 701                                 v.add "&gt;"
 702                         else
 703                                 v.addc c
 704                         end
 705                 end
 706         end
 707
 708         redef fun strip_id(txt) do
 709                 # strip id
 710                 var b = new FlatBuffer
 711                 for c in txt do
 712                         if c == ' ' then
 713                                 b.add '_'
 714                         else
 715                                 if not c.is_letter and
 716                                    not c.is_digit and
 717                                    not allowed_id_chars.has(c) then continue
 718                                 b.add c
 719                         end
 720                 end
 721                 var res = b.to_s
 722                 var key = res
 723                 # check for multiple id definitions
 724                 if headlines.has_key(key) then
 725                         var i = 1
 726                         key = "{res}_{i}"
 727                         while headlines.has_key(key) do
 728                                 i += 1
 729                                 key = "{res}_{i}"
 730                         end
 731                 end
 732                 return key
 733         end
 734
 735         private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
 736 end
 737
 738 # A block of markdown lines.
 739 # A `MDBlock` can contains lines and/or sub-blocks.
 740 class MDBlock
 741         # Kind of block.
 742         # See `Block`.
 743         var kind: Block = new BlockNone(self) is writable
 744
 745         # First line if any.
 746         var first_line: nullable MDLine = null is writable
 747
 748         # Last line if any.
 749         var last_line: nullable MDLine = null is writable
 750
 751         # First sub-block if any.
 752         var first_block: nullable MDBlock = null is writable
 753
 754         # Last sub-block if any.
 755         var last_block: nullable MDBlock = null is writable
 756
 757         # Previous block if any.
 758         var prev: nullable MDBlock = null is writable
 759
 760         # Next block if any.
 761         var next: nullable MDBlock = null is writable
 762
 763         # Does this block contain subblocks?
 764         fun has_blocks: Bool do return first_block != null
 765
 766         # Count sub-blocks.
 767         fun count_blocks: Int do
 768                 var count = 0
 769                 var block = first_block
 770                 while block != null do
 771                         count += 1
 772                         block = block.next
 773                 end
 774                 return count
 775         end
 776
 777         # Does this block contain lines?
 778         fun has_lines: Bool do return first_line != null
 779
 780         # Count block lines.
 781         fun count_lines: Int do
 782                 var count = 0
 783                 var line = first_line
 784                 while line != null do
 785                         count += 1
 786                         line = line.next
 787                 end
 788                 return count
 789         end
 790
 791         # Split `self` creating a new sub-block having `line` has `last_line`.
 792         fun split(line: MDLine): MDBlock do
 793                 var block = new MDBlock
 794                 block.first_line = first_line
 795                 block.last_line = line
 796                 first_line = line.next
 797                 line.next = null
 798                 if first_line == null then
 799                         last_line = null
 800                 else
 801                         first_line.prev = null
 802                 end
 803                 if first_block == null then
 804                         first_block = block
 805                         last_block = block
 806                 else
 807                         last_block.next = block
 808                         last_block = block
 809                 end
 810                 return block
 811         end
 812
 813         # Add a `line` to this block.
 814         fun add_line(line: MDLine) do
 815                 if last_line == null then
 816                         first_line = line
 817                         last_line = line
 818                 else
 819                         last_line.next_empty = line.is_empty
 820                         line.prev_empty = last_line.is_empty
 821                         line.prev = last_line
 822                         last_line.next = line
 823                         last_line = line
 824                 end
 825         end
 826
 827         # Remove `line` from this block.
 828         fun remove_line(line: MDLine) do
 829                 if line.prev == null then
 830                         first_line = line.next
 831                 else
 832                         line.prev.next = line.next
 833                 end
 834                 if line.next == null then
 835                         last_line = line.prev
 836                 else
 837                         line.next.prev = line.prev
 838                 end
 839                 line.prev = null
 840                 line.next = null
 841         end
 842
 843         # Remove leading empty lines.
 844         fun remove_leading_empty_lines: Bool do
 845                 var was_empty = false
 846                 var line = first_line
 847                 while line != null and line.is_empty do
 848                         remove_line line
 849                         line = first_line
 850                         was_empty = true
 851                 end
 852                 return was_empty
 853         end
 854
 855         # Remove trailing empty lines.
 856         fun remove_trailing_empty_lines: Bool do
 857                 var was_empty = false
 858                 var line = last_line
 859                 while line != null and line.is_empty do
 860                         remove_line line
 861                         line = last_line
 862                         was_empty = true
 863                 end
 864                 return was_empty
 865         end
 866
 867         # Remove leading and trailing empty lines.
 868         fun remove_surrounding_empty_lines: Bool do
 869                 var was_empty = false
 870                 if remove_leading_empty_lines then was_empty = true
 871                 if remove_trailing_empty_lines then was_empty = true
 872                 return was_empty
 873         end
 874
 875         # Remove list markers and up to 4 leading spaces.
 876         # Used to clean nested lists.
 877         fun remove_list_indent(v: MarkdownProcessor) do
 878                 var line = first_line
 879                 while line != null do
 880                         if not line.is_empty then
 881                                 var kind = v.line_kind(line)
 882                                 if kind isa LineList then
 883                                         line.value = kind.extract_value(line)
 884                                 else
 885                                         line.value = line.value.substring_from(line.leading.min(4))
 886                                 end
 887                                 line.leading = line.process_leading
 888                         end
 889                         line = line.next
 890                 end
 891         end
 892
 893         # Collect block line text.
 894         fun text: String do
 895                 var text = new FlatBuffer
 896                 var line = first_line
 897                 while line != null do
 898                         if not line.is_empty then
 899                                 text.append line.text
 900                         end
 901                         text.append "\n"
 902                         line = line.next
 903                 end
 904                 return text.write_to_string
 905         end
 906 end
 907
 908 # Representation of a markdown block in the AST.
 909 # Each `Block` is linked to a `MDBlock` that contains mardown code.
 910 abstract class Block
 911
 912         # The markdown block `self` is related to.
 913         var block: MDBlock
 914
 915         # Output `self` using `v.decorator`.
 916         fun emit(v: MarkdownEmitter) do v.emit_in(self)
 917
 918         # Emit the containts of `self`, lines or blocks.
 919         fun emit_in(v: MarkdownEmitter) do
 920                 block.remove_surrounding_empty_lines
 921                 if block.has_lines then
 922                         emit_lines(v)
 923                 else
 924                         emit_blocks(v)
 925                 end
 926         end
 927
 928         # Emit lines contained in `block`.
 929         fun emit_lines(v: MarkdownEmitter) do
 930                 var tpl = v.push_buffer
 931                 var line = block.first_line
 932                 while line != null do
 933                         if not line.is_empty then
 934                                 v.add line.value.substring(line.leading, line.value.length - line.trailing)
 935                                 if line.trailing >= 2 then v.decorator.add_line_break(v)
 936                         end
 937                         if line.next != null then
 938                                 v.addn
 939                         end
 940                         line = line.next
 941                 end
 942                 v.pop_buffer
 943                 v.emit_text(tpl)
 944         end
 945
 946         # Emit sub-blocks contained in `block`.
 947         fun emit_blocks(v: MarkdownEmitter) do
 948                 var block = self.block.first_block
 949                 while block != null do
 950                         block.kind.emit(v)
 951                         block = block.next
 952                 end
 953         end
 954 end
 955
 956 # A block without any markdown specificities.
 957 #
 958 # Actually use the same implementation than `BlockCode`,
 959 # this class is only used for typing purposes.
 960 class BlockNone
 961         super Block
 962 end
 963
 964 # A markdown blockquote.
 965 class BlockQuote
 966         super Block
 967
 968         redef fun emit(v) do v.decorator.add_blockquote(v, self)
 969
 970         # Remove blockquote markers.
 971         private fun remove_block_quote_prefix(block: MDBlock) do
 972                 var line = block.first_line
 973                 while line != null do
 974                         if not line.is_empty then
 975                                 if line.value[line.leading] == '>' then
 976                                         var rem = line.leading + 1
 977                                         if line.leading + 1 < line.value.length and
 978                                            line.value[line.leading + 1] == ' ' then
 979                                                 rem += 1
 980                                         end
 981                                         line.value = line.value.substring_from(rem)
 982                                         line.leading = line.process_leading
 983                                 end
 984                         end
 985                         line = line.next
 986                 end
 987         end
 988 end
 989
 990 # A markdown code block.
 991 class BlockCode
 992         super Block
 993
 994         # Number of char to skip at the beginning of the line.
 995         #
 996         # Block code lines start at 4 spaces.
 997         protected var line_start = 4
 998
 999         redef fun emit(v) do v.decorator.add_code(v, self)
1000
1001         redef fun emit_lines(v) do
1002                 var line = block.first_line
1003                 while line != null do
1004                         if not line.is_empty then
1005                                 v.decorator.append_code(v, line.value, line_start, line.value.length)
1006                         end
1007                         v.addn
1008                         line = line.next
1009                 end
1010         end
1011 end
1012
1013 # A markdown code-fence block.
1014 #
1015 # Actually use the same implementation than `BlockCode`,
1016 # this class is only used for typing purposes.
1017 class BlockFence
1018         super BlockCode
1019
1020         # Fence code lines start at 0 spaces.
1021         redef var line_start = 0
1022 end
1023
1024 # A markdown headline.
1025 class BlockHeadline
1026         super Block
1027
1028         redef fun emit(v) do v.decorator.add_headline(v, self)
1029
1030         # Depth of the headline used to determine the headline level.
1031         var depth = 0
1032
1033         # Remove healine marks from lines contained in `self`.
1034         private fun transform_headline(block: MDBlock) do
1035                 if depth > 0 then return
1036                 var level = 0
1037                 var line = block.first_line
1038                 if line.is_empty then return
1039                 var start = line.leading
1040                 while start < line.value.length and line.value[start] == '#' do
1041                         level += 1
1042                         start += 1
1043                 end
1044                 while start < line.value.length and line.value[start] == ' ' do
1045                         start += 1
1046                 end
1047                 if start >= line.value.length then
1048                         line.is_empty = true
1049                 else
1050                         var nend = line.value.length - line.trailing - 1
1051                         while line.value[nend] == '#' do nend -= 1
1052                         while line.value[nend] == ' ' do nend -= 1
1053                         line.value = line.value.substring(start, nend - start + 1)
1054                         line.leading = 0
1055                         line.trailing = 0
1056                 end
1057                 depth = level.min(6)
1058         end
1059 end
1060
1061 # A markdown list item block.
1062 class BlockListItem
1063         super Block
1064
1065         redef fun emit(v) do v.decorator.add_listitem(v, self)
1066 end
1067
1068 # A markdown list block.
1069 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1070 abstract class BlockList
1071         super Block
1072
1073         # Split list block into list items sub-blocks.
1074         private fun init_block(v: MarkdownProcessor) do
1075                 var line = block.first_line
1076                 line = line.next
1077                 while line != null do
1078                         var t = v.line_kind(line)
1079                         if t isa LineList or
1080                            (not line.is_empty and (line.prev_empty and line.leading == 0 and
1081                            not (t isa LineList))) then
1082                                    var sblock = block.split(line.prev.as(not null))
1083                                    sblock.kind = new BlockListItem(sblock)
1084                         end
1085                         line = line.next
1086                 end
1087                 var sblock = block.split(block.last_line.as(not null))
1088                 sblock.kind = new BlockListItem(sblock)
1089         end
1090
1091         # Expand list items as paragraphs if needed.
1092         private fun expand_paragraphs(block: MDBlock) do
1093                 var outer = block.first_block
1094                 var inner: nullable MDBlock
1095                 var has_paragraph = false
1096                 while outer != null and not has_paragraph do
1097                         if outer.kind isa BlockListItem then
1098                                 inner = outer.first_block
1099                                 while inner != null and not has_paragraph do
1100                                         if inner.kind isa BlockParagraph then
1101                                                 has_paragraph = true
1102                                         end
1103                                         inner = inner.next
1104                                 end
1105                         end
1106                         outer = outer.next
1107                 end
1108                 if has_paragraph then
1109                         outer = block.first_block
1110                         while outer != null do
1111                                 if outer.kind isa BlockListItem then
1112                                         inner = outer.first_block
1113                                         while inner != null do
1114                                                 if inner.kind isa BlockNone then
1115                                                         inner.kind = new BlockParagraph(inner)
1116                                                 end
1117                                                 inner = inner.next
1118                                         end
1119                                 end
1120                                 outer = outer.next
1121                         end
1122                 end
1123         end
1124 end
1125
1126 # A markdown ordered list.
1127 class BlockOrderedList
1128         super BlockList
1129
1130         redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1131 end
1132
1133 # A markdown unordred list.
1134 class BlockUnorderedList
1135         super BlockList
1136
1137         redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1138 end
1139
1140 # A markdown paragraph block.
1141 class BlockParagraph
1142         super Block
1143
1144         redef fun emit(v) do v.decorator.add_paragraph(v, self)
1145 end
1146
1147 # A markdown ruler.
1148 class BlockRuler
1149         super Block
1150
1151         redef fun emit(v) do v.decorator.add_ruler(v, self)
1152 end
1153
1154 # Xml blocks that can be found in markdown markup.
1155 class BlockXML
1156         super Block
1157
1158         redef fun emit_lines(v) do
1159                 var line = block.first_line
1160                 while line != null do
1161                         if not line.is_empty then v.add line.value
1162                         v.addn
1163                         line = line.next
1164                 end
1165         end
1166 end
1167
1168 # A markdown line.
1169 class MDLine
1170
1171         # Text contained in this line.
1172         var value: String is writable
1173
1174         # Is this line empty?
1175         # Lines containing only spaces are considered empty.
1176         var is_empty: Bool = true is writable
1177
1178         # Previous line in `MDBlock` or null if first line.
1179         var prev: nullable MDLine = null is writable
1180
1181         # Next line in `MDBlock` or null if last line.
1182         var next: nullable MDLine = null is writable
1183
1184         # Is the previous line empty?
1185         var prev_empty: Bool = false is writable
1186
1187         # Is the next line empty?
1188         var next_empty: Bool = false is writable
1189
1190         # Initialize a new MDLine from its string value
1191         init do
1192                 self.leading = process_leading
1193                 if leading != value.length then
1194                         self.is_empty = false
1195                         self.trailing = process_trailing
1196                 end
1197         end
1198
1199         # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1200         fun clear do
1201                 value = ""
1202                 leading = 0
1203                 trailing = 0
1204                 is_empty = true
1205                 if prev != null then prev.next_empty = true
1206                 if next != null then next.prev_empty = true
1207         end
1208
1209         # Number or leading spaces on this line.
1210         var leading: Int = 0 is writable
1211
1212         # Compute `leading` depending on `value`.
1213         fun process_leading: Int do
1214                 var count = 0
1215                 var value = self.value
1216                 while count < value.length and value[count] == ' ' do count += 1
1217                 if leading == value.length then clear
1218                 return count
1219         end
1220
1221         # Number of trailing spaces on this line.
1222         var trailing: Int = 0 is writable
1223
1224         # Compute `trailing` depending on `value`.
1225         fun process_trailing: Int do
1226                 var count = 0
1227                 var value = self.value
1228                 while value[value.length - count - 1] == ' ' do
1229                         count += 1
1230                 end
1231                 return count
1232         end
1233
1234         # Count the amount of `ch` in this line.
1235         # Return A value > 0 if this line only consists of `ch` end spaces.
1236         fun count_chars(ch: Char): Int do
1237                 var count = 0
1238                 for c in value do
1239                         if c == ' ' then
1240                                 continue
1241                         end
1242                         if c == ch then
1243                                 count += 1
1244                                 continue
1245                         end
1246                         count = 0
1247                         break
1248                 end
1249                 return count
1250         end
1251
1252         # Count the amount of `ch` at the start of this line ignoring spaces.
1253         fun count_chars_start(ch: Char): Int do
1254                 var count = 0
1255                 for c in value do
1256                         if c == ' ' then
1257                                 continue
1258                         end
1259                         if c == ch then
1260                                 count += 1
1261                         else
1262                                 break
1263                         end
1264                 end
1265                 return count
1266         end
1267
1268         # Last XML line if any.
1269         private var xml_end_line: nullable MDLine = null
1270
1271         # Does `value` contains valid XML markup?
1272         private fun check_html: Bool do
1273                 var tags = new Array[String]
1274                 var tmp = new FlatBuffer
1275                 var pos = leading
1276                 if pos + 1 < value.length and value[pos + 1] == '!' then
1277                         if read_xml_comment(self, pos) > 0 then return true
1278                 end
1279                 pos = value.read_xml(tmp, pos, false)
1280                 var tag: String
1281                 if pos > -1 then
1282                         tag = tmp.xml_tag
1283                         if not tag.is_html_block then
1284                                 return false
1285                         end
1286                         if tag == "hr" then
1287                                 xml_end_line = self
1288                                 return true
1289                         end
1290                         tags.add tag
1291                         var line: nullable MDLine = self
1292                         while line != null do
1293                                 while pos < line.value.length and line.value[pos] != '<' do
1294                                         pos += 1
1295                                 end
1296                                 if pos >= line.value.length then
1297                                         if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1298                                                 tags.pop
1299                                                 if tags.is_empty then
1300                                                         xml_end_line = line
1301                                                         break
1302                                                 end
1303                                         end
1304                                         line = line.next
1305                                         pos = 0
1306                                 else
1307                                         tmp = new FlatBuffer
1308                                         var new_pos = line.value.read_xml(tmp, pos, false)
1309                                         if new_pos > 0 then
1310                                                 tag = tmp.xml_tag
1311                                                 if tag.is_html_block and not tag == "hr" then
1312                                                         if tmp[1] == '/' then
1313                                                                 if tags.last != tag then
1314                                                                         return false
1315                                                                 end
1316                                                                 tags.pop
1317                                                         else
1318                                                                 tags.add tag
1319                                                         end
1320                                                 end
1321                                                 if tags.is_empty then
1322                                                         xml_end_line = line
1323                                                         break
1324                                                 end
1325                                                 pos = new_pos
1326                                         else
1327                                                 pos += 1
1328                                         end
1329                                 end
1330                         end
1331                         return tags.is_empty
1332                 end
1333                 return false
1334         end
1335
1336         # Read a XML comment.
1337         # Used by `check_html`.
1338         private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1339                 var line: nullable MDLine = first_line
1340                 if start + 3 < line.value.length then
1341                         if line.value[2] == '-' and line.value[3] == '-' then
1342                                 var pos = start + 4
1343                                 while line != null do
1344                                         while pos < line.value.length and line.value[pos] != '-' do
1345                                                 pos += 1
1346                                         end
1347                                         if pos == line.value.length then
1348                                                 line = line.next
1349                                                 pos = 0
1350                                         else
1351                                                 if pos + 2 < line.value.length then
1352                                                         if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1353                                                                 first_line.xml_end_line = line
1354                                                                 return pos + 3
1355                                                         end
1356                                                 end
1357                                                 pos += 1
1358                                         end
1359                                 end
1360                         end
1361                 end
1362                 return -1
1363         end
1364
1365         # Extract the text of `self` without leading and trailing.
1366         fun text: String do return value.substring(leading, value.length - trailing)
1367 end
1368
1369 # A markdown line.
1370 interface Line
1371
1372         # Parse the line.
1373         # See `MarkdownProcessor::recurse`.
1374         fun process(v: MarkdownProcessor) is abstract
1375 end
1376
1377 # An empty markdown line.
1378 class LineEmpty
1379         super Line
1380
1381         redef fun process(v) do
1382                 v.current_line = v.current_line.next
1383         end
1384 end
1385
1386 # A non-specific markdown construction.
1387 # Mainly used as part of another line construct such as paragraphs or lists.
1388 class LineOther
1389         super Line
1390
1391         redef fun process(v) do
1392                 var line = v.current_line
1393                 # go to block end
1394                 var was_empty = line.prev_empty
1395                 while line != null and not line.is_empty do
1396                         var t = v.line_kind(line)
1397                         if v.in_list and t isa LineList then
1398                                 break
1399                         end
1400                         if t isa LineCode or t isa LineFence then
1401                                 break
1402                         end
1403                         if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1404                            t isa LineHR or t isa LineBlockquote or t isa LineXML then
1405                                    break
1406                         end
1407                         line = line.next
1408                 end
1409                 # build block
1410                 if line != null and not line.is_empty then
1411                         var block = v.current_block.split(line.prev.as(not null))
1412                         if v.in_list and not was_empty then
1413                                 block.kind = new BlockNone(block)
1414                         else
1415                                 block.kind = new BlockParagraph(block)
1416                         end
1417                         v.current_block.remove_leading_empty_lines
1418                 else
1419                         var block: MDBlock
1420                         if line != null then
1421                                 block = v.current_block.split(line)
1422                         else
1423                                 block = v.current_block.split(v.current_block.last_line.as(not null))
1424                         end
1425                         if v.in_list and (line == null or not line.is_empty) and not was_empty then
1426                                 block.kind = new BlockNone(block)
1427                         else
1428                                 block.kind = new BlockParagraph(block)
1429                         end
1430                         v.current_block.remove_leading_empty_lines
1431                 end
1432                 v.current_line = v.current_block.first_line
1433         end
1434 end
1435
1436 # A line of markdown code.
1437 class LineCode
1438         super Line
1439
1440         redef fun process(v) do
1441                 var line = v.current_line
1442                 # lookup block end
1443                 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1444                         line = line.next
1445                 end
1446                 # split at block end line
1447                 var block: MDBlock
1448                 if line != null then
1449                         block = v.current_block.split(line.prev.as(not null))
1450                 else
1451                         block = v.current_block.split(v.current_block.last_line.as(not null))
1452                 end
1453                 block.kind = new BlockCode(block)
1454                 block.remove_surrounding_empty_lines
1455                 v.current_line = v.current_block.first_line
1456         end
1457 end
1458
1459 # A line of raw XML.
1460 class LineXML
1461         super Line
1462
1463         redef fun process(v) do
1464                 var line = v.current_line
1465                 var prev = line.prev
1466                 if prev != null then v.current_block.split(prev)
1467                 var block = v.current_block.split(line.xml_end_line.as(not null))
1468                 block.kind = new BlockXML(block)
1469                 v.current_block.remove_leading_empty_lines
1470                 v.current_line = v.current_block.first_line
1471         end
1472 end
1473
1474 # A markdown blockquote line.
1475 class LineBlockquote
1476         super Line
1477
1478         redef fun process(v) do
1479                 var line = v.current_line
1480                 # go to bquote end
1481                 while line != null do
1482                         if not line.is_empty and (line.prev_empty and
1483                            line.leading == 0 and
1484                            not v.line_kind(line) isa LineBlockquote) then break
1485                         line = line.next
1486                 end
1487                 # build sub block
1488                 var block: MDBlock
1489                 if line != null then
1490                         block = v.current_block.split(line.prev.as(not null))
1491                 else
1492                         block = v.current_block.split(v.current_block.last_line.as(not null))
1493                 end
1494                 var kind = new BlockQuote(block)
1495                 block.kind = kind
1496                 block.remove_surrounding_empty_lines
1497                 kind.remove_block_quote_prefix(block)
1498                 v.current_line = line
1499                 v.recurse(block, false)
1500                 v.current_line = v.current_block.first_line
1501         end
1502 end
1503
1504 # A markdown ruler line.
1505 class LineHR
1506         super Line
1507
1508         redef fun process(v) do
1509                 var line = v.current_line
1510                 if line.prev != null then v.current_block.split(line.prev.as(not null))
1511                 var block = v.current_block.split(line.as(not null))
1512                 block.kind = new BlockRuler(block)
1513                 v.current_block.remove_leading_empty_lines
1514                 v.current_line = v.current_block.first_line
1515         end
1516 end
1517
1518 # A markdown fence code line.
1519 class LineFence
1520         super Line
1521
1522         redef fun process(v) do
1523                 # go to fence end
1524                 var line = v.current_line.next
1525                 while line != null do
1526                         if v.line_kind(line) isa LineFence then break
1527                         line = line.next
1528                 end
1529                 if line != null then
1530                         line = line.next
1531                 end
1532                 # build fence block
1533                 var block: MDBlock
1534                 if line != null then
1535                         block = v.current_block.split(line.prev.as(not null))
1536                 else
1537                         block = v.current_block.split(v.current_block.last_line.as(not null))
1538                 end
1539                 block.kind = new BlockFence(block)
1540                 block.first_line.clear
1541                 var last = block.last_line
1542                 if last != null and v.line_kind(last) isa LineFence then
1543                         block.last_line.clear
1544                 end
1545                 block.remove_surrounding_empty_lines
1546                 v.current_line = line
1547         end
1548 end
1549
1550 # A markdown headline.
1551 class LineHeadline
1552         super Line
1553
1554         redef fun process(v) do
1555                 var line = v.current_line
1556                 var lprev = line.prev
1557                 if lprev != null then v.current_block.split(lprev)
1558                 var block = v.current_block.split(line.as(not null))
1559                 var kind = new BlockHeadline(block)
1560                 block.kind = kind
1561                 kind.transform_headline(block)
1562                 v.current_block.remove_leading_empty_lines
1563                 v.current_line = v.current_block.first_line
1564         end
1565 end
1566
1567 # A markdown headline of level 1.
1568 class LineHeadline1
1569         super LineHeadline
1570
1571         redef fun process(v) do
1572                 var line = v.current_line
1573                 var lprev = line.prev
1574                 if lprev != null then v.current_block.split(lprev)
1575                 line.next.clear
1576                 var block = v.current_block.split(line.as(not null))
1577                 var kind = new BlockHeadline(block)
1578                 kind.depth = 1
1579                 kind.transform_headline(block)
1580                 block.kind = kind
1581                 v.current_block.remove_leading_empty_lines
1582                 v.current_line = v.current_block.first_line
1583         end
1584 end
1585
1586 # A markdown headline of level 2.
1587 class LineHeadline2
1588         super LineHeadline
1589
1590         redef fun process(v) do
1591                 var line = v.current_line
1592                 var lprev = line.prev
1593                 if lprev != null then v.current_block.split(lprev)
1594                 line.next.clear
1595                 var block = v.current_block.split(line.as(not null))
1596                 var kind = new BlockHeadline(block)
1597                 kind.depth = 2
1598                 kind.transform_headline(block)
1599                 block.kind = kind
1600                 v.current_block.remove_leading_empty_lines
1601                 v.current_line = v.current_block.first_line
1602         end
1603 end
1604
1605 # A markdown list line.
1606 # Mainly used to factorize code between ordered and unordered lists.
1607 class LineList
1608         super Line
1609
1610         redef fun process(v) do
1611                 var line = v.current_line
1612                 # go to list end
1613                 while line != null do
1614                         var t = v.line_kind(line)
1615                         if not line.is_empty and (line.prev_empty and line.leading == 0 and
1616                            not t isa LineList) then break
1617                         line = line.next
1618                 end
1619                 # build list block
1620                 var list: MDBlock
1621                 if line != null then
1622                         list = v.current_block.split(line.prev.as(not null))
1623                 else
1624                         list = v.current_block.split(v.current_block.last_line.as(not null))
1625                 end
1626                 var kind = block_kind(list)
1627                 list.kind = kind
1628                 list.first_line.prev_empty = false
1629                 list.last_line.next_empty = false
1630                 list.remove_surrounding_empty_lines
1631                 list.first_line.prev_empty = false
1632                 list.last_line.next_empty = false
1633                 kind.init_block(v)
1634                 var block = list.first_block
1635                 while block != null do
1636                         block.remove_list_indent(v)
1637                         v.recurse(block, true)
1638                         block = block.next
1639                 end
1640                 kind.expand_paragraphs(list)
1641                 v.current_line = line
1642         end
1643
1644         # Create a new block kind based on this line.
1645         protected fun block_kind(block: MDBlock): BlockList is abstract
1646
1647         # Extract string value from `MDLine`.
1648         protected fun extract_value(line: MDLine): String is abstract
1649 end
1650
1651 # An ordered list line.
1652 class LineOList
1653         super LineList
1654
1655         redef fun block_kind(block) do return new BlockOrderedList(block)
1656
1657         redef fun extract_value(line) do
1658                 return line.value.substring_from(line.value.index_of('.') + 2)
1659         end
1660 end
1661
1662 # An unordered list line.
1663 class LineUList
1664         super LineList
1665
1666         redef fun block_kind(block) do return new BlockUnorderedList(block)
1667
1668         redef fun extract_value(line) do
1669                 return line.value.substring_from(line.leading + 2)
1670         end
1671 end
1672
1673 # A token represent a character in the markdown input.
1674 # Some tokens have a specific markup behaviour that is handled here.
1675 abstract class Token
1676
1677         # Position of `self` in markdown input.
1678         var pos: Int
1679
1680         # Character found at `pos` in the markdown input.
1681         var char: Char
1682
1683         # Output that token using `MarkdownEmitter::decorator`.
1684         fun emit(v: MarkdownEmitter) do v.addc char
1685 end
1686
1687 # A token without a specific meaning.
1688 class TokenNone
1689         super Token
1690 end
1691
1692 # An emphasis token.
1693 abstract class TokenEm
1694         super Token
1695
1696         redef fun emit(v) do
1697                 var tmp = v.push_buffer
1698                 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1699                 v.pop_buffer
1700                 if b > 0 then
1701                         v.decorator.add_em(v, tmp)
1702                         v.current_pos = b
1703                 else
1704                         v.addc char
1705                 end
1706         end
1707 end
1708
1709 # An emphasis star token.
1710 class TokenEmStar
1711         super TokenEm
1712 end
1713
1714 # An emphasis underscore token.
1715 class TokenEmUnderscore
1716         super TokenEm
1717 end
1718
1719 # A strong token.
1720 abstract class TokenStrong
1721         super Token
1722
1723         redef fun emit(v) do
1724                 var tmp = v.push_buffer
1725                 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1726                 v.pop_buffer
1727                 if b > 0 then
1728                         v.decorator.add_strong(v, tmp)
1729                         v.current_pos = b + 1
1730                 else
1731                         v.addc char
1732                 end
1733         end
1734 end
1735
1736 # A strong star token.
1737 class TokenStrongStar
1738         super TokenStrong
1739 end
1740
1741 # A strong underscore token.
1742 class TokenStrongUnderscore
1743         super TokenStrong
1744 end
1745
1746 # A code token.
1747 # This class is mainly used to factorize work between single and double quoted span codes.
1748 abstract class TokenCode
1749         super Token
1750
1751         redef fun emit(v) do
1752                 var a = pos + next_pos + 1
1753                 var b = v.processor.find_token(v.current_text.as(not null), a, self)
1754                 if b > 0 then
1755                         v.current_pos = b + next_pos
1756                         while a < b and v.current_text[a] == ' ' do a += 1
1757                         if a < b then
1758                                 while v.current_text[b - 1] == ' ' do b -= 1
1759                                 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1760                         end
1761                 else
1762                         v.addc char
1763                 end
1764         end
1765
1766         private fun next_pos: Int is abstract
1767 end
1768
1769 # A span code token.
1770 class TokenCodeSingle
1771         super TokenCode
1772
1773         redef fun next_pos do return 0
1774 end
1775
1776 # A doubled span code token.
1777 class TokenCodeDouble
1778         super TokenCode
1779
1780         redef fun next_pos do return 1
1781 end
1782
1783 # A link or image token.
1784 # This class is mainly used to factorize work between images and links.
1785 abstract class TokenLinkOrImage
1786         super Token
1787
1788         # Link adress
1789         var link: nullable Text = null
1790
1791         # Link text
1792         var name: nullable Text = null
1793
1794         # Link title
1795         var comment: nullable Text = null
1796
1797         # Is the link construct an abbreviation?
1798         var is_abbrev = false
1799
1800         redef fun emit(v) do
1801                 var tmp = new FlatBuffer
1802                 var b = check_link(v, tmp, pos, self)
1803                 if b > 0 then
1804                         emit_hyper(v)
1805                         v.current_pos = b
1806                 else
1807                         v.addc char
1808                 end
1809         end
1810
1811         # Emit the hyperlink as link or image.
1812         private fun emit_hyper(v: MarkdownEmitter) is abstract
1813
1814         # Check if the link is a valid link.
1815         private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1816                 var md = v.current_text
1817                 var pos
1818                 if token isa TokenLink then
1819                         pos = start + 1
1820                 else
1821                         pos = start + 2
1822                 end
1823                 var tmp = new FlatBuffer
1824                 pos = md.read_md_link_id(tmp, pos)
1825                 if pos < start then return -1
1826                 name = tmp
1827                 var old_pos = pos
1828                 pos += 1
1829                 pos = md.skip_spaces(pos)
1830                 if pos < start then
1831                         var tid = name.write_to_string.to_lower
1832                         if v.processor.link_refs.has_key(tid) then
1833                                 var lr = v.processor.link_refs[tid]
1834                                 is_abbrev = lr.is_abbrev
1835                                 link = lr.link
1836                                 comment = lr.title
1837                                 pos = old_pos
1838                         else
1839                                 return -1
1840                         end
1841                 else if md[pos] == '(' then
1842                         pos += 1
1843                         pos = md.skip_spaces(pos)
1844                         if pos < start then return -1
1845                         tmp = new FlatBuffer
1846                         var use_lt = md[pos] == '<'
1847                         if use_lt then
1848                                 pos = md.read_until(tmp, pos + 1, '>')
1849                         else
1850                                 pos = md.read_md_link(tmp, pos)
1851                         end
1852                         if pos < start then return -1
1853                         if use_lt then pos += 1
1854                         link = tmp.write_to_string
1855                         if md[pos] == ' ' then
1856                                 pos = md.skip_spaces(pos)
1857                                 if pos > start and md[pos] == '"' then
1858                                         pos += 1
1859                                         tmp = new FlatBuffer
1860                                         pos = md.read_until(tmp, pos, '"')
1861                                         if pos < start then return -1
1862                                         comment = tmp.write_to_string
1863                                         pos += 1
1864                                         pos = md.skip_spaces(pos)
1865                                         if pos == -1 then return -1
1866                                 end
1867                         end
1868                         if md[pos] != ')' then return -1
1869                 else if md[pos] == '[' then
1870                         pos += 1
1871                         tmp = new FlatBuffer
1872                         pos = md.read_raw_until(tmp, pos, ']')
1873                         if pos < start then return -1
1874                         var id
1875                         if tmp.length > 0 then
1876                                 id = tmp
1877                         else
1878                                 id = name
1879                         end
1880                         var tid = id.write_to_string.to_lower
1881                         if v.processor.link_refs.has_key(tid) then
1882                                 var lr = v.processor.link_refs[tid]
1883                                 link = lr.link
1884                                 comment = lr.title
1885                         end
1886                 else
1887                 var tid = name.write_to_string.replace("\n", " ").to_lower
1888                         if v.processor.link_refs.has_key(tid) then
1889                                 var lr = v.processor.link_refs[tid]
1890                                 link = lr.link
1891                                 comment = lr.title
1892                                 pos = old_pos
1893                         else
1894                                 return -1
1895                         end
1896                 end
1897                 if link == null then return -1
1898                 return pos
1899         end
1900 end
1901
1902 # A markdown link token.
1903 class TokenLink
1904         super TokenLinkOrImage
1905
1906         redef fun emit_hyper(v) do
1907                 if is_abbrev and comment != null then
1908                         v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
1909                 else
1910                         v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
1911                 end
1912         end
1913 end
1914
1915 # A markdown image token.
1916 class TokenImage
1917         super TokenLinkOrImage
1918
1919         redef fun emit_hyper(v) do
1920                 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
1921         end
1922 end
1923
1924 # A HTML/XML token.
1925 class TokenHTML
1926         super Token
1927
1928         redef fun emit(v) do
1929                 var tmp = new FlatBuffer
1930                 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
1931                 if b > 0 then
1932                         v.add tmp
1933                         v.current_pos = b
1934                 else
1935                         v.decorator.escape_char(v, char)
1936                 end
1937         end
1938
1939         # Is the HTML valid?
1940         # Also take care of link and mailto shortcuts.
1941         private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
1942                 # check for auto links
1943                 var tmp = new FlatBuffer
1944                 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
1945                 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
1946                         pos = md.read_until(tmp, pos, '>')
1947                         if pos != -1 then
1948                                 var link = tmp.write_to_string
1949                                 v.decorator.add_link(v, link, link, null)
1950                                 return pos
1951                         end
1952                 end
1953                 # TODO check for mailto
1954                 # check for inline html
1955                 if start + 2 < md.length then
1956                         return md.read_xml(out, start, true)
1957                 end
1958                 return -1
1959         end
1960 end
1961
1962 # An HTML entity token.
1963 class TokenEntity
1964         super Token
1965
1966         redef fun emit(v) do
1967                 var tmp = new FlatBuffer
1968                 var b = check_entity(tmp, v.current_text.as(not null), pos)
1969                 if b > 0 then
1970                         v.add tmp
1971                         v.current_pos = b
1972                 else
1973                         v.decorator.escape_char(v, char)
1974                 end
1975         end
1976
1977         # Is the entity valid?
1978         private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
1979                 var pos = md.read_until(out, start, ';')
1980                 if pos < 0 or out.length < 3 then
1981                         return -1
1982                 end
1983                 if out[1] == '#' then
1984                         if out[2] == 'x' or out[2] == 'X' then
1985                                 if out.length < 4 then return -1
1986                                 for i in [3..out.length[ do
1987                                         var c = out[i]
1988                                         if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
1989                                                 return -1
1990                                         end
1991                                 end
1992                         else
1993                                 for i in [2..out.length[ do
1994                                         var c = out[i]
1995                                         if c < '0' or c > '9' then return -1
1996                                 end
1997                         end
1998                         out.add ';'
1999                 else
2000                         for i in [1..out.length[ do
2001                                 var c = out[i]
2002                                 if not c.is_digit and not c.is_letter then return -1
2003                         end
2004                         out.add ';'
2005                         # TODO check entity is valid
2006                         # if out.is_entity then
2007                                 return pos
2008                         # else
2009                                 # return -1
2010                         # end
2011                 end
2012                 return pos
2013         end
2014 end
2015
2016 # A markdown escape token.
2017 class TokenEscape
2018         super Token
2019
2020         redef fun emit(v) do
2021                 v.current_pos += 1
2022                 v.addc v.current_text[v.current_pos]
2023         end
2024 end
2025
2026 redef class Text
2027
2028         # Get the position of the next non-space character.
2029         private fun skip_spaces(start: Int): Int do
2030                 var pos = start
2031                 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2032                         pos += 1
2033                 end
2034                 if pos < length then return pos
2035                 return -1
2036         end
2037
2038         # Read `self` until `nend` and append it to the `out` buffer.
2039         # Escape markdown special chars.
2040         private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2041                 var pos = start
2042                 while pos < length do
2043                         var c = self[pos]
2044                         if c == '\\' and pos + 1 < length then
2045                                 pos = escape(out, self[pos + 1], pos)
2046                         else
2047                                 var end_reached = false
2048                                 for n in nend do
2049                                         if c == n then
2050                                                 end_reached = true
2051                                                 break
2052                                         end
2053                                 end
2054                                 if end_reached then break
2055                                 out.add c
2056                         end
2057                         pos += 1
2058                 end
2059                 if pos == length then return -1
2060                 return pos
2061         end
2062
2063         # Read `self` as raw text until `nend` and append it to the `out` buffer.
2064         # No escape is made.
2065         private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2066                 var pos = start
2067                 while pos < length do
2068                         var c = self[pos]
2069                         var end_reached = false
2070                         for n in nend do
2071                                 if c == n then
2072                                         end_reached = true
2073                                         break
2074                                 end
2075                         end
2076                         if end_reached then break
2077                         out.add c
2078                         pos += 1
2079                 end
2080                 if pos == length then return -1
2081                 return pos
2082         end
2083
2084         # Read `self` as XML until `to` and append it to the `out` buffer.
2085         # Escape HTML special chars.
2086         private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2087                 var pos = from
2088                 var in_str = false
2089                 var str_char: nullable Char = null
2090                 while pos < length do
2091                         var c = self[pos]
2092                         if in_str then
2093                                 if c == '\\' then
2094                                         out.add c
2095                                         pos += 1
2096                                         if pos < length then
2097                                                 out.add c
2098                                                 pos += 1
2099                                         end
2100                                         continue
2101                                 end
2102                                 if c == str_char then
2103                                         in_str = false
2104                                         out.add c
2105                                         pos += 1
2106                                         continue
2107                                 end
2108                         end
2109                         if c == '"' or c == '\'' then
2110                                 in_str = true
2111                                 str_char = c
2112                         end
2113                         if not in_str then
2114                                 var end_reached = false
2115                                 for n in [0..to.length[ do
2116                                         if c == to[n] then
2117                                                 end_reached = true
2118                                                 break
2119                                         end
2120                                 end
2121                                 if end_reached then break
2122                         end
2123                         out.add c
2124                         pos += 1
2125                 end
2126                 if pos == length then return -1
2127                 return pos
2128         end
2129
2130         # Read `self` as XML and append it to the `out` buffer.
2131         # Safe mode can be activated to limit reading to valid xml.
2132         private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2133                 var pos = 0
2134                 var is_close_tag = false
2135                 if start + 1 >= length then return -1
2136                 if self[start + 1] == '/' then
2137                         is_close_tag = true
2138                         pos = start + 2
2139                 else if self[start + 1] == '!' then
2140                         out.append "<!"
2141                         return start + 1
2142                 else
2143                         is_close_tag = false
2144                         pos = start + 1
2145                 end
2146                 if safe_mode then
2147                         var tmp = new FlatBuffer
2148                         pos = read_xml_until(tmp, pos, ' ', '/', '>')
2149                         if pos == -1 then return -1
2150                         var tag = tmp.write_to_string.trim.to_lower
2151                         if tag.is_html_unsafe then
2152                                 out.append "&lt;"
2153                                 if is_close_tag then out.add '/'
2154                                 out.append tmp
2155                         else
2156                                 out.append "<"
2157                                 if is_close_tag then out.add '/'
2158                                 out.append tmp
2159                         end
2160                 else
2161                         out.add '<'
2162                         if is_close_tag then out.add '/'
2163                         pos = read_xml_until(out, pos, ' ', '/', '>')
2164                 end
2165                 if pos == -1 then return -1
2166                 pos = read_xml_until(out, pos, '/', '>')
2167                 if pos == -1 then return -1
2168                 if self[pos] == '/' then
2169                         out.append " /"
2170                         pos = self.read_xml_until(out, pos + 1, '>')
2171                         if pos == -1 then return -1
2172                 end
2173                 if self[pos] == '>' then
2174                         out.add '>'
2175                         return pos
2176                 end
2177                 return -1
2178         end
2179
2180         # Read a markdown link address and append it to the `out` buffer.
2181         private fun read_md_link(out: FlatBuffer, start: Int): Int do
2182                 var pos = start
2183                 var counter = 1
2184                 while pos < length do
2185                         var c = self[pos]
2186                         if c == '\\' and pos + 1 < length then
2187                                 pos = escape(out, self[pos + 1], pos)
2188                         else
2189                                 var end_reached = false
2190                                 if c == '(' then
2191                                         counter += 1
2192                                 else if c == ' ' then
2193                                         if counter == 1 then end_reached = true
2194                                 else if c == ')' then
2195                                         counter -= 1
2196                                         if counter == 0 then end_reached = true
2197                                 end
2198                                 if end_reached then break
2199                                 out.add c
2200                         end
2201                         pos += 1
2202                 end
2203                 if pos == length then return -1
2204                 return pos
2205         end
2206
2207         # Read a markdown link text and append it to the `out` buffer.
2208         private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2209                 var pos = start
2210                 var counter = 1
2211                 while pos < length do
2212                         var c = self[pos]
2213                         var end_reached = false
2214                         if c == '[' then
2215                                 counter += 1
2216                                 out.add c
2217                         else if c == ']' then
2218                                 counter -= 1
2219                                 if counter == 0 then
2220                                         end_reached = true
2221                                 else
2222                                         out.add c
2223                                 end
2224                         else
2225                                 out.add c
2226                         end
2227                         if end_reached then break
2228                         pos += 1
2229                 end
2230                 if pos == length then return -1
2231                 return pos
2232         end
2233
2234         # Extract the XML tag name from a XML tag.
2235         private fun xml_tag: String do
2236                 var tpl = new FlatBuffer
2237                 var pos = 1
2238                 if pos < length and self[1] == '/' then pos += 1
2239                 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2240                         tpl.add self[pos]
2241                         pos += 1
2242                 end
2243                 return tpl.write_to_string.to_lower
2244         end
2245
2246         # Read and escape the markdown contained in `self`.
2247         private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2248                 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2249                    c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2250                    c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2251                    c == '`' or c == '~' or c == '^' then
2252                         out.add c
2253                         return pos + 1
2254                 end
2255                 out.add '\\'
2256                 return pos
2257         end
2258
2259         # Is `self` an unsafe HTML element?
2260         private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2261
2262         # Is `self` a HRML block element?
2263         private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2264
2265         # Is `self` a link prefix?
2266         private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2267
2268         private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2269
2270         private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2271
2272         private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2273 end
2274
2275 redef class String
2276
2277         # Parse `self` as markdown and return the HTML representation
2278         #.
2279         #    var md = "**Hello World!**"
2280         #    var html = md.md_to_html
2281         #    assert html == "<p><strong>Hello World!</strong></p>\n"
2282         fun md_to_html: Streamable do
2283                 var processor = new MarkdownProcessor
2284                 return processor.process(self)
2285         end
2286 end