lib/markdown: use virtual type to make subclassing easier
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 # `MarkdownEmitter` used for ouput.
34 var emitter: MarkdownEmitter is noinit, protected writable
35
36 # Work in extended mode (default).
37 #
38 # Behavior changes when using extended mode:
39 #
40 # * Lists and code blocks end a paragraph
41 #
42 # In normal markdown the following:
43 #
44 # This is a paragraph
45 # * and this is not a list
46 #
47 # Will produce:
48 #
49 # <p>This is a paragraph
50 # * and this is not a list</p>
51 #
52 # When using extended mode this changes to:
53 #
54 # <p>This is a paragraph</p>
55 # <ul>
56 # <li>and this is not a list</li>
57 # </ul>
58 #
59 # * Fences code blocks
60 #
61 # If you don't want to indent your all your code with 4 spaces,
62 # you can wrap your code in ``` ``` ``` or `~~~`.
63 #
64 # Here's an example:
65 #
66 # ```
67 # fun test do
68 # print "Hello World!"
69 # end
70 # ```
71 #
72 # * Code blocks meta
73 #
74 # If you want to use syntax highlighting tools, most of them need to know what kind
75 # of language they are highlighting.
76 # You can add an optional language identifier after the fence declaration to output
77 # it in the HTML render.
78 #
79 # ```nit
80 # import markdown
81 #
82 # print "# Hello World!".md_to_html
83 # ```
84 #
85 # Becomes
86 #
87 # <pre class="nit"><code>import markdown
88 #
89 # print "Hello World!".md_to_html
90 # </code></pre>
91 #
92 # * Underscores (Emphasis)
93 #
94 # Underscores in the middle of a word like:
95 #
96 # Con_cat_this
97 #
98 # normally produces this:
99 #
100 # <p>Con<em>cat</em>this</p>
101 #
102 # With extended mode they don't result in emphasis.
103 #
104 # <p>Con_cat_this</p>
105 #
106 # * Strikethrough
107 #
108 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
109 # strikethrought span is marked with `~~`.
110 #
111 # ~~Mistaken text.~~
112 #
113 # becomes
114 #
115 # <del>Mistaken text.</del>
116 var ext_mode = true
117
118 init do self.emitter = new MarkdownEmitter(self)
119
120 # Process the mardown `input` string and return the processed output.
121 fun process(input: String): Writable do
122 # init processor
123 link_refs.clear
124 last_link_ref = null
125 current_line = null
126 current_block = null
127 # parse markdown
128 var parent = read_lines(input)
129 parent.remove_surrounding_empty_lines
130 recurse(parent, false)
131 # output processed text
132 return emitter.emit(parent.kind)
133 end
134
135 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
136 private fun read_lines(input: String): MDBlock do
137 var block = new MDBlock
138 var value = new FlatBuffer
139 var i = 0
140 while i < input.length do
141 value.clear
142 var pos = 0
143 var eol = false
144 while not eol and i < input.length do
145 var c = input[i]
146 if c == '\n' then
147 i += 1
148 eol = true
149 else if c == '\t' then
150 var np = pos + (4 - (pos.bin_and(3)))
151 while pos < np do
152 value.add ' '
153 pos += 1
154 end
155 i += 1
156 else
157 pos += 1
158 value.add c
159 i += 1
160 end
161 end
162
163 var line = new MDLine(value.write_to_string)
164 var is_link_ref = check_link_ref(line)
165 # Skip link refs
166 if not is_link_ref then block.add_line line
167 end
168 return block
169 end
170
171 # Check if line is a block link definition.
172 # Return `true` if line contains a valid link ref and save it into `link_refs`.
173 private fun check_link_ref(line: MDLine): Bool do
174 var md = line.value
175 var is_link_ref = false
176 var id = new FlatBuffer
177 var link = new FlatBuffer
178 var comment = new FlatBuffer
179 var pos = -1
180 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
181 pos = line.leading + 1
182 pos = md.read_until(id, pos, ']')
183 if not id.is_empty and pos + 2 < line.value.length then
184 if line.value[pos + 1] == ':' then
185 pos += 2
186 pos = md.skip_spaces(pos)
187 if line.value[pos] == '<' then
188 pos += 1
189 pos = md.read_until(link, pos, '>')
190 pos += 1
191 else
192 pos = md.read_until(link, pos, ' ', '\n')
193 end
194 if not link.is_empty then
195 pos = md.skip_spaces(pos)
196 if pos > 0 and pos < line.value.length then
197 var c = line.value[pos]
198 if c == '\"' or c == '\'' or c == '(' then
199 pos += 1
200 if c == '(' then
201 pos = md.read_until(comment, pos, ')')
202 else
203 pos = md.read_until(comment, pos, c)
204 end
205 if pos > 0 then is_link_ref = true
206 end
207 else
208 is_link_ref = true
209 end
210 end
211 end
212 end
213 end
214 if is_link_ref and not id.is_empty and not link.is_empty then
215 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
216 add_link_ref(id.write_to_string, lr)
217 if comment.is_empty then last_link_ref = lr
218 return true
219 else
220 comment = new FlatBuffer
221 if not line.is_empty and last_link_ref != null then
222 pos = line.leading
223 var c = line.value[pos]
224 if c == '\"' or c == '\'' or c == '(' then
225 pos += 1
226 if c == '(' then
227 pos = md.read_until(comment, pos, ')')
228 else
229 pos = md.read_until(comment, pos, c)
230 end
231 end
232 if not comment.is_empty then last_link_ref.title = comment.write_to_string
233 end
234 if comment.is_empty then return false
235 return true
236 end
237 end
238
239 # Known link refs
240 # This list will be needed during output to expand links.
241 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
242
243 # Last encountered link ref (for multiline definitions)
244 #
245 # Markdown allows link refs to be defined over two lines:
246 #
247 # [id]: http://example.com/longish/path/to/resource/here
248 # "Optional Title Here"
249 #
250 private var last_link_ref: nullable LinkRef = null
251
252 # Add a link ref to the list
253 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
254
255 # Recursively split a `block`.
256 #
257 # The block is splitted according to the type of lines it contains.
258 # Some blocks can be splited again recursively like lists.
259 # The `in_list` mode is used to recurse on list and build
260 # nested paragraphs or code blocks.
261 fun recurse(root: MDBlock, in_list: Bool) do
262 var old_mode = self.in_list
263 var old_root = self.current_block
264 self.in_list = in_list
265
266 var line = root.first_line
267 while line != null and line.is_empty do
268 line = line.next
269 if line == null then return
270 end
271
272 current_line = line
273 current_block = root
274 while current_line != null do
275 line_kind(current_line.as(not null)).process(self)
276 end
277 self.in_list = old_mode
278 self.current_block = old_root
279 end
280
281 # Currently processed line.
282 # Used when visiting blocks with `recurse`.
283 var current_line: nullable MDLine = null is writable
284
285 # Currently processed block.
286 # Used when visiting blocks with `recurse`.
287 var current_block: nullable MDBlock = null is writable
288
289 # Is the current recursion in list mode?
290 # Used when visiting blocks with `recurse`
291 private var in_list = false
292
293 # The type of line.
294 # see: `md_line_*`
295 fun line_kind(md: MDLine): Line do
296 var value = md.value
297 var leading = md.leading
298 var trailing = md.trailing
299 if md.is_empty then return new LineEmpty
300 if md.leading > 3 then return new LineCode
301 if value[leading] == '#' then return new LineHeadline
302 if value[leading] == '>' then return new LineBlockquote
303
304 if ext_mode then
305 if value.length - leading - trailing > 2 then
306 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
307 return new LineFence
308 end
309 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
310 return new LineFence
311 end
312 end
313 end
314
315 if value.length - leading - trailing > 2 and
316 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
317 if md.count_chars(value[leading]) >= 3 then
318 return new LineHR
319 end
320 end
321
322 if value.length - leading >= 2 and value[leading + 1] == ' ' then
323 var c = value[leading]
324 if c == '*' or c == '-' or c == '+' then return new LineUList
325 end
326
327 if value.length - leading >= 3 and value[leading].is_digit then
328 var i = leading + 1
329 while i < value.length and value[i].is_digit do i += 1
330 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
331 return new LineOList
332 end
333 end
334
335 if value[leading] == '<' and md.check_html then return new LineXML
336
337 var next = md.next
338 if next != null and not next.is_empty then
339 if next.count_chars('=') > 0 then
340 return new LineHeadline1
341 end
342 if next.count_chars('-') > 0 then
343 return new LineHeadline2
344 end
345 end
346 return new LineOther
347 end
348
349 # Get the token kind at `pos`.
350 fun token_at(text: Text, pos: Int): Token do
351 var c0: Char
352 var c1: Char
353 var c2: Char
354
355 if pos > 0 then
356 c0 = text[pos - 1]
357 else
358 c0 = ' '
359 end
360 var c = text[pos]
361
362 if pos + 1 < text.length then
363 c1 = text[pos + 1]
364 else
365 c1 = ' '
366 end
367 if pos + 2 < text.length then
368 c2 = text[pos + 2]
369 else
370 c2 = ' '
371 end
372
373 if c == '*' then
374 if c1 == '*' then
375 if c0 != ' ' or c2 != ' ' then
376 return new TokenStrongStar(pos, c)
377 else
378 return new TokenEmStar(pos, c)
379 end
380 end
381 if c0 != ' ' or c1 != ' ' then
382 return new TokenEmStar(pos, c)
383 else
384 return new TokenNone(pos, c)
385 end
386 else if c == '_' then
387 if c1 == '_' then
388 if c0 != ' ' or c2 != ' 'then
389 return new TokenStrongUnderscore(pos, c)
390 else
391 return new TokenEmUnderscore(pos, c)
392 end
393 end
394 if ext_mode then
395 if (c0.is_letter or c0.is_digit) and c0 != '_' and
396 (c1.is_letter or c1.is_digit) then
397 return new TokenNone(pos, c)
398 else
399 return new TokenEmUnderscore(pos, c)
400 end
401 end
402 if c0 != ' ' or c1 != ' ' then
403 return new TokenEmUnderscore(pos, c)
404 else
405 return new TokenNone(pos, c)
406 end
407 else if c == '!' then
408 if c1 == '[' then return new TokenImage(pos, c)
409 return new TokenNone(pos, c)
410 else if c == '[' then
411 return new TokenLink(pos, c)
412 else if c == ']' then
413 return new TokenNone(pos, c)
414 else if c == '`' then
415 if c1 == '`' then
416 return new TokenCodeDouble(pos, c)
417 else
418 return new TokenCodeSingle(pos, c)
419 end
420 else if c == '\\' then
421 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
422 return new TokenEscape(pos, c)
423 else
424 return new TokenNone(pos, c)
425 end
426 else if c == '<' then
427 return new TokenHTML(pos, c)
428 else if c == '&' then
429 return new TokenEntity(pos, c)
430 else
431 if ext_mode then
432 if c == '~' and c1 == '~' then
433 return new TokenStrike(pos, c)
434 end
435 end
436 return new TokenNone(pos, c)
437 end
438 end
439
440 # Find the position of a `token` in `self`.
441 fun find_token(text: Text, start: Int, token: Token): Int do
442 var pos = start
443 while pos < text.length do
444 if token_at(text, pos).is_same_type(token) then
445 return pos
446 end
447 pos += 1
448 end
449 return -1
450 end
451 end
452
453 # Emit output corresponding to blocks content.
454 #
455 # Blocks are created by a previous pass in `MarkdownProcessor`.
456 # The emitter use a `Decorator` to select the output format.
457 class MarkdownEmitter
458
459 # Kind of processor used for parsing.
460 type PROCESSOR: MarkdownProcessor
461
462 # Processor containing link refs.
463 var processor: PROCESSOR
464
465 # Kind of decorator used for decoration.
466 type DECORATOR: Decorator
467
468 # Decorator used for output.
469 # Default is `HTMLDecorator`
470 var decorator: DECORATOR is writable, lazy do
471 return new HTMLDecorator
472 end
473
474 # Create a new `MarkdownEmitter` using a custom `decorator`.
475 init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
476 init processor
477 self.decorator = decorator
478 end
479
480 # Output `block` using `decorator` in the current buffer.
481 fun emit(block: Block): Text do
482 var buffer = push_buffer
483 block.emit(self)
484 pop_buffer
485 return buffer
486 end
487
488 # Output the content of `block`.
489 fun emit_in(block: Block) do block.emit_in(self)
490
491 # Transform and emit mardown text
492 fun emit_text(text: Text) do emit_text_until(text, 0, null)
493
494 # Transform and emit mardown text starting at `from` and
495 # until a token with the same type as `token` is found.
496 # Go until the end of text if `token` is null.
497 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
498 var old_text = current_text
499 var old_pos = current_pos
500 current_text = text
501 current_pos = start
502 while current_pos < text.length do
503 var mt = processor.token_at(text, current_pos)
504 if (token != null and not token isa TokenNone) and
505 (mt.is_same_type(token) or
506 (token isa TokenEmStar and mt isa TokenStrongStar) or
507 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
508 return current_pos
509 end
510 mt.emit(self)
511 current_pos += 1
512 end
513 current_text = old_text
514 current_pos = old_pos
515 return -1
516 end
517
518 # Currently processed position in `current_text`.
519 # Used when visiting inline production with `emit_text_until`.
520 private var current_pos: Int = -1
521
522 # Currently processed text.
523 # Used when visiting inline production with `emit_text_until`.
524 private var current_text: nullable Text = null
525
526 # Stacked buffers.
527 private var buffer_stack = new List[FlatBuffer]
528
529 # Push a new buffer on the stack.
530 private fun push_buffer: FlatBuffer do
531 var buffer = new FlatBuffer
532 buffer_stack.add buffer
533 return buffer
534 end
535
536 # Pop the last buffer.
537 private fun pop_buffer do buffer_stack.pop
538
539 # Current output buffer.
540 private fun current_buffer: FlatBuffer do
541 assert not buffer_stack.is_empty
542 return buffer_stack.last
543 end
544
545 # Append `e` to current buffer.
546 fun add(e: Writable) do
547 if e isa Text then
548 current_buffer.append e
549 else
550 current_buffer.append e.write_to_string
551 end
552 end
553
554 # Append `c` to current buffer.
555 fun addc(c: Char) do current_buffer.add c
556
557 # Append a "\n" line break.
558 fun addn do current_buffer.add '\n'
559 end
560
561 # A Link Reference.
562 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
563 #
564 # ~~~raw
565 # [1]: http://example.com/ "Optional title"
566 # ~~~
567 class LinkRef
568
569 # Link href
570 var link: String
571
572 # Optional link title
573 var title: nullable String = null
574
575 # Is the link an abreviation?
576 var is_abbrev = false
577
578 # Create a link with a title.
579 init with_title(link: String, title: nullable String) do
580 self.link = link
581 self.title = title
582 end
583 end
584
585 # A `Decorator` is used to emit mardown into a specific format.
586 # Default decorator used is `HTMLDecorator`.
587 interface Decorator
588
589 # Kind of emitter used for decoration.
590 type EMITTER: MarkdownEmitter
591
592 # Render a ruler block.
593 fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
594
595 # Render a headline block with corresponding level.
596 fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
597
598 # Render a paragraph block.
599 fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
600
601 # Render a code or fence block.
602 fun add_code(v: EMITTER, block: BlockCode) is abstract
603
604 # Render a blockquote.
605 fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
606
607 # Render an unordered list.
608 fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
609
610 # Render an ordered list.
611 fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
612
613 # Render a list item.
614 fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
615
616 # Render an emphasis text.
617 fun add_em(v: EMITTER, text: Text) is abstract
618
619 # Render a strong text.
620 fun add_strong(v: EMITTER, text: Text) is abstract
621
622 # Render a strike text.
623 #
624 # Extended mode only (see `MarkdownProcessor::ext_mode`)
625 fun add_strike(v: EMITTER, text: Text) is abstract
626
627 # Render a link.
628 fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
629
630 # Render an image.
631 fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
632
633 # Render an abbreviation.
634 fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
635
636 # Render a code span reading from a buffer.
637 fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
638
639 # Render a text and escape it.
640 fun append_value(v: EMITTER, value: Text) is abstract
641
642 # Render code text from buffer and escape it.
643 fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
644
645 # Render a character escape.
646 fun escape_char(v: EMITTER, char: Char) is abstract
647
648 # Render a line break
649 fun add_line_break(v: EMITTER) is abstract
650
651 # Generate a new html valid id from a `String`.
652 fun strip_id(txt: String): String is abstract
653
654 # Found headlines during the processing labeled by their ids.
655 fun headlines: ArrayMap[String, HeadLine] is abstract
656 end
657
658 # Class representing a markdown headline.
659 class HeadLine
660 # Unique identifier of this headline.
661 var id: String
662
663 # Text of the headline.
664 var title: String
665
666 # Level of this headline.
667 #
668 # According toe the markdown specification, level must be in `[1..6]`.
669 var level: Int
670 end
671
672 # `Decorator` that outputs HTML.
673 class HTMLDecorator
674 super Decorator
675
676 redef var headlines = new ArrayMap[String, HeadLine]
677
678 redef fun add_ruler(v, block) do v.add "<hr/>\n"
679
680 redef fun add_headline(v, block) do
681 # save headline
682 var txt = block.block.first_line.value
683 var id = strip_id(txt)
684 var lvl = block.depth
685 headlines[id] = new HeadLine(id, txt, lvl)
686 # output it
687 v.add "<h{lvl} id=\"{id}\">"
688 v.emit_in block
689 v.add "</h{lvl}>\n"
690 end
691
692 redef fun add_paragraph(v, block) do
693 v.add "<p>"
694 v.emit_in block
695 v.add "</p>\n"
696 end
697
698 redef fun add_code(v, block) do
699 if block isa BlockFence and block.meta != null then
700 v.add "<pre class=\"{block.meta.to_s}\"><code>"
701 else
702 v.add "<pre><code>"
703 end
704 v.emit_in block
705 v.add "</code></pre>\n"
706 end
707
708 redef fun add_blockquote(v, block) do
709 v.add "<blockquote>\n"
710 v.emit_in block
711 v.add "</blockquote>\n"
712 end
713
714 redef fun add_unorderedlist(v, block) do
715 v.add "<ul>\n"
716 v.emit_in block
717 v.add "</ul>\n"
718 end
719
720 redef fun add_orderedlist(v, block) do
721 v.add "<ol>\n"
722 v.emit_in block
723 v.add "</ol>\n"
724 end
725
726 redef fun add_listitem(v, block) do
727 v.add "<li>"
728 v.emit_in block
729 v.add "</li>\n"
730 end
731
732 redef fun add_em(v, text) do
733 v.add "<em>"
734 v.add text
735 v.add "</em>"
736 end
737
738 redef fun add_strong(v, text) do
739 v.add "<strong>"
740 v.add text
741 v.add "</strong>"
742 end
743
744 redef fun add_strike(v, text) do
745 v.add "<del>"
746 v.add text
747 v.add "</del>"
748 end
749
750 redef fun add_image(v, link, name, comment) do
751 v.add "<img src=\""
752 append_value(v, link)
753 v.add "\" alt=\""
754 append_value(v, name)
755 v.add "\""
756 if comment != null and not comment.is_empty then
757 v.add " title=\""
758 append_value(v, comment)
759 v.add "\""
760 end
761 v.add "/>"
762 end
763
764 redef fun add_link(v, link, name, comment) do
765 v.add "<a href=\""
766 append_value(v, link)
767 v.add "\""
768 if comment != null and not comment.is_empty then
769 v.add " title=\""
770 append_value(v, comment)
771 v.add "\""
772 end
773 v.add ">"
774 v.emit_text(name)
775 v.add "</a>"
776 end
777
778 redef fun add_abbr(v, name, comment) do
779 v.add "<abbr title=\""
780 append_value(v, comment)
781 v.add "\">"
782 v.emit_text(name)
783 v.add "</abbr>"
784 end
785
786 redef fun add_span_code(v, text, from, to) do
787 v.add "<code>"
788 append_code(v, text, from, to)
789 v.add "</code>"
790 end
791
792 redef fun add_line_break(v) do
793 v.add "<br/>"
794 end
795
796 redef fun append_value(v, text) do for c in text do escape_char(v, c)
797
798 redef fun escape_char(v, c) do
799 if c == '&' then
800 v.add "&amp;"
801 else if c == '<' then
802 v.add "&lt;"
803 else if c == '>' then
804 v.add "&gt;"
805 else if c == '"' then
806 v.add "&quot;"
807 else if c == '\'' then
808 v.add "&apos;"
809 else
810 v.addc c
811 end
812 end
813
814 redef fun append_code(v, buffer, from, to) do
815 for i in [from..to[ do
816 var c = buffer[i]
817 if c == '&' then
818 v.add "&amp;"
819 else if c == '<' then
820 v.add "&lt;"
821 else if c == '>' then
822 v.add "&gt;"
823 else
824 v.addc c
825 end
826 end
827 end
828
829 redef fun strip_id(txt) do
830 # strip id
831 var b = new FlatBuffer
832 for c in txt do
833 if c == ' ' then
834 b.add '_'
835 else
836 if not c.is_letter and
837 not c.is_digit and
838 not allowed_id_chars.has(c) then continue
839 b.add c
840 end
841 end
842 var res = b.to_s
843 var key = res
844 # check for multiple id definitions
845 if headlines.has_key(key) then
846 var i = 1
847 key = "{res}_{i}"
848 while headlines.has_key(key) do
849 i += 1
850 key = "{res}_{i}"
851 end
852 end
853 return key
854 end
855
856 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
857 end
858
859 # A block of markdown lines.
860 # A `MDBlock` can contains lines and/or sub-blocks.
861 class MDBlock
862 # Kind of block.
863 # See `Block`.
864 var kind: Block = new BlockNone(self) is writable
865
866 # First line if any.
867 var first_line: nullable MDLine = null is writable
868
869 # Last line if any.
870 var last_line: nullable MDLine = null is writable
871
872 # First sub-block if any.
873 var first_block: nullable MDBlock = null is writable
874
875 # Last sub-block if any.
876 var last_block: nullable MDBlock = null is writable
877
878 # Previous block if any.
879 var prev: nullable MDBlock = null is writable
880
881 # Next block if any.
882 var next: nullable MDBlock = null is writable
883
884 # Does this block contain subblocks?
885 fun has_blocks: Bool do return first_block != null
886
887 # Count sub-blocks.
888 fun count_blocks: Int do
889 var count = 0
890 var block = first_block
891 while block != null do
892 count += 1
893 block = block.next
894 end
895 return count
896 end
897
898 # Does this block contain lines?
899 fun has_lines: Bool do return first_line != null
900
901 # Count block lines.
902 fun count_lines: Int do
903 var count = 0
904 var line = first_line
905 while line != null do
906 count += 1
907 line = line.next
908 end
909 return count
910 end
911
912 # Split `self` creating a new sub-block having `line` has `last_line`.
913 fun split(line: MDLine): MDBlock do
914 var block = new MDBlock
915 block.first_line = first_line
916 block.last_line = line
917 first_line = line.next
918 line.next = null
919 if first_line == null then
920 last_line = null
921 else
922 first_line.prev = null
923 end
924 if first_block == null then
925 first_block = block
926 last_block = block
927 else
928 last_block.next = block
929 last_block = block
930 end
931 return block
932 end
933
934 # Add a `line` to this block.
935 fun add_line(line: MDLine) do
936 if last_line == null then
937 first_line = line
938 last_line = line
939 else
940 last_line.next_empty = line.is_empty
941 line.prev_empty = last_line.is_empty
942 line.prev = last_line
943 last_line.next = line
944 last_line = line
945 end
946 end
947
948 # Remove `line` from this block.
949 fun remove_line(line: MDLine) do
950 if line.prev == null then
951 first_line = line.next
952 else
953 line.prev.next = line.next
954 end
955 if line.next == null then
956 last_line = line.prev
957 else
958 line.next.prev = line.prev
959 end
960 line.prev = null
961 line.next = null
962 end
963
964 # Remove leading empty lines.
965 fun remove_leading_empty_lines: Bool do
966 var was_empty = false
967 var line = first_line
968 while line != null and line.is_empty do
969 remove_line line
970 line = first_line
971 was_empty = true
972 end
973 return was_empty
974 end
975
976 # Remove trailing empty lines.
977 fun remove_trailing_empty_lines: Bool do
978 var was_empty = false
979 var line = last_line
980 while line != null and line.is_empty do
981 remove_line line
982 line = last_line
983 was_empty = true
984 end
985 return was_empty
986 end
987
988 # Remove leading and trailing empty lines.
989 fun remove_surrounding_empty_lines: Bool do
990 var was_empty = false
991 if remove_leading_empty_lines then was_empty = true
992 if remove_trailing_empty_lines then was_empty = true
993 return was_empty
994 end
995
996 # Remove list markers and up to 4 leading spaces.
997 # Used to clean nested lists.
998 fun remove_list_indent(v: MarkdownProcessor) do
999 var line = first_line
1000 while line != null do
1001 if not line.is_empty then
1002 var kind = v.line_kind(line)
1003 if kind isa LineList then
1004 line.value = kind.extract_value(line)
1005 else
1006 line.value = line.value.substring_from(line.leading.min(4))
1007 end
1008 line.leading = line.process_leading
1009 end
1010 line = line.next
1011 end
1012 end
1013
1014 # Collect block line text.
1015 fun text: String do
1016 var text = new FlatBuffer
1017 var line = first_line
1018 while line != null do
1019 if not line.is_empty then
1020 text.append line.text
1021 end
1022 text.append "\n"
1023 line = line.next
1024 end
1025 return text.write_to_string
1026 end
1027 end
1028
1029 # Representation of a markdown block in the AST.
1030 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1031 abstract class Block
1032
1033 # The markdown block `self` is related to.
1034 var block: MDBlock
1035
1036 # Output `self` using `v.decorator`.
1037 fun emit(v: MarkdownEmitter) do v.emit_in(self)
1038
1039 # Emit the containts of `self`, lines or blocks.
1040 fun emit_in(v: MarkdownEmitter) do
1041 block.remove_surrounding_empty_lines
1042 if block.has_lines then
1043 emit_lines(v)
1044 else
1045 emit_blocks(v)
1046 end
1047 end
1048
1049 # Emit lines contained in `block`.
1050 fun emit_lines(v: MarkdownEmitter) do
1051 var tpl = v.push_buffer
1052 var line = block.first_line
1053 while line != null do
1054 if not line.is_empty then
1055 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1056 if line.trailing >= 2 then v.decorator.add_line_break(v)
1057 end
1058 if line.next != null then
1059 v.addn
1060 end
1061 line = line.next
1062 end
1063 v.pop_buffer
1064 v.emit_text(tpl)
1065 end
1066
1067 # Emit sub-blocks contained in `block`.
1068 fun emit_blocks(v: MarkdownEmitter) do
1069 var block = self.block.first_block
1070 while block != null do
1071 block.kind.emit(v)
1072 block = block.next
1073 end
1074 end
1075 end
1076
1077 # A block without any markdown specificities.
1078 #
1079 # Actually use the same implementation than `BlockCode`,
1080 # this class is only used for typing purposes.
1081 class BlockNone
1082 super Block
1083 end
1084
1085 # A markdown blockquote.
1086 class BlockQuote
1087 super Block
1088
1089 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1090
1091 # Remove blockquote markers.
1092 private fun remove_block_quote_prefix(block: MDBlock) do
1093 var line = block.first_line
1094 while line != null do
1095 if not line.is_empty then
1096 if line.value[line.leading] == '>' then
1097 var rem = line.leading + 1
1098 if line.leading + 1 < line.value.length and
1099 line.value[line.leading + 1] == ' ' then
1100 rem += 1
1101 end
1102 line.value = line.value.substring_from(rem)
1103 line.leading = line.process_leading
1104 end
1105 end
1106 line = line.next
1107 end
1108 end
1109 end
1110
1111 # A markdown code block.
1112 class BlockCode
1113 super Block
1114
1115 # Number of char to skip at the beginning of the line.
1116 #
1117 # Block code lines start at 4 spaces.
1118 protected var line_start = 4
1119
1120 redef fun emit(v) do v.decorator.add_code(v, self)
1121
1122 redef fun emit_lines(v) do
1123 var line = block.first_line
1124 while line != null do
1125 if not line.is_empty then
1126 v.decorator.append_code(v, line.value, line_start, line.value.length)
1127 end
1128 v.addn
1129 line = line.next
1130 end
1131 end
1132 end
1133
1134 # A markdown code-fence block.
1135 #
1136 # Actually use the same implementation than `BlockCode`,
1137 # this class is only used for typing purposes.
1138 class BlockFence
1139 super BlockCode
1140
1141 # Any string found after fence token.
1142 var meta: nullable Text
1143
1144 # Fence code lines start at 0 spaces.
1145 redef var line_start = 0
1146 end
1147
1148 # A markdown headline.
1149 class BlockHeadline
1150 super Block
1151
1152 redef fun emit(v) do v.decorator.add_headline(v, self)
1153
1154 # Depth of the headline used to determine the headline level.
1155 var depth = 0
1156
1157 # Remove healine marks from lines contained in `self`.
1158 private fun transform_headline(block: MDBlock) do
1159 if depth > 0 then return
1160 var level = 0
1161 var line = block.first_line
1162 if line.is_empty then return
1163 var start = line.leading
1164 while start < line.value.length and line.value[start] == '#' do
1165 level += 1
1166 start += 1
1167 end
1168 while start < line.value.length and line.value[start] == ' ' do
1169 start += 1
1170 end
1171 if start >= line.value.length then
1172 line.is_empty = true
1173 else
1174 var nend = line.value.length - line.trailing - 1
1175 while line.value[nend] == '#' do nend -= 1
1176 while line.value[nend] == ' ' do nend -= 1
1177 line.value = line.value.substring(start, nend - start + 1)
1178 line.leading = 0
1179 line.trailing = 0
1180 end
1181 depth = level.min(6)
1182 end
1183 end
1184
1185 # A markdown list item block.
1186 class BlockListItem
1187 super Block
1188
1189 redef fun emit(v) do v.decorator.add_listitem(v, self)
1190 end
1191
1192 # A markdown list block.
1193 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1194 abstract class BlockList
1195 super Block
1196
1197 # Split list block into list items sub-blocks.
1198 private fun init_block(v: MarkdownProcessor) do
1199 var line = block.first_line
1200 line = line.next
1201 while line != null do
1202 var t = v.line_kind(line)
1203 if t isa LineList or
1204 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1205 not (t isa LineList))) then
1206 var sblock = block.split(line.prev.as(not null))
1207 sblock.kind = new BlockListItem(sblock)
1208 end
1209 line = line.next
1210 end
1211 var sblock = block.split(block.last_line.as(not null))
1212 sblock.kind = new BlockListItem(sblock)
1213 end
1214
1215 # Expand list items as paragraphs if needed.
1216 private fun expand_paragraphs(block: MDBlock) do
1217 var outer = block.first_block
1218 var inner: nullable MDBlock
1219 var has_paragraph = false
1220 while outer != null and not has_paragraph do
1221 if outer.kind isa BlockListItem then
1222 inner = outer.first_block
1223 while inner != null and not has_paragraph do
1224 if inner.kind isa BlockParagraph then
1225 has_paragraph = true
1226 end
1227 inner = inner.next
1228 end
1229 end
1230 outer = outer.next
1231 end
1232 if has_paragraph then
1233 outer = block.first_block
1234 while outer != null do
1235 if outer.kind isa BlockListItem then
1236 inner = outer.first_block
1237 while inner != null do
1238 if inner.kind isa BlockNone then
1239 inner.kind = new BlockParagraph(inner)
1240 end
1241 inner = inner.next
1242 end
1243 end
1244 outer = outer.next
1245 end
1246 end
1247 end
1248 end
1249
1250 # A markdown ordered list.
1251 class BlockOrderedList
1252 super BlockList
1253
1254 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1255 end
1256
1257 # A markdown unordred list.
1258 class BlockUnorderedList
1259 super BlockList
1260
1261 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1262 end
1263
1264 # A markdown paragraph block.
1265 class BlockParagraph
1266 super Block
1267
1268 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1269 end
1270
1271 # A markdown ruler.
1272 class BlockRuler
1273 super Block
1274
1275 redef fun emit(v) do v.decorator.add_ruler(v, self)
1276 end
1277
1278 # Xml blocks that can be found in markdown markup.
1279 class BlockXML
1280 super Block
1281
1282 redef fun emit_lines(v) do
1283 var line = block.first_line
1284 while line != null do
1285 if not line.is_empty then v.add line.value
1286 v.addn
1287 line = line.next
1288 end
1289 end
1290 end
1291
1292 # A markdown line.
1293 class MDLine
1294
1295 # Text contained in this line.
1296 var value: String is writable
1297
1298 # Is this line empty?
1299 # Lines containing only spaces are considered empty.
1300 var is_empty: Bool = true is writable
1301
1302 # Previous line in `MDBlock` or null if first line.
1303 var prev: nullable MDLine = null is writable
1304
1305 # Next line in `MDBlock` or null if last line.
1306 var next: nullable MDLine = null is writable
1307
1308 # Is the previous line empty?
1309 var prev_empty: Bool = false is writable
1310
1311 # Is the next line empty?
1312 var next_empty: Bool = false is writable
1313
1314 # Initialize a new MDLine from its string value
1315 init do
1316 self.leading = process_leading
1317 if leading != value.length then
1318 self.is_empty = false
1319 self.trailing = process_trailing
1320 end
1321 end
1322
1323 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1324 fun clear do
1325 value = ""
1326 leading = 0
1327 trailing = 0
1328 is_empty = true
1329 if prev != null then prev.next_empty = true
1330 if next != null then next.prev_empty = true
1331 end
1332
1333 # Number or leading spaces on this line.
1334 var leading: Int = 0 is writable
1335
1336 # Compute `leading` depending on `value`.
1337 fun process_leading: Int do
1338 var count = 0
1339 var value = self.value
1340 while count < value.length and value[count] == ' ' do count += 1
1341 if leading == value.length then clear
1342 return count
1343 end
1344
1345 # Number of trailing spaces on this line.
1346 var trailing: Int = 0 is writable
1347
1348 # Compute `trailing` depending on `value`.
1349 fun process_trailing: Int do
1350 var count = 0
1351 var value = self.value
1352 while value[value.length - count - 1] == ' ' do
1353 count += 1
1354 end
1355 return count
1356 end
1357
1358 # Count the amount of `ch` in this line.
1359 # Return A value > 0 if this line only consists of `ch` end spaces.
1360 fun count_chars(ch: Char): Int do
1361 var count = 0
1362 for c in value do
1363 if c == ' ' then
1364 continue
1365 end
1366 if c == ch then
1367 count += 1
1368 continue
1369 end
1370 count = 0
1371 break
1372 end
1373 return count
1374 end
1375
1376 # Count the amount of `ch` at the start of this line ignoring spaces.
1377 fun count_chars_start(ch: Char): Int do
1378 var count = 0
1379 for c in value do
1380 if c == ' ' then
1381 continue
1382 end
1383 if c == ch then
1384 count += 1
1385 else
1386 break
1387 end
1388 end
1389 return count
1390 end
1391
1392 # Last XML line if any.
1393 private var xml_end_line: nullable MDLine = null
1394
1395 # Does `value` contains valid XML markup?
1396 private fun check_html: Bool do
1397 var tags = new Array[String]
1398 var tmp = new FlatBuffer
1399 var pos = leading
1400 if pos + 1 < value.length and value[pos + 1] == '!' then
1401 if read_xml_comment(self, pos) > 0 then return true
1402 end
1403 pos = value.read_xml(tmp, pos, false)
1404 var tag: String
1405 if pos > -1 then
1406 tag = tmp.xml_tag
1407 if not tag.is_html_block then
1408 return false
1409 end
1410 if tag == "hr" then
1411 xml_end_line = self
1412 return true
1413 end
1414 tags.add tag
1415 var line: nullable MDLine = self
1416 while line != null do
1417 while pos < line.value.length and line.value[pos] != '<' do
1418 pos += 1
1419 end
1420 if pos >= line.value.length then
1421 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1422 tags.pop
1423 if tags.is_empty then
1424 xml_end_line = line
1425 break
1426 end
1427 end
1428 line = line.next
1429 pos = 0
1430 else
1431 tmp = new FlatBuffer
1432 var new_pos = line.value.read_xml(tmp, pos, false)
1433 if new_pos > 0 then
1434 tag = tmp.xml_tag
1435 if tag.is_html_block and not tag == "hr" then
1436 if tmp[1] == '/' then
1437 if tags.last != tag then
1438 return false
1439 end
1440 tags.pop
1441 else
1442 tags.add tag
1443 end
1444 end
1445 if tags.is_empty then
1446 xml_end_line = line
1447 break
1448 end
1449 pos = new_pos
1450 else
1451 pos += 1
1452 end
1453 end
1454 end
1455 return tags.is_empty
1456 end
1457 return false
1458 end
1459
1460 # Read a XML comment.
1461 # Used by `check_html`.
1462 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1463 var line: nullable MDLine = first_line
1464 if start + 3 < line.value.length then
1465 if line.value[2] == '-' and line.value[3] == '-' then
1466 var pos = start + 4
1467 while line != null do
1468 while pos < line.value.length and line.value[pos] != '-' do
1469 pos += 1
1470 end
1471 if pos == line.value.length then
1472 line = line.next
1473 pos = 0
1474 else
1475 if pos + 2 < line.value.length then
1476 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1477 first_line.xml_end_line = line
1478 return pos + 3
1479 end
1480 end
1481 pos += 1
1482 end
1483 end
1484 end
1485 end
1486 return -1
1487 end
1488
1489 # Extract the text of `self` without leading and trailing.
1490 fun text: String do return value.substring(leading, value.length - trailing)
1491 end
1492
1493 # A markdown line.
1494 interface Line
1495
1496 # Parse the line.
1497 # See `MarkdownProcessor::recurse`.
1498 fun process(v: MarkdownProcessor) is abstract
1499 end
1500
1501 # An empty markdown line.
1502 class LineEmpty
1503 super Line
1504
1505 redef fun process(v) do
1506 v.current_line = v.current_line.next
1507 end
1508 end
1509
1510 # A non-specific markdown construction.
1511 # Mainly used as part of another line construct such as paragraphs or lists.
1512 class LineOther
1513 super Line
1514
1515 redef fun process(v) do
1516 var line = v.current_line
1517 # go to block end
1518 var was_empty = line.prev_empty
1519 while line != null and not line.is_empty do
1520 var t = v.line_kind(line)
1521 if (v.in_list or v.ext_mode) and t isa LineList then
1522 break
1523 end
1524 if v.ext_mode and (t isa LineCode or t isa LineFence) then
1525 break
1526 end
1527 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1528 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1529 break
1530 end
1531 line = line.next
1532 end
1533 # build block
1534 if line != null and not line.is_empty then
1535 var block = v.current_block.split(line.prev.as(not null))
1536 if v.in_list and not was_empty then
1537 block.kind = new BlockNone(block)
1538 else
1539 block.kind = new BlockParagraph(block)
1540 end
1541 v.current_block.remove_leading_empty_lines
1542 else
1543 var block: MDBlock
1544 if line != null then
1545 block = v.current_block.split(line)
1546 else
1547 block = v.current_block.split(v.current_block.last_line.as(not null))
1548 end
1549 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1550 block.kind = new BlockNone(block)
1551 else
1552 block.kind = new BlockParagraph(block)
1553 end
1554 v.current_block.remove_leading_empty_lines
1555 end
1556 v.current_line = v.current_block.first_line
1557 end
1558 end
1559
1560 # A line of markdown code.
1561 class LineCode
1562 super Line
1563
1564 redef fun process(v) do
1565 var line = v.current_line
1566 # lookup block end
1567 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1568 line = line.next
1569 end
1570 # split at block end line
1571 var block: MDBlock
1572 if line != null then
1573 block = v.current_block.split(line.prev.as(not null))
1574 else
1575 block = v.current_block.split(v.current_block.last_line.as(not null))
1576 end
1577 block.kind = new BlockCode(block)
1578 block.remove_surrounding_empty_lines
1579 v.current_line = v.current_block.first_line
1580 end
1581 end
1582
1583 # A line of raw XML.
1584 class LineXML
1585 super Line
1586
1587 redef fun process(v) do
1588 var line = v.current_line
1589 var prev = line.prev
1590 if prev != null then v.current_block.split(prev)
1591 var block = v.current_block.split(line.xml_end_line.as(not null))
1592 block.kind = new BlockXML(block)
1593 v.current_block.remove_leading_empty_lines
1594 v.current_line = v.current_block.first_line
1595 end
1596 end
1597
1598 # A markdown blockquote line.
1599 class LineBlockquote
1600 super Line
1601
1602 redef fun process(v) do
1603 var line = v.current_line
1604 # go to bquote end
1605 while line != null do
1606 if not line.is_empty and (line.prev_empty and
1607 line.leading == 0 and
1608 not v.line_kind(line) isa LineBlockquote) then break
1609 line = line.next
1610 end
1611 # build sub block
1612 var block: MDBlock
1613 if line != null then
1614 block = v.current_block.split(line.prev.as(not null))
1615 else
1616 block = v.current_block.split(v.current_block.last_line.as(not null))
1617 end
1618 var kind = new BlockQuote(block)
1619 block.kind = kind
1620 block.remove_surrounding_empty_lines
1621 kind.remove_block_quote_prefix(block)
1622 v.current_line = line
1623 v.recurse(block, false)
1624 v.current_line = v.current_block.first_line
1625 end
1626 end
1627
1628 # A markdown ruler line.
1629 class LineHR
1630 super Line
1631
1632 redef fun process(v) do
1633 var line = v.current_line
1634 if line.prev != null then v.current_block.split(line.prev.as(not null))
1635 var block = v.current_block.split(line.as(not null))
1636 block.kind = new BlockRuler(block)
1637 v.current_block.remove_leading_empty_lines
1638 v.current_line = v.current_block.first_line
1639 end
1640 end
1641
1642 # A markdown fence code line.
1643 class LineFence
1644 super Line
1645
1646 redef fun process(v) do
1647 # go to fence end
1648 var line = v.current_line.next
1649 while line != null do
1650 if v.line_kind(line) isa LineFence then break
1651 line = line.next
1652 end
1653 if line != null then
1654 line = line.next
1655 end
1656 # build fence block
1657 var block: MDBlock
1658 if line != null then
1659 block = v.current_block.split(line.prev.as(not null))
1660 else
1661 block = v.current_block.split(v.current_block.last_line.as(not null))
1662 end
1663 var meta = block.first_line.value.meta_from_fence
1664 block.kind = new BlockFence(block, meta)
1665 block.first_line.clear
1666 var last = block.last_line
1667 if last != null and v.line_kind(last) isa LineFence then
1668 block.last_line.clear
1669 end
1670 block.remove_surrounding_empty_lines
1671 v.current_line = line
1672 end
1673 end
1674
1675 # A markdown headline.
1676 class LineHeadline
1677 super Line
1678
1679 redef fun process(v) do
1680 var line = v.current_line
1681 var lprev = line.prev
1682 if lprev != null then v.current_block.split(lprev)
1683 var block = v.current_block.split(line.as(not null))
1684 var kind = new BlockHeadline(block)
1685 block.kind = kind
1686 kind.transform_headline(block)
1687 v.current_block.remove_leading_empty_lines
1688 v.current_line = v.current_block.first_line
1689 end
1690 end
1691
1692 # A markdown headline of level 1.
1693 class LineHeadline1
1694 super LineHeadline
1695
1696 redef fun process(v) do
1697 var line = v.current_line
1698 var lprev = line.prev
1699 if lprev != null then v.current_block.split(lprev)
1700 line.next.clear
1701 var block = v.current_block.split(line.as(not null))
1702 var kind = new BlockHeadline(block)
1703 kind.depth = 1
1704 kind.transform_headline(block)
1705 block.kind = kind
1706 v.current_block.remove_leading_empty_lines
1707 v.current_line = v.current_block.first_line
1708 end
1709 end
1710
1711 # A markdown headline of level 2.
1712 class LineHeadline2
1713 super LineHeadline
1714
1715 redef fun process(v) do
1716 var line = v.current_line
1717 var lprev = line.prev
1718 if lprev != null then v.current_block.split(lprev)
1719 line.next.clear
1720 var block = v.current_block.split(line.as(not null))
1721 var kind = new BlockHeadline(block)
1722 kind.depth = 2
1723 kind.transform_headline(block)
1724 block.kind = kind
1725 v.current_block.remove_leading_empty_lines
1726 v.current_line = v.current_block.first_line
1727 end
1728 end
1729
1730 # A markdown list line.
1731 # Mainly used to factorize code between ordered and unordered lists.
1732 class LineList
1733 super Line
1734
1735 redef fun process(v) do
1736 var line = v.current_line
1737 # go to list end
1738 while line != null do
1739 var t = v.line_kind(line)
1740 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1741 not t isa LineList) then break
1742 line = line.next
1743 end
1744 # build list block
1745 var list: MDBlock
1746 if line != null then
1747 list = v.current_block.split(line.prev.as(not null))
1748 else
1749 list = v.current_block.split(v.current_block.last_line.as(not null))
1750 end
1751 var kind = block_kind(list)
1752 list.kind = kind
1753 list.first_line.prev_empty = false
1754 list.last_line.next_empty = false
1755 list.remove_surrounding_empty_lines
1756 list.first_line.prev_empty = false
1757 list.last_line.next_empty = false
1758 kind.init_block(v)
1759 var block = list.first_block
1760 while block != null do
1761 block.remove_list_indent(v)
1762 v.recurse(block, true)
1763 block = block.next
1764 end
1765 kind.expand_paragraphs(list)
1766 v.current_line = line
1767 end
1768
1769 # Create a new block kind based on this line.
1770 protected fun block_kind(block: MDBlock): BlockList is abstract
1771
1772 # Extract string value from `MDLine`.
1773 protected fun extract_value(line: MDLine): String is abstract
1774 end
1775
1776 # An ordered list line.
1777 class LineOList
1778 super LineList
1779
1780 redef fun block_kind(block) do return new BlockOrderedList(block)
1781
1782 redef fun extract_value(line) do
1783 return line.value.substring_from(line.value.index_of('.') + 2)
1784 end
1785 end
1786
1787 # An unordered list line.
1788 class LineUList
1789 super LineList
1790
1791 redef fun block_kind(block) do return new BlockUnorderedList(block)
1792
1793 redef fun extract_value(line) do
1794 return line.value.substring_from(line.leading + 2)
1795 end
1796 end
1797
1798 # A token represent a character in the markdown input.
1799 # Some tokens have a specific markup behaviour that is handled here.
1800 abstract class Token
1801
1802 # Position of `self` in markdown input.
1803 var pos: Int
1804
1805 # Character found at `pos` in the markdown input.
1806 var char: Char
1807
1808 # Output that token using `MarkdownEmitter::decorator`.
1809 fun emit(v: MarkdownEmitter) do v.addc char
1810 end
1811
1812 # A token without a specific meaning.
1813 class TokenNone
1814 super Token
1815 end
1816
1817 # An emphasis token.
1818 abstract class TokenEm
1819 super Token
1820
1821 redef fun emit(v) do
1822 var tmp = v.push_buffer
1823 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1824 v.pop_buffer
1825 if b > 0 then
1826 v.decorator.add_em(v, tmp)
1827 v.current_pos = b
1828 else
1829 v.addc char
1830 end
1831 end
1832 end
1833
1834 # An emphasis star token.
1835 class TokenEmStar
1836 super TokenEm
1837 end
1838
1839 # An emphasis underscore token.
1840 class TokenEmUnderscore
1841 super TokenEm
1842 end
1843
1844 # A strong token.
1845 abstract class TokenStrong
1846 super Token
1847
1848 redef fun emit(v) do
1849 var tmp = v.push_buffer
1850 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1851 v.pop_buffer
1852 if b > 0 then
1853 v.decorator.add_strong(v, tmp)
1854 v.current_pos = b + 1
1855 else
1856 v.addc char
1857 end
1858 end
1859 end
1860
1861 # A strong star token.
1862 class TokenStrongStar
1863 super TokenStrong
1864 end
1865
1866 # A strong underscore token.
1867 class TokenStrongUnderscore
1868 super TokenStrong
1869 end
1870
1871 # A code token.
1872 # This class is mainly used to factorize work between single and double quoted span codes.
1873 abstract class TokenCode
1874 super Token
1875
1876 redef fun emit(v) do
1877 var a = pos + next_pos + 1
1878 var b = v.processor.find_token(v.current_text.as(not null), a, self)
1879 if b > 0 then
1880 v.current_pos = b + next_pos
1881 while a < b and v.current_text[a] == ' ' do a += 1
1882 if a < b then
1883 while v.current_text[b - 1] == ' ' do b -= 1
1884 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1885 end
1886 else
1887 v.addc char
1888 end
1889 end
1890
1891 private fun next_pos: Int is abstract
1892 end
1893
1894 # A span code token.
1895 class TokenCodeSingle
1896 super TokenCode
1897
1898 redef fun next_pos do return 0
1899 end
1900
1901 # A doubled span code token.
1902 class TokenCodeDouble
1903 super TokenCode
1904
1905 redef fun next_pos do return 1
1906 end
1907
1908 # A link or image token.
1909 # This class is mainly used to factorize work between images and links.
1910 abstract class TokenLinkOrImage
1911 super Token
1912
1913 # Link adress
1914 var link: nullable Text = null
1915
1916 # Link text
1917 var name: nullable Text = null
1918
1919 # Link title
1920 var comment: nullable Text = null
1921
1922 # Is the link construct an abbreviation?
1923 var is_abbrev = false
1924
1925 redef fun emit(v) do
1926 var tmp = new FlatBuffer
1927 var b = check_link(v, tmp, pos, self)
1928 if b > 0 then
1929 emit_hyper(v)
1930 v.current_pos = b
1931 else
1932 v.addc char
1933 end
1934 end
1935
1936 # Emit the hyperlink as link or image.
1937 private fun emit_hyper(v: MarkdownEmitter) is abstract
1938
1939 # Check if the link is a valid link.
1940 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1941 var md = v.current_text
1942 var pos
1943 if token isa TokenLink then
1944 pos = start + 1
1945 else
1946 pos = start + 2
1947 end
1948 var tmp = new FlatBuffer
1949 pos = md.read_md_link_id(tmp, pos)
1950 if pos < start then return -1
1951 name = tmp
1952 var old_pos = pos
1953 pos += 1
1954 pos = md.skip_spaces(pos)
1955 if pos < start then
1956 var tid = name.write_to_string.to_lower
1957 if v.processor.link_refs.has_key(tid) then
1958 var lr = v.processor.link_refs[tid]
1959 is_abbrev = lr.is_abbrev
1960 link = lr.link
1961 comment = lr.title
1962 pos = old_pos
1963 else
1964 return -1
1965 end
1966 else if md[pos] == '(' then
1967 pos += 1
1968 pos = md.skip_spaces(pos)
1969 if pos < start then return -1
1970 tmp = new FlatBuffer
1971 var use_lt = md[pos] == '<'
1972 if use_lt then
1973 pos = md.read_until(tmp, pos + 1, '>')
1974 else
1975 pos = md.read_md_link(tmp, pos)
1976 end
1977 if pos < start then return -1
1978 if use_lt then pos += 1
1979 link = tmp.write_to_string
1980 if md[pos] == ' ' then
1981 pos = md.skip_spaces(pos)
1982 if pos > start and md[pos] == '"' then
1983 pos += 1
1984 tmp = new FlatBuffer
1985 pos = md.read_until(tmp, pos, '"')
1986 if pos < start then return -1
1987 comment = tmp.write_to_string
1988 pos += 1
1989 pos = md.skip_spaces(pos)
1990 if pos == -1 then return -1
1991 end
1992 end
1993 if md[pos] != ')' then return -1
1994 else if md[pos] == '[' then
1995 pos += 1
1996 tmp = new FlatBuffer
1997 pos = md.read_raw_until(tmp, pos, ']')
1998 if pos < start then return -1
1999 var id
2000 if tmp.length > 0 then
2001 id = tmp
2002 else
2003 id = name
2004 end
2005 var tid = id.write_to_string.to_lower
2006 if v.processor.link_refs.has_key(tid) then
2007 var lr = v.processor.link_refs[tid]
2008 link = lr.link
2009 comment = lr.title
2010 end
2011 else
2012 var tid = name.write_to_string.replace("\n", " ").to_lower
2013 if v.processor.link_refs.has_key(tid) then
2014 var lr = v.processor.link_refs[tid]
2015 link = lr.link
2016 comment = lr.title
2017 pos = old_pos
2018 else
2019 return -1
2020 end
2021 end
2022 if link == null then return -1
2023 return pos
2024 end
2025 end
2026
2027 # A markdown link token.
2028 class TokenLink
2029 super TokenLinkOrImage
2030
2031 redef fun emit_hyper(v) do
2032 if is_abbrev and comment != null then
2033 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2034 else
2035 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2036 end
2037 end
2038 end
2039
2040 # A markdown image token.
2041 class TokenImage
2042 super TokenLinkOrImage
2043
2044 redef fun emit_hyper(v) do
2045 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2046 end
2047 end
2048
2049 # A HTML/XML token.
2050 class TokenHTML
2051 super Token
2052
2053 redef fun emit(v) do
2054 var tmp = new FlatBuffer
2055 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2056 if b > 0 then
2057 v.add tmp
2058 v.current_pos = b
2059 else
2060 v.decorator.escape_char(v, char)
2061 end
2062 end
2063
2064 # Is the HTML valid?
2065 # Also take care of link and mailto shortcuts.
2066 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2067 # check for auto links
2068 var tmp = new FlatBuffer
2069 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2070 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2071 pos = md.read_until(tmp, pos, '>')
2072 if pos != -1 then
2073 var link = tmp.write_to_string
2074 v.decorator.add_link(v, link, link, null)
2075 return pos
2076 end
2077 end
2078 # TODO check for mailto
2079 # check for inline html
2080 if start + 2 < md.length then
2081 return md.read_xml(out, start, true)
2082 end
2083 return -1
2084 end
2085 end
2086
2087 # An HTML entity token.
2088 class TokenEntity
2089 super Token
2090
2091 redef fun emit(v) do
2092 var tmp = new FlatBuffer
2093 var b = check_entity(tmp, v.current_text.as(not null), pos)
2094 if b > 0 then
2095 v.add tmp
2096 v.current_pos = b
2097 else
2098 v.decorator.escape_char(v, char)
2099 end
2100 end
2101
2102 # Is the entity valid?
2103 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2104 var pos = md.read_until(out, start, ';')
2105 if pos < 0 or out.length < 3 then
2106 return -1
2107 end
2108 if out[1] == '#' then
2109 if out[2] == 'x' or out[2] == 'X' then
2110 if out.length < 4 then return -1
2111 for i in [3..out.length[ do
2112 var c = out[i]
2113 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2114 return -1
2115 end
2116 end
2117 else
2118 for i in [2..out.length[ do
2119 var c = out[i]
2120 if c < '0' or c > '9' then return -1
2121 end
2122 end
2123 out.add ';'
2124 else
2125 for i in [1..out.length[ do
2126 var c = out[i]
2127 if not c.is_digit and not c.is_letter then return -1
2128 end
2129 out.add ';'
2130 # TODO check entity is valid
2131 # if out.is_entity then
2132 return pos
2133 # else
2134 # return -1
2135 # end
2136 end
2137 return pos
2138 end
2139 end
2140
2141 # A markdown escape token.
2142 class TokenEscape
2143 super Token
2144
2145 redef fun emit(v) do
2146 v.current_pos += 1
2147 v.addc v.current_text[v.current_pos]
2148 end
2149 end
2150
2151 # A markdown strike token.
2152 #
2153 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2154 class TokenStrike
2155 super Token
2156
2157 redef fun emit(v) do
2158 var tmp = v.push_buffer
2159 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2160 v.pop_buffer
2161 if b > 0 then
2162 v.decorator.add_strike(v, tmp)
2163 v.current_pos = b + 1
2164 else
2165 v.addc char
2166 end
2167 end
2168 end
2169
2170 redef class Text
2171
2172 # Get the position of the next non-space character.
2173 private fun skip_spaces(start: Int): Int do
2174 var pos = start
2175 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2176 pos += 1
2177 end
2178 if pos < length then return pos
2179 return -1
2180 end
2181
2182 # Read `self` until `nend` and append it to the `out` buffer.
2183 # Escape markdown special chars.
2184 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2185 var pos = start
2186 while pos < length do
2187 var c = self[pos]
2188 if c == '\\' and pos + 1 < length then
2189 pos = escape(out, self[pos + 1], pos)
2190 else
2191 var end_reached = false
2192 for n in nend do
2193 if c == n then
2194 end_reached = true
2195 break
2196 end
2197 end
2198 if end_reached then break
2199 out.add c
2200 end
2201 pos += 1
2202 end
2203 if pos == length then return -1
2204 return pos
2205 end
2206
2207 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2208 # No escape is made.
2209 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2210 var pos = start
2211 while pos < length do
2212 var c = self[pos]
2213 var end_reached = false
2214 for n in nend do
2215 if c == n then
2216 end_reached = true
2217 break
2218 end
2219 end
2220 if end_reached then break
2221 out.add c
2222 pos += 1
2223 end
2224 if pos == length then return -1
2225 return pos
2226 end
2227
2228 # Read `self` as XML until `to` and append it to the `out` buffer.
2229 # Escape HTML special chars.
2230 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2231 var pos = from
2232 var in_str = false
2233 var str_char: nullable Char = null
2234 while pos < length do
2235 var c = self[pos]
2236 if in_str then
2237 if c == '\\' then
2238 out.add c
2239 pos += 1
2240 if pos < length then
2241 out.add c
2242 pos += 1
2243 end
2244 continue
2245 end
2246 if c == str_char then
2247 in_str = false
2248 out.add c
2249 pos += 1
2250 continue
2251 end
2252 end
2253 if c == '"' or c == '\'' then
2254 in_str = true
2255 str_char = c
2256 end
2257 if not in_str then
2258 var end_reached = false
2259 for n in [0..to.length[ do
2260 if c == to[n] then
2261 end_reached = true
2262 break
2263 end
2264 end
2265 if end_reached then break
2266 end
2267 out.add c
2268 pos += 1
2269 end
2270 if pos == length then return -1
2271 return pos
2272 end
2273
2274 # Read `self` as XML and append it to the `out` buffer.
2275 # Safe mode can be activated to limit reading to valid xml.
2276 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2277 var pos = 0
2278 var is_valid = true
2279 var is_close_tag = false
2280 if start + 1 >= length then return -1
2281 if self[start + 1] == '/' then
2282 is_close_tag = true
2283 pos = start + 2
2284 else if self[start + 1] == '!' then
2285 out.append "<!"
2286 return start + 1
2287 else
2288 is_close_tag = false
2289 pos = start + 1
2290 end
2291 if safe_mode then
2292 var tmp = new FlatBuffer
2293 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2294 if pos == -1 then return -1
2295 var tag = tmp.write_to_string.trim.to_lower
2296 if not tag.is_valid_html_tag then
2297 out.append "&lt;"
2298 pos = -1
2299 else if tag.is_html_unsafe then
2300 is_valid = false
2301 out.append "&lt;"
2302 if is_close_tag then out.add '/'
2303 out.append tmp
2304 else
2305 out.append "<"
2306 if is_close_tag then out.add '/'
2307 out.append tmp
2308 end
2309 else
2310 out.add '<'
2311 if is_close_tag then out.add '/'
2312 pos = read_xml_until(out, pos, ' ', '/', '>')
2313 end
2314 if pos == -1 then return -1
2315 pos = read_xml_until(out, pos, '/', '>')
2316 if pos == -1 then return -1
2317 if self[pos] == '/' then
2318 out.append " /"
2319 pos = self.read_xml_until(out, pos + 1, '>')
2320 if pos == -1 then return -1
2321 end
2322 if self[pos] == '>' then
2323 if is_valid then
2324 out.add '>'
2325 else
2326 out.append "&gt;"
2327 end
2328 return pos
2329 end
2330 return -1
2331 end
2332
2333 # Read a markdown link address and append it to the `out` buffer.
2334 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2335 var pos = start
2336 var counter = 1
2337 while pos < length do
2338 var c = self[pos]
2339 if c == '\\' and pos + 1 < length then
2340 pos = escape(out, self[pos + 1], pos)
2341 else
2342 var end_reached = false
2343 if c == '(' then
2344 counter += 1
2345 else if c == ' ' then
2346 if counter == 1 then end_reached = true
2347 else if c == ')' then
2348 counter -= 1
2349 if counter == 0 then end_reached = true
2350 end
2351 if end_reached then break
2352 out.add c
2353 end
2354 pos += 1
2355 end
2356 if pos == length then return -1
2357 return pos
2358 end
2359
2360 # Read a markdown link text and append it to the `out` buffer.
2361 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2362 var pos = start
2363 var counter = 1
2364 while pos < length do
2365 var c = self[pos]
2366 var end_reached = false
2367 if c == '[' then
2368 counter += 1
2369 out.add c
2370 else if c == ']' then
2371 counter -= 1
2372 if counter == 0 then
2373 end_reached = true
2374 else
2375 out.add c
2376 end
2377 else
2378 out.add c
2379 end
2380 if end_reached then break
2381 pos += 1
2382 end
2383 if pos == length then return -1
2384 return pos
2385 end
2386
2387 # Extract the XML tag name from a XML tag.
2388 private fun xml_tag: String do
2389 var tpl = new FlatBuffer
2390 var pos = 1
2391 if pos < length and self[1] == '/' then pos += 1
2392 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2393 tpl.add self[pos]
2394 pos += 1
2395 end
2396 return tpl.write_to_string.to_lower
2397 end
2398
2399 private fun is_valid_html_tag: Bool do
2400 if is_empty then return false
2401 for c in self do
2402 if not c.is_alpha then return false
2403 end
2404 return true
2405 end
2406
2407 # Read and escape the markdown contained in `self`.
2408 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2409 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2410 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2411 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2412 c == '`' or c == '~' or c == '^' then
2413 out.add c
2414 return pos + 1
2415 end
2416 out.add '\\'
2417 return pos
2418 end
2419
2420 # Extract string found at end of fence opening.
2421 private fun meta_from_fence: nullable Text do
2422 for i in [0..chars.length[ do
2423 var c = chars[i]
2424 if c != ' ' and c != '`' and c != '~' then
2425 return substring_from(i).trim
2426 end
2427 end
2428 return null
2429 end
2430
2431 # Is `self` an unsafe HTML element?
2432 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2433
2434 # Is `self` a HRML block element?
2435 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2436
2437 # Is `self` a link prefix?
2438 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2439
2440 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2441
2442 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2443
2444 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2445 end
2446
2447 redef class String
2448
2449 # Parse `self` as markdown and return the HTML representation
2450 #.
2451 # var md = "**Hello World!**"
2452 # var html = md.md_to_html
2453 # assert html == "<p><strong>Hello World!</strong></p>\n"
2454 fun md_to_html: Writable do
2455 var processor = new MarkdownProcessor
2456 return processor.process(self)
2457 end
2458 end