4c2428fbfee495cd5a14e79bf4669ca3c2c2dcae
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 # `MarkdownEmitter` used for ouput.
34 var emitter: MarkdownEmitter is noinit, protected writable
35
36 # Work in extended mode (default).
37 #
38 # Behavior changes when using extended mode:
39 #
40 # * Lists and code blocks end a paragraph
41 #
42 # In normal markdown the following:
43 #
44 # ~~~md
45 # This is a paragraph
46 # * and this is not a list
47 # ~~~
48 #
49 # Will produce:
50 #
51 # ~~~html
52 # <p>This is a paragraph
53 # * and this is not a list</p>
54 # ~~~
55 #
56 # When using extended mode this changes to:
57 #
58 # ~~~html
59 # <p>This is a paragraph</p>
60 # <ul>
61 # <li>and this is not a list</li>
62 # </ul>
63 # ~~~
64 #
65 # * Fences code blocks
66 #
67 # If you don't want to indent your all your code with 4 spaces,
68 # you can wrap your code in ``` ``` ``` or `~~~`.
69 #
70 # Here's an example:
71 #
72 # ~~~md
73 # fun test do
74 # print "Hello World!"
75 # end
76 # ~~~
77 #
78 # * Code blocks meta
79 #
80 # If you want to use syntax highlighting tools, most of them need to know what kind
81 # of language they are highlighting.
82 # You can add an optional language identifier after the fence declaration to output
83 # it in the HTML render.
84 #
85 # ```nit
86 # import markdown
87 #
88 # print "# Hello World!".md_to_html
89 # ```
90 #
91 # Becomes
92 #
93 # ~~~html
94 # <pre class="nit"><code>import markdown
95 #
96 # print "Hello World!".md_to_html
97 # </code></pre>
98 # ~~~
99 #
100 # * Underscores (Emphasis)
101 #
102 # Underscores in the middle of a word like:
103 #
104 # ~~~md
105 # Con_cat_this
106 # ~~~
107 #
108 # normally produces this:
109 #
110 # ~~~html
111 # <p>Con<em>cat</em>this</p>
112 # ~~~
113 #
114 # With extended mode they don't result in emphasis.
115 #
116 # ~~~html
117 # <p>Con_cat_this</p>
118 # ~~~
119 #
120 # * Strikethrough
121 #
122 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
123 # strikethrought span is marked with `~~`.
124 #
125 # ~~~md
126 # ~~Mistaken text.~~
127 # ~~~
128 #
129 # becomes
130 #
131 # ~~~html
132 # <del>Mistaken text.</del>
133 # ~~~
134 var ext_mode = true
135
136 init do self.emitter = new MarkdownEmitter(self)
137
138 # Process the mardown `input` string and return the processed output.
139 fun process(input: String): Writable do
140 # init processor
141 link_refs.clear
142 last_link_ref = null
143 current_line = null
144 current_block = null
145 # parse markdown
146 var parent = read_lines(input)
147 parent.remove_surrounding_empty_lines
148 recurse(parent, false)
149 # output processed text
150 return emitter.emit(parent.kind)
151 end
152
153 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
154 private fun read_lines(input: String): MDBlock do
155 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
156 var value = new FlatBuffer
157 var i = 0
158
159 var line_pos = 0
160 var col_pos = 0
161
162 while i < input.length do
163 value.clear
164 var pos = 0
165 var eol = false
166 while not eol and i < input.length do
167 col_pos += 1
168 var c = input[i]
169 if c == '\n' then
170 eol = true
171 else if c == '\r' then
172 else if c == '\t' then
173 var np = pos + (4 - (pos & 3))
174 while pos < np do
175 value.add ' '
176 pos += 1
177 end
178 else
179 pos += 1
180 value.add c
181 end
182 i += 1
183 end
184 line_pos += 1
185
186 var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
187 var line = new MDLine(loc, value.write_to_string)
188 var is_link_ref = check_link_ref(line)
189 # Skip link refs
190 if not is_link_ref then block.add_line line
191 col_pos = 0
192 end
193 return block
194 end
195
196 # Check if line is a block link definition.
197 # Return `true` if line contains a valid link ref and save it into `link_refs`.
198 private fun check_link_ref(line: MDLine): Bool do
199 var md = line.value
200 var is_link_ref = false
201 var id = new FlatBuffer
202 var link = new FlatBuffer
203 var comment = new FlatBuffer
204 var pos = -1
205 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
206 pos = line.leading + 1
207 pos = md.read_until(id, pos, ']')
208 if not id.is_empty and pos + 2 < line.value.length then
209 if line.value[pos + 1] == ':' then
210 pos += 2
211 pos = md.skip_spaces(pos)
212 if line.value[pos] == '<' then
213 pos += 1
214 pos = md.read_until(link, pos, '>')
215 pos += 1
216 else
217 pos = md.read_until(link, pos, ' ', '\n')
218 end
219 if not link.is_empty then
220 pos = md.skip_spaces(pos)
221 if pos > 0 and pos < line.value.length then
222 var c = line.value[pos]
223 if c == '\"' or c == '\'' or c == '(' then
224 pos += 1
225 if c == '(' then
226 pos = md.read_until(comment, pos, ')')
227 else
228 pos = md.read_until(comment, pos, c)
229 end
230 if pos > 0 then is_link_ref = true
231 end
232 else
233 is_link_ref = true
234 end
235 end
236 end
237 end
238 end
239 if is_link_ref and not id.is_empty and not link.is_empty then
240 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
241 add_link_ref(id.write_to_string, lr)
242 if comment.is_empty then last_link_ref = lr
243 return true
244 else
245 comment = new FlatBuffer
246 if not line.is_empty and last_link_ref != null then
247 pos = line.leading
248 var c = line.value[pos]
249 if c == '\"' or c == '\'' or c == '(' then
250 pos += 1
251 if c == '(' then
252 pos = md.read_until(comment, pos, ')')
253 else
254 pos = md.read_until(comment, pos, c)
255 end
256 end
257 if not comment.is_empty then last_link_ref.title = comment.write_to_string
258 end
259 if comment.is_empty then return false
260 return true
261 end
262 end
263
264 # Known link refs
265 # This list will be needed during output to expand links.
266 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
267
268 # Last encountered link ref (for multiline definitions)
269 #
270 # Markdown allows link refs to be defined over two lines:
271 #
272 # ~~~md
273 # [id]: http://example.com/longish/path/to/resource/here
274 # "Optional Title Here"
275 # ~~~
276 #
277 private var last_link_ref: nullable LinkRef = null
278
279 # Add a link ref to the list
280 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
281
282 # Recursively split a `block`.
283 #
284 # The block is splitted according to the type of lines it contains.
285 # Some blocks can be splited again recursively like lists.
286 # The `in_list` mode is used to recurse on list and build
287 # nested paragraphs or code blocks.
288 fun recurse(root: MDBlock, in_list: Bool) do
289 var old_mode = self.in_list
290 var old_root = self.current_block
291 self.in_list = in_list
292
293 var line = root.first_line
294 while line != null and line.is_empty do
295 line = line.next
296 if line == null then return
297 end
298
299 current_line = line
300 current_block = root
301 while current_line != null do
302 line_kind(current_line.as(not null)).process(self)
303 end
304 self.in_list = old_mode
305 self.current_block = old_root
306 end
307
308 # Currently processed line.
309 # Used when visiting blocks with `recurse`.
310 var current_line: nullable MDLine = null is writable
311
312 # Currently processed block.
313 # Used when visiting blocks with `recurse`.
314 var current_block: nullable MDBlock = null is writable
315
316 # Is the current recursion in list mode?
317 # Used when visiting blocks with `recurse`
318 private var in_list = false
319
320 # The type of line.
321 # see: `md_line_*`
322 fun line_kind(md: MDLine): Line do
323 var value = md.value
324 var leading = md.leading
325 var trailing = md.trailing
326 if md.is_empty then return new LineEmpty
327 if md.leading > 3 then return new LineCode
328 if value[leading] == '#' then return new LineHeadline
329 if value[leading] == '>' then return new LineBlockquote
330
331 if ext_mode then
332 if value.length - leading - trailing > 2 then
333 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
334 return new LineFence
335 end
336 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
337 return new LineFence
338 end
339 end
340 end
341
342 if value.length - leading - trailing > 2 and
343 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
344 if md.count_chars(value[leading]) >= 3 then
345 return new LineHR
346 end
347 end
348
349 if value.length - leading >= 2 and value[leading + 1] == ' ' then
350 var c = value[leading]
351 if c == '*' or c == '-' or c == '+' then return new LineUList
352 end
353
354 if value.length - leading >= 3 and value[leading].is_digit then
355 var i = leading + 1
356 while i < value.length and value[i].is_digit do i += 1
357 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
358 return new LineOList
359 end
360 end
361
362 if value[leading] == '<' and md.check_html then return new LineXML
363
364 var next = md.next
365 if next != null and not next.is_empty then
366 if next.count_chars('=') > 0 then
367 return new LineHeadline1
368 end
369 if next.count_chars('-') > 0 then
370 return new LineHeadline2
371 end
372 end
373 return new LineOther
374 end
375
376 # Get the token kind at `pos`.
377 fun token_at(text: Text, pos: Int): Token do
378 var c0: Char
379 var c1: Char
380 var c2: Char
381
382 if pos > 0 then
383 c0 = text[pos - 1]
384 else
385 c0 = ' '
386 end
387 var c = text[pos]
388
389 if pos + 1 < text.length then
390 c1 = text[pos + 1]
391 else
392 c1 = ' '
393 end
394 if pos + 2 < text.length then
395 c2 = text[pos + 2]
396 else
397 c2 = ' '
398 end
399
400 var loc = new MDLocation(
401 current_loc.line_start,
402 current_loc.column_start + pos,
403 current_loc.line_start,
404 current_loc.column_start + pos)
405
406 if c == '*' then
407 if c1 == '*' then
408 if c0 != ' ' or c2 != ' ' then
409 return new TokenStrongStar(loc, pos, c)
410 else
411 return new TokenEmStar(loc, pos, c)
412 end
413 end
414 if c0 != ' ' or c1 != ' ' then
415 return new TokenEmStar(loc, pos, c)
416 else
417 return new TokenNone(loc, pos, c)
418 end
419 else if c == '_' then
420 if c1 == '_' then
421 if c0 != ' ' or c2 != ' ' then
422 return new TokenStrongUnderscore(loc, pos, c)
423 else
424 return new TokenEmUnderscore(loc, pos, c)
425 end
426 end
427 if ext_mode then
428 if (c0.is_letter or c0.is_digit) and c0 != '_' and
429 (c1.is_letter or c1.is_digit) then
430 return new TokenNone(loc, pos, c)
431 else
432 return new TokenEmUnderscore(loc, pos, c)
433 end
434 end
435 if c0 != ' ' or c1 != ' ' then
436 return new TokenEmUnderscore(loc, pos, c)
437 else
438 return new TokenNone(loc, pos, c)
439 end
440 else if c == '!' then
441 if c1 == '[' then return new TokenImage(loc, pos, c)
442 return new TokenNone(loc, pos, c)
443 else if c == '[' then
444 return new TokenLink(loc, pos, c)
445 else if c == ']' then
446 return new TokenNone(loc, pos, c)
447 else if c == '`' then
448 if c1 == '`' then
449 return new TokenCodeDouble(loc, pos, c)
450 else
451 return new TokenCodeSingle(loc, pos, c)
452 end
453 else if c == '\\' then
454 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
455 return new TokenEscape(loc, pos, c)
456 else
457 return new TokenNone(loc, pos, c)
458 end
459 else if c == '<' then
460 return new TokenHTML(loc, pos, c)
461 else if c == '&' then
462 return new TokenEntity(loc, pos, c)
463 else
464 if ext_mode then
465 if c == '~' and c1 == '~' then
466 return new TokenStrike(loc, pos, c)
467 end
468 end
469 return new TokenNone(loc, pos, c)
470 end
471 end
472
473 # Find the position of a `token` in `self`.
474 fun find_token(text: Text, start: Int, token: Token): Int do
475 var pos = start
476 while pos < text.length do
477 if token_at(text, pos).is_same_type(token) then
478 return pos
479 end
480 pos += 1
481 end
482 return -1
483 end
484
485 # Location used for next parsed token.
486 #
487 # This location can be changed by the emitter to adjust with `\n` found
488 # in the input.
489 private fun current_loc: MDLocation do return emitter.current_loc
490 end
491
492 # Emit output corresponding to blocks content.
493 #
494 # Blocks are created by a previous pass in `MarkdownProcessor`.
495 # The emitter use a `Decorator` to select the output format.
496 class MarkdownEmitter
497
498 # Kind of processor used for parsing.
499 type PROCESSOR: MarkdownProcessor
500
501 # Processor containing link refs.
502 var processor: PROCESSOR
503
504 # Kind of decorator used for decoration.
505 type DECORATOR: Decorator
506
507 # Decorator used for output.
508 # Default is `HTMLDecorator`
509 var decorator: DECORATOR is writable, lazy do
510 return new HTMLDecorator
511 end
512
513 # Create a new `MarkdownEmitter` using a custom `decorator`.
514 init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
515 init processor
516 self.decorator = decorator
517 end
518
519 # Output `block` using `decorator` in the current buffer.
520 fun emit(block: Block): Text do
521 var buffer = push_buffer
522 block.emit(self)
523 pop_buffer
524 return buffer
525 end
526
527 # Output the content of `block`.
528 fun emit_in(block: Block) do block.emit_in(self)
529
530 # Transform and emit mardown text
531 fun emit_text(text: Text) do emit_text_until(text, 0, null)
532
533 # Transform and emit mardown text starting at `start` and
534 # until a token with the same type as `token` is found.
535 # Go until the end of `text` if `token` is null.
536 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
537 var old_text = current_text
538 var old_pos = current_pos
539 current_text = text
540 current_pos = start
541 while current_pos < text.length do
542 if text[current_pos] == '\n' then
543 current_loc.line_start += 1
544 current_loc.column_start = -current_pos
545 end
546 var mt = processor.token_at(text, current_pos)
547 if (token != null and not token isa TokenNone) and
548 (mt.is_same_type(token) or
549 (token isa TokenEmStar and mt isa TokenStrongStar) or
550 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
551 return current_pos
552 end
553 mt.emit(self)
554 current_pos += 1
555 end
556 current_text = old_text
557 current_pos = old_pos
558 return -1
559 end
560
561 # Currently processed position in `current_text`.
562 # Used when visiting inline production with `emit_text_until`.
563 private var current_pos: Int = -1
564
565 # Currently processed text.
566 # Used when visiting inline production with `emit_text_until`.
567 private var current_text: nullable Text = null
568
569 # Stacked buffers.
570 private var buffer_stack = new List[FlatBuffer]
571
572 # Push a new buffer on the stack.
573 private fun push_buffer: FlatBuffer do
574 var buffer = new FlatBuffer
575 buffer_stack.add buffer
576 return buffer
577 end
578
579 # Pop the last buffer.
580 private fun pop_buffer do buffer_stack.pop
581
582 # Current output buffer.
583 private fun current_buffer: FlatBuffer do
584 assert not buffer_stack.is_empty
585 return buffer_stack.last
586 end
587
588 # Stacked locations.
589 private var loc_stack = new List[MDLocation]
590
591 # Push a new MDLocation on the stack.
592 private fun push_loc(location: MDLocation) do loc_stack.add location
593
594 # Pop the last buffer.
595 private fun pop_loc: MDLocation do return loc_stack.pop
596
597 # Current output buffer.
598 private fun current_loc: MDLocation do
599 assert not loc_stack.is_empty
600 return loc_stack.last
601 end
602
603 # Append `e` to current buffer.
604 fun add(e: Writable) do
605 if e isa Text then
606 current_buffer.append e
607 else
608 current_buffer.append e.write_to_string
609 end
610 end
611
612 # Append `c` to current buffer.
613 fun addc(c: Char) do add c.to_s
614
615 # Append a "\n" line break.
616 fun addn do add "\n"
617 end
618
619 # A Link Reference.
620 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
621 #
622 # ~~~raw
623 # [1]: http://example.com/ "Optional title"
624 # ~~~
625 class LinkRef
626
627 # Link href
628 var link: String
629
630 # Optional link title
631 var title: nullable String = null
632
633 # Is the link an abreviation?
634 var is_abbrev = false
635
636 # Create a link with a title.
637 init with_title(link: String, title: nullable String) do
638 init(link)
639 self.title = title
640 end
641 end
642
643 # A `Decorator` is used to emit mardown into a specific format.
644 # Default decorator used is `HTMLDecorator`.
645 interface Decorator
646
647 # Kind of emitter used for decoration.
648 type EMITTER: MarkdownEmitter
649
650 # Render a single plain char.
651 #
652 # Redefine this method to add special escaping for plain text.
653 fun add_char(v: EMITTER, c: Char) do v.addc c
654
655 # Render a ruler block.
656 fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
657
658 # Render a headline block with corresponding level.
659 fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
660
661 # Render a paragraph block.
662 fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
663
664 # Render a code or fence block.
665 fun add_code(v: EMITTER, block: BlockCode) is abstract
666
667 # Render a blockquote.
668 fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
669
670 # Render an unordered list.
671 fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
672
673 # Render an ordered list.
674 fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
675
676 # Render a list item.
677 fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
678
679 # Render an emphasis text.
680 fun add_em(v: EMITTER, text: Text) is abstract
681
682 # Render a strong text.
683 fun add_strong(v: EMITTER, text: Text) is abstract
684
685 # Render a strike text.
686 #
687 # Extended mode only (see `MarkdownProcessor::ext_mode`)
688 fun add_strike(v: EMITTER, text: Text) is abstract
689
690 # Render a link.
691 fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
692
693 # Render an image.
694 fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
695
696 # Render an abbreviation.
697 fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
698
699 # Render a code span reading from a buffer.
700 fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
701
702 # Render a text and escape it.
703 fun append_value(v: EMITTER, value: Text) is abstract
704
705 # Render code text from buffer and escape it.
706 fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
707
708 # Render a character escape.
709 fun escape_char(v: EMITTER, char: Char) is abstract
710
711 # Render a line break
712 fun add_line_break(v: EMITTER) is abstract
713
714 # Generate a new html valid id from a `String`.
715 fun strip_id(txt: String): String is abstract
716
717 # Found headlines during the processing labeled by their ids.
718 fun headlines: ArrayMap[String, HeadLine] is abstract
719 end
720
721 # Class representing a markdown headline.
722 class HeadLine
723 # Unique identifier of this headline.
724 var id: String
725
726 # Text of the headline.
727 var title: String
728
729 # Level of this headline.
730 #
731 # According toe the markdown specification, level must be in `[1..6]`.
732 var level: Int
733 end
734
735 # `Decorator` that outputs HTML.
736 class HTMLDecorator
737 super Decorator
738
739 redef var headlines = new ArrayMap[String, HeadLine]
740
741 redef fun add_ruler(v, block) do v.add "<hr/>\n"
742
743 redef fun add_headline(v, block) do
744 # save headline
745 var txt = block.block.first_line.value
746 var id = strip_id(txt)
747 var lvl = block.depth
748 headlines[id] = new HeadLine(id, txt, lvl)
749 # output it
750 v.add "<h{lvl} id=\"{id}\">"
751 v.emit_in block
752 v.add "</h{lvl}>\n"
753 end
754
755 redef fun add_paragraph(v, block) do
756 v.add "<p>"
757 v.emit_in block
758 v.add "</p>\n"
759 end
760
761 redef fun add_code(v, block) do
762 var meta = block.meta
763 if meta != null then
764 v.add "<pre class=\""
765 append_value(v, meta)
766 v.add "\"><code>"
767 else
768 v.add "<pre><code>"
769 end
770 v.emit_in block
771 v.add "</code></pre>\n"
772 end
773
774 redef fun add_blockquote(v, block) do
775 v.add "<blockquote>\n"
776 v.emit_in block
777 v.add "</blockquote>\n"
778 end
779
780 redef fun add_unorderedlist(v, block) do
781 v.add "<ul>\n"
782 v.emit_in block
783 v.add "</ul>\n"
784 end
785
786 redef fun add_orderedlist(v, block) do
787 v.add "<ol>\n"
788 v.emit_in block
789 v.add "</ol>\n"
790 end
791
792 redef fun add_listitem(v, block) do
793 v.add "<li>"
794 v.emit_in block
795 v.add "</li>\n"
796 end
797
798 redef fun add_em(v, text) do
799 v.add "<em>"
800 v.add text
801 v.add "</em>"
802 end
803
804 redef fun add_strong(v, text) do
805 v.add "<strong>"
806 v.add text
807 v.add "</strong>"
808 end
809
810 redef fun add_strike(v, text) do
811 v.add "<del>"
812 v.add text
813 v.add "</del>"
814 end
815
816 redef fun add_image(v, link, name, comment) do
817 v.add "<img src=\""
818 append_value(v, link)
819 v.add "\" alt=\""
820 append_value(v, name)
821 v.add "\""
822 if comment != null and not comment.is_empty then
823 v.add " title=\""
824 append_value(v, comment)
825 v.add "\""
826 end
827 v.add "/>"
828 end
829
830 redef fun add_link(v, link, name, comment) do
831 v.add "<a href=\""
832 append_value(v, link)
833 v.add "\""
834 if comment != null and not comment.is_empty then
835 v.add " title=\""
836 append_value(v, comment)
837 v.add "\""
838 end
839 v.add ">"
840 v.emit_text(name)
841 v.add "</a>"
842 end
843
844 redef fun add_abbr(v, name, comment) do
845 v.add "<abbr title=\""
846 append_value(v, comment)
847 v.add "\">"
848 v.emit_text(name)
849 v.add "</abbr>"
850 end
851
852 redef fun add_span_code(v, text, from, to) do
853 v.add "<code>"
854 append_code(v, text, from, to)
855 v.add "</code>"
856 end
857
858 redef fun add_line_break(v) do
859 v.add "<br/>"
860 end
861
862 redef fun append_value(v, text) do for c in text do escape_char(v, c)
863
864 redef fun escape_char(v, c) do
865 if c == '&' then
866 v.add "&amp;"
867 else if c == '<' then
868 v.add "&lt;"
869 else if c == '>' then
870 v.add "&gt;"
871 else if c == '"' then
872 v.add "&quot;"
873 else if c == '\'' then
874 v.add "&apos;"
875 else
876 v.addc c
877 end
878 end
879
880 redef fun append_code(v, buffer, from, to) do
881 for i in [from..to[ do
882 var c = buffer[i]
883 if c == '&' then
884 v.add "&amp;"
885 else if c == '<' then
886 v.add "&lt;"
887 else if c == '>' then
888 v.add "&gt;"
889 else
890 v.addc c
891 end
892 end
893 end
894
895 redef fun strip_id(txt) do
896 # strip id
897 var b = new FlatBuffer
898 for c in txt do
899 if c == ' ' then
900 b.add '_'
901 else
902 if not c.is_letter and
903 not c.is_digit and
904 not allowed_id_chars.has(c) then continue
905 b.add c
906 end
907 end
908 var res = b.to_s
909 var key = res
910 # check for multiple id definitions
911 if headlines.has_key(key) then
912 var i = 1
913 key = "{res}_{i}"
914 while headlines.has_key(key) do
915 i += 1
916 key = "{res}_{i}"
917 end
918 end
919 return key
920 end
921
922 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
923 end
924
925 # Location in a Markdown input.
926 class MDLocation
927
928 # Starting line number (starting from 1).
929 var line_start: Int
930
931 # Starting column number (starting from 1).
932 var column_start: Int
933
934 # Stopping line number (starting from 1).
935 var line_end: Int
936
937 # Stopping column number (starting from 1).
938 var column_end: Int
939
940 redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
941
942 # Return a copy of `self`.
943 fun copy: MDLocation do
944 return new MDLocation(line_start, column_start, line_end, column_end)
945 end
946 end
947
948 # A block of markdown lines.
949 # A `MDBlock` can contains lines and/or sub-blocks.
950 class MDBlock
951
952 # Position of `self` in the input.
953 var location: MDLocation
954
955 # Kind of block.
956 # See `Block`.
957 var kind: Block = new BlockNone(self) is writable
958
959 # First line if any.
960 var first_line: nullable MDLine = null is writable
961
962 # Last line if any.
963 var last_line: nullable MDLine = null is writable
964
965 # First sub-block if any.
966 var first_block: nullable MDBlock = null is writable
967
968 # Last sub-block if any.
969 var last_block: nullable MDBlock = null is writable
970
971 # Previous block if any.
972 var prev: nullable MDBlock = null is writable
973
974 # Next block if any.
975 var next: nullable MDBlock = null is writable
976
977 # Does this block contain subblocks?
978 fun has_blocks: Bool do return first_block != null
979
980 # Count sub-blocks.
981 fun count_blocks: Int do
982 var count = 0
983 var block = first_block
984 while block != null do
985 count += 1
986 block = block.next
987 end
988 return count
989 end
990
991 # Does this block contain lines?
992 fun has_lines: Bool do return first_line != null
993
994 # Count block lines.
995 fun count_lines: Int do
996 var count = 0
997 var line = first_line
998 while line != null do
999 count += 1
1000 line = line.next
1001 end
1002 return count
1003 end
1004
1005 # Split `self` creating a new sub-block having `line` has `last_line`.
1006 fun split(line: MDLine): MDBlock do
1007 # location for new block
1008 var new_loc = new MDLocation(
1009 first_line.location.line_start,
1010 first_line.location.column_start,
1011 line.location.line_end,
1012 line.location.column_end)
1013 # create block
1014 var block = new MDBlock(new_loc)
1015 block.first_line = first_line
1016 block.last_line = line
1017 first_line = line.next
1018 line.next = null
1019 if first_line == null then
1020 last_line = null
1021 else
1022 first_line.prev = null
1023 # update current block loc
1024 location.line_start = first_line.location.line_start
1025 location.column_start = first_line.location.column_start
1026 end
1027 if first_block == null then
1028 first_block = block
1029 last_block = block
1030 else
1031 last_block.next = block
1032 last_block = block
1033 end
1034 return block
1035 end
1036
1037 # Add a `line` to this block.
1038 fun add_line(line: MDLine) do
1039 if last_line == null then
1040 first_line = line
1041 last_line = line
1042 else
1043 last_line.next_empty = line.is_empty
1044 line.prev_empty = last_line.is_empty
1045 line.prev = last_line
1046 last_line.next = line
1047 last_line = line
1048 end
1049 end
1050
1051 # Remove `line` from this block.
1052 fun remove_line(line: MDLine) do
1053 if line.prev == null then
1054 first_line = line.next
1055 else
1056 line.prev.next = line.next
1057 end
1058 if line.next == null then
1059 last_line = line.prev
1060 else
1061 line.next.prev = line.prev
1062 end
1063 line.prev = null
1064 line.next = null
1065 end
1066
1067 # Remove leading empty lines.
1068 fun remove_leading_empty_lines: Bool do
1069 var was_empty = false
1070 var line = first_line
1071 while line != null and line.is_empty do
1072 remove_line line
1073 line = first_line
1074 was_empty = true
1075 end
1076 return was_empty
1077 end
1078
1079 # Remove trailing empty lines.
1080 fun remove_trailing_empty_lines: Bool do
1081 var was_empty = false
1082 var line = last_line
1083 while line != null and line.is_empty do
1084 remove_line line
1085 line = last_line
1086 was_empty = true
1087 end
1088 return was_empty
1089 end
1090
1091 # Remove leading and trailing empty lines.
1092 fun remove_surrounding_empty_lines: Bool do
1093 var was_empty = false
1094 if remove_leading_empty_lines then was_empty = true
1095 if remove_trailing_empty_lines then was_empty = true
1096 return was_empty
1097 end
1098
1099 # Remove list markers and up to 4 leading spaces.
1100 # Used to clean nested lists.
1101 fun remove_list_indent(v: MarkdownProcessor) do
1102 var line = first_line
1103 while line != null do
1104 if not line.is_empty then
1105 var kind = v.line_kind(line)
1106 if kind isa LineList then
1107 line.value = kind.extract_value(line)
1108 else
1109 line.value = line.value.substring_from(line.leading.min(4))
1110 end
1111 line.leading = line.process_leading
1112 end
1113 line = line.next
1114 end
1115 end
1116
1117 # Collect block line text.
1118 fun text: String do
1119 var text = new FlatBuffer
1120 var line = first_line
1121 while line != null do
1122 if not line.is_empty then
1123 text.append line.text
1124 end
1125 text.append "\n"
1126 line = line.next
1127 end
1128 return text.write_to_string
1129 end
1130 end
1131
1132 # Representation of a markdown block in the AST.
1133 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1134 abstract class Block
1135
1136 # The markdown block `self` is related to.
1137 var block: MDBlock
1138
1139 # Output `self` using `v.decorator`.
1140 fun emit(v: MarkdownEmitter) do v.emit_in(self)
1141
1142 # Emit the containts of `self`, lines or blocks.
1143 fun emit_in(v: MarkdownEmitter) do
1144 block.remove_surrounding_empty_lines
1145 if block.has_lines then
1146 emit_lines(v)
1147 else
1148 emit_blocks(v)
1149 end
1150 end
1151
1152 # Emit lines contained in `block`.
1153 fun emit_lines(v: MarkdownEmitter) do
1154 var tpl = v.push_buffer
1155 var line = block.first_line
1156 while line != null do
1157 if not line.is_empty then
1158 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1159 if line.trailing >= 2 then v.decorator.add_line_break(v)
1160 end
1161 if line.next != null then
1162 v.addn
1163 end
1164 line = line.next
1165 end
1166 v.pop_buffer
1167 v.emit_text(tpl)
1168 end
1169
1170 # Emit sub-blocks contained in `block`.
1171 fun emit_blocks(v: MarkdownEmitter) do
1172 var block = self.block.first_block
1173 while block != null do
1174 v.push_loc(block.location)
1175 block.kind.emit(v)
1176 v.pop_loc
1177 block = block.next
1178 end
1179 end
1180
1181 # The raw content of the block as a multi-line string.
1182 fun raw_content: String do
1183 var infence = self isa BlockFence
1184 var text = new FlatBuffer
1185 var line = self.block.first_line
1186 while line != null do
1187 if not line.is_empty then
1188 var str = line.value
1189 if not infence and str.has_prefix(" ") then
1190 text.append str.substring(4, str.length - line.trailing)
1191 else
1192 text.append str
1193 end
1194 end
1195 text.append "\n"
1196 line = line.next
1197 end
1198 return text.write_to_string
1199 end
1200 end
1201
1202 # A block without any markdown specificities.
1203 #
1204 # Actually use the same implementation than `BlockCode`,
1205 # this class is only used for typing purposes.
1206 class BlockNone
1207 super Block
1208 end
1209
1210 # A markdown blockquote.
1211 class BlockQuote
1212 super Block
1213
1214 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1215
1216 # Remove blockquote markers.
1217 private fun remove_block_quote_prefix(block: MDBlock) do
1218 var line = block.first_line
1219 while line != null do
1220 if not line.is_empty then
1221 if line.value[line.leading] == '>' then
1222 var rem = line.leading + 1
1223 if line.leading + 1 < line.value.length and
1224 line.value[line.leading + 1] == ' ' then
1225 rem += 1
1226 end
1227 line.value = line.value.substring_from(rem)
1228 line.leading = line.process_leading
1229 end
1230 end
1231 line = line.next
1232 end
1233 end
1234 end
1235
1236 # A markdown code block.
1237 class BlockCode
1238 super Block
1239
1240 # Any string found after fence token.
1241 var meta: nullable Text
1242
1243 # Number of char to skip at the beginning of the line.
1244 #
1245 # Block code lines start at 4 spaces.
1246 protected var line_start = 4
1247
1248 redef fun emit(v) do v.decorator.add_code(v, self)
1249
1250 redef fun emit_lines(v) do
1251 var line = block.first_line
1252 while line != null do
1253 if not line.is_empty then
1254 v.decorator.append_code(v, line.value, line_start, line.value.length)
1255 end
1256 v.addn
1257 line = line.next
1258 end
1259 end
1260 end
1261
1262 # A markdown code-fence block.
1263 #
1264 # Actually use the same implementation than `BlockCode`,
1265 # this class is only used for typing purposes.
1266 class BlockFence
1267 super BlockCode
1268
1269 # Fence code lines start at 0 spaces.
1270 redef var line_start = 0
1271 end
1272
1273 # A markdown headline.
1274 class BlockHeadline
1275 super Block
1276
1277 redef fun emit(v) do
1278 var loc = block.location.copy
1279 loc.column_start += start
1280 v.push_loc(loc)
1281 v.decorator.add_headline(v, self)
1282 v.pop_loc
1283 end
1284
1285 private var start = 0
1286
1287 # Depth of the headline used to determine the headline level.
1288 var depth = 0
1289
1290 # Remove healine marks from lines contained in `self`.
1291 private fun transform_headline(block: MDBlock) do
1292 if depth > 0 then return
1293 var level = 0
1294 var line = block.first_line
1295 if line.is_empty then return
1296 var start = line.leading
1297 while start < line.value.length and line.value[start] == '#' do
1298 level += 1
1299 start += 1
1300 end
1301 while start < line.value.length and line.value[start] == ' ' do
1302 start += 1
1303 end
1304 if start >= line.value.length then
1305 line.is_empty = true
1306 else
1307 var nend = line.value.length - line.trailing - 1
1308 while line.value[nend] == '#' do nend -= 1
1309 while line.value[nend] == ' ' do nend -= 1
1310 line.value = line.value.substring(start, nend - start + 1)
1311 line.leading = 0
1312 line.trailing = 0
1313 end
1314 self.start = start
1315 depth = level.min(6)
1316 end
1317 end
1318
1319 # A markdown list item block.
1320 class BlockListItem
1321 super Block
1322
1323 redef fun emit(v) do v.decorator.add_listitem(v, self)
1324 end
1325
1326 # A markdown list block.
1327 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1328 abstract class BlockList
1329 super Block
1330
1331 # Split list block into list items sub-blocks.
1332 private fun init_block(v: MarkdownProcessor) do
1333 var line = block.first_line
1334 line = line.next
1335 while line != null do
1336 var t = v.line_kind(line)
1337 if t isa LineList or
1338 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1339 not (t isa LineList))) then
1340 var sblock = block.split(line.prev.as(not null))
1341 sblock.kind = new BlockListItem(sblock)
1342 end
1343 line = line.next
1344 end
1345 var sblock = block.split(block.last_line.as(not null))
1346 sblock.kind = new BlockListItem(sblock)
1347 end
1348
1349 # Expand list items as paragraphs if needed.
1350 private fun expand_paragraphs(block: MDBlock) do
1351 var outer = block.first_block
1352 var inner: nullable MDBlock
1353 var has_paragraph = false
1354 while outer != null and not has_paragraph do
1355 if outer.kind isa BlockListItem then
1356 inner = outer.first_block
1357 while inner != null and not has_paragraph do
1358 if inner.kind isa BlockParagraph then
1359 has_paragraph = true
1360 end
1361 inner = inner.next
1362 end
1363 end
1364 outer = outer.next
1365 end
1366 if has_paragraph then
1367 outer = block.first_block
1368 while outer != null do
1369 if outer.kind isa BlockListItem then
1370 inner = outer.first_block
1371 while inner != null do
1372 if inner.kind isa BlockNone then
1373 inner.kind = new BlockParagraph(inner)
1374 end
1375 inner = inner.next
1376 end
1377 end
1378 outer = outer.next
1379 end
1380 end
1381 end
1382 end
1383
1384 # A markdown ordered list.
1385 class BlockOrderedList
1386 super BlockList
1387
1388 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1389 end
1390
1391 # A markdown unordred list.
1392 class BlockUnorderedList
1393 super BlockList
1394
1395 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1396 end
1397
1398 # A markdown paragraph block.
1399 class BlockParagraph
1400 super Block
1401
1402 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1403 end
1404
1405 # A markdown ruler.
1406 class BlockRuler
1407 super Block
1408
1409 redef fun emit(v) do v.decorator.add_ruler(v, self)
1410 end
1411
1412 # Xml blocks that can be found in markdown markup.
1413 class BlockXML
1414 super Block
1415
1416 redef fun emit_lines(v) do
1417 var line = block.first_line
1418 while line != null do
1419 if not line.is_empty then v.add line.value
1420 v.addn
1421 line = line.next
1422 end
1423 end
1424 end
1425
1426 # A markdown line.
1427 class MDLine
1428
1429 # Location of `self` in the original input.
1430 var location: MDLocation
1431
1432 # Text contained in this line.
1433 var value: String is writable
1434
1435 # Is this line empty?
1436 # Lines containing only spaces are considered empty.
1437 var is_empty: Bool = true is writable
1438
1439 # Previous line in `MDBlock` or null if first line.
1440 var prev: nullable MDLine = null is writable
1441
1442 # Next line in `MDBlock` or null if last line.
1443 var next: nullable MDLine = null is writable
1444
1445 # Is the previous line empty?
1446 var prev_empty: Bool = false is writable
1447
1448 # Is the next line empty?
1449 var next_empty: Bool = false is writable
1450
1451 # Initialize a new MDLine from its string value
1452 init do
1453 self.leading = process_leading
1454 if leading != value.length then
1455 self.is_empty = false
1456 self.trailing = process_trailing
1457 end
1458 end
1459
1460 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1461 fun clear do
1462 value = ""
1463 leading = 0
1464 trailing = 0
1465 is_empty = true
1466 if prev != null then prev.next_empty = true
1467 if next != null then next.prev_empty = true
1468 end
1469
1470 # Number or leading spaces on this line.
1471 var leading: Int = 0 is writable
1472
1473 # Compute `leading` depending on `value`.
1474 fun process_leading: Int do
1475 var count = 0
1476 var value = self.value
1477 while count < value.length and value[count] == ' ' do count += 1
1478 if leading == value.length then clear
1479 return count
1480 end
1481
1482 # Number of trailing spaces on this line.
1483 var trailing: Int = 0 is writable
1484
1485 # Compute `trailing` depending on `value`.
1486 fun process_trailing: Int do
1487 var count = 0
1488 var value = self.value
1489 while value[value.length - count - 1] == ' ' do
1490 count += 1
1491 end
1492 return count
1493 end
1494
1495 # Count the amount of `ch` in this line.
1496 # Return A value > 0 if this line only consists of `ch` end spaces.
1497 fun count_chars(ch: Char): Int do
1498 var count = 0
1499 for c in value do
1500 if c == ' ' then
1501 continue
1502 end
1503 if c == ch then
1504 count += 1
1505 continue
1506 end
1507 count = 0
1508 break
1509 end
1510 return count
1511 end
1512
1513 # Count the amount of `ch` at the start of this line ignoring spaces.
1514 fun count_chars_start(ch: Char): Int do
1515 var count = 0
1516 for c in value do
1517 if c == ' ' then
1518 continue
1519 end
1520 if c == ch then
1521 count += 1
1522 else
1523 break
1524 end
1525 end
1526 return count
1527 end
1528
1529 # Last XML line if any.
1530 private var xml_end_line: nullable MDLine = null
1531
1532 # Does `value` contains valid XML markup?
1533 private fun check_html: Bool do
1534 var tags = new Array[String]
1535 var tmp = new FlatBuffer
1536 var pos = leading
1537 if pos + 1 < value.length and value[pos + 1] == '!' then
1538 if read_xml_comment(self, pos) > 0 then return true
1539 end
1540 pos = value.read_xml(tmp, pos, false)
1541 var tag: String
1542 if pos > -1 then
1543 tag = tmp.xml_tag
1544 if not tag.is_html_block then
1545 return false
1546 end
1547 if tag == "hr" then
1548 xml_end_line = self
1549 return true
1550 end
1551 tags.add tag
1552 var line: nullable MDLine = self
1553 while line != null do
1554 while pos < line.value.length and line.value[pos] != '<' do
1555 pos += 1
1556 end
1557 if pos >= line.value.length then
1558 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1559 tags.pop
1560 if tags.is_empty then
1561 xml_end_line = line
1562 break
1563 end
1564 end
1565 line = line.next
1566 pos = 0
1567 else
1568 tmp = new FlatBuffer
1569 var new_pos = line.value.read_xml(tmp, pos, false)
1570 if new_pos > 0 then
1571 tag = tmp.xml_tag
1572 if tag.is_html_block and not tag == "hr" then
1573 if tmp[1] == '/' then
1574 if tags.last != tag then
1575 return false
1576 end
1577 tags.pop
1578 else
1579 tags.add tag
1580 end
1581 end
1582 if tags.is_empty then
1583 xml_end_line = line
1584 break
1585 end
1586 pos = new_pos
1587 else
1588 pos += 1
1589 end
1590 end
1591 end
1592 return tags.is_empty
1593 end
1594 return false
1595 end
1596
1597 # Read a XML comment.
1598 # Used by `check_html`.
1599 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1600 var line: nullable MDLine = first_line
1601 if start + 3 < line.value.length then
1602 if line.value[2] == '-' and line.value[3] == '-' then
1603 var pos = start + 4
1604 while line != null do
1605 while pos < line.value.length and line.value[pos] != '-' do
1606 pos += 1
1607 end
1608 if pos == line.value.length then
1609 line = line.next
1610 pos = 0
1611 else
1612 if pos + 2 < line.value.length then
1613 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1614 first_line.xml_end_line = line
1615 return pos + 3
1616 end
1617 end
1618 pos += 1
1619 end
1620 end
1621 end
1622 end
1623 return -1
1624 end
1625
1626 # Extract the text of `self` without leading and trailing.
1627 fun text: String do return value.substring(leading, value.length - trailing)
1628 end
1629
1630 # A markdown line.
1631 interface Line
1632
1633 # Parse the line.
1634 # See `MarkdownProcessor::recurse`.
1635 fun process(v: MarkdownProcessor) is abstract
1636 end
1637
1638 # An empty markdown line.
1639 class LineEmpty
1640 super Line
1641
1642 redef fun process(v) do
1643 v.current_line = v.current_line.next
1644 end
1645 end
1646
1647 # A non-specific markdown construction.
1648 # Mainly used as part of another line construct such as paragraphs or lists.
1649 class LineOther
1650 super Line
1651
1652 redef fun process(v) do
1653 var line = v.current_line
1654 # go to block end
1655 var was_empty = line.prev_empty
1656 while line != null and not line.is_empty do
1657 var t = v.line_kind(line)
1658 if (v.in_list or v.ext_mode) and t isa LineList then
1659 break
1660 end
1661 if v.ext_mode and (t isa LineCode or t isa LineFence) then
1662 break
1663 end
1664 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1665 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1666 break
1667 end
1668 line = line.next
1669 end
1670 # build block
1671 if line != null and not line.is_empty then
1672 var block = v.current_block.split(line.prev.as(not null))
1673 if v.in_list and not was_empty then
1674 block.kind = new BlockNone(block)
1675 else
1676 block.kind = new BlockParagraph(block)
1677 end
1678 v.current_block.remove_leading_empty_lines
1679 else
1680 var block: MDBlock
1681 if line != null then
1682 block = v.current_block.split(line)
1683 else
1684 block = v.current_block.split(v.current_block.last_line.as(not null))
1685 end
1686 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1687 block.kind = new BlockNone(block)
1688 else
1689 block.kind = new BlockParagraph(block)
1690 end
1691 v.current_block.remove_leading_empty_lines
1692 end
1693 v.current_line = v.current_block.first_line
1694 end
1695 end
1696
1697 # A line of markdown code.
1698 class LineCode
1699 super Line
1700
1701 redef fun process(v) do
1702 var line = v.current_line
1703 # lookup block end
1704 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1705 line = line.next
1706 end
1707 # split at block end line
1708 var block: MDBlock
1709 if line != null then
1710 block = v.current_block.split(line.prev.as(not null))
1711 else
1712 block = v.current_block.split(v.current_block.last_line.as(not null))
1713 end
1714 block.kind = new BlockCode(block)
1715 block.remove_surrounding_empty_lines
1716 v.current_line = v.current_block.first_line
1717 end
1718 end
1719
1720 # A line of raw XML.
1721 class LineXML
1722 super Line
1723
1724 redef fun process(v) do
1725 var line = v.current_line
1726 var prev = line.prev
1727 if prev != null then v.current_block.split(prev)
1728 var block = v.current_block.split(line.xml_end_line.as(not null))
1729 block.kind = new BlockXML(block)
1730 v.current_block.remove_leading_empty_lines
1731 v.current_line = v.current_block.first_line
1732 end
1733 end
1734
1735 # A markdown blockquote line.
1736 class LineBlockquote
1737 super Line
1738
1739 redef fun process(v) do
1740 var line = v.current_line
1741 # go to bquote end
1742 while line != null do
1743 if not line.is_empty and (line.prev_empty and
1744 line.leading == 0 and
1745 not v.line_kind(line) isa LineBlockquote) then break
1746 line = line.next
1747 end
1748 # build sub block
1749 var block: MDBlock
1750 if line != null then
1751 block = v.current_block.split(line.prev.as(not null))
1752 else
1753 block = v.current_block.split(v.current_block.last_line.as(not null))
1754 end
1755 var kind = new BlockQuote(block)
1756 block.kind = kind
1757 block.remove_surrounding_empty_lines
1758 kind.remove_block_quote_prefix(block)
1759 v.current_line = line
1760 v.recurse(block, false)
1761 v.current_line = v.current_block.first_line
1762 end
1763 end
1764
1765 # A markdown ruler line.
1766 class LineHR
1767 super Line
1768
1769 redef fun process(v) do
1770 var line = v.current_line
1771 if line.prev != null then v.current_block.split(line.prev.as(not null))
1772 var block = v.current_block.split(line.as(not null))
1773 block.kind = new BlockRuler(block)
1774 v.current_block.remove_leading_empty_lines
1775 v.current_line = v.current_block.first_line
1776 end
1777 end
1778
1779 # A markdown fence code line.
1780 class LineFence
1781 super Line
1782
1783 redef fun process(v) do
1784 # go to fence end
1785 var line = v.current_line.next
1786 while line != null do
1787 if v.line_kind(line) isa LineFence then break
1788 line = line.next
1789 end
1790 if line != null then
1791 line = line.next
1792 end
1793 # build fence block
1794 var block: MDBlock
1795 if line != null then
1796 block = v.current_block.split(line.prev.as(not null))
1797 else
1798 block = v.current_block.split(v.current_block.last_line.as(not null))
1799 end
1800 block.remove_surrounding_empty_lines
1801 var meta = block.first_line.value.meta_from_fence
1802 block.kind = new BlockFence(block, meta)
1803 block.first_line.clear
1804 var last = block.last_line
1805 if last != null and v.line_kind(last) isa LineFence then
1806 block.last_line.clear
1807 end
1808 block.remove_surrounding_empty_lines
1809 v.current_line = line
1810 end
1811 end
1812
1813 # A markdown headline.
1814 class LineHeadline
1815 super Line
1816
1817 redef fun process(v) do
1818 var line = v.current_line
1819 var lprev = line.prev
1820 if lprev != null then v.current_block.split(lprev)
1821 var block = v.current_block.split(line.as(not null))
1822 var kind = new BlockHeadline(block)
1823 block.kind = kind
1824 kind.transform_headline(block)
1825 v.current_block.remove_leading_empty_lines
1826 v.current_line = v.current_block.first_line
1827 end
1828 end
1829
1830 # A markdown headline of level 1.
1831 class LineHeadline1
1832 super LineHeadline
1833
1834 redef fun process(v) do
1835 var line = v.current_line
1836 var lprev = line.prev
1837 if lprev != null then v.current_block.split(lprev)
1838 line.next.clear
1839 var block = v.current_block.split(line.as(not null))
1840 var kind = new BlockHeadline(block)
1841 kind.depth = 1
1842 kind.transform_headline(block)
1843 block.kind = kind
1844 v.current_block.remove_leading_empty_lines
1845 v.current_line = v.current_block.first_line
1846 end
1847 end
1848
1849 # A markdown headline of level 2.
1850 class LineHeadline2
1851 super LineHeadline
1852
1853 redef fun process(v) do
1854 var line = v.current_line
1855 var lprev = line.prev
1856 if lprev != null then v.current_block.split(lprev)
1857 line.next.clear
1858 var block = v.current_block.split(line.as(not null))
1859 var kind = new BlockHeadline(block)
1860 kind.depth = 2
1861 kind.transform_headline(block)
1862 block.kind = kind
1863 v.current_block.remove_leading_empty_lines
1864 v.current_line = v.current_block.first_line
1865 end
1866 end
1867
1868 # A markdown list line.
1869 # Mainly used to factorize code between ordered and unordered lists.
1870 abstract class LineList
1871 super Line
1872
1873 redef fun process(v) do
1874 var line = v.current_line
1875 # go to list end
1876 while line != null do
1877 var t = v.line_kind(line)
1878 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1879 not t isa LineList) then break
1880 line = line.next
1881 end
1882 # build list block
1883 var list: MDBlock
1884 if line != null then
1885 list = v.current_block.split(line.prev.as(not null))
1886 else
1887 list = v.current_block.split(v.current_block.last_line.as(not null))
1888 end
1889 var kind = block_kind(list)
1890 list.kind = kind
1891 list.first_line.prev_empty = false
1892 list.last_line.next_empty = false
1893 list.remove_surrounding_empty_lines
1894 list.first_line.prev_empty = false
1895 list.last_line.next_empty = false
1896 kind.init_block(v)
1897 var block = list.first_block
1898 while block != null do
1899 block.remove_list_indent(v)
1900 v.recurse(block, true)
1901 block = block.next
1902 end
1903 kind.expand_paragraphs(list)
1904 v.current_line = line
1905 end
1906
1907 # Create a new block kind based on this line.
1908 protected fun block_kind(block: MDBlock): BlockList is abstract
1909
1910 # Extract string value from `MDLine`.
1911 protected fun extract_value(line: MDLine): String is abstract
1912 end
1913
1914 # An ordered list line.
1915 class LineOList
1916 super LineList
1917
1918 redef fun block_kind(block) do return new BlockOrderedList(block)
1919
1920 redef fun extract_value(line) do
1921 return line.value.substring_from(line.value.index_of('.') + 2)
1922 end
1923 end
1924
1925 # An unordered list line.
1926 class LineUList
1927 super LineList
1928
1929 redef fun block_kind(block) do return new BlockUnorderedList(block)
1930
1931 redef fun extract_value(line) do
1932 return line.value.substring_from(line.leading + 2)
1933 end
1934 end
1935
1936 # A token represent a character in the markdown input.
1937 # Some tokens have a specific markup behaviour that is handled here.
1938 abstract class Token
1939
1940 # Location of `self` in the original input.
1941 var location: MDLocation
1942
1943 # Position of `self` in input independant from lines.
1944 var pos: Int
1945
1946 # Character found at `pos` in the markdown input.
1947 var char: Char
1948
1949 # Output that token using `MarkdownEmitter::decorator`.
1950 fun emit(v: MarkdownEmitter) do v.decorator.add_char(v, char)
1951 end
1952
1953 # A token without a specific meaning.
1954 class TokenNone
1955 super Token
1956 end
1957
1958 # An emphasis token.
1959 abstract class TokenEm
1960 super Token
1961
1962 redef fun emit(v) do
1963 var tmp = v.push_buffer
1964 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1965 v.pop_buffer
1966 if b > 0 then
1967 v.decorator.add_em(v, tmp)
1968 v.current_pos = b
1969 else
1970 v.addc char
1971 end
1972 end
1973 end
1974
1975 # An emphasis star token.
1976 class TokenEmStar
1977 super TokenEm
1978 end
1979
1980 # An emphasis underscore token.
1981 class TokenEmUnderscore
1982 super TokenEm
1983 end
1984
1985 # A strong token.
1986 abstract class TokenStrong
1987 super Token
1988
1989 redef fun emit(v) do
1990 var tmp = v.push_buffer
1991 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1992 v.pop_buffer
1993 if b > 0 then
1994 v.decorator.add_strong(v, tmp)
1995 v.current_pos = b + 1
1996 else
1997 v.addc char
1998 end
1999 end
2000 end
2001
2002 # A strong star token.
2003 class TokenStrongStar
2004 super TokenStrong
2005 end
2006
2007 # A strong underscore token.
2008 class TokenStrongUnderscore
2009 super TokenStrong
2010 end
2011
2012 # A code token.
2013 # This class is mainly used to factorize work between single and double quoted span codes.
2014 abstract class TokenCode
2015 super Token
2016
2017 redef fun emit(v) do
2018 var a = pos + next_pos + 1
2019 var b = v.processor.find_token(v.current_text.as(not null), a, self)
2020 if b > 0 then
2021 v.current_pos = b + next_pos
2022 while a < b and v.current_text[a] == ' ' do a += 1
2023 if a < b then
2024 while v.current_text[b - 1] == ' ' do b -= 1
2025 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
2026 end
2027 else
2028 v.addc char
2029 end
2030 end
2031
2032 private fun next_pos: Int is abstract
2033 end
2034
2035 # A span code token.
2036 class TokenCodeSingle
2037 super TokenCode
2038
2039 redef fun next_pos do return 0
2040 end
2041
2042 # A doubled span code token.
2043 class TokenCodeDouble
2044 super TokenCode
2045
2046 redef fun next_pos do return 1
2047 end
2048
2049 # A link or image token.
2050 # This class is mainly used to factorize work between images and links.
2051 abstract class TokenLinkOrImage
2052 super Token
2053
2054 # Link adress
2055 var link: nullable Text = null
2056
2057 # Link text
2058 var name: nullable Text = null
2059
2060 # Link title
2061 var comment: nullable Text = null
2062
2063 # Is the link construct an abbreviation?
2064 var is_abbrev = false
2065
2066 redef fun emit(v) do
2067 var tmp = new FlatBuffer
2068 var b = check_link(v, tmp, pos, self)
2069 if b > 0 then
2070 emit_hyper(v)
2071 v.current_pos = b
2072 else
2073 v.addc char
2074 end
2075 end
2076
2077 # Emit the hyperlink as link or image.
2078 private fun emit_hyper(v: MarkdownEmitter) is abstract
2079
2080 # Check if the link is a valid link.
2081 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
2082 var md = v.current_text
2083 var pos
2084 if token isa TokenLink then
2085 pos = start + 1
2086 else
2087 pos = start + 2
2088 end
2089 var tmp = new FlatBuffer
2090 pos = md.read_md_link_id(tmp, pos)
2091 if pos < start then return -1
2092 name = tmp
2093 var old_pos = pos
2094 pos += 1
2095 pos = md.skip_spaces(pos)
2096 if pos < start then
2097 var tid = name.write_to_string.to_lower
2098 if v.processor.link_refs.has_key(tid) then
2099 var lr = v.processor.link_refs[tid]
2100 is_abbrev = lr.is_abbrev
2101 link = lr.link
2102 comment = lr.title
2103 pos = old_pos
2104 else
2105 return -1
2106 end
2107 else if md[pos] == '(' then
2108 pos += 1
2109 pos = md.skip_spaces(pos)
2110 if pos < start then return -1
2111 tmp = new FlatBuffer
2112 var use_lt = md[pos] == '<'
2113 if use_lt then
2114 pos = md.read_until(tmp, pos + 1, '>')
2115 else
2116 pos = md.read_md_link(tmp, pos)
2117 end
2118 if pos < start then return -1
2119 if use_lt then pos += 1
2120 link = tmp.write_to_string
2121 if md[pos] == ' ' then
2122 pos = md.skip_spaces(pos)
2123 if pos > start and md[pos] == '"' then
2124 pos += 1
2125 tmp = new FlatBuffer
2126 pos = md.read_until(tmp, pos, '"')
2127 if pos < start then return -1
2128 comment = tmp.write_to_string
2129 pos += 1
2130 pos = md.skip_spaces(pos)
2131 if pos == -1 then return -1
2132 end
2133 end
2134 if pos < start then return -1
2135 if md[pos] != ')' then return -1
2136 else if md[pos] == '[' then
2137 pos += 1
2138 tmp = new FlatBuffer
2139 pos = md.read_raw_until(tmp, pos, ']')
2140 if pos < start then return -1
2141 var id
2142 if tmp.length > 0 then
2143 id = tmp
2144 else
2145 id = name
2146 end
2147 var tid = id.write_to_string.to_lower
2148 if v.processor.link_refs.has_key(tid) then
2149 var lr = v.processor.link_refs[tid]
2150 link = lr.link
2151 comment = lr.title
2152 end
2153 else
2154 var tid = name.write_to_string.replace("\n", " ").to_lower
2155 if v.processor.link_refs.has_key(tid) then
2156 var lr = v.processor.link_refs[tid]
2157 link = lr.link
2158 comment = lr.title
2159 pos = old_pos
2160 else
2161 return -1
2162 end
2163 end
2164 if link == null then return -1
2165 return pos
2166 end
2167 end
2168
2169 # A markdown link token.
2170 class TokenLink
2171 super TokenLinkOrImage
2172
2173 redef fun emit_hyper(v) do
2174 if is_abbrev and comment != null then
2175 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2176 else
2177 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2178 end
2179 end
2180 end
2181
2182 # A markdown image token.
2183 class TokenImage
2184 super TokenLinkOrImage
2185
2186 redef fun emit_hyper(v) do
2187 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2188 end
2189 end
2190
2191 # A HTML/XML token.
2192 class TokenHTML
2193 super Token
2194
2195 redef fun emit(v) do
2196 var tmp = new FlatBuffer
2197 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2198 if b > 0 then
2199 v.add tmp
2200 v.current_pos = b
2201 else
2202 v.decorator.escape_char(v, char)
2203 end
2204 end
2205
2206 # Is the HTML valid?
2207 # Also take care of link and mailto shortcuts.
2208 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2209 # check for auto links
2210 var tmp = new FlatBuffer
2211 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2212 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2213 pos = md.read_until(tmp, pos, '>')
2214 if pos != -1 then
2215 var link = tmp.write_to_string
2216 v.decorator.add_link(v, link, link, null)
2217 return pos
2218 end
2219 end
2220 # TODO check for mailto
2221 # check for inline html
2222 if start + 2 < md.length then
2223 return md.read_xml(out, start, true)
2224 end
2225 return -1
2226 end
2227 end
2228
2229 # An HTML entity token.
2230 class TokenEntity
2231 super Token
2232
2233 redef fun emit(v) do
2234 var tmp = new FlatBuffer
2235 var b = check_entity(tmp, v.current_text.as(not null), pos)
2236 if b > 0 then
2237 v.add tmp
2238 v.current_pos = b
2239 else
2240 v.decorator.escape_char(v, char)
2241 end
2242 end
2243
2244 # Is the entity valid?
2245 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2246 var pos = md.read_until(out, start, ';')
2247 if pos < 0 or out.length < 3 then
2248 return -1
2249 end
2250 if out[1] == '#' then
2251 if out[2] == 'x' or out[2] == 'X' then
2252 if out.length < 4 then return -1
2253 for i in [3..out.length[ do
2254 var c = out[i]
2255 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2256 return -1
2257 end
2258 end
2259 else
2260 for i in [2..out.length[ do
2261 var c = out[i]
2262 if c < '0' or c > '9' then return -1
2263 end
2264 end
2265 out.add ';'
2266 else
2267 for i in [1..out.length[ do
2268 var c = out[i]
2269 if not c.is_digit and not c.is_letter then return -1
2270 end
2271 out.add ';'
2272 # TODO check entity is valid
2273 # if out.is_entity then
2274 return pos
2275 # else
2276 # return -1
2277 # end
2278 end
2279 return pos
2280 end
2281 end
2282
2283 # A markdown escape token.
2284 class TokenEscape
2285 super Token
2286
2287 redef fun emit(v) do
2288 v.current_pos += 1
2289 v.addc v.current_text[v.current_pos]
2290 end
2291 end
2292
2293 # A markdown strike token.
2294 #
2295 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2296 class TokenStrike
2297 super Token
2298
2299 redef fun emit(v) do
2300 var tmp = v.push_buffer
2301 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2302 v.pop_buffer
2303 if b > 0 then
2304 v.decorator.add_strike(v, tmp)
2305 v.current_pos = b + 1
2306 else
2307 v.addc char
2308 end
2309 end
2310 end
2311
2312 redef class Text
2313
2314 # Get the position of the next non-space character.
2315 private fun skip_spaces(start: Int): Int do
2316 var pos = start
2317 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2318 pos += 1
2319 end
2320 if pos < length then return pos
2321 return -1
2322 end
2323
2324 # Read `self` until `nend` and append it to the `out` buffer.
2325 # Escape markdown special chars.
2326 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2327 var pos = start
2328 while pos < length do
2329 var c = self[pos]
2330 if c == '\\' and pos + 1 < length then
2331 pos = escape(out, self[pos + 1], pos)
2332 else
2333 var end_reached = false
2334 for n in nend do
2335 if c == n then
2336 end_reached = true
2337 break
2338 end
2339 end
2340 if end_reached then break
2341 out.add c
2342 end
2343 pos += 1
2344 end
2345 if pos == length then return -1
2346 return pos
2347 end
2348
2349 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2350 # No escape is made.
2351 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2352 var pos = start
2353 while pos < length do
2354 var c = self[pos]
2355 var end_reached = false
2356 for n in nend do
2357 if c == n then
2358 end_reached = true
2359 break
2360 end
2361 end
2362 if end_reached then break
2363 out.add c
2364 pos += 1
2365 end
2366 if pos == length then return -1
2367 return pos
2368 end
2369
2370 # Read `self` as XML until `to` and append it to the `out` buffer.
2371 # Escape HTML special chars.
2372 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2373 var pos = from
2374 var in_str = false
2375 var str_char: nullable Char = null
2376 while pos < length do
2377 var c = self[pos]
2378 if in_str then
2379 if c == '\\' then
2380 out.add c
2381 pos += 1
2382 if pos < length then
2383 out.add c
2384 pos += 1
2385 end
2386 continue
2387 end
2388 if c == str_char then
2389 in_str = false
2390 out.add c
2391 pos += 1
2392 continue
2393 end
2394 end
2395 if c == '"' or c == '\'' then
2396 in_str = true
2397 str_char = c
2398 end
2399 if not in_str then
2400 var end_reached = false
2401 for n in [0..to.length[ do
2402 if c == to[n] then
2403 end_reached = true
2404 break
2405 end
2406 end
2407 if end_reached then break
2408 end
2409 out.add c
2410 pos += 1
2411 end
2412 if pos == length then return -1
2413 return pos
2414 end
2415
2416 # Read `self` as XML and append it to the `out` buffer.
2417 # Safe mode can be activated to limit reading to valid xml.
2418 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2419 var pos = 0
2420 var is_valid = true
2421 var is_close_tag = false
2422 if start + 1 >= length then return -1
2423 if self[start + 1] == '/' then
2424 is_close_tag = true
2425 pos = start + 2
2426 else if self[start + 1] == '!' then
2427 out.append "<!"
2428 return start + 1
2429 else
2430 is_close_tag = false
2431 pos = start + 1
2432 end
2433 if safe_mode then
2434 var tmp = new FlatBuffer
2435 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2436 if pos == -1 then return -1
2437 var tag = tmp.write_to_string.trim.to_lower
2438 if not tag.is_valid_html_tag then
2439 out.append "&lt;"
2440 pos = -1
2441 else if tag.is_html_unsafe then
2442 is_valid = false
2443 out.append "&lt;"
2444 if is_close_tag then out.add '/'
2445 out.append tmp
2446 else
2447 out.append "<"
2448 if is_close_tag then out.add '/'
2449 out.append tmp
2450 end
2451 else
2452 out.add '<'
2453 if is_close_tag then out.add '/'
2454 pos = read_xml_until(out, pos, ' ', '/', '>')
2455 end
2456 if pos == -1 then return -1
2457 pos = read_xml_until(out, pos, '/', '>')
2458 if pos == -1 then return -1
2459 if self[pos] == '/' then
2460 out.append " /"
2461 pos = self.read_xml_until(out, pos + 1, '>')
2462 if pos == -1 then return -1
2463 end
2464 if self[pos] == '>' then
2465 if is_valid then
2466 out.add '>'
2467 else
2468 out.append "&gt;"
2469 end
2470 return pos
2471 end
2472 return -1
2473 end
2474
2475 # Read a markdown link address and append it to the `out` buffer.
2476 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2477 var pos = start
2478 var counter = 1
2479 while pos < length do
2480 var c = self[pos]
2481 if c == '\\' and pos + 1 < length then
2482 pos = escape(out, self[pos + 1], pos)
2483 else
2484 var end_reached = false
2485 if c == '(' then
2486 counter += 1
2487 else if c == ' ' then
2488 if counter == 1 then end_reached = true
2489 else if c == ')' then
2490 counter -= 1
2491 if counter == 0 then end_reached = true
2492 end
2493 if end_reached then break
2494 out.add c
2495 end
2496 pos += 1
2497 end
2498 if pos == length then return -1
2499 return pos
2500 end
2501
2502 # Read a markdown link text and append it to the `out` buffer.
2503 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2504 var pos = start
2505 var counter = 1
2506 while pos < length do
2507 var c = self[pos]
2508 var end_reached = false
2509 if c == '[' then
2510 counter += 1
2511 out.add c
2512 else if c == ']' then
2513 counter -= 1
2514 if counter == 0 then
2515 end_reached = true
2516 else
2517 out.add c
2518 end
2519 else
2520 out.add c
2521 end
2522 if end_reached then break
2523 pos += 1
2524 end
2525 if pos == length then return -1
2526 return pos
2527 end
2528
2529 # Extract the XML tag name from a XML tag.
2530 private fun xml_tag: String do
2531 var tpl = new FlatBuffer
2532 var pos = 1
2533 if pos < length and self[1] == '/' then pos += 1
2534 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2535 tpl.add self[pos]
2536 pos += 1
2537 end
2538 return tpl.write_to_string.to_lower
2539 end
2540
2541 private fun is_valid_html_tag: Bool do
2542 if is_empty then return false
2543 for c in self do
2544 if not c.is_alpha then return false
2545 end
2546 return true
2547 end
2548
2549 # Read and escape the markdown contained in `self`.
2550 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2551 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2552 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2553 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2554 c == '`' or c == '~' or c == '^' then
2555 out.add c
2556 return pos + 1
2557 end
2558 out.add '\\'
2559 return pos
2560 end
2561
2562 # Extract string found at end of fence opening.
2563 private fun meta_from_fence: nullable Text do
2564 for i in [0..chars.length[ do
2565 var c = chars[i]
2566 if c != ' ' and c != '`' and c != '~' then
2567 return substring_from(i).trim
2568 end
2569 end
2570 return null
2571 end
2572
2573 # Is `self` an unsafe HTML element?
2574 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2575
2576 # Is `self` a HRML block element?
2577 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2578
2579 # Is `self` a link prefix?
2580 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2581
2582 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2583
2584 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2585
2586 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2587 end
2588
2589 redef class String
2590
2591 # Parse `self` as markdown and return the HTML representation
2592 #.
2593 # var md = "**Hello World!**"
2594 # var html = md.md_to_html
2595 # assert html == "<p><strong>Hello World!</strong></p>\n"
2596 fun md_to_html: Writable do
2597 var processor = new MarkdownProcessor
2598 return processor.process(self)
2599 end
2600 end