lib/markdown: merge processor and emitter
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 # Work in extended mode (default).
34 #
35 # Behavior changes when using extended mode:
36 #
37 # * Lists and code blocks end a paragraph
38 #
39 # In normal markdown the following:
40 #
41 # ~~~md
42 # This is a paragraph
43 # * and this is not a list
44 # ~~~
45 #
46 # Will produce:
47 #
48 # ~~~html
49 # <p>This is a paragraph
50 # * and this is not a list</p>
51 # ~~~
52 #
53 # When using extended mode this changes to:
54 #
55 # ~~~html
56 # <p>This is a paragraph</p>
57 # <ul>
58 # <li>and this is not a list</li>
59 # </ul>
60 # ~~~
61 #
62 # * Fences code blocks
63 #
64 # If you don't want to indent your all your code with 4 spaces,
65 # you can wrap your code in ``` ``` ``` or `~~~`.
66 #
67 # Here's an example:
68 #
69 # ~~~md
70 # fun test do
71 # print "Hello World!"
72 # end
73 # ~~~
74 #
75 # * Code blocks meta
76 #
77 # If you want to use syntax highlighting tools, most of them need to know what kind
78 # of language they are highlighting.
79 # You can add an optional language identifier after the fence declaration to output
80 # it in the HTML render.
81 #
82 # ```nit
83 # import markdown
84 #
85 # print "# Hello World!".md_to_html
86 # ```
87 #
88 # Becomes
89 #
90 # ~~~html
91 # <pre class="nit"><code>import markdown
92 #
93 # print "Hello World!".md_to_html
94 # </code></pre>
95 # ~~~
96 #
97 # * Underscores (Emphasis)
98 #
99 # Underscores in the middle of a word like:
100 #
101 # ~~~md
102 # Con_cat_this
103 # ~~~
104 #
105 # normally produces this:
106 #
107 # ~~~html
108 # <p>Con<em>cat</em>this</p>
109 # ~~~
110 #
111 # With extended mode they don't result in emphasis.
112 #
113 # ~~~html
114 # <p>Con_cat_this</p>
115 # ~~~
116 #
117 # * Strikethrough
118 #
119 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
120 # strikethrought span is marked with `~~`.
121 #
122 # ~~~md
123 # ~~Mistaken text.~~
124 # ~~~
125 #
126 # becomes
127 #
128 # ~~~html
129 # <del>Mistaken text.</del>
130 # ~~~
131 var ext_mode = true
132
133 # Disable attaching MDLocation to Tokens
134 #
135 # Locations are useful for some tools but they may
136 # cause an important time and space overhead.
137 #
138 # Default = `false`
139 var no_location = false is writable
140
141 # Process the mardown `input` string and return the processed output.
142 fun process(input: String): Writable do
143 # init processor
144 link_refs.clear
145 last_link_ref = null
146 current_line = null
147 current_block = null
148 # parse markdown
149 var parent = read_lines(input)
150 parent.remove_surrounding_empty_lines
151 recurse(parent, false)
152 # output processed text
153 return emit(parent.kind)
154 end
155
156 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
157 private fun read_lines(input: String): MDBlock do
158 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
159 var value = new FlatBuffer
160 var i = 0
161
162 var line_pos = 0
163 var col_pos = 0
164
165 while i < input.length do
166 value.clear
167 var pos = 0
168 var eol = false
169 while not eol and i < input.length do
170 col_pos += 1
171 var c = input[i]
172 if c == '\n' then
173 eol = true
174 else if c == '\r' then
175 else if c == '\t' then
176 var np = pos + (4 - (pos & 3))
177 while pos < np do
178 value.add ' '
179 pos += 1
180 end
181 else
182 pos += 1
183 value.add c
184 end
185 i += 1
186 end
187 line_pos += 1
188
189 var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
190 var line = new MDLine(loc, value.write_to_string)
191 var is_link_ref = check_link_ref(line)
192 # Skip link refs
193 if not is_link_ref then block.add_line line
194 col_pos = 0
195 end
196 return block
197 end
198
199 # Check if line is a block link definition.
200 # Return `true` if line contains a valid link ref and save it into `link_refs`.
201 private fun check_link_ref(line: MDLine): Bool do
202 var md = line.value
203 var is_link_ref = false
204 var id = new FlatBuffer
205 var link = new FlatBuffer
206 var comment = new FlatBuffer
207 var pos = -1
208 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
209 pos = line.leading + 1
210 pos = md.read_until(id, pos, ']')
211 if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
212 if line.value[pos + 1] == ':' then
213 pos += 2
214 pos = md.skip_spaces(pos)
215 if pos >= 0 and line.value[pos] == '<' then
216 pos += 1
217 pos = md.read_until(link, pos, '>')
218 pos += 1
219 else if pos >= 0 then
220 pos = md.read_until(link, pos, ' ', '\n')
221 end
222 if not link.is_empty then
223 pos = md.skip_spaces(pos)
224 if pos > 0 and pos < line.value.length then
225 var c = line.value[pos]
226 if c == '\"' or c == '\'' or c == '(' then
227 pos += 1
228 if c == '(' then
229 pos = md.read_until(comment, pos, ')')
230 else
231 pos = md.read_until(comment, pos, c)
232 end
233 if pos > 0 then is_link_ref = true
234 end
235 else
236 is_link_ref = true
237 end
238 end
239 end
240 end
241 end
242 if is_link_ref and not id.is_empty and not link.is_empty then
243 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
244 add_link_ref(id.write_to_string, lr)
245 if comment.is_empty then last_link_ref = lr
246 return true
247 else
248 comment = new FlatBuffer
249 if not line.is_empty and last_link_ref != null then
250 pos = line.leading
251 var c = line.value[pos]
252 if c == '\"' or c == '\'' or c == '(' then
253 pos += 1
254 if c == '(' then
255 pos = md.read_until(comment, pos, ')')
256 else
257 pos = md.read_until(comment, pos, c)
258 end
259 end
260 var last_link_ref = self.last_link_ref
261 if not comment.is_empty and last_link_ref != null then
262 last_link_ref.title = comment.write_to_string
263 end
264 end
265 if comment.is_empty then return false
266 return true
267 end
268 end
269
270 # Known link refs
271 # This list will be needed during output to expand links.
272 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
273
274 # Last encountered link ref (for multiline definitions)
275 #
276 # Markdown allows link refs to be defined over two lines:
277 #
278 # ~~~md
279 # [id]: http://example.com/longish/path/to/resource/here
280 # "Optional Title Here"
281 # ~~~
282 #
283 private var last_link_ref: nullable LinkRef = null
284
285 # Add a link ref to the list
286 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
287
288 # Recursively split a `block`.
289 #
290 # The block is splitted according to the type of lines it contains.
291 # Some blocks can be splited again recursively like lists.
292 # The `in_list` mode is used to recurse on list and build
293 # nested paragraphs or code blocks.
294 fun recurse(root: MDBlock, in_list: Bool) do
295 var old_mode = self.in_list
296 var old_root = self.current_block
297 self.in_list = in_list
298
299 var line = root.first_line
300 while line != null and line.is_empty do
301 line = line.next
302 if line == null then return
303 end
304
305 current_line = line
306 current_block = root
307 while current_line != null do
308 line_kind(current_line.as(not null)).process(self)
309 end
310 self.in_list = old_mode
311 self.current_block = old_root
312 end
313
314 # Currently processed line.
315 # Used when visiting blocks with `recurse`.
316 var current_line: nullable MDLine = null is writable
317
318 # Currently processed block.
319 # Used when visiting blocks with `recurse`.
320 var current_block: nullable MDBlock = null is writable
321
322 # Is the current recursion in list mode?
323 # Used when visiting blocks with `recurse`
324 private var in_list = false
325
326 # The type of line.
327 # see: `md_line_*`
328 fun line_kind(md: MDLine): Line do
329 var value = md.value
330 var leading = md.leading
331 var trailing = md.trailing
332 if md.is_empty then return new LineEmpty
333 if md.leading > 3 then return new LineCode
334 if value[leading] == '#' then return new LineHeadline
335 if value[leading] == '>' then return new LineBlockquote
336
337 if ext_mode then
338 if value.length - leading - trailing > 2 then
339 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
340 return new LineFence
341 end
342 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
343 return new LineFence
344 end
345 end
346 end
347
348 if value.length - leading - trailing > 2 and
349 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
350 if md.count_chars(value[leading]) >= 3 then
351 return new LineHR
352 end
353 end
354
355 if value.length - leading >= 2 and value[leading + 1] == ' ' then
356 var c = value[leading]
357 if c == '*' or c == '-' or c == '+' then return new LineUList
358 end
359
360 if value.length - leading >= 3 and value[leading].is_digit then
361 var i = leading + 1
362 while i < value.length and value[i].is_digit do i += 1
363 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
364 return new LineOList
365 end
366 end
367
368 if value[leading] == '<' and md.check_html then return new LineXML
369
370 var next = md.next
371 if next != null and not next.is_empty then
372 if next.count_chars('=') > 0 then
373 return new LineHeadline1
374 end
375 if next.count_chars('-') > 0 then
376 return new LineHeadline2
377 end
378 end
379 return new LineOther
380 end
381
382 # Get the token kind at `pos`.
383 fun token_at(text: Text, pos: Int): Token do
384 var c0: Char
385 var c1: Char
386 var c2: Char
387
388 if pos > 0 then
389 c0 = text[pos - 1]
390 else
391 c0 = ' '
392 end
393 var c = text[pos]
394
395 if pos + 1 < text.length then
396 c1 = text[pos + 1]
397 else
398 c1 = ' '
399 end
400 if pos + 2 < text.length then
401 c2 = text[pos + 2]
402 else
403 c2 = ' '
404 end
405
406 var loc
407 if no_location then
408 loc = null
409 else
410 loc = new MDLocation(
411 current_loc.line_start,
412 current_loc.column_start + pos,
413 current_loc.line_start,
414 current_loc.column_start + pos)
415 end
416
417 if c == '*' then
418 if c1 == '*' then
419 if c0 != ' ' or c2 != ' ' then
420 return new TokenStrongStar(loc, pos, c)
421 else
422 return new TokenEmStar(loc, pos, c)
423 end
424 end
425 if c0 != ' ' or c1 != ' ' then
426 return new TokenEmStar(loc, pos, c)
427 else
428 return new TokenNone(loc, pos, c)
429 end
430 else if c == '_' then
431 if c1 == '_' then
432 if c0 != ' ' or c2 != ' ' then
433 return new TokenStrongUnderscore(loc, pos, c)
434 else
435 return new TokenEmUnderscore(loc, pos, c)
436 end
437 end
438 if ext_mode then
439 if (c0.is_letter or c0.is_digit) and c0 != '_' and
440 (c1.is_letter or c1.is_digit) then
441 return new TokenNone(loc, pos, c)
442 else
443 return new TokenEmUnderscore(loc, pos, c)
444 end
445 end
446 if c0 != ' ' or c1 != ' ' then
447 return new TokenEmUnderscore(loc, pos, c)
448 else
449 return new TokenNone(loc, pos, c)
450 end
451 else if c == '!' then
452 if c1 == '[' then return new TokenImage(loc, pos, c)
453 return new TokenNone(loc, pos, c)
454 else if c == '[' then
455 return new TokenLink(loc, pos, c)
456 else if c == ']' then
457 return new TokenNone(loc, pos, c)
458 else if c == '`' then
459 if c1 == '`' then
460 return new TokenCodeDouble(loc, pos, c)
461 else
462 return new TokenCodeSingle(loc, pos, c)
463 end
464 else if c == '\\' then
465 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
466 return new TokenEscape(loc, pos, c)
467 else
468 return new TokenNone(loc, pos, c)
469 end
470 else if c == '<' then
471 return new TokenHTML(loc, pos, c)
472 else if c == '&' then
473 return new TokenEntity(loc, pos, c)
474 else
475 if ext_mode then
476 if c == '~' and c1 == '~' then
477 return new TokenStrike(loc, pos, c)
478 end
479 end
480 return new TokenNone(loc, pos, c)
481 end
482 end
483
484 # Find the position of a `token` in `self`.
485 fun find_token(text: Text, start: Int, token: Token): Int do
486 var pos = start
487 while pos < text.length do
488 if token_at(text, pos).is_same_type(token) then
489 return pos
490 end
491 pos += 1
492 end
493 return -1
494 end
495
496 # Kind of decorator used for decoration.
497 type DECORATOR: Decorator
498
499 # Decorator used for output.
500 # Default is `HTMLDecorator`
501 var decorator: DECORATOR is writable, lazy do
502 return new HTMLDecorator
503 end
504
505 # Create a new `MarkdownEmitter` using a custom `decorator`.
506 init with_decorator(decorator: DECORATOR) do
507 self.decorator = decorator
508 end
509
510 # Output `block` using `decorator` in the current buffer.
511 fun emit(block: Block): Text do
512 var buffer = push_buffer
513 block.emit(self)
514 pop_buffer
515 return buffer
516 end
517
518 # Output the content of `block`.
519 fun emit_in(block: Block) do block.emit_in(self)
520
521 # Transform and emit mardown text
522 fun emit_text(text: Text) do emit_text_until(text, 0, null)
523
524 # Transform and emit mardown text starting at `start` and
525 # until a token with the same type as `token` is found.
526 # Go until the end of `text` if `token` is null.
527 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
528 var old_text = current_text
529 var old_pos = current_pos
530 current_text = text
531 current_pos = start
532 while current_pos < text.length do
533 if text[current_pos] == '\n' then
534 current_loc.line_start += 1
535 current_loc.column_start = -current_pos
536 end
537 var mt = token_at(text, current_pos)
538 if (token != null and not token isa TokenNone) and
539 (mt.is_same_type(token) or
540 (token isa TokenEmStar and mt isa TokenStrongStar) or
541 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
542 return current_pos
543 end
544 mt.emit(self)
545 current_pos += 1
546 end
547 current_text = old_text
548 current_pos = old_pos
549 return -1
550 end
551
552 # Currently processed position in `current_text`.
553 # Used when visiting inline production with `emit_text_until`.
554 private var current_pos: Int = -1
555
556 # Currently processed text.
557 # Used when visiting inline production with `emit_text_until`.
558 private var current_text: nullable Text = null
559
560 # Stacked buffers.
561 private var buffer_stack = new List[FlatBuffer]
562
563 # Push a new buffer on the stack.
564 private fun push_buffer: FlatBuffer do
565 var buffer = new FlatBuffer
566 buffer_stack.add buffer
567 return buffer
568 end
569
570 # Pop the last buffer.
571 private fun pop_buffer do buffer_stack.pop
572
573 # Current output buffer.
574 private fun current_buffer: FlatBuffer do
575 assert not buffer_stack.is_empty
576 return buffer_stack.last
577 end
578
579 # Stacked locations.
580 private var loc_stack = new List[MDLocation]
581
582 # Push a new MDLocation on the stack.
583 private fun push_loc(location: MDLocation) do loc_stack.add location
584
585 # Pop the last buffer.
586 private fun pop_loc: MDLocation do return loc_stack.pop
587
588 # Current output buffer.
589 private fun current_loc: MDLocation do
590 assert not loc_stack.is_empty
591 return loc_stack.last
592 end
593
594 # Append `e` to current buffer.
595 fun add(e: Writable) do
596 if e isa Text then
597 current_buffer.append e
598 else
599 current_buffer.append e.write_to_string
600 end
601 end
602
603 # Append `c` to current buffer.
604 fun addc(c: Char) do
605 current_buffer.add c
606 end
607
608 # Append a "\n" line break.
609 fun addn do addc '\n'
610 end
611
612 # A Link Reference.
613 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
614 #
615 # ~~~raw
616 # [1]: http://example.com/ "Optional title"
617 # ~~~
618 class LinkRef
619
620 # Link href
621 var link: String
622
623 # Optional link title
624 var title: nullable String = null
625
626 # Is the link an abreviation?
627 var is_abbrev = false
628
629 # Create a link with a title.
630 init with_title(link: String, title: nullable String) do
631 init(link)
632 self.title = title
633 end
634 end
635
636 # A `Decorator` is used to emit mardown into a specific format.
637 # Default decorator used is `HTMLDecorator`.
638 interface Decorator
639
640 # Kind of processor used
641 type PROCESSOR: MarkdownProcessor
642
643 # Render a single plain char.
644 #
645 # Redefine this method to add special escaping for plain text.
646 fun add_char(v: PROCESSOR, c: Char) do v.addc c
647
648 # Render a ruler block.
649 fun add_ruler(v: PROCESSOR, block: BlockRuler) is abstract
650
651 # Render a headline block with corresponding level.
652 fun add_headline(v: PROCESSOR, block: BlockHeadline) is abstract
653
654 # Render a paragraph block.
655 fun add_paragraph(v: PROCESSOR, block: BlockParagraph) is abstract
656
657 # Render a code or fence block.
658 fun add_code(v: PROCESSOR, block: BlockCode) is abstract
659
660 # Render a blockquote.
661 fun add_blockquote(v: PROCESSOR, block: BlockQuote) is abstract
662
663 # Render an unordered list.
664 fun add_unorderedlist(v: PROCESSOR, block: BlockUnorderedList) is abstract
665
666 # Render an ordered list.
667 fun add_orderedlist(v: PROCESSOR, block: BlockOrderedList) is abstract
668
669 # Render a list item.
670 fun add_listitem(v: PROCESSOR, block: BlockListItem) is abstract
671
672 # Render an emphasis text.
673 fun add_em(v: PROCESSOR, text: Text) is abstract
674
675 # Render a strong text.
676 fun add_strong(v: PROCESSOR, text: Text) is abstract
677
678 # Render a strike text.
679 #
680 # Extended mode only (see `MarkdownProcessor::ext_mode`)
681 fun add_strike(v: PROCESSOR, text: Text) is abstract
682
683 # Render a link.
684 fun add_link(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract
685
686 # Render an image.
687 fun add_image(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract
688
689 # Render an abbreviation.
690 fun add_abbr(v: PROCESSOR, name: Text, comment: Text) is abstract
691
692 # Render a code span reading from a buffer.
693 fun add_span_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract
694
695 # Render a text and escape it.
696 fun append_value(v: PROCESSOR, value: Text) is abstract
697
698 # Render code text from buffer and escape it.
699 fun append_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract
700
701 # Render a character escape.
702 fun escape_char(v: PROCESSOR, char: Char) is abstract
703
704 # Render a line break
705 fun add_line_break(v: PROCESSOR) is abstract
706
707 # Generate a new html valid id from a `String`.
708 fun strip_id(txt: String): String is abstract
709
710 # Found headlines during the processing labeled by their ids.
711 fun headlines: ArrayMap[String, HeadLine] is abstract
712 end
713
714 # Class representing a markdown headline.
715 class HeadLine
716 # Unique identifier of this headline.
717 var id: String
718
719 # Text of the headline.
720 var title: String
721
722 # Level of this headline.
723 #
724 # According toe the markdown specification, level must be in `[1..6]`.
725 var level: Int
726 end
727
728 # `Decorator` that outputs HTML.
729 class HTMLDecorator
730 super Decorator
731
732 redef var headlines = new ArrayMap[String, HeadLine]
733
734 redef fun add_ruler(v, block) do v.add "<hr/>\n"
735
736 redef fun add_headline(v, block) do
737 # save headline
738 var line = block.block.first_line
739 if line == null then return
740 var txt = line.value
741 var id = strip_id(txt)
742 var lvl = block.depth
743 headlines[id] = new HeadLine(id, txt, lvl)
744 # output it
745 v.add "<h{lvl} id=\"{id}\">"
746 v.emit_in block
747 v.add "</h{lvl}>\n"
748 end
749
750 redef fun add_paragraph(v, block) do
751 v.add "<p>"
752 v.emit_in block
753 v.add "</p>\n"
754 end
755
756 redef fun add_code(v, block) do
757 var meta = block.meta
758 if meta != null then
759 v.add "<pre class=\""
760 append_value(v, meta)
761 v.add "\"><code>"
762 else
763 v.add "<pre><code>"
764 end
765 v.emit_in block
766 v.add "</code></pre>\n"
767 end
768
769 redef fun add_blockquote(v, block) do
770 v.add "<blockquote>\n"
771 v.emit_in block
772 v.add "</blockquote>\n"
773 end
774
775 redef fun add_unorderedlist(v, block) do
776 v.add "<ul>\n"
777 v.emit_in block
778 v.add "</ul>\n"
779 end
780
781 redef fun add_orderedlist(v, block) do
782 v.add "<ol>\n"
783 v.emit_in block
784 v.add "</ol>\n"
785 end
786
787 redef fun add_listitem(v, block) do
788 v.add "<li>"
789 v.emit_in block
790 v.add "</li>\n"
791 end
792
793 redef fun add_em(v, text) do
794 v.add "<em>"
795 v.add text
796 v.add "</em>"
797 end
798
799 redef fun add_strong(v, text) do
800 v.add "<strong>"
801 v.add text
802 v.add "</strong>"
803 end
804
805 redef fun add_strike(v, text) do
806 v.add "<del>"
807 v.add text
808 v.add "</del>"
809 end
810
811 redef fun add_image(v, link, name, comment) do
812 v.add "<img src=\""
813 append_value(v, link)
814 v.add "\" alt=\""
815 append_value(v, name)
816 v.add "\""
817 if comment != null and not comment.is_empty then
818 v.add " title=\""
819 append_value(v, comment)
820 v.add "\""
821 end
822 v.add "/>"
823 end
824
825 redef fun add_link(v, link, name, comment) do
826 v.add "<a href=\""
827 append_value(v, link)
828 v.add "\""
829 if comment != null and not comment.is_empty then
830 v.add " title=\""
831 append_value(v, comment)
832 v.add "\""
833 end
834 v.add ">"
835 v.emit_text(name)
836 v.add "</a>"
837 end
838
839 redef fun add_abbr(v, name, comment) do
840 v.add "<abbr title=\""
841 append_value(v, comment)
842 v.add "\">"
843 v.emit_text(name)
844 v.add "</abbr>"
845 end
846
847 redef fun add_span_code(v, text, from, to) do
848 v.add "<code>"
849 append_code(v, text, from, to)
850 v.add "</code>"
851 end
852
853 redef fun add_line_break(v) do
854 v.add "<br/>"
855 end
856
857 redef fun append_value(v, text) do for c in text do escape_char(v, c)
858
859 redef fun escape_char(v, c) do
860 if c == '&' then
861 v.add "&amp;"
862 else if c == '<' then
863 v.add "&lt;"
864 else if c == '>' then
865 v.add "&gt;"
866 else if c == '"' then
867 v.add "&quot;"
868 else if c == '\'' then
869 v.add "&apos;"
870 else
871 v.addc c
872 end
873 end
874
875 redef fun append_code(v, buffer, from, to) do
876 for i in [from..to[ do
877 var c = buffer[i]
878 if c == '&' then
879 v.add "&amp;"
880 else if c == '<' then
881 v.add "&lt;"
882 else if c == '>' then
883 v.add "&gt;"
884 else
885 v.addc c
886 end
887 end
888 end
889
890 redef fun strip_id(txt) do
891 # strip id
892 var b = new FlatBuffer
893 for c in txt do
894 if c == ' ' then
895 b.add '_'
896 else
897 if not c.is_letter and
898 not c.is_digit and
899 not allowed_id_chars.has(c) then continue
900 b.add c
901 end
902 end
903 var res = b.to_s
904 var key = res
905 # check for multiple id definitions
906 if headlines.has_key(key) then
907 var i = 1
908 key = "{res}_{i}"
909 while headlines.has_key(key) do
910 i += 1
911 key = "{res}_{i}"
912 end
913 end
914 return key
915 end
916
917 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
918 end
919
920 # Location in a Markdown input.
921 class MDLocation
922
923 # Starting line number (starting from 1).
924 var line_start: Int
925
926 # Starting column number (starting from 1).
927 var column_start: Int
928
929 # Stopping line number (starting from 1).
930 var line_end: Int
931
932 # Stopping column number (starting from 1).
933 var column_end: Int
934
935 redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
936
937 # Return a copy of `self`.
938 fun copy: MDLocation do
939 return new MDLocation(line_start, column_start, line_end, column_end)
940 end
941 end
942
943 # A block of markdown lines.
944 # A `MDBlock` can contains lines and/or sub-blocks.
945 class MDBlock
946
947 # Position of `self` in the input.
948 var location: MDLocation
949
950 # Kind of block.
951 # See `Block`.
952 var kind: Block = new BlockNone(self) is writable
953
954 # First line if any.
955 var first_line: nullable MDLine = null is writable
956
957 # Last line if any.
958 var last_line: nullable MDLine = null is writable
959
960 # First sub-block if any.
961 var first_block: nullable MDBlock = null is writable
962
963 # Last sub-block if any.
964 var last_block: nullable MDBlock = null is writable
965
966 # Previous block if any.
967 var prev: nullable MDBlock = null is writable
968
969 # Next block if any.
970 var next: nullable MDBlock = null is writable
971
972 # Does this block contain subblocks?
973 fun has_blocks: Bool do return first_block != null
974
975 # Count sub-blocks.
976 fun count_blocks: Int do
977 var count = 0
978 var block = first_block
979 while block != null do
980 count += 1
981 block = block.next
982 end
983 return count
984 end
985
986 # Does this block contain lines?
987 fun has_lines: Bool do return first_line != null
988
989 # Count block lines.
990 fun count_lines: Int do
991 var count = 0
992 var line = first_line
993 while line != null do
994 count += 1
995 line = line.next
996 end
997 return count
998 end
999
1000 # Split `self` creating a new sub-block having `line` has `last_line`.
1001 fun split(line: MDLine): MDBlock do
1002 # location for new block
1003 var new_loc = new MDLocation(
1004 first_line.as(not null).location.line_start,
1005 first_line.as(not null).location.column_start,
1006 line.location.line_end,
1007 line.location.column_end)
1008 # create block
1009 var block = new MDBlock(new_loc)
1010 block.first_line = first_line
1011 block.last_line = line
1012 first_line = line.next
1013 line.next = null
1014 if first_line == null then
1015 last_line = null
1016 else
1017 first_line.as(not null).prev = null
1018 # update current block loc
1019 location.line_start = first_line.as(not null).location.line_start
1020 location.column_start = first_line.as(not null).location.column_start
1021 end
1022 if first_block == null then
1023 first_block = block
1024 last_block = block
1025 else
1026 last_block.as(not null).next = block
1027 last_block = block
1028 end
1029 return block
1030 end
1031
1032 # Add a `line` to this block.
1033 fun add_line(line: MDLine) do
1034 if last_line == null then
1035 first_line = line
1036 last_line = line
1037 else
1038 last_line.as(not null).next_empty = line.is_empty
1039 line.prev_empty = last_line.as(not null).is_empty
1040 line.prev = last_line
1041 last_line.as(not null).next = line
1042 last_line = line
1043 end
1044 end
1045
1046 # Remove `line` from this block.
1047 fun remove_line(line: MDLine) do
1048 if line.prev == null then
1049 first_line = line.next
1050 else
1051 line.prev.as(not null).next = line.next
1052 end
1053 if line.next == null then
1054 last_line = line.prev
1055 else
1056 line.next.as(not null).prev = line.prev
1057 end
1058 line.prev = null
1059 line.next = null
1060 end
1061
1062 # Remove leading empty lines.
1063 fun remove_leading_empty_lines: Bool do
1064 var was_empty = false
1065 var line = first_line
1066 while line != null and line.is_empty do
1067 remove_line line
1068 line = first_line
1069 was_empty = true
1070 end
1071 return was_empty
1072 end
1073
1074 # Remove trailing empty lines.
1075 fun remove_trailing_empty_lines: Bool do
1076 var was_empty = false
1077 var line = last_line
1078 while line != null and line.is_empty do
1079 remove_line line
1080 line = last_line
1081 was_empty = true
1082 end
1083 return was_empty
1084 end
1085
1086 # Remove leading and trailing empty lines.
1087 fun remove_surrounding_empty_lines: Bool do
1088 var was_empty = false
1089 if remove_leading_empty_lines then was_empty = true
1090 if remove_trailing_empty_lines then was_empty = true
1091 return was_empty
1092 end
1093
1094 # Remove list markers and up to 4 leading spaces.
1095 # Used to clean nested lists.
1096 fun remove_list_indent(v: MarkdownProcessor) do
1097 var line = first_line
1098 while line != null do
1099 if not line.is_empty then
1100 var kind = v.line_kind(line)
1101 if kind isa LineList then
1102 line.value = kind.extract_value(line)
1103 else
1104 line.value = line.value.substring_from(line.leading.min(4))
1105 end
1106 line.leading = line.process_leading
1107 end
1108 line = line.next
1109 end
1110 end
1111
1112 # Collect block line text.
1113 fun text: String do
1114 var text = new FlatBuffer
1115 var line = first_line
1116 while line != null do
1117 if not line.is_empty then
1118 text.append line.text
1119 end
1120 text.append "\n"
1121 line = line.next
1122 end
1123 return text.write_to_string
1124 end
1125 end
1126
1127 # Representation of a markdown block in the AST.
1128 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1129 abstract class Block
1130
1131 # The markdown block `self` is related to.
1132 var block: MDBlock
1133
1134 # Output `self` using `v.decorator`.
1135 fun emit(v: MarkdownProcessor) do v.emit_in(self)
1136
1137 # Emit the containts of `self`, lines or blocks.
1138 fun emit_in(v: MarkdownProcessor) do
1139 block.remove_surrounding_empty_lines
1140 if block.has_lines then
1141 emit_lines(v)
1142 else
1143 emit_blocks(v)
1144 end
1145 end
1146
1147 # Emit lines contained in `block`.
1148 fun emit_lines(v: MarkdownProcessor) do
1149 var tpl = v.push_buffer
1150 var line = block.first_line
1151 while line != null do
1152 if not line.is_empty then
1153 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1154 if line.trailing >= 2 then v.decorator.add_line_break(v)
1155 end
1156 if line.next != null then
1157 v.addn
1158 end
1159 line = line.next
1160 end
1161 v.pop_buffer
1162 v.emit_text(tpl)
1163 end
1164
1165 # Emit sub-blocks contained in `block`.
1166 fun emit_blocks(v: MarkdownProcessor) do
1167 var block = self.block.first_block
1168 while block != null do
1169 v.push_loc(block.location)
1170 block.kind.emit(v)
1171 v.pop_loc
1172 block = block.next
1173 end
1174 end
1175
1176 # The raw content of the block as a multi-line string.
1177 fun raw_content: String do
1178 var infence = self isa BlockFence
1179 var text = new FlatBuffer
1180 var line = self.block.first_line
1181 while line != null do
1182 if not line.is_empty then
1183 var str = line.value
1184 if not infence and str.has_prefix(" ") then
1185 text.append str.substring(4, str.length - line.trailing)
1186 else
1187 text.append str
1188 end
1189 end
1190 text.append "\n"
1191 line = line.next
1192 end
1193 return text.write_to_string
1194 end
1195 end
1196
1197 # A block without any markdown specificities.
1198 #
1199 # Actually use the same implementation than `BlockCode`,
1200 # this class is only used for typing purposes.
1201 class BlockNone
1202 super Block
1203 end
1204
1205 # A markdown blockquote.
1206 class BlockQuote
1207 super Block
1208
1209 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1210
1211 # Remove blockquote markers.
1212 private fun remove_block_quote_prefix(block: MDBlock) do
1213 var line = block.first_line
1214 while line != null do
1215 if not line.is_empty then
1216 if line.value[line.leading] == '>' then
1217 var rem = line.leading + 1
1218 if line.leading + 1 < line.value.length and
1219 line.value[line.leading + 1] == ' ' then
1220 rem += 1
1221 end
1222 line.value = line.value.substring_from(rem)
1223 line.leading = line.process_leading
1224 end
1225 end
1226 line = line.next
1227 end
1228 end
1229 end
1230
1231 # A markdown code block.
1232 class BlockCode
1233 super Block
1234
1235 # Any string found after fence token.
1236 var meta: nullable Text
1237
1238 # Number of char to skip at the beginning of the line.
1239 #
1240 # Block code lines start at 4 spaces.
1241 protected var line_start = 4
1242
1243 redef fun emit(v) do v.decorator.add_code(v, self)
1244
1245 redef fun emit_lines(v) do
1246 var line = block.first_line
1247 while line != null do
1248 if not line.is_empty then
1249 v.decorator.append_code(v, line.value, line_start, line.value.length)
1250 end
1251 v.addn
1252 line = line.next
1253 end
1254 end
1255 end
1256
1257 # A markdown code-fence block.
1258 #
1259 # Actually use the same implementation than `BlockCode`,
1260 # this class is only used for typing purposes.
1261 class BlockFence
1262 super BlockCode
1263
1264 # Fence code lines start at 0 spaces.
1265 redef var line_start = 0
1266 end
1267
1268 # A markdown headline.
1269 class BlockHeadline
1270 super Block
1271
1272 redef fun emit(v) do
1273 var loc = block.location.copy
1274 loc.column_start += start
1275 v.push_loc(loc)
1276 v.decorator.add_headline(v, self)
1277 v.pop_loc
1278 end
1279
1280 private var start = 0
1281
1282 # Depth of the headline used to determine the headline level.
1283 var depth = 0
1284
1285 # Remove healine marks from lines contained in `self`.
1286 private fun transform_headline(block: MDBlock) do
1287 if depth > 0 then return
1288 var level = 0
1289 var line = block.first_line
1290 if line == null then return
1291 if line.is_empty then return
1292 var start = line.leading
1293 while start < line.value.length and line.value[start] == '#' do
1294 level += 1
1295 start += 1
1296 end
1297 while start < line.value.length and line.value[start] == ' ' do
1298 start += 1
1299 end
1300 if start >= line.value.length then
1301 line.is_empty = true
1302 else
1303 var nend = line.value.length - line.trailing - 1
1304 while line.value[nend] == '#' do nend -= 1
1305 while line.value[nend] == ' ' do nend -= 1
1306 line.value = line.value.substring(start, nend - start + 1)
1307 line.leading = 0
1308 line.trailing = 0
1309 end
1310 self.start = start
1311 depth = level.min(6)
1312 end
1313 end
1314
1315 # A markdown list item block.
1316 class BlockListItem
1317 super Block
1318
1319 redef fun emit(v) do v.decorator.add_listitem(v, self)
1320 end
1321
1322 # A markdown list block.
1323 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1324 abstract class BlockList
1325 super Block
1326
1327 # Split list block into list items sub-blocks.
1328 private fun init_block(v: MarkdownProcessor) do
1329 var line = block.first_line
1330 if line == null then return
1331 line = line.next
1332 while line != null do
1333 var t = v.line_kind(line)
1334 if t isa LineList or
1335 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1336 not (t isa LineList))) then
1337 var sblock = block.split(line.prev.as(not null))
1338 sblock.kind = new BlockListItem(sblock)
1339 end
1340 line = line.next
1341 end
1342 var sblock = block.split(block.last_line.as(not null))
1343 sblock.kind = new BlockListItem(sblock)
1344 end
1345
1346 # Expand list items as paragraphs if needed.
1347 private fun expand_paragraphs(block: MDBlock) do
1348 var outer = block.first_block
1349 var inner: nullable MDBlock
1350 var has_paragraph = false
1351 while outer != null and not has_paragraph do
1352 if outer.kind isa BlockListItem then
1353 inner = outer.first_block
1354 while inner != null and not has_paragraph do
1355 if inner.kind isa BlockParagraph then
1356 has_paragraph = true
1357 end
1358 inner = inner.next
1359 end
1360 end
1361 outer = outer.next
1362 end
1363 if has_paragraph then
1364 outer = block.first_block
1365 while outer != null do
1366 if outer.kind isa BlockListItem then
1367 inner = outer.first_block
1368 while inner != null do
1369 if inner.kind isa BlockNone then
1370 inner.kind = new BlockParagraph(inner)
1371 end
1372 inner = inner.next
1373 end
1374 end
1375 outer = outer.next
1376 end
1377 end
1378 end
1379 end
1380
1381 # A markdown ordered list.
1382 class BlockOrderedList
1383 super BlockList
1384
1385 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1386 end
1387
1388 # A markdown unordred list.
1389 class BlockUnorderedList
1390 super BlockList
1391
1392 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1393 end
1394
1395 # A markdown paragraph block.
1396 class BlockParagraph
1397 super Block
1398
1399 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1400 end
1401
1402 # A markdown ruler.
1403 class BlockRuler
1404 super Block
1405
1406 redef fun emit(v) do v.decorator.add_ruler(v, self)
1407 end
1408
1409 # Xml blocks that can be found in markdown markup.
1410 class BlockXML
1411 super Block
1412
1413 redef fun emit_lines(v) do
1414 var line = block.first_line
1415 while line != null do
1416 if not line.is_empty then v.add line.value
1417 v.addn
1418 line = line.next
1419 end
1420 end
1421 end
1422
1423 # A markdown line.
1424 class MDLine
1425
1426 # Location of `self` in the original input.
1427 var location: MDLocation
1428
1429 # Text contained in this line.
1430 var value: String is writable
1431
1432 # Is this line empty?
1433 # Lines containing only spaces are considered empty.
1434 var is_empty: Bool = true is writable
1435
1436 # Previous line in `MDBlock` or null if first line.
1437 var prev: nullable MDLine = null is writable
1438
1439 # Next line in `MDBlock` or null if last line.
1440 var next: nullable MDLine = null is writable
1441
1442 # Is the previous line empty?
1443 var prev_empty: Bool = false is writable
1444
1445 # Is the next line empty?
1446 var next_empty: Bool = false is writable
1447
1448 # Initialize a new MDLine from its string value
1449 init do
1450 self.leading = process_leading
1451 if leading != value.length then
1452 self.is_empty = false
1453 self.trailing = process_trailing
1454 end
1455 end
1456
1457 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1458 fun clear do
1459 value = ""
1460 leading = 0
1461 trailing = 0
1462 is_empty = true
1463 if prev != null then prev.as(not null).next_empty = true
1464 if next != null then next.as(not null).prev_empty = true
1465 end
1466
1467 # Number or leading spaces on this line.
1468 var leading: Int = 0 is writable
1469
1470 # Compute `leading` depending on `value`.
1471 fun process_leading: Int do
1472 var count = 0
1473 var value = self.value
1474 while count < value.length and value[count] == ' ' do count += 1
1475 if leading == value.length then clear
1476 return count
1477 end
1478
1479 # Number of trailing spaces on this line.
1480 var trailing: Int = 0 is writable
1481
1482 # Compute `trailing` depending on `value`.
1483 fun process_trailing: Int do
1484 var count = 0
1485 var value = self.value
1486 while value[value.length - count - 1] == ' ' do
1487 count += 1
1488 end
1489 return count
1490 end
1491
1492 # Count the amount of `ch` in this line.
1493 # Return A value > 0 if this line only consists of `ch` end spaces.
1494 fun count_chars(ch: Char): Int do
1495 var count = 0
1496 for c in value do
1497 if c == ' ' then
1498 continue
1499 end
1500 if c == ch then
1501 count += 1
1502 continue
1503 end
1504 count = 0
1505 break
1506 end
1507 return count
1508 end
1509
1510 # Count the amount of `ch` at the start of this line ignoring spaces.
1511 fun count_chars_start(ch: Char): Int do
1512 var count = 0
1513 for c in value do
1514 if c == ' ' then
1515 continue
1516 end
1517 if c == ch then
1518 count += 1
1519 else
1520 break
1521 end
1522 end
1523 return count
1524 end
1525
1526 # Last XML line if any.
1527 private var xml_end_line: nullable MDLine = null
1528
1529 # Does `value` contains valid XML markup?
1530 private fun check_html: Bool do
1531 var tags = new Array[String]
1532 var tmp = new FlatBuffer
1533 var pos = leading
1534 if pos + 1 < value.length and value[pos + 1] == '!' then
1535 if read_xml_comment(self, pos) > 0 then return true
1536 end
1537 pos = value.read_xml(tmp, pos, false)
1538 var tag: String
1539 if pos > -1 then
1540 tag = tmp.xml_tag
1541 if not tag.is_html_block then
1542 return false
1543 end
1544 if tag == "hr" then
1545 xml_end_line = self
1546 return true
1547 end
1548 tags.add tag
1549 var line: nullable MDLine = self
1550 while line != null do
1551 while pos < line.value.length and line.value[pos] != '<' do
1552 pos += 1
1553 end
1554 if pos >= line.value.length then
1555 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1556 tags.pop
1557 if tags.is_empty then
1558 xml_end_line = line
1559 break
1560 end
1561 end
1562 line = line.next
1563 pos = 0
1564 else
1565 tmp = new FlatBuffer
1566 var new_pos = line.value.read_xml(tmp, pos, false)
1567 if new_pos > 0 then
1568 tag = tmp.xml_tag
1569 if tag.is_html_block and not tag == "hr" then
1570 if tmp[1] == '/' then
1571 if tags.last != tag then
1572 return false
1573 end
1574 tags.pop
1575 else
1576 tags.add tag
1577 end
1578 end
1579 if tags.is_empty then
1580 xml_end_line = line
1581 break
1582 end
1583 pos = new_pos
1584 else
1585 pos += 1
1586 end
1587 end
1588 end
1589 return tags.is_empty
1590 end
1591 return false
1592 end
1593
1594 # Read a XML comment.
1595 # Used by `check_html`.
1596 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1597 var line: nullable MDLine = first_line
1598 if start + 3 < line.as(not null).value.length then
1599 if line.as(not null).value[2] == '-' and line.as(not null).value[3] == '-' then
1600 var pos = start + 4
1601 while line != null do
1602 while pos < line.value.length and line.value[pos] != '-' do
1603 pos += 1
1604 end
1605 if pos == line.value.length then
1606 line = line.next
1607 pos = 0
1608 else
1609 if pos + 2 < line.value.length then
1610 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1611 first_line.xml_end_line = line
1612 return pos + 3
1613 end
1614 end
1615 pos += 1
1616 end
1617 end
1618 end
1619 end
1620 return -1
1621 end
1622
1623 # Extract the text of `self` without leading and trailing.
1624 fun text: String do return value.substring(leading, value.length - trailing)
1625 end
1626
1627 # A markdown line.
1628 interface Line
1629
1630 # Parse the line.
1631 # See `MarkdownProcessor::recurse`.
1632 fun process(v: MarkdownProcessor) is abstract
1633 end
1634
1635 # An empty markdown line.
1636 class LineEmpty
1637 super Line
1638
1639 redef fun process(v) do
1640 v.current_line = v.current_line.as(not null).next
1641 end
1642 end
1643
1644 # A non-specific markdown construction.
1645 # Mainly used as part of another line construct such as paragraphs or lists.
1646 class LineOther
1647 super Line
1648
1649 redef fun process(v) do
1650 var line = v.current_line
1651 # go to block end
1652 var was_empty = line.as(not null).prev_empty
1653 while line != null and not line.is_empty do
1654 var t = v.line_kind(line)
1655 if (v.in_list or v.ext_mode) and t isa LineList then
1656 break
1657 end
1658 if v.ext_mode and (t isa LineCode or t isa LineFence) then
1659 break
1660 end
1661 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1662 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1663 break
1664 end
1665 line = line.next
1666 end
1667 # build block
1668 var current_block = v.current_block.as(not null)
1669 if line != null and not line.is_empty then
1670 var block = current_block.split(line.prev.as(not null))
1671 if v.in_list and not was_empty then
1672 block.kind = new BlockNone(block)
1673 else
1674 block.kind = new BlockParagraph(block)
1675 end
1676 current_block.remove_leading_empty_lines
1677 else
1678 var block: MDBlock
1679 if line != null then
1680 block = current_block.split(line)
1681 else
1682 block = current_block.split(current_block.last_line.as(not null))
1683 end
1684 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1685 block.kind = new BlockNone(block)
1686 else
1687 block.kind = new BlockParagraph(block)
1688 end
1689 current_block.remove_leading_empty_lines
1690 end
1691 v.current_line = current_block.first_line
1692 end
1693 end
1694
1695 # A line of markdown code.
1696 class LineCode
1697 super Line
1698
1699 redef fun process(v) do
1700 var line = v.current_line
1701 # lookup block end
1702 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1703 line = line.next
1704 end
1705 # split at block end line
1706 var current_block = v.current_block.as(not null)
1707 var block: MDBlock
1708 if line != null then
1709 block = current_block.split(line.prev.as(not null))
1710 else
1711 block = current_block.split(current_block.last_line.as(not null))
1712 end
1713 block.kind = new BlockCode(block)
1714 block.remove_surrounding_empty_lines
1715 v.current_line = current_block.first_line
1716 end
1717 end
1718
1719 # A line of raw XML.
1720 class LineXML
1721 super Line
1722
1723 redef fun process(v) do
1724 var line = v.current_line
1725 if line == null then return
1726 var current_block = v.current_block.as(not null)
1727 var prev = line.prev
1728 if prev != null then current_block.split(prev)
1729 var block = current_block.split(line.xml_end_line.as(not null))
1730 block.kind = new BlockXML(block)
1731 current_block.remove_leading_empty_lines
1732 v.current_line = current_block.first_line
1733 end
1734 end
1735
1736 # A markdown blockquote line.
1737 class LineBlockquote
1738 super Line
1739
1740 redef fun process(v) do
1741 var line = v.current_line
1742 var current_block = v.current_block.as(not null)
1743 # go to bquote end
1744 while line != null do
1745 if not line.is_empty and (line.prev_empty and
1746 line.leading == 0 and
1747 not v.line_kind(line) isa LineBlockquote) then break
1748 line = line.next
1749 end
1750 # build sub block
1751 var block: MDBlock
1752 if line != null then
1753 block = current_block.split(line.prev.as(not null))
1754 else
1755 block = current_block.split(current_block.last_line.as(not null))
1756 end
1757 var kind = new BlockQuote(block)
1758 block.kind = kind
1759 block.remove_surrounding_empty_lines
1760 kind.remove_block_quote_prefix(block)
1761 v.current_line = line
1762 v.recurse(block, false)
1763 v.current_line = current_block.first_line
1764 end
1765 end
1766
1767 # A markdown ruler line.
1768 class LineHR
1769 super Line
1770
1771 redef fun process(v) do
1772 var line = v.current_line
1773 if line == null then return
1774 var current_block = v.current_block.as(not null)
1775 if line.prev != null then current_block.split(line.prev.as(not null))
1776 var block = current_block.split(line)
1777 block.kind = new BlockRuler(block)
1778 current_block.remove_leading_empty_lines
1779 v.current_line = current_block.first_line
1780 end
1781 end
1782
1783 # A markdown fence code line.
1784 class LineFence
1785 super Line
1786
1787 redef fun process(v) do
1788 # go to fence end
1789 var line = v.current_line.as(not null).next
1790 var current_block = v.current_block.as(not null)
1791 while line != null do
1792 if v.line_kind(line) isa LineFence then break
1793 line = line.next
1794 end
1795 if line != null then
1796 line = line.next
1797 end
1798 # build fence block
1799 var block: MDBlock
1800 if line != null then
1801 block = current_block.split(line.prev.as(not null))
1802 else
1803 block = current_block.split(current_block.last_line.as(not null))
1804 end
1805 block.remove_surrounding_empty_lines
1806 var meta = block.first_line.as(not null).value.meta_from_fence
1807 block.kind = new BlockFence(block, meta)
1808 block.first_line.as(not null).clear
1809 var last = block.last_line
1810 if last != null and v.line_kind(last) isa LineFence then
1811 block.last_line.as(not null).clear
1812 end
1813 block.remove_surrounding_empty_lines
1814 v.current_line = line
1815 end
1816 end
1817
1818 # A markdown headline.
1819 class LineHeadline
1820 super Line
1821
1822 redef fun process(v) do
1823 var line = v.current_line
1824 if line == null then return
1825 var current_block = v.current_block.as(not null)
1826 var lprev = line.prev
1827 if lprev != null then current_block.split(lprev)
1828 var block = current_block.split(line)
1829 var kind = new BlockHeadline(block)
1830 block.kind = kind
1831 kind.transform_headline(block)
1832 current_block.remove_leading_empty_lines
1833 v.current_line = current_block.first_line
1834 end
1835 end
1836
1837 # A markdown headline of level 1.
1838 class LineHeadline1
1839 super LineHeadline
1840
1841 redef fun process(v) do
1842 var line = v.current_line
1843 if line == null then return
1844 var current_block = v.current_block.as(not null)
1845 var lprev = line.prev
1846 if lprev != null then current_block.split(lprev)
1847 line.next.as(not null).clear
1848 var block = current_block.split(line)
1849 var kind = new BlockHeadline(block)
1850 kind.depth = 1
1851 kind.transform_headline(block)
1852 block.kind = kind
1853 current_block.remove_leading_empty_lines
1854 v.current_line = current_block.first_line
1855 end
1856 end
1857
1858 # A markdown headline of level 2.
1859 class LineHeadline2
1860 super LineHeadline
1861
1862 redef fun process(v) do
1863 var line = v.current_line
1864 if line == null then return
1865 var current_block = v.current_block.as(not null)
1866 var lprev = line.prev
1867 if lprev != null then current_block.split(lprev)
1868 line.next.as(not null).clear
1869 var block = current_block.split(line)
1870 var kind = new BlockHeadline(block)
1871 kind.depth = 2
1872 kind.transform_headline(block)
1873 block.kind = kind
1874 current_block.remove_leading_empty_lines
1875 v.current_line = current_block.first_line
1876 end
1877 end
1878
1879 # A markdown list line.
1880 # Mainly used to factorize code between ordered and unordered lists.
1881 abstract class LineList
1882 super Line
1883
1884 redef fun process(v) do
1885 var line = v.current_line
1886 # go to list end
1887 while line != null do
1888 var t = v.line_kind(line)
1889 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1890 not t isa LineList) then break
1891 line = line.next
1892 end
1893 # build list block
1894 var current_block = v.current_block.as(not null)
1895 var list: MDBlock
1896 if line != null then
1897 list = current_block.split(line.prev.as(not null))
1898 else
1899 list = current_block.split(current_block.last_line.as(not null))
1900 end
1901 var kind = block_kind(list)
1902 list.kind = kind
1903 list.first_line.as(not null).prev_empty = false
1904 list.last_line.as(not null).next_empty = false
1905 list.remove_surrounding_empty_lines
1906 list.first_line.as(not null).prev_empty = false
1907 list.last_line.as(not null).next_empty = false
1908 kind.init_block(v)
1909 var block = list.first_block
1910 while block != null do
1911 block.remove_list_indent(v)
1912 v.recurse(block, true)
1913 block = block.next
1914 end
1915 kind.expand_paragraphs(list)
1916 v.current_line = line
1917 end
1918
1919 # Create a new block kind based on this line.
1920 protected fun block_kind(block: MDBlock): BlockList is abstract
1921
1922 # Extract string value from `MDLine`.
1923 protected fun extract_value(line: MDLine): String is abstract
1924 end
1925
1926 # An ordered list line.
1927 class LineOList
1928 super LineList
1929
1930 redef fun block_kind(block) do return new BlockOrderedList(block)
1931
1932 redef fun extract_value(line) do
1933 return line.value.substring_from(line.value.index_of('.') + 2)
1934 end
1935 end
1936
1937 # An unordered list line.
1938 class LineUList
1939 super LineList
1940
1941 redef fun block_kind(block) do return new BlockUnorderedList(block)
1942
1943 redef fun extract_value(line) do
1944 return line.value.substring_from(line.leading + 2)
1945 end
1946 end
1947
1948 # A token represent a character in the markdown input.
1949 # Some tokens have a specific markup behaviour that is handled here.
1950 abstract class Token
1951
1952 # Location of `self` in the original input.
1953 var location: nullable MDLocation
1954
1955 # Position of `self` in input independant from lines.
1956 var pos: Int
1957
1958 # Character found at `pos` in the markdown input.
1959 var char: Char
1960
1961 # Output that token using `MarkdownEmitter::decorator`.
1962 fun emit(v: MarkdownProcessor) do v.decorator.add_char(v, char)
1963 end
1964
1965 # A token without a specific meaning.
1966 class TokenNone
1967 super Token
1968 end
1969
1970 # An emphasis token.
1971 abstract class TokenEm
1972 super Token
1973
1974 redef fun emit(v) do
1975 var tmp = v.push_buffer
1976 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1977 v.pop_buffer
1978 if b > 0 then
1979 v.decorator.add_em(v, tmp)
1980 v.current_pos = b
1981 else
1982 v.addc char
1983 end
1984 end
1985 end
1986
1987 # An emphasis star token.
1988 class TokenEmStar
1989 super TokenEm
1990 end
1991
1992 # An emphasis underscore token.
1993 class TokenEmUnderscore
1994 super TokenEm
1995 end
1996
1997 # A strong token.
1998 abstract class TokenStrong
1999 super Token
2000
2001 redef fun emit(v) do
2002 var tmp = v.push_buffer
2003 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2004 v.pop_buffer
2005 if b > 0 then
2006 v.decorator.add_strong(v, tmp)
2007 v.current_pos = b + 1
2008 else
2009 v.addc char
2010 end
2011 end
2012 end
2013
2014 # A strong star token.
2015 class TokenStrongStar
2016 super TokenStrong
2017 end
2018
2019 # A strong underscore token.
2020 class TokenStrongUnderscore
2021 super TokenStrong
2022 end
2023
2024 # A code token.
2025 # This class is mainly used to factorize work between single and double quoted span codes.
2026 abstract class TokenCode
2027 super Token
2028
2029 redef fun emit(v) do
2030 var current_text = v.current_text.as(not null)
2031 var a = pos + next_pos + 1
2032 var b = v.find_token(current_text, a, self)
2033 if b > 0 then
2034 v.current_pos = b + next_pos
2035 while a < b and current_text[a] == ' ' do a += 1
2036 if a < b then
2037 while current_text[b - 1] == ' ' do b -= 1
2038 v.decorator.add_span_code(v, current_text, a, b)
2039 end
2040 else
2041 v.addc char
2042 end
2043 end
2044
2045 private fun next_pos: Int is abstract
2046 end
2047
2048 # A span code token.
2049 class TokenCodeSingle
2050 super TokenCode
2051
2052 redef fun next_pos do return 0
2053 end
2054
2055 # A doubled span code token.
2056 class TokenCodeDouble
2057 super TokenCode
2058
2059 redef fun next_pos do return 1
2060 end
2061
2062 # A link or image token.
2063 # This class is mainly used to factorize work between images and links.
2064 abstract class TokenLinkOrImage
2065 super Token
2066
2067 # Link adress
2068 var link: nullable Text = null
2069
2070 # Link text
2071 var name: nullable Text = null
2072
2073 # Link title
2074 var comment: nullable Text = null
2075
2076 # Is the link construct an abbreviation?
2077 var is_abbrev = false
2078
2079 redef fun emit(v) do
2080 var tmp = new FlatBuffer
2081 var b = check_link(v, tmp, pos, self)
2082 if b > 0 then
2083 emit_hyper(v)
2084 v.current_pos = b
2085 else
2086 v.addc char
2087 end
2088 end
2089
2090 # Emit the hyperlink as link or image.
2091 private fun emit_hyper(v: MarkdownProcessor) is abstract
2092
2093 # Check if the link is a valid link.
2094 private fun check_link(v: MarkdownProcessor, out: FlatBuffer, start: Int, token: Token): Int do
2095 var md = v.current_text
2096 if md == null then return -1
2097 var pos
2098 if token isa TokenLink then
2099 pos = start + 1
2100 else
2101 pos = start + 2
2102 end
2103 var tmp = new FlatBuffer
2104 pos = md.read_md_link_id(tmp, pos)
2105 if pos < start then return -1
2106 name = tmp
2107 var old_pos = pos
2108 pos += 1
2109 pos = md.skip_spaces(pos)
2110 if pos < start then
2111 var tid = name.as(not null).write_to_string.to_lower
2112 if v.link_refs.has_key(tid) then
2113 var lr = v.link_refs[tid]
2114 is_abbrev = lr.is_abbrev
2115 link = lr.link
2116 comment = lr.title
2117 pos = old_pos
2118 else
2119 return -1
2120 end
2121 else if md[pos] == '(' then
2122 pos += 1
2123 pos = md.skip_spaces(pos)
2124 if pos < start then return -1
2125 tmp = new FlatBuffer
2126 var use_lt = md[pos] == '<'
2127 if use_lt then
2128 pos = md.read_until(tmp, pos + 1, '>')
2129 else
2130 pos = md.read_md_link(tmp, pos)
2131 end
2132 if pos < start then return -1
2133 if use_lt then pos += 1
2134 link = tmp.write_to_string
2135 if md[pos] == ' ' then
2136 pos = md.skip_spaces(pos)
2137 if pos > start and md[pos] == '"' then
2138 pos += 1
2139 tmp = new FlatBuffer
2140 pos = md.read_until(tmp, pos, '"')
2141 if pos < start then return -1
2142 comment = tmp.write_to_string
2143 pos += 1
2144 pos = md.skip_spaces(pos)
2145 if pos == -1 then return -1
2146 end
2147 end
2148 if pos < start then return -1
2149 if md[pos] != ')' then return -1
2150 else if md[pos] == '[' then
2151 pos += 1
2152 tmp = new FlatBuffer
2153 pos = md.read_raw_until(tmp, pos, ']')
2154 if pos < start then return -1
2155 var id
2156 if tmp.length > 0 then
2157 id = tmp
2158 else
2159 id = name
2160 end
2161 var tid = id.as(not null).write_to_string.to_lower
2162 if v.link_refs.has_key(tid) then
2163 var lr = v.link_refs[tid]
2164 link = lr.link
2165 comment = lr.title
2166 end
2167 else
2168 var tid = name.as(not null).write_to_string.replace("\n", " ").to_lower
2169 if v.link_refs.has_key(tid) then
2170 var lr = v.link_refs[tid]
2171 link = lr.link
2172 comment = lr.title
2173 pos = old_pos
2174 else
2175 return -1
2176 end
2177 end
2178 if link == null then return -1
2179 return pos
2180 end
2181 end
2182
2183 # A markdown link token.
2184 class TokenLink
2185 super TokenLinkOrImage
2186
2187 redef fun emit_hyper(v) do
2188 if is_abbrev and comment != null then
2189 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2190 else
2191 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2192 end
2193 end
2194 end
2195
2196 # A markdown image token.
2197 class TokenImage
2198 super TokenLinkOrImage
2199
2200 redef fun emit_hyper(v) do
2201 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2202 end
2203 end
2204
2205 # A HTML/XML token.
2206 class TokenHTML
2207 super Token
2208
2209 redef fun emit(v) do
2210 var tmp = new FlatBuffer
2211 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2212 if b > 0 then
2213 v.add tmp
2214 v.current_pos = b
2215 else
2216 v.decorator.escape_char(v, char)
2217 end
2218 end
2219
2220 # Is the HTML valid?
2221 # Also take care of link and mailto shortcuts.
2222 private fun check_html(v: MarkdownProcessor, out: FlatBuffer, md: Text, start: Int): Int do
2223 # check for auto links
2224 var tmp = new FlatBuffer
2225 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2226 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2227 pos = md.read_until(tmp, pos, '>')
2228 if pos != -1 then
2229 var link = tmp.write_to_string
2230 v.decorator.add_link(v, link, link, null)
2231 return pos
2232 end
2233 end
2234 # TODO check for mailto
2235 # check for inline html
2236 if start + 2 < md.length then
2237 return md.read_xml(out, start, true)
2238 end
2239 return -1
2240 end
2241 end
2242
2243 # An HTML entity token.
2244 class TokenEntity
2245 super Token
2246
2247 redef fun emit(v) do
2248 var tmp = new FlatBuffer
2249 var b = check_entity(tmp, v.current_text.as(not null), pos)
2250 if b > 0 then
2251 v.add tmp
2252 v.current_pos = b
2253 else
2254 v.decorator.escape_char(v, char)
2255 end
2256 end
2257
2258 # Is the entity valid?
2259 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2260 var pos = md.read_until(out, start, ';')
2261 if pos < 0 or out.length < 3 then
2262 return -1
2263 end
2264 if out[1] == '#' then
2265 if out[2] == 'x' or out[2] == 'X' then
2266 if out.length < 4 then return -1
2267 for i in [3..out.length[ do
2268 var c = out[i]
2269 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2270 return -1
2271 end
2272 end
2273 else
2274 for i in [2..out.length[ do
2275 var c = out[i]
2276 if c < '0' or c > '9' then return -1
2277 end
2278 end
2279 out.add ';'
2280 else
2281 for i in [1..out.length[ do
2282 var c = out[i]
2283 if not c.is_digit and not c.is_letter then return -1
2284 end
2285 out.add ';'
2286 # TODO check entity is valid
2287 # if out.is_entity then
2288 return pos
2289 # else
2290 # return -1
2291 # end
2292 end
2293 return pos
2294 end
2295 end
2296
2297 # A markdown escape token.
2298 class TokenEscape
2299 super Token
2300
2301 redef fun emit(v) do
2302 v.current_pos += 1
2303 v.addc v.current_text.as(not null)[v.current_pos]
2304 end
2305 end
2306
2307 # A markdown strike token.
2308 #
2309 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2310 class TokenStrike
2311 super Token
2312
2313 redef fun emit(v) do
2314 var tmp = v.push_buffer
2315 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2316 v.pop_buffer
2317 if b > 0 then
2318 v.decorator.add_strike(v, tmp)
2319 v.current_pos = b + 1
2320 else
2321 v.addc char
2322 end
2323 end
2324 end
2325
2326 redef class Text
2327
2328 # Get the position of the next non-space character.
2329 private fun skip_spaces(start: Int): Int do
2330 var pos = start
2331 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2332 pos += 1
2333 end
2334 if pos < length then return pos
2335 return -1
2336 end
2337
2338 # Read `self` until `nend` and append it to the `out` buffer.
2339 # Escape markdown special chars.
2340 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2341 var pos = start
2342 while pos < length do
2343 var c = self[pos]
2344 if c == '\\' and pos + 1 < length then
2345 pos = escape(out, self[pos + 1], pos)
2346 else
2347 for n in nend do if c == n then break label
2348 out.add c
2349 end
2350 pos += 1
2351 end label
2352 if pos == length then return -1
2353 return pos
2354 end
2355
2356 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2357 # No escape is made.
2358 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2359 var pos = start
2360 while pos < length do
2361 var c = self[pos]
2362 var end_reached = false
2363 for n in nend do
2364 if c == n then
2365 end_reached = true
2366 break
2367 end
2368 end
2369 if end_reached then break
2370 out.add c
2371 pos += 1
2372 end
2373 if pos == length then return -1
2374 return pos
2375 end
2376
2377 # Read `self` as XML until `to` and append it to the `out` buffer.
2378 # Escape HTML special chars.
2379 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2380 var pos = from
2381 var in_str = false
2382 var str_char: nullable Char = null
2383 while pos < length do
2384 var c = self[pos]
2385 if in_str then
2386 if c == '\\' then
2387 out.add c
2388 pos += 1
2389 if pos < length then
2390 out.add c
2391 pos += 1
2392 end
2393 continue
2394 end
2395 if c == str_char then
2396 in_str = false
2397 out.add c
2398 pos += 1
2399 continue
2400 end
2401 end
2402 if c == '"' or c == '\'' then
2403 in_str = true
2404 str_char = c
2405 end
2406 if not in_str then
2407 var end_reached = false
2408 for n in [0..to.length[ do
2409 if c == to[n] then
2410 end_reached = true
2411 break
2412 end
2413 end
2414 if end_reached then break
2415 end
2416 out.add c
2417 pos += 1
2418 end
2419 if pos == length then return -1
2420 return pos
2421 end
2422
2423 # Read `self` as XML and append it to the `out` buffer.
2424 # Safe mode can be activated to limit reading to valid xml.
2425 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2426 var pos = 0
2427 var is_valid = true
2428 var is_close_tag = false
2429 if start + 1 >= length then return -1
2430 if self[start + 1] == '/' then
2431 is_close_tag = true
2432 pos = start + 2
2433 else if self[start + 1] == '!' then
2434 out.append "<!"
2435 return start + 1
2436 else
2437 is_close_tag = false
2438 pos = start + 1
2439 end
2440 if safe_mode then
2441 var tmp = new FlatBuffer
2442 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2443 if pos == -1 then return -1
2444 var tag = tmp.write_to_string.trim.to_lower
2445 if not tag.is_valid_html_tag then
2446 out.append "&lt;"
2447 pos = -1
2448 else if tag.is_html_unsafe then
2449 is_valid = false
2450 out.append "&lt;"
2451 if is_close_tag then out.add '/'
2452 out.append tmp
2453 else
2454 out.append "<"
2455 if is_close_tag then out.add '/'
2456 out.append tmp
2457 end
2458 else
2459 out.add '<'
2460 if is_close_tag then out.add '/'
2461 pos = read_xml_until(out, pos, ' ', '/', '>')
2462 end
2463 if pos == -1 then return -1
2464 pos = read_xml_until(out, pos, '/', '>')
2465 if pos == -1 then return -1
2466 if self[pos] == '/' then
2467 out.append " /"
2468 pos = self.read_xml_until(out, pos + 1, '>')
2469 if pos == -1 then return -1
2470 end
2471 if self[pos] == '>' then
2472 if is_valid then
2473 out.add '>'
2474 else
2475 out.append "&gt;"
2476 end
2477 return pos
2478 end
2479 return -1
2480 end
2481
2482 # Read a markdown link address and append it to the `out` buffer.
2483 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2484 var pos = start
2485 var counter = 1
2486 while pos < length do
2487 var c = self[pos]
2488 if c == '\\' and pos + 1 < length then
2489 pos = escape(out, self[pos + 1], pos)
2490 else
2491 var end_reached = false
2492 if c == '(' then
2493 counter += 1
2494 else if c == ' ' then
2495 if counter == 1 then end_reached = true
2496 else if c == ')' then
2497 counter -= 1
2498 if counter == 0 then end_reached = true
2499 end
2500 if end_reached then break
2501 out.add c
2502 end
2503 pos += 1
2504 end
2505 if pos == length then return -1
2506 return pos
2507 end
2508
2509 # Read a markdown link text and append it to the `out` buffer.
2510 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2511 var pos = start
2512 var counter = 1
2513 while pos < length do
2514 var c = self[pos]
2515 var end_reached = false
2516 if c == '[' then
2517 counter += 1
2518 out.add c
2519 else if c == ']' then
2520 counter -= 1
2521 if counter == 0 then
2522 end_reached = true
2523 else
2524 out.add c
2525 end
2526 else
2527 out.add c
2528 end
2529 if end_reached then break
2530 pos += 1
2531 end
2532 if pos == length then return -1
2533 return pos
2534 end
2535
2536 # Extract the XML tag name from a XML tag.
2537 private fun xml_tag: String do
2538 var tpl = new FlatBuffer
2539 var pos = 1
2540 if pos < length and self[1] == '/' then pos += 1
2541 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2542 tpl.add self[pos]
2543 pos += 1
2544 end
2545 return tpl.write_to_string.to_lower
2546 end
2547
2548 private fun is_valid_html_tag: Bool do
2549 if is_empty then return false
2550 for c in self do
2551 if not c.is_alpha then return false
2552 end
2553 return true
2554 end
2555
2556 # Read and escape the markdown contained in `self`.
2557 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2558 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2559 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2560 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2561 c == '`' or c == '~' or c == '^' then
2562 out.add c
2563 return pos + 1
2564 end
2565 out.add '\\'
2566 return pos
2567 end
2568
2569 # Extract string found at end of fence opening.
2570 private fun meta_from_fence: nullable Text do
2571 for i in [0..chars.length[ do
2572 var c = chars[i]
2573 if c != ' ' and c != '`' and c != '~' then
2574 return substring_from(i).trim
2575 end
2576 end
2577 return null
2578 end
2579
2580 # Is `self` an unsafe HTML element?
2581 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2582
2583 # Is `self` a HRML block element?
2584 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2585
2586 # Is `self` a link prefix?
2587 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2588
2589 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2590
2591 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2592
2593 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2594 end
2595
2596 redef class String
2597
2598 # Parse `self` as markdown and return the HTML representation
2599 #.
2600 # var md = "**Hello World!**"
2601 # var html = md.md_to_html
2602 # assert html == "<p><strong>Hello World!</strong></p>\n"
2603 fun md_to_html: Writable do
2604 var processor = new MarkdownProcessor
2605 return processor.process(self)
2606 end
2607 end