Merge: markdown: merge MDProcessor and MDEmitter
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 # Work in extended mode (default).
34 #
35 # Behavior changes when using extended mode:
36 #
37 # * Lists and code blocks end a paragraph
38 #
39 # In normal markdown the following:
40 #
41 # ~~~md
42 # This is a paragraph
43 # * and this is not a list
44 # ~~~
45 #
46 # Will produce:
47 #
48 # ~~~html
49 # <p>This is a paragraph
50 # * and this is not a list</p>
51 # ~~~
52 #
53 # When using extended mode this changes to:
54 #
55 # ~~~html
56 # <p>This is a paragraph</p>
57 # <ul>
58 # <li>and this is not a list</li>
59 # </ul>
60 # ~~~
61 #
62 # * Fences code blocks
63 #
64 # If you don't want to indent your all your code with 4 spaces,
65 # you can wrap your code in ``` ``` ``` or `~~~`.
66 #
67 # Here's an example:
68 #
69 # ~~~md
70 # fun test do
71 # print "Hello World!"
72 # end
73 # ~~~
74 #
75 # * Code blocks meta
76 #
77 # If you want to use syntax highlighting tools, most of them need to know what kind
78 # of language they are highlighting.
79 # You can add an optional language identifier after the fence declaration to output
80 # it in the HTML render.
81 #
82 # ```nit
83 # import markdown
84 #
85 # print "# Hello World!".md_to_html
86 # ```
87 #
88 # Becomes
89 #
90 # ~~~html
91 # <pre class="nit"><code>import markdown
92 #
93 # print "Hello World!".md_to_html
94 # </code></pre>
95 # ~~~
96 #
97 # * Underscores (Emphasis)
98 #
99 # Underscores in the middle of a word like:
100 #
101 # ~~~md
102 # Con_cat_this
103 # ~~~
104 #
105 # normally produces this:
106 #
107 # ~~~html
108 # <p>Con<em>cat</em>this</p>
109 # ~~~
110 #
111 # With extended mode they don't result in emphasis.
112 #
113 # ~~~html
114 # <p>Con_cat_this</p>
115 # ~~~
116 #
117 # * Strikethrough
118 #
119 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
120 # strikethrought span is marked with `~~`.
121 #
122 # ~~~md
123 # ~~Mistaken text.~~
124 # ~~~
125 #
126 # becomes
127 #
128 # ~~~html
129 # <del>Mistaken text.</del>
130 # ~~~
131 var ext_mode = true
132
133 # Disable attaching MDLocation to Tokens
134 #
135 # Locations are useful for some tools but they may
136 # cause an important time and space overhead.
137 #
138 # Default = `false`
139 var no_location = false is writable
140
141 # Process the mardown `input` string and return the processed output.
142 fun process(input: String): Writable do
143 # init processor
144 link_refs.clear
145 last_link_ref = null
146 current_line = null
147 current_block = null
148 # parse markdown
149 var parent = read_lines(input)
150 parent.remove_surrounding_empty_lines
151 recurse(parent, false)
152 # output processed text
153 return emit(parent.kind)
154 end
155
156 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
157 private fun read_lines(input: String): MDBlock do
158 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
159 var value = new FlatBuffer
160 var i = 0
161
162 var line_pos = 0
163 var col_pos = 0
164
165 while i < input.length do
166 value.clear
167 var pos = 0
168 var eol = false
169 while not eol and i < input.length do
170 col_pos += 1
171 var c = input[i]
172 if c == '\n' then
173 eol = true
174 else if c == '\r' then
175 else if c == '\t' then
176 var np = pos + (4 - (pos & 3))
177 while pos < np do
178 value.add ' '
179 pos += 1
180 end
181 else
182 pos += 1
183 value.add c
184 end
185 i += 1
186 end
187 line_pos += 1
188
189 var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
190 var line = new MDLine(loc, value.write_to_string)
191 var is_link_ref = check_link_ref(line)
192 # Skip link refs
193 if not is_link_ref then block.add_line line
194 col_pos = 0
195 end
196 return block
197 end
198
199 # Check if line is a block link definition.
200 # Return `true` if line contains a valid link ref and save it into `link_refs`.
201 private fun check_link_ref(line: MDLine): Bool do
202 var md = line.value
203 var is_link_ref = false
204 var id = new FlatBuffer
205 var link = new FlatBuffer
206 var comment = new FlatBuffer
207 var pos = -1
208 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
209 pos = line.leading + 1
210 pos = md.read_until(id, pos, ']')
211 if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
212 if line.value[pos + 1] == ':' then
213 pos += 2
214 pos = md.skip_spaces(pos)
215 if pos >= 0 and line.value[pos] == '<' then
216 pos += 1
217 pos = md.read_until(link, pos, '>')
218 pos += 1
219 else if pos >= 0 then
220 pos = md.read_until(link, pos, ' ', '\n')
221 end
222 if not link.is_empty then
223 pos = md.skip_spaces(pos)
224 if pos > 0 and pos < line.value.length then
225 var c = line.value[pos]
226 if c == '\"' or c == '\'' or c == '(' then
227 pos += 1
228 if c == '(' then
229 pos = md.read_until(comment, pos, ')')
230 else
231 pos = md.read_until(comment, pos, c)
232 end
233 if pos > 0 then is_link_ref = true
234 end
235 else
236 is_link_ref = true
237 end
238 end
239 end
240 end
241 end
242 if is_link_ref and not id.is_empty and not link.is_empty then
243 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
244 add_link_ref(id.write_to_string, lr)
245 if comment.is_empty then last_link_ref = lr
246 return true
247 else
248 comment = new FlatBuffer
249 if not line.is_empty and last_link_ref != null then
250 pos = line.leading
251 var c = line.value[pos]
252 if c == '\"' or c == '\'' or c == '(' then
253 pos += 1
254 if c == '(' then
255 pos = md.read_until(comment, pos, ')')
256 else
257 pos = md.read_until(comment, pos, c)
258 end
259 end
260 var last_link_ref = self.last_link_ref
261 if not comment.is_empty and last_link_ref != null then
262 last_link_ref.title = comment.write_to_string
263 end
264 end
265 if comment.is_empty then return false
266 return true
267 end
268 end
269
270 # Known link refs
271 # This list will be needed during output to expand links.
272 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
273
274 # Last encountered link ref (for multiline definitions)
275 #
276 # Markdown allows link refs to be defined over two lines:
277 #
278 # ~~~md
279 # [id]: http://example.com/longish/path/to/resource/here
280 # "Optional Title Here"
281 # ~~~
282 #
283 private var last_link_ref: nullable LinkRef = null
284
285 # Add a link ref to the list
286 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
287
288 # Recursively split a `block`.
289 #
290 # The block is splitted according to the type of lines it contains.
291 # Some blocks can be splited again recursively like lists.
292 # The `in_list` mode is used to recurse on list and build
293 # nested paragraphs or code blocks.
294 fun recurse(root: MDBlock, in_list: Bool) do
295 var old_mode = self.in_list
296 var old_root = self.current_block
297 self.in_list = in_list
298
299 var line = root.first_line
300 while line != null and line.is_empty do
301 line = line.next
302 if line == null then return
303 end
304
305 current_line = line
306 current_block = root
307 while current_line != null do
308 line_kind(current_line.as(not null)).process(self)
309 end
310 self.in_list = old_mode
311 self.current_block = old_root
312 end
313
314 # Currently processed line.
315 # Used when visiting blocks with `recurse`.
316 var current_line: nullable MDLine = null is writable
317
318 # Currently processed block.
319 # Used when visiting blocks with `recurse`.
320 var current_block: nullable MDBlock = null is writable
321
322 # Is the current recursion in list mode?
323 # Used when visiting blocks with `recurse`
324 private var in_list = false
325
326 # The type of line.
327 # see: `md_line_*`
328 fun line_kind(md: MDLine): Line do
329 var value = md.value
330 var leading = md.leading
331 var trailing = md.trailing
332 if md.is_empty then return new LineEmpty
333 if md.leading > 3 then return new LineCode
334 if value[leading] == '#' then return new LineHeadline
335 if value[leading] == '>' then return new LineBlockquote
336
337 if ext_mode then
338 if value.length - leading - trailing > 2 then
339 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
340 return new LineFence
341 end
342 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
343 return new LineFence
344 end
345 end
346 end
347
348 if value.length - leading - trailing > 2 and
349 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
350 if md.count_chars(value[leading]) >= 3 then
351 return new LineHR
352 end
353 end
354
355 if value.length - leading >= 2 and value[leading + 1] == ' ' then
356 var c = value[leading]
357 if c == '*' or c == '-' or c == '+' then return new LineUList
358 end
359
360 if value.length - leading >= 3 and value[leading].is_digit then
361 var i = leading + 1
362 while i < value.length and value[i].is_digit do i += 1
363 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
364 return new LineOList
365 end
366 end
367
368 if value[leading] == '<' and md.check_html then return new LineXML
369
370 var next = md.next
371 if next != null and not next.is_empty then
372 if next.count_chars('=') > 0 then
373 return new LineHeadline1
374 end
375 if next.count_chars('-') > 0 then
376 return new LineHeadline2
377 end
378 end
379 return new LineOther
380 end
381
382 # Get the token kind at `pos`.
383 fun token_at(text: Text, pos: Int): Token do
384 var c0: Char
385 var c1: Char
386 var c2: Char
387
388 if pos > 0 then
389 c0 = text[pos - 1]
390 else
391 c0 = ' '
392 end
393 var c = text[pos]
394
395 if pos + 1 < text.length then
396 c1 = text[pos + 1]
397 else
398 c1 = ' '
399 end
400 if pos + 2 < text.length then
401 c2 = text[pos + 2]
402 else
403 c2 = ' '
404 end
405
406 var loc
407 if no_location then
408 loc = null
409 else
410 loc = new MDLocation(
411 current_loc.line_start,
412 current_loc.column_start + pos,
413 current_loc.line_start,
414 current_loc.column_start + pos)
415 end
416
417 if c == '*' then
418 if c1 == '*' then
419 if c0 != ' ' or c2 != ' ' then
420 return new TokenStrongStar(loc, pos, c)
421 else
422 return new TokenEmStar(loc, pos, c)
423 end
424 end
425 if c0 != ' ' or c1 != ' ' then
426 return new TokenEmStar(loc, pos, c)
427 else
428 return new TokenNone(loc, pos, c)
429 end
430 else if c == '_' then
431 if c1 == '_' then
432 if c0 != ' ' or c2 != ' ' then
433 return new TokenStrongUnderscore(loc, pos, c)
434 else
435 return new TokenEmUnderscore(loc, pos, c)
436 end
437 end
438 if ext_mode then
439 if (c0.is_letter or c0.is_digit) and c0 != '_' and
440 (c1.is_letter or c1.is_digit) then
441 return new TokenNone(loc, pos, c)
442 else
443 return new TokenEmUnderscore(loc, pos, c)
444 end
445 end
446 if c0 != ' ' or c1 != ' ' then
447 return new TokenEmUnderscore(loc, pos, c)
448 else
449 return new TokenNone(loc, pos, c)
450 end
451 else if c == '!' then
452 if c1 == '[' then return new TokenImage(loc, pos, c)
453 return new TokenNone(loc, pos, c)
454 else if c == '[' then
455 return new TokenLink(loc, pos, c)
456 else if c == ']' then
457 return new TokenNone(loc, pos, c)
458 else if c == '`' then
459 if c1 == '`' then
460 return new TokenCodeDouble(loc, pos, c)
461 else
462 return new TokenCodeSingle(loc, pos, c)
463 end
464 else if c == '\\' then
465 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
466 return new TokenEscape(loc, pos, c)
467 else
468 return new TokenNone(loc, pos, c)
469 end
470 else if c == '<' then
471 return new TokenHTML(loc, pos, c)
472 else if c == '&' then
473 return new TokenEntity(loc, pos, c)
474 else
475 if ext_mode then
476 if c == '~' and c1 == '~' then
477 return new TokenStrike(loc, pos, c)
478 end
479 end
480 return new TokenNone(loc, pos, c)
481 end
482 end
483
484 # Find the position of a `token` in `self`.
485 fun find_token(text: Text, start: Int, token: Token): Int do
486 var pos = start
487 while pos < text.length do
488 if token_at(text, pos).is_same_type(token) then
489 return pos
490 end
491 pos += 1
492 end
493 return -1
494 end
495
496 # Kind of decorator used for decoration.
497 type DECORATOR: Decorator
498
499 # Decorator used for output.
500 # Default is `HTMLDecorator`
501 var decorator: DECORATOR is writable, lazy do
502 return new HTMLDecorator
503 end
504
505 # Create a new `MarkdownEmitter` using a custom `decorator`.
506 init with_decorator(decorator: DECORATOR) do
507 self.decorator = decorator
508 end
509
510 # Output `block` using `decorator` in the current buffer.
511 fun emit(block: Block): Text do
512 var buffer = push_buffer
513 block.emit(self)
514 pop_buffer
515 return buffer
516 end
517
518 # Output the content of `block`.
519 fun emit_in(block: Block) do block.emit_in(self)
520
521 # Transform and emit mardown text
522 fun emit_text(text: Text) do emit_text_until(text, 0, null)
523
524 # Transform and emit mardown text starting at `start` and
525 # until a token with the same type as `token` is found.
526 # Go until the end of `text` if `token` is null.
527 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
528 var old_text = current_text
529 var old_pos = current_pos
530 current_text = text
531 current_pos = start
532 while current_pos < text.length do
533 if text[current_pos] == '\n' then
534 current_loc.line_start += 1
535 current_loc.column_start = -current_pos
536 end
537 var mt = token_at(text, current_pos)
538 if (token != null and not token isa TokenNone) and
539 (mt.is_same_type(token) or
540 (token isa TokenEmStar and mt isa TokenStrongStar) or
541 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
542 return current_pos
543 end
544 mt.emit(self)
545 current_pos += 1
546 end
547 current_text = old_text
548 current_pos = old_pos
549 return -1
550 end
551
552 # Currently processed position in `current_text`.
553 # Used when visiting inline production with `emit_text_until`.
554 private var current_pos: Int = -1
555
556 # Currently processed text.
557 # Used when visiting inline production with `emit_text_until`.
558 private var current_text: nullable Text = null
559
560 # Stacked buffers.
561 private var buffer_stack = new List[FlatBuffer]
562
563 # Push a new buffer on the stack.
564 private fun push_buffer: FlatBuffer do
565 var buffer = new FlatBuffer
566 buffer_stack.add buffer
567 return buffer
568 end
569
570 # Pop the last buffer.
571 private fun pop_buffer do buffer_stack.pop
572
573 # Current output buffer.
574 private fun current_buffer: FlatBuffer do
575 assert not buffer_stack.is_empty
576 return buffer_stack.last
577 end
578
579 # Stacked locations.
580 private var loc_stack = new List[MDLocation]
581
582 # Push a new MDLocation on the stack.
583 private fun push_loc(location: MDLocation) do loc_stack.add location
584
585 # Pop the last buffer.
586 private fun pop_loc: MDLocation do return loc_stack.pop
587
588 # Current output buffer.
589 private fun current_loc: MDLocation do
590 assert not loc_stack.is_empty
591 return loc_stack.last
592 end
593
594 # Append `e` to current buffer.
595 fun add(e: Writable) do
596 if e isa Text then
597 current_buffer.append e
598 else
599 current_buffer.append e.write_to_string
600 end
601 end
602
603 # Append `c` to current buffer.
604 fun addc(c: Char) do
605 current_buffer.add c
606 end
607
608 # Append a "\n" line break.
609 fun addn do addc '\n'
610 end
611
612 # A Link Reference.
613 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
614 #
615 # ~~~raw
616 # [1]: http://example.com/ "Optional title"
617 # ~~~
618 class LinkRef
619
620 # Link href
621 var link: String
622
623 # Optional link title
624 var title: nullable String = null
625
626 # Is the link an abreviation?
627 var is_abbrev = false
628
629 # Create a link with a title.
630 init with_title(link: String, title: nullable String) do
631 init(link)
632 self.title = title
633 end
634 end
635
636 # A `Decorator` is used to emit mardown into a specific format.
637 # Default decorator used is `HTMLDecorator`.
638 interface Decorator
639
640 # Kind of processor used
641 type PROCESSOR: MarkdownProcessor
642
643 # Render a single plain char.
644 #
645 # Redefine this method to add special escaping for plain text.
646 fun add_char(v: PROCESSOR, c: Char) do v.addc c
647
648 # Render a ruler block.
649 fun add_ruler(v: PROCESSOR, block: BlockRuler) is abstract
650
651 # Render a headline block with corresponding level.
652 fun add_headline(v: PROCESSOR, block: BlockHeadline) is abstract
653
654 # Render a paragraph block.
655 fun add_paragraph(v: PROCESSOR, block: BlockParagraph) is abstract
656
657 # Render a code or fence block.
658 fun add_code(v: PROCESSOR, block: BlockCode) is abstract
659
660 # Render a blockquote.
661 fun add_blockquote(v: PROCESSOR, block: BlockQuote) is abstract
662
663 # Render an unordered list.
664 fun add_unorderedlist(v: PROCESSOR, block: BlockUnorderedList) is abstract
665
666 # Render an ordered list.
667 fun add_orderedlist(v: PROCESSOR, block: BlockOrderedList) is abstract
668
669 # Render a list item.
670 fun add_listitem(v: PROCESSOR, block: BlockListItem) is abstract
671
672 # Render an emphasis text.
673 fun add_em(v: PROCESSOR, text: Text) is abstract
674
675 # Render a strong text.
676 fun add_strong(v: PROCESSOR, text: Text) is abstract
677
678 # Render a strike text.
679 #
680 # Extended mode only (see `MarkdownProcessor::ext_mode`)
681 fun add_strike(v: PROCESSOR, text: Text) is abstract
682
683 # Render a link.
684 fun add_link(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract
685
686 # Render an image.
687 fun add_image(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract
688
689 # Render an abbreviation.
690 fun add_abbr(v: PROCESSOR, name: Text, comment: Text) is abstract
691
692 # Render a code span reading from a buffer.
693 fun add_span_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract
694
695 # Render a text and escape it.
696 fun append_value(v: PROCESSOR, value: Text) is abstract
697
698 # Render code text from buffer and escape it.
699 fun append_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract
700
701 # Render a character escape.
702 fun escape_char(v: PROCESSOR, char: Char) is abstract
703
704 # Render a line break
705 fun add_line_break(v: PROCESSOR) is abstract
706
707 # Generate a new html valid id from a `String`.
708 fun strip_id(txt: String): String is abstract
709
710 # Found headlines during the processing labeled by their ids.
711 fun headlines: ArrayMap[String, HeadLine] is abstract
712 end
713
714 # Class representing a markdown headline.
715 class HeadLine
716 # Unique identifier of this headline.
717 var id: String
718
719 # Text of the headline.
720 var title: String
721
722 # Level of this headline.
723 #
724 # According toe the markdown specification, level must be in `[1..6]`.
725 var level: Int
726 end
727
728 # `Decorator` that outputs HTML.
729 class HTMLDecorator
730 super Decorator
731
732 redef var headlines = new ArrayMap[String, HeadLine]
733
734 redef fun add_ruler(v, block) do v.add "<hr/>\n"
735
736 redef fun add_headline(v, block) do
737 # save headline
738 var line = block.block.first_line
739 if line == null then return
740 var txt = line.value
741 var id = strip_id(txt)
742 var lvl = block.depth
743 headlines[id] = new HeadLine(id, txt, lvl)
744 # output it
745 v.add "<h{lvl} id=\"{id}\">"
746 v.emit_in block
747 v.add "</h{lvl}>\n"
748 end
749
750 redef fun add_paragraph(v, block) do
751 v.add "<p>"
752 v.emit_in block
753 v.add "</p>\n"
754 end
755
756 redef fun add_code(v, block) do
757 var meta = block.meta
758 if meta != null then
759 v.add "<pre class=\""
760 append_value(v, meta)
761 v.add "\"><code>"
762 else
763 v.add "<pre><code>"
764 end
765 v.emit_in block
766 v.add "</code></pre>\n"
767 end
768
769 redef fun add_blockquote(v, block) do
770 v.add "<blockquote>\n"
771 v.emit_in block
772 v.add "</blockquote>\n"
773 end
774
775 redef fun add_unorderedlist(v, block) do
776 v.add "<ul>\n"
777 v.emit_in block
778 v.add "</ul>\n"
779 end
780
781 redef fun add_orderedlist(v, block) do
782 v.add "<ol>\n"
783 v.emit_in block
784 v.add "</ol>\n"
785 end
786
787 redef fun add_listitem(v, block) do
788 v.add "<li>"
789 v.emit_in block
790 v.add "</li>\n"
791 end
792
793 redef fun add_em(v, text) do
794 v.add "<em>"
795 v.add text
796 v.add "</em>"
797 end
798
799 redef fun add_strong(v, text) do
800 v.add "<strong>"
801 v.add text
802 v.add "</strong>"
803 end
804
805 redef fun add_strike(v, text) do
806 v.add "<del>"
807 v.add text
808 v.add "</del>"
809 end
810
811 redef fun add_image(v, link, name, comment) do
812 v.add "<img src=\""
813 append_value(v, link)
814 v.add "\" alt=\""
815 append_value(v, name)
816 v.add "\""
817 if comment != null and not comment.is_empty then
818 v.add " title=\""
819 append_value(v, comment)
820 v.add "\""
821 end
822 v.add "/>"
823 end
824
825 redef fun add_link(v, link, name, comment) do
826 v.add "<a href=\""
827 append_value(v, link)
828 v.add "\""
829 if comment != null and not comment.is_empty then
830 v.add " title=\""
831 append_value(v, comment)
832 v.add "\""
833 end
834 v.add ">"
835 v.emit_text(name)
836 v.add "</a>"
837 end
838
839 redef fun add_abbr(v, name, comment) do
840 v.add "<abbr title=\""
841 append_value(v, comment)
842 v.add "\">"
843 v.emit_text(name)
844 v.add "</abbr>"
845 end
846
847 redef fun add_span_code(v, text, from, to) do
848 v.add "<code>"
849 append_code(v, text, from, to)
850 v.add "</code>"
851 end
852
853 redef fun add_line_break(v) do
854 v.add "<br/>"
855 end
856
857 redef fun append_value(v, text) do for c in text do escape_char(v, c)
858
859 redef fun escape_char(v, c) do
860 if c == '&' then
861 v.add "&amp;"
862 else if c == '<' then
863 v.add "&lt;"
864 else if c == '>' then
865 v.add "&gt;"
866 else if c == '"' then
867 v.add "&quot;"
868 else if c == '\'' then
869 v.add "&apos;"
870 else
871 v.addc c
872 end
873 end
874
875 redef fun append_code(v, buffer, from, to) do
876 for i in [from..to[ do
877 var c = buffer[i]
878 if c == '&' then
879 v.add "&amp;"
880 else if c == '<' then
881 v.add "&lt;"
882 else if c == '>' then
883 v.add "&gt;"
884 else
885 v.addc c
886 end
887 end
888 end
889
890 redef fun strip_id(txt) do
891 # strip id
892 var b = new FlatBuffer
893 for c in txt do
894 if c == ' ' then
895 b.add '_'
896 else
897 if not c.is_letter and
898 not c.is_digit and
899 not allowed_id_chars.has(c) then continue
900 b.add c
901 end
902 end
903 var res = b.to_s
904 var key = res
905 # check for multiple id definitions
906 if headlines.has_key(key) then
907 var i = 1
908 key = "{res}_{i}"
909 while headlines.has_key(key) do
910 i += 1
911 key = "{res}_{i}"
912 end
913 end
914 return key
915 end
916
917 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
918 end
919
920 # Location in a Markdown input.
921 class MDLocation
922
923 # Starting line number (starting from 1).
924 var line_start: Int
925
926 # Starting column number (starting from 1).
927 var column_start: Int
928
929 # Stopping line number (starting from 1).
930 var line_end: Int
931
932 # Stopping column number (starting from 1).
933 var column_end: Int
934
935 redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
936
937 # Return a copy of `self`.
938 fun copy: MDLocation do
939 return new MDLocation(line_start, column_start, line_end, column_end)
940 end
941 end
942
943 # A block of markdown lines.
944 # A `MDBlock` can contains lines and/or sub-blocks.
945 class MDBlock
946
947 # Position of `self` in the input.
948 var location: MDLocation
949
950 # Kind of block.
951 # See `Block`.
952 var kind: Block = new BlockNone(self) is writable
953
954 # First line if any.
955 var first_line: nullable MDLine = null is writable
956
957 # Last line if any.
958 var last_line: nullable MDLine = null is writable
959
960 # First sub-block if any.
961 var first_block: nullable MDBlock = null is writable
962
963 # Last sub-block if any.
964 var last_block: nullable MDBlock = null is writable
965
966 # Previous block if any.
967 var prev: nullable MDBlock = null is writable
968
969 # Next block if any.
970 var next: nullable MDBlock = null is writable
971
972 # Does this block contain subblocks?
973 fun has_blocks: Bool do return first_block != null
974
975 # Count sub-blocks.
976 fun count_blocks: Int do
977 var count = 0
978 var block = first_block
979 while block != null do
980 count += 1
981 block = block.next
982 end
983 return count
984 end
985
986 # Does this block contain lines?
987 fun has_lines: Bool do return first_line != null
988
989 # Count block lines.
990 fun count_lines: Int do
991 var count = 0
992 var line = first_line
993 while line != null do
994 count += 1
995 line = line.next
996 end
997 return count
998 end
999
1000 # Split `self` creating a new sub-block having `line` has `last_line`.
1001 fun split(line: MDLine): MDBlock do
1002 # location for new block
1003 var new_loc = new MDLocation(
1004 first_line.as(not null).location.line_start,
1005 first_line.as(not null).location.column_start,
1006 line.location.line_end,
1007 line.location.column_end)
1008 # create block
1009 var block = new MDBlock(new_loc)
1010 block.first_line = first_line
1011 block.last_line = line
1012 first_line = line.next
1013 line.next = null
1014 if first_line == null then
1015 last_line = null
1016 else
1017 first_line.as(not null).prev = null
1018 # update current block loc
1019 location.line_start = first_line.as(not null).location.line_start
1020 location.column_start = first_line.as(not null).location.column_start
1021 end
1022 if first_block == null then
1023 first_block = block
1024 last_block = block
1025 else
1026 last_block.as(not null).next = block
1027 last_block = block
1028 end
1029 return block
1030 end
1031
1032 # Add a `line` to this block.
1033 fun add_line(line: MDLine) do
1034 if last_line == null then
1035 first_line = line
1036 last_line = line
1037 else
1038 last_line.as(not null).next_empty = line.is_empty
1039 line.prev_empty = last_line.as(not null).is_empty
1040 line.prev = last_line
1041 last_line.as(not null).next = line
1042 last_line = line
1043 end
1044 end
1045
1046 # Remove `line` from this block.
1047 fun remove_line(line: MDLine) do
1048 if line.prev == null then
1049 first_line = line.next
1050 else
1051 line.prev.as(not null).next = line.next
1052 end
1053 if line.next == null then
1054 last_line = line.prev
1055 else
1056 line.next.as(not null).prev = line.prev
1057 end
1058 line.prev = null
1059 line.next = null
1060 end
1061
1062 # Remove leading empty lines.
1063 fun remove_leading_empty_lines: Bool do
1064 var was_empty = false
1065 var line = first_line
1066 while line != null and line.is_empty do
1067 remove_line line
1068 line = first_line
1069 was_empty = true
1070 end
1071 return was_empty
1072 end
1073
1074 # Remove trailing empty lines.
1075 fun remove_trailing_empty_lines: Bool do
1076 var was_empty = false
1077 var line = last_line
1078 while line != null and line.is_empty do
1079 remove_line line
1080 line = last_line
1081 was_empty = true
1082 end
1083 return was_empty
1084 end
1085
1086 # Remove leading and trailing empty lines.
1087 fun remove_surrounding_empty_lines: Bool do
1088 var was_empty = false
1089 if remove_leading_empty_lines then was_empty = true
1090 if remove_trailing_empty_lines then was_empty = true
1091 return was_empty
1092 end
1093
1094 # Remove list markers and up to 4 leading spaces.
1095 # Used to clean nested lists.
1096 fun remove_list_indent(v: MarkdownProcessor) do
1097 var line = first_line
1098 while line != null do
1099 if not line.is_empty then
1100 var kind = v.line_kind(line)
1101 if kind isa LineList then
1102 line.value = kind.extract_value(line)
1103 else
1104 line.value = line.value.substring_from(line.leading.min(4))
1105 end
1106 line.leading = line.process_leading
1107 end
1108 line = line.next
1109 end
1110 end
1111
1112 # Collect block line text.
1113 fun text: String do
1114 var text = new FlatBuffer
1115 var line = first_line
1116 while line != null do
1117 if not line.is_empty then
1118 text.append line.text
1119 end
1120 text.append "\n"
1121 line = line.next
1122 end
1123 var block = first_block
1124 while block != null do
1125 text.append block.text
1126 text.append "\n"
1127 block = block.next
1128 end
1129 return text.write_to_string
1130 end
1131 end
1132
1133 # Representation of a markdown block in the AST.
1134 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1135 abstract class Block
1136
1137 # The markdown block `self` is related to.
1138 var block: MDBlock
1139
1140 # Output `self` using `v.decorator`.
1141 fun emit(v: MarkdownProcessor) do v.emit_in(self)
1142
1143 # Emit the containts of `self`, lines or blocks.
1144 fun emit_in(v: MarkdownProcessor) do
1145 block.remove_surrounding_empty_lines
1146 if block.has_lines then
1147 emit_lines(v)
1148 else
1149 emit_blocks(v)
1150 end
1151 end
1152
1153 # Emit lines contained in `block`.
1154 fun emit_lines(v: MarkdownProcessor) do
1155 var tpl = v.push_buffer
1156 var line = block.first_line
1157 while line != null do
1158 if not line.is_empty then
1159 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1160 if line.trailing >= 2 then v.decorator.add_line_break(v)
1161 end
1162 if line.next != null then
1163 v.addn
1164 end
1165 line = line.next
1166 end
1167 v.pop_buffer
1168 v.emit_text(tpl)
1169 end
1170
1171 # Emit sub-blocks contained in `block`.
1172 fun emit_blocks(v: MarkdownProcessor) do
1173 var block = self.block.first_block
1174 while block != null do
1175 v.push_loc(block.location)
1176 block.kind.emit(v)
1177 v.pop_loc
1178 block = block.next
1179 end
1180 end
1181
1182 # The raw content of the block as a multi-line string.
1183 fun raw_content: String do
1184 var infence = self isa BlockFence
1185 var text = new FlatBuffer
1186 var line = self.block.first_line
1187 while line != null do
1188 if not line.is_empty then
1189 var str = line.value
1190 if not infence and str.has_prefix(" ") then
1191 text.append str.substring(4, str.length - line.trailing)
1192 else
1193 text.append str
1194 end
1195 end
1196 text.append "\n"
1197 line = line.next
1198 end
1199 return text.write_to_string
1200 end
1201 end
1202
1203 # A block without any markdown specificities.
1204 #
1205 # Actually use the same implementation than `BlockCode`,
1206 # this class is only used for typing purposes.
1207 class BlockNone
1208 super Block
1209 end
1210
1211 # A markdown blockquote.
1212 class BlockQuote
1213 super Block
1214
1215 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1216
1217 # Remove blockquote markers.
1218 private fun remove_block_quote_prefix(block: MDBlock) do
1219 var line = block.first_line
1220 while line != null do
1221 if not line.is_empty then
1222 if line.value[line.leading] == '>' then
1223 var rem = line.leading + 1
1224 if line.leading + 1 < line.value.length and
1225 line.value[line.leading + 1] == ' ' then
1226 rem += 1
1227 end
1228 line.value = line.value.substring_from(rem)
1229 line.leading = line.process_leading
1230 end
1231 end
1232 line = line.next
1233 end
1234 end
1235 end
1236
1237 # A markdown code block.
1238 class BlockCode
1239 super Block
1240
1241 # Any string found after fence token.
1242 var meta: nullable Text
1243
1244 # Number of char to skip at the beginning of the line.
1245 #
1246 # Block code lines start at 4 spaces.
1247 protected var line_start = 4
1248
1249 redef fun emit(v) do v.decorator.add_code(v, self)
1250
1251 redef fun emit_lines(v) do
1252 var line = block.first_line
1253 while line != null do
1254 if not line.is_empty then
1255 v.decorator.append_code(v, line.value, line_start, line.value.length)
1256 end
1257 v.addn
1258 line = line.next
1259 end
1260 end
1261 end
1262
1263 # A markdown code-fence block.
1264 #
1265 # Actually use the same implementation than `BlockCode`,
1266 # this class is only used for typing purposes.
1267 class BlockFence
1268 super BlockCode
1269
1270 # Fence code lines start at 0 spaces.
1271 redef var line_start = 0
1272 end
1273
1274 # A markdown headline.
1275 class BlockHeadline
1276 super Block
1277
1278 redef fun emit(v) do
1279 var loc = block.location.copy
1280 loc.column_start += start
1281 v.push_loc(loc)
1282 v.decorator.add_headline(v, self)
1283 v.pop_loc
1284 end
1285
1286 private var start = 0
1287
1288 # Depth of the headline used to determine the headline level.
1289 var depth = 0
1290
1291 # Remove healine marks from lines contained in `self`.
1292 private fun transform_headline(block: MDBlock) do
1293 if depth > 0 then return
1294 var level = 0
1295 var line = block.first_line
1296 if line == null then return
1297 if line.is_empty then return
1298 var start = line.leading
1299 while start < line.value.length and line.value[start] == '#' do
1300 level += 1
1301 start += 1
1302 end
1303 while start < line.value.length and line.value[start] == ' ' do
1304 start += 1
1305 end
1306 if start >= line.value.length then
1307 line.is_empty = true
1308 else
1309 var nend = line.value.length - line.trailing - 1
1310 while line.value[nend] == '#' do nend -= 1
1311 while line.value[nend] == ' ' do nend -= 1
1312 line.value = line.value.substring(start, nend - start + 1)
1313 line.leading = 0
1314 line.trailing = 0
1315 end
1316 self.start = start
1317 depth = level.min(6)
1318 end
1319 end
1320
1321 # A markdown list item block.
1322 class BlockListItem
1323 super Block
1324
1325 redef fun emit(v) do v.decorator.add_listitem(v, self)
1326 end
1327
1328 # A markdown list block.
1329 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1330 abstract class BlockList
1331 super Block
1332
1333 # Split list block into list items sub-blocks.
1334 private fun init_block(v: MarkdownProcessor) do
1335 var line = block.first_line
1336 if line == null then return
1337 line = line.next
1338 while line != null do
1339 var t = v.line_kind(line)
1340 if t isa LineList or
1341 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1342 not (t isa LineList))) then
1343 var sblock = block.split(line.prev.as(not null))
1344 sblock.kind = new BlockListItem(sblock)
1345 end
1346 line = line.next
1347 end
1348 var sblock = block.split(block.last_line.as(not null))
1349 sblock.kind = new BlockListItem(sblock)
1350 end
1351
1352 # Expand list items as paragraphs if needed.
1353 private fun expand_paragraphs(block: MDBlock) do
1354 var outer = block.first_block
1355 var inner: nullable MDBlock
1356 var has_paragraph = false
1357 while outer != null and not has_paragraph do
1358 if outer.kind isa BlockListItem then
1359 inner = outer.first_block
1360 while inner != null and not has_paragraph do
1361 if inner.kind isa BlockParagraph then
1362 has_paragraph = true
1363 end
1364 inner = inner.next
1365 end
1366 end
1367 outer = outer.next
1368 end
1369 if has_paragraph then
1370 outer = block.first_block
1371 while outer != null do
1372 if outer.kind isa BlockListItem then
1373 inner = outer.first_block
1374 while inner != null do
1375 if inner.kind isa BlockNone then
1376 inner.kind = new BlockParagraph(inner)
1377 end
1378 inner = inner.next
1379 end
1380 end
1381 outer = outer.next
1382 end
1383 end
1384 end
1385 end
1386
1387 # A markdown ordered list.
1388 class BlockOrderedList
1389 super BlockList
1390
1391 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1392 end
1393
1394 # A markdown unordred list.
1395 class BlockUnorderedList
1396 super BlockList
1397
1398 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1399 end
1400
1401 # A markdown paragraph block.
1402 class BlockParagraph
1403 super Block
1404
1405 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1406 end
1407
1408 # A markdown ruler.
1409 class BlockRuler
1410 super Block
1411
1412 redef fun emit(v) do v.decorator.add_ruler(v, self)
1413 end
1414
1415 # Xml blocks that can be found in markdown markup.
1416 class BlockXML
1417 super Block
1418
1419 redef fun emit_lines(v) do
1420 var line = block.first_line
1421 while line != null do
1422 if not line.is_empty then v.add line.value
1423 v.addn
1424 line = line.next
1425 end
1426 end
1427 end
1428
1429 # A markdown line.
1430 class MDLine
1431
1432 # Location of `self` in the original input.
1433 var location: MDLocation
1434
1435 # Text contained in this line.
1436 var value: String is writable
1437
1438 # Is this line empty?
1439 # Lines containing only spaces are considered empty.
1440 var is_empty: Bool = true is writable
1441
1442 # Previous line in `MDBlock` or null if first line.
1443 var prev: nullable MDLine = null is writable
1444
1445 # Next line in `MDBlock` or null if last line.
1446 var next: nullable MDLine = null is writable
1447
1448 # Is the previous line empty?
1449 var prev_empty: Bool = false is writable
1450
1451 # Is the next line empty?
1452 var next_empty: Bool = false is writable
1453
1454 # Initialize a new MDLine from its string value
1455 init do
1456 self.leading = process_leading
1457 if leading != value.length then
1458 self.is_empty = false
1459 self.trailing = process_trailing
1460 end
1461 end
1462
1463 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1464 fun clear do
1465 value = ""
1466 leading = 0
1467 trailing = 0
1468 is_empty = true
1469 if prev != null then prev.as(not null).next_empty = true
1470 if next != null then next.as(not null).prev_empty = true
1471 end
1472
1473 # Number or leading spaces on this line.
1474 var leading: Int = 0 is writable
1475
1476 # Compute `leading` depending on `value`.
1477 fun process_leading: Int do
1478 var count = 0
1479 var value = self.value
1480 while count < value.length and value[count] == ' ' do count += 1
1481 if leading == value.length then clear
1482 return count
1483 end
1484
1485 # Number of trailing spaces on this line.
1486 var trailing: Int = 0 is writable
1487
1488 # Compute `trailing` depending on `value`.
1489 fun process_trailing: Int do
1490 var count = 0
1491 var value = self.value
1492 while value[value.length - count - 1] == ' ' do
1493 count += 1
1494 end
1495 return count
1496 end
1497
1498 # Count the amount of `ch` in this line.
1499 # Return A value > 0 if this line only consists of `ch` end spaces.
1500 fun count_chars(ch: Char): Int do
1501 var count = 0
1502 for c in value do
1503 if c == ' ' then
1504 continue
1505 end
1506 if c == ch then
1507 count += 1
1508 continue
1509 end
1510 count = 0
1511 break
1512 end
1513 return count
1514 end
1515
1516 # Count the amount of `ch` at the start of this line ignoring spaces.
1517 fun count_chars_start(ch: Char): Int do
1518 var count = 0
1519 for c in value do
1520 if c == ' ' then
1521 continue
1522 end
1523 if c == ch then
1524 count += 1
1525 else
1526 break
1527 end
1528 end
1529 return count
1530 end
1531
1532 # Last XML line if any.
1533 private var xml_end_line: nullable MDLine = null
1534
1535 # Does `value` contains valid XML markup?
1536 private fun check_html: Bool do
1537 var tags = new Array[String]
1538 var tmp = new FlatBuffer
1539 var pos = leading
1540 if pos + 1 < value.length and value[pos + 1] == '!' then
1541 if read_xml_comment(self, pos) > 0 then return true
1542 end
1543 pos = value.read_xml(tmp, pos, false)
1544 var tag: String
1545 if pos > -1 then
1546 tag = tmp.xml_tag
1547 if not tag.is_html_block then
1548 return false
1549 end
1550 if tag == "hr" then
1551 xml_end_line = self
1552 return true
1553 end
1554 tags.add tag
1555 var line: nullable MDLine = self
1556 while line != null do
1557 while pos < line.value.length and line.value[pos] != '<' do
1558 pos += 1
1559 end
1560 if pos >= line.value.length then
1561 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1562 tags.pop
1563 if tags.is_empty then
1564 xml_end_line = line
1565 break
1566 end
1567 end
1568 line = line.next
1569 pos = 0
1570 else
1571 tmp = new FlatBuffer
1572 var new_pos = line.value.read_xml(tmp, pos, false)
1573 if new_pos > 0 then
1574 tag = tmp.xml_tag
1575 if tag.is_html_block and not tag == "hr" then
1576 if tmp[1] == '/' then
1577 if tags.last != tag then
1578 return false
1579 end
1580 tags.pop
1581 else
1582 tags.add tag
1583 end
1584 end
1585 if tags.is_empty then
1586 xml_end_line = line
1587 break
1588 end
1589 pos = new_pos
1590 else
1591 pos += 1
1592 end
1593 end
1594 end
1595 return tags.is_empty
1596 end
1597 return false
1598 end
1599
1600 # Read a XML comment.
1601 # Used by `check_html`.
1602 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1603 var line: nullable MDLine = first_line
1604 if start + 3 < line.as(not null).value.length then
1605 if line.as(not null).value[2] == '-' and line.as(not null).value[3] == '-' then
1606 var pos = start + 4
1607 while line != null do
1608 while pos < line.value.length and line.value[pos] != '-' do
1609 pos += 1
1610 end
1611 if pos == line.value.length then
1612 line = line.next
1613 pos = 0
1614 else
1615 if pos + 2 < line.value.length then
1616 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1617 first_line.xml_end_line = line
1618 return pos + 3
1619 end
1620 end
1621 pos += 1
1622 end
1623 end
1624 end
1625 end
1626 return -1
1627 end
1628
1629 # Extract the text of `self` without leading and trailing.
1630 fun text: String do return value.substring(leading, value.length - trailing)
1631 end
1632
1633 # A markdown line.
1634 interface Line
1635
1636 # Parse the line.
1637 # See `MarkdownProcessor::recurse`.
1638 fun process(v: MarkdownProcessor) is abstract
1639 end
1640
1641 # An empty markdown line.
1642 class LineEmpty
1643 super Line
1644
1645 redef fun process(v) do
1646 v.current_line = v.current_line.as(not null).next
1647 end
1648 end
1649
1650 # A non-specific markdown construction.
1651 # Mainly used as part of another line construct such as paragraphs or lists.
1652 class LineOther
1653 super Line
1654
1655 redef fun process(v) do
1656 var line = v.current_line
1657 # go to block end
1658 var was_empty = line.as(not null).prev_empty
1659 while line != null and not line.is_empty do
1660 var t = v.line_kind(line)
1661 if (v.in_list or v.ext_mode) and t isa LineList then
1662 break
1663 end
1664 if v.ext_mode and (t isa LineCode or t isa LineFence) then
1665 break
1666 end
1667 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1668 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1669 break
1670 end
1671 line = line.next
1672 end
1673 # build block
1674 var current_block = v.current_block.as(not null)
1675 if line != null and not line.is_empty then
1676 var block = current_block.split(line.prev.as(not null))
1677 if v.in_list and not was_empty then
1678 block.kind = new BlockNone(block)
1679 else
1680 block.kind = new BlockParagraph(block)
1681 end
1682 current_block.remove_leading_empty_lines
1683 else
1684 var block: MDBlock
1685 if line != null then
1686 block = current_block.split(line)
1687 else
1688 block = current_block.split(current_block.last_line.as(not null))
1689 end
1690 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1691 block.kind = new BlockNone(block)
1692 else
1693 block.kind = new BlockParagraph(block)
1694 end
1695 current_block.remove_leading_empty_lines
1696 end
1697 v.current_line = current_block.first_line
1698 end
1699 end
1700
1701 # A line of markdown code.
1702 class LineCode
1703 super Line
1704
1705 redef fun process(v) do
1706 var line = v.current_line
1707 # lookup block end
1708 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1709 line = line.next
1710 end
1711 # split at block end line
1712 var current_block = v.current_block.as(not null)
1713 var block: MDBlock
1714 if line != null then
1715 block = current_block.split(line.prev.as(not null))
1716 else
1717 block = current_block.split(current_block.last_line.as(not null))
1718 end
1719 block.kind = new BlockCode(block)
1720 block.remove_surrounding_empty_lines
1721 v.current_line = current_block.first_line
1722 end
1723 end
1724
1725 # A line of raw XML.
1726 class LineXML
1727 super Line
1728
1729 redef fun process(v) do
1730 var line = v.current_line
1731 if line == null then return
1732 var current_block = v.current_block.as(not null)
1733 var prev = line.prev
1734 if prev != null then current_block.split(prev)
1735 var block = current_block.split(line.xml_end_line.as(not null))
1736 block.kind = new BlockXML(block)
1737 current_block.remove_leading_empty_lines
1738 v.current_line = current_block.first_line
1739 end
1740 end
1741
1742 # A markdown blockquote line.
1743 class LineBlockquote
1744 super Line
1745
1746 redef fun process(v) do
1747 var line = v.current_line
1748 var current_block = v.current_block.as(not null)
1749 # go to bquote end
1750 while line != null do
1751 if not line.is_empty and (line.prev_empty and
1752 line.leading == 0 and
1753 not v.line_kind(line) isa LineBlockquote) then break
1754 line = line.next
1755 end
1756 # build sub block
1757 var block: MDBlock
1758 if line != null then
1759 block = current_block.split(line.prev.as(not null))
1760 else
1761 block = current_block.split(current_block.last_line.as(not null))
1762 end
1763 var kind = new BlockQuote(block)
1764 block.kind = kind
1765 block.remove_surrounding_empty_lines
1766 kind.remove_block_quote_prefix(block)
1767 v.current_line = line
1768 v.recurse(block, false)
1769 v.current_line = current_block.first_line
1770 end
1771 end
1772
1773 # A markdown ruler line.
1774 class LineHR
1775 super Line
1776
1777 redef fun process(v) do
1778 var line = v.current_line
1779 if line == null then return
1780 var current_block = v.current_block.as(not null)
1781 if line.prev != null then current_block.split(line.prev.as(not null))
1782 var block = current_block.split(line)
1783 block.kind = new BlockRuler(block)
1784 current_block.remove_leading_empty_lines
1785 v.current_line = current_block.first_line
1786 end
1787 end
1788
1789 # A markdown fence code line.
1790 class LineFence
1791 super Line
1792
1793 redef fun process(v) do
1794 # go to fence end
1795 var line = v.current_line.as(not null).next
1796 var current_block = v.current_block.as(not null)
1797 while line != null do
1798 if v.line_kind(line) isa LineFence then break
1799 line = line.next
1800 end
1801 if line != null then
1802 line = line.next
1803 end
1804 # build fence block
1805 var block: MDBlock
1806 if line != null then
1807 block = current_block.split(line.prev.as(not null))
1808 else
1809 block = current_block.split(current_block.last_line.as(not null))
1810 end
1811 block.remove_surrounding_empty_lines
1812 var meta = block.first_line.as(not null).value.meta_from_fence
1813 block.kind = new BlockFence(block, meta)
1814 block.first_line.as(not null).clear
1815 var last = block.last_line
1816 if last != null and v.line_kind(last) isa LineFence then
1817 block.last_line.as(not null).clear
1818 end
1819 block.remove_surrounding_empty_lines
1820 v.current_line = line
1821 end
1822 end
1823
1824 # A markdown headline.
1825 class LineHeadline
1826 super Line
1827
1828 redef fun process(v) do
1829 var line = v.current_line
1830 if line == null then return
1831 var current_block = v.current_block.as(not null)
1832 var lprev = line.prev
1833 if lprev != null then current_block.split(lprev)
1834 var block = current_block.split(line)
1835 var kind = new BlockHeadline(block)
1836 block.kind = kind
1837 kind.transform_headline(block)
1838 current_block.remove_leading_empty_lines
1839 v.current_line = current_block.first_line
1840 end
1841 end
1842
1843 # A markdown headline of level 1.
1844 class LineHeadline1
1845 super LineHeadline
1846
1847 redef fun process(v) do
1848 var line = v.current_line
1849 if line == null then return
1850 var current_block = v.current_block.as(not null)
1851 var lprev = line.prev
1852 if lprev != null then current_block.split(lprev)
1853 line.next.as(not null).clear
1854 var block = current_block.split(line)
1855 var kind = new BlockHeadline(block)
1856 kind.depth = 1
1857 kind.transform_headline(block)
1858 block.kind = kind
1859 current_block.remove_leading_empty_lines
1860 v.current_line = current_block.first_line
1861 end
1862 end
1863
1864 # A markdown headline of level 2.
1865 class LineHeadline2
1866 super LineHeadline
1867
1868 redef fun process(v) do
1869 var line = v.current_line
1870 if line == null then return
1871 var current_block = v.current_block.as(not null)
1872 var lprev = line.prev
1873 if lprev != null then current_block.split(lprev)
1874 line.next.as(not null).clear
1875 var block = current_block.split(line)
1876 var kind = new BlockHeadline(block)
1877 kind.depth = 2
1878 kind.transform_headline(block)
1879 block.kind = kind
1880 current_block.remove_leading_empty_lines
1881 v.current_line = current_block.first_line
1882 end
1883 end
1884
1885 # A markdown list line.
1886 # Mainly used to factorize code between ordered and unordered lists.
1887 abstract class LineList
1888 super Line
1889
1890 redef fun process(v) do
1891 var line = v.current_line
1892 # go to list end
1893 while line != null do
1894 var t = v.line_kind(line)
1895 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1896 not t isa LineList) then break
1897 line = line.next
1898 end
1899 # build list block
1900 var current_block = v.current_block.as(not null)
1901 var list: MDBlock
1902 if line != null then
1903 list = current_block.split(line.prev.as(not null))
1904 else
1905 list = current_block.split(current_block.last_line.as(not null))
1906 end
1907 var kind = block_kind(list)
1908 list.kind = kind
1909 list.first_line.as(not null).prev_empty = false
1910 list.last_line.as(not null).next_empty = false
1911 list.remove_surrounding_empty_lines
1912 list.first_line.as(not null).prev_empty = false
1913 list.last_line.as(not null).next_empty = false
1914 kind.init_block(v)
1915 var block = list.first_block
1916 while block != null do
1917 block.remove_list_indent(v)
1918 v.recurse(block, true)
1919 block = block.next
1920 end
1921 kind.expand_paragraphs(list)
1922 v.current_line = line
1923 end
1924
1925 # Create a new block kind based on this line.
1926 protected fun block_kind(block: MDBlock): BlockList is abstract
1927
1928 # Extract string value from `MDLine`.
1929 protected fun extract_value(line: MDLine): String is abstract
1930 end
1931
1932 # An ordered list line.
1933 class LineOList
1934 super LineList
1935
1936 redef fun block_kind(block) do return new BlockOrderedList(block)
1937
1938 redef fun extract_value(line) do
1939 return line.value.substring_from(line.value.index_of('.') + 2)
1940 end
1941 end
1942
1943 # An unordered list line.
1944 class LineUList
1945 super LineList
1946
1947 redef fun block_kind(block) do return new BlockUnorderedList(block)
1948
1949 redef fun extract_value(line) do
1950 return line.value.substring_from(line.leading + 2)
1951 end
1952 end
1953
1954 # A token represent a character in the markdown input.
1955 # Some tokens have a specific markup behaviour that is handled here.
1956 abstract class Token
1957
1958 # Location of `self` in the original input.
1959 var location: nullable MDLocation
1960
1961 # Position of `self` in input independant from lines.
1962 var pos: Int
1963
1964 # Character found at `pos` in the markdown input.
1965 var char: Char
1966
1967 # Output that token using `MarkdownEmitter::decorator`.
1968 fun emit(v: MarkdownProcessor) do v.decorator.add_char(v, char)
1969 end
1970
1971 # A token without a specific meaning.
1972 class TokenNone
1973 super Token
1974 end
1975
1976 # An emphasis token.
1977 abstract class TokenEm
1978 super Token
1979
1980 redef fun emit(v) do
1981 var tmp = v.push_buffer
1982 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1983 v.pop_buffer
1984 if b > 0 then
1985 v.decorator.add_em(v, tmp)
1986 v.current_pos = b
1987 else
1988 v.addc char
1989 end
1990 end
1991 end
1992
1993 # An emphasis star token.
1994 class TokenEmStar
1995 super TokenEm
1996 end
1997
1998 # An emphasis underscore token.
1999 class TokenEmUnderscore
2000 super TokenEm
2001 end
2002
2003 # A strong token.
2004 abstract class TokenStrong
2005 super Token
2006
2007 redef fun emit(v) do
2008 var tmp = v.push_buffer
2009 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2010 v.pop_buffer
2011 if b > 0 then
2012 v.decorator.add_strong(v, tmp)
2013 v.current_pos = b + 1
2014 else
2015 v.addc char
2016 end
2017 end
2018 end
2019
2020 # A strong star token.
2021 class TokenStrongStar
2022 super TokenStrong
2023 end
2024
2025 # A strong underscore token.
2026 class TokenStrongUnderscore
2027 super TokenStrong
2028 end
2029
2030 # A code token.
2031 # This class is mainly used to factorize work between single and double quoted span codes.
2032 abstract class TokenCode
2033 super Token
2034
2035 redef fun emit(v) do
2036 var current_text = v.current_text.as(not null)
2037 var a = pos + next_pos + 1
2038 var b = v.find_token(current_text, a, self)
2039 if b > 0 then
2040 v.current_pos = b + next_pos
2041 while a < b and current_text[a] == ' ' do a += 1
2042 if a < b then
2043 while current_text[b - 1] == ' ' do b -= 1
2044 v.decorator.add_span_code(v, current_text, a, b)
2045 end
2046 else
2047 v.addc char
2048 end
2049 end
2050
2051 private fun next_pos: Int is abstract
2052 end
2053
2054 # A span code token.
2055 class TokenCodeSingle
2056 super TokenCode
2057
2058 redef fun next_pos do return 0
2059 end
2060
2061 # A doubled span code token.
2062 class TokenCodeDouble
2063 super TokenCode
2064
2065 redef fun next_pos do return 1
2066 end
2067
2068 # A link or image token.
2069 # This class is mainly used to factorize work between images and links.
2070 abstract class TokenLinkOrImage
2071 super Token
2072
2073 # Link adress
2074 var link: nullable Text = null
2075
2076 # Link text
2077 var name: nullable Text = null
2078
2079 # Link title
2080 var comment: nullable Text = null
2081
2082 # Is the link construct an abbreviation?
2083 var is_abbrev = false
2084
2085 redef fun emit(v) do
2086 var tmp = new FlatBuffer
2087 var b = check_link(v, tmp, pos, self)
2088 if b > 0 then
2089 emit_hyper(v)
2090 v.current_pos = b
2091 else
2092 v.addc char
2093 end
2094 end
2095
2096 # Emit the hyperlink as link or image.
2097 private fun emit_hyper(v: MarkdownProcessor) is abstract
2098
2099 # Check if the link is a valid link.
2100 private fun check_link(v: MarkdownProcessor, out: FlatBuffer, start: Int, token: Token): Int do
2101 var md = v.current_text
2102 if md == null then return -1
2103 var pos
2104 if token isa TokenLink then
2105 pos = start + 1
2106 else
2107 pos = start + 2
2108 end
2109 var tmp = new FlatBuffer
2110 pos = md.read_md_link_id(tmp, pos)
2111 if pos < start then return -1
2112 name = tmp
2113 var old_pos = pos
2114 pos += 1
2115 pos = md.skip_spaces(pos)
2116 if pos < start then
2117 var tid = name.as(not null).write_to_string.to_lower
2118 if v.link_refs.has_key(tid) then
2119 var lr = v.link_refs[tid]
2120 is_abbrev = lr.is_abbrev
2121 link = lr.link
2122 comment = lr.title
2123 pos = old_pos
2124 else
2125 return -1
2126 end
2127 else if md[pos] == '(' then
2128 pos += 1
2129 pos = md.skip_spaces(pos)
2130 if pos < start then return -1
2131 tmp = new FlatBuffer
2132 var use_lt = md[pos] == '<'
2133 if use_lt then
2134 pos = md.read_until(tmp, pos + 1, '>')
2135 else
2136 pos = md.read_md_link(tmp, pos)
2137 end
2138 if pos < start then return -1
2139 if use_lt then pos += 1
2140 link = tmp.write_to_string
2141 if md[pos] == ' ' then
2142 pos = md.skip_spaces(pos)
2143 if pos > start and md[pos] == '"' then
2144 pos += 1
2145 tmp = new FlatBuffer
2146 pos = md.read_until(tmp, pos, '"')
2147 if pos < start then return -1
2148 comment = tmp.write_to_string
2149 pos += 1
2150 pos = md.skip_spaces(pos)
2151 if pos == -1 then return -1
2152 end
2153 end
2154 if pos < start then return -1
2155 if md[pos] != ')' then return -1
2156 else if md[pos] == '[' then
2157 pos += 1
2158 tmp = new FlatBuffer
2159 pos = md.read_raw_until(tmp, pos, ']')
2160 if pos < start then return -1
2161 var id
2162 if tmp.length > 0 then
2163 id = tmp
2164 else
2165 id = name
2166 end
2167 var tid = id.as(not null).write_to_string.to_lower
2168 if v.link_refs.has_key(tid) then
2169 var lr = v.link_refs[tid]
2170 link = lr.link
2171 comment = lr.title
2172 end
2173 else
2174 var tid = name.as(not null).write_to_string.replace("\n", " ").to_lower
2175 if v.link_refs.has_key(tid) then
2176 var lr = v.link_refs[tid]
2177 link = lr.link
2178 comment = lr.title
2179 pos = old_pos
2180 else
2181 return -1
2182 end
2183 end
2184 if link == null then return -1
2185 return pos
2186 end
2187 end
2188
2189 # A markdown link token.
2190 class TokenLink
2191 super TokenLinkOrImage
2192
2193 redef fun emit_hyper(v) do
2194 if is_abbrev and comment != null then
2195 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2196 else
2197 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2198 end
2199 end
2200 end
2201
2202 # A markdown image token.
2203 class TokenImage
2204 super TokenLinkOrImage
2205
2206 redef fun emit_hyper(v) do
2207 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2208 end
2209 end
2210
2211 # A HTML/XML token.
2212 class TokenHTML
2213 super Token
2214
2215 redef fun emit(v) do
2216 var tmp = new FlatBuffer
2217 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2218 if b > 0 then
2219 v.add tmp
2220 v.current_pos = b
2221 else
2222 v.decorator.escape_char(v, char)
2223 end
2224 end
2225
2226 # Is the HTML valid?
2227 # Also take care of link and mailto shortcuts.
2228 private fun check_html(v: MarkdownProcessor, out: FlatBuffer, md: Text, start: Int): Int do
2229 # check for auto links
2230 var tmp = new FlatBuffer
2231 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2232 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2233 pos = md.read_until(tmp, pos, '>')
2234 if pos != -1 then
2235 var link = tmp.write_to_string
2236 v.decorator.add_link(v, link, link, null)
2237 return pos
2238 end
2239 end
2240 # TODO check for mailto
2241 # check for inline html
2242 if start + 2 < md.length then
2243 return md.read_xml(out, start, true)
2244 end
2245 return -1
2246 end
2247 end
2248
2249 # An HTML entity token.
2250 class TokenEntity
2251 super Token
2252
2253 redef fun emit(v) do
2254 var tmp = new FlatBuffer
2255 var b = check_entity(tmp, v.current_text.as(not null), pos)
2256 if b > 0 then
2257 v.add tmp
2258 v.current_pos = b
2259 else
2260 v.decorator.escape_char(v, char)
2261 end
2262 end
2263
2264 # Is the entity valid?
2265 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2266 var pos = md.read_until(out, start, ';')
2267 if pos < 0 or out.length < 3 then
2268 return -1
2269 end
2270 if out[1] == '#' then
2271 if out[2] == 'x' or out[2] == 'X' then
2272 if out.length < 4 then return -1
2273 for i in [3..out.length[ do
2274 var c = out[i]
2275 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2276 return -1
2277 end
2278 end
2279 else
2280 for i in [2..out.length[ do
2281 var c = out[i]
2282 if c < '0' or c > '9' then return -1
2283 end
2284 end
2285 out.add ';'
2286 else
2287 for i in [1..out.length[ do
2288 var c = out[i]
2289 if not c.is_digit and not c.is_letter then return -1
2290 end
2291 out.add ';'
2292 # TODO check entity is valid
2293 # if out.is_entity then
2294 return pos
2295 # else
2296 # return -1
2297 # end
2298 end
2299 return pos
2300 end
2301 end
2302
2303 # A markdown escape token.
2304 class TokenEscape
2305 super Token
2306
2307 redef fun emit(v) do
2308 v.current_pos += 1
2309 v.addc v.current_text.as(not null)[v.current_pos]
2310 end
2311 end
2312
2313 # A markdown strike token.
2314 #
2315 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2316 class TokenStrike
2317 super Token
2318
2319 redef fun emit(v) do
2320 var tmp = v.push_buffer
2321 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2322 v.pop_buffer
2323 if b > 0 then
2324 v.decorator.add_strike(v, tmp)
2325 v.current_pos = b + 1
2326 else
2327 v.addc char
2328 end
2329 end
2330 end
2331
2332 redef class Text
2333
2334 # Get the position of the next non-space character.
2335 private fun skip_spaces(start: Int): Int do
2336 var pos = start
2337 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2338 pos += 1
2339 end
2340 if pos < length then return pos
2341 return -1
2342 end
2343
2344 # Read `self` until `nend` and append it to the `out` buffer.
2345 # Escape markdown special chars.
2346 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2347 var pos = start
2348 while pos < length do
2349 var c = self[pos]
2350 if c == '\\' and pos + 1 < length then
2351 pos = escape(out, self[pos + 1], pos)
2352 else
2353 for n in nend do if c == n then break label
2354 out.add c
2355 end
2356 pos += 1
2357 end label
2358 if pos == length then return -1
2359 return pos
2360 end
2361
2362 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2363 # No escape is made.
2364 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2365 var pos = start
2366 while pos < length do
2367 var c = self[pos]
2368 var end_reached = false
2369 for n in nend do
2370 if c == n then
2371 end_reached = true
2372 break
2373 end
2374 end
2375 if end_reached then break
2376 out.add c
2377 pos += 1
2378 end
2379 if pos == length then return -1
2380 return pos
2381 end
2382
2383 # Read `self` as XML until `to` and append it to the `out` buffer.
2384 # Escape HTML special chars.
2385 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2386 var pos = from
2387 var in_str = false
2388 var str_char: nullable Char = null
2389 while pos < length do
2390 var c = self[pos]
2391 if in_str then
2392 if c == '\\' then
2393 out.add c
2394 pos += 1
2395 if pos < length then
2396 out.add c
2397 pos += 1
2398 end
2399 continue
2400 end
2401 if c == str_char then
2402 in_str = false
2403 out.add c
2404 pos += 1
2405 continue
2406 end
2407 end
2408 if c == '"' or c == '\'' then
2409 in_str = true
2410 str_char = c
2411 end
2412 if not in_str then
2413 var end_reached = false
2414 for n in [0..to.length[ do
2415 if c == to[n] then
2416 end_reached = true
2417 break
2418 end
2419 end
2420 if end_reached then break
2421 end
2422 out.add c
2423 pos += 1
2424 end
2425 if pos == length then return -1
2426 return pos
2427 end
2428
2429 # Read `self` as XML and append it to the `out` buffer.
2430 # Safe mode can be activated to limit reading to valid xml.
2431 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2432 var pos = 0
2433 var is_valid = true
2434 var is_close_tag = false
2435 if start + 1 >= length then return -1
2436 if self[start + 1] == '/' then
2437 is_close_tag = true
2438 pos = start + 2
2439 else if self[start + 1] == '!' then
2440 out.append "<!"
2441 return start + 1
2442 else
2443 is_close_tag = false
2444 pos = start + 1
2445 end
2446 if safe_mode then
2447 var tmp = new FlatBuffer
2448 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2449 if pos == -1 then return -1
2450 var tag = tmp.write_to_string.trim.to_lower
2451 if not tag.is_valid_html_tag then
2452 out.append "&lt;"
2453 pos = -1
2454 else if tag.is_html_unsafe then
2455 is_valid = false
2456 out.append "&lt;"
2457 if is_close_tag then out.add '/'
2458 out.append tmp
2459 else
2460 out.append "<"
2461 if is_close_tag then out.add '/'
2462 out.append tmp
2463 end
2464 else
2465 out.add '<'
2466 if is_close_tag then out.add '/'
2467 pos = read_xml_until(out, pos, ' ', '/', '>')
2468 end
2469 if pos == -1 then return -1
2470 pos = read_xml_until(out, pos, '/', '>')
2471 if pos == -1 then return -1
2472 if self[pos] == '/' then
2473 out.append " /"
2474 pos = self.read_xml_until(out, pos + 1, '>')
2475 if pos == -1 then return -1
2476 end
2477 if self[pos] == '>' then
2478 if is_valid then
2479 out.add '>'
2480 else
2481 out.append "&gt;"
2482 end
2483 return pos
2484 end
2485 return -1
2486 end
2487
2488 # Read a markdown link address and append it to the `out` buffer.
2489 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2490 var pos = start
2491 var counter = 1
2492 while pos < length do
2493 var c = self[pos]
2494 if c == '\\' and pos + 1 < length then
2495 pos = escape(out, self[pos + 1], pos)
2496 else
2497 var end_reached = false
2498 if c == '(' then
2499 counter += 1
2500 else if c == ' ' then
2501 if counter == 1 then end_reached = true
2502 else if c == ')' then
2503 counter -= 1
2504 if counter == 0 then end_reached = true
2505 end
2506 if end_reached then break
2507 out.add c
2508 end
2509 pos += 1
2510 end
2511 if pos == length then return -1
2512 return pos
2513 end
2514
2515 # Read a markdown link text and append it to the `out` buffer.
2516 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2517 var pos = start
2518 var counter = 1
2519 while pos < length do
2520 var c = self[pos]
2521 var end_reached = false
2522 if c == '[' then
2523 counter += 1
2524 out.add c
2525 else if c == ']' then
2526 counter -= 1
2527 if counter == 0 then
2528 end_reached = true
2529 else
2530 out.add c
2531 end
2532 else
2533 out.add c
2534 end
2535 if end_reached then break
2536 pos += 1
2537 end
2538 if pos == length then return -1
2539 return pos
2540 end
2541
2542 # Extract the XML tag name from a XML tag.
2543 private fun xml_tag: String do
2544 var tpl = new FlatBuffer
2545 var pos = 1
2546 if pos < length and self[1] == '/' then pos += 1
2547 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2548 tpl.add self[pos]
2549 pos += 1
2550 end
2551 return tpl.write_to_string.to_lower
2552 end
2553
2554 private fun is_valid_html_tag: Bool do
2555 if is_empty then return false
2556 for c in self do
2557 if not c.is_alpha then return false
2558 end
2559 return true
2560 end
2561
2562 # Read and escape the markdown contained in `self`.
2563 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2564 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2565 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2566 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2567 c == '`' or c == '~' or c == '^' then
2568 out.add c
2569 return pos + 1
2570 end
2571 out.add '\\'
2572 return pos
2573 end
2574
2575 # Extract string found at end of fence opening.
2576 private fun meta_from_fence: nullable Text do
2577 for i in [0..chars.length[ do
2578 var c = chars[i]
2579 if c != ' ' and c != '`' and c != '~' then
2580 return substring_from(i).trim
2581 end
2582 end
2583 return null
2584 end
2585
2586 # Is `self` an unsafe HTML element?
2587 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2588
2589 # Is `self` a HRML block element?
2590 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2591
2592 # Is `self` a link prefix?
2593 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2594
2595 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2596
2597 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2598
2599 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2600 end
2601
2602 redef class String
2603
2604 # Parse `self` as markdown and return the HTML representation
2605 #.
2606 # var md = "**Hello World!**"
2607 # var html = md.md_to_html
2608 # assert html == "<p><strong>Hello World!</strong></p>\n"
2609 fun md_to_html: Writable do
2610 var processor = new MarkdownProcessor
2611 return processor.process(self)
2612 end
2613 end