Merge: doc: fixed some typos and other misc. corrections
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 # Work in extended mode (default).
34 #
35 # Behavior changes when using extended mode:
36 #
37 # * Lists and code blocks end a paragraph
38 #
39 # In normal markdown the following:
40 #
41 # ~~~md
42 # This is a paragraph
43 # * and this is not a list
44 # ~~~
45 #
46 # Will produce:
47 #
48 # ~~~html
49 # <p>This is a paragraph
50 # * and this is not a list</p>
51 # ~~~
52 #
53 # When using extended mode this changes to:
54 #
55 # ~~~html
56 # <p>This is a paragraph</p>
57 # <ul>
58 # <li>and this is not a list</li>
59 # </ul>
60 # ~~~
61 #
62 # * Fences code blocks
63 #
64 # If you don't want to indent your all your code with 4 spaces,
65 # you can wrap your code in ``` ``` ``` or `~~~`.
66 #
67 # Here's an example:
68 #
69 # ~~~md
70 # fun test do
71 # print "Hello World!"
72 # end
73 # ~~~
74 #
75 # * Code blocks meta
76 #
77 # If you want to use syntax highlighting tools, most of them need to know what kind
78 # of language they are highlighting.
79 # You can add an optional language identifier after the fence declaration to output
80 # it in the HTML render.
81 #
82 # ```nit
83 # import markdown
84 #
85 # print "# Hello World!".md_to_html
86 # ```
87 #
88 # Becomes
89 #
90 # ~~~html
91 # <pre class="nit"><code>import markdown
92 #
93 # print "Hello World!".md_to_html
94 # </code></pre>
95 # ~~~
96 #
97 # * Underscores (Emphasis)
98 #
99 # Underscores in the middle of a word like:
100 #
101 # ~~~md
102 # Con_cat_this
103 # ~~~
104 #
105 # normally produces this:
106 #
107 # ~~~html
108 # <p>Con<em>cat</em>this</p>
109 # ~~~
110 #
111 # With extended mode they don't result in emphasis.
112 #
113 # ~~~html
114 # <p>Con_cat_this</p>
115 # ~~~
116 #
117 # * Strikethrough
118 #
119 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
120 # strikethrought span is marked with `~~`.
121 #
122 # ~~~md
123 # ~~Mistaken text.~~
124 # ~~~
125 #
126 # becomes
127 #
128 # ~~~html
129 # <del>Mistaken text.</del>
130 # ~~~
131 var ext_mode = true
132
133 # Disable attaching MDLocation to Tokens
134 #
135 # Locations are useful for some tools but they may
136 # cause an important time and space overhead.
137 #
138 # Default = `false`
139 var no_location = false is writable
140
141 # Process the mardown `input` string and return the processed output.
142 fun process(input: String): Writable do
143 # init processor
144 link_refs.clear
145 last_link_ref = null
146 current_line = null
147 current_block = null
148 # parse markdown
149 var parent = read_lines(input)
150 parent.remove_surrounding_empty_lines
151 recurse(parent, false)
152 # output processed text
153 decorator.headlines.clear
154 return emit(parent.kind)
155 end
156
157 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
158 private fun read_lines(input: String): MDBlock do
159 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
160 var value = new FlatBuffer
161 var i = 0
162
163 var line_pos = 0
164 var col_pos = 0
165
166 while i < input.length do
167 value.clear
168 var pos = 0
169 var eol = false
170 while not eol and i < input.length do
171 col_pos += 1
172 var c = input[i]
173 if c == '\n' then
174 eol = true
175 else if c == '\r' then
176 else if c == '\t' then
177 var np = pos + (4 - (pos & 3))
178 while pos < np do
179 value.add ' '
180 pos += 1
181 end
182 else
183 pos += 1
184 value.add c
185 end
186 i += 1
187 end
188 line_pos += 1
189
190 var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
191 var line = new MDLine(loc, value.write_to_string)
192 var is_link_ref = check_link_ref(line)
193 # Skip link refs
194 if not is_link_ref then block.add_line line
195 col_pos = 0
196 end
197 return block
198 end
199
200 # Check if line is a block link definition.
201 # Return `true` if line contains a valid link ref and save it into `link_refs`.
202 private fun check_link_ref(line: MDLine): Bool do
203 var md = line.value
204 var is_link_ref = false
205 var id = new FlatBuffer
206 var link = new FlatBuffer
207 var comment = new FlatBuffer
208 var pos = -1
209 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
210 pos = line.leading + 1
211 pos = md.read_until(id, pos, ']')
212 if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
213 if line.value[pos + 1] == ':' then
214 pos += 2
215 pos = md.skip_spaces(pos)
216 if pos >= 0 and line.value[pos] == '<' then
217 pos += 1
218 pos = md.read_until(link, pos, '>')
219 pos += 1
220 else if pos >= 0 then
221 pos = md.read_until(link, pos, ' ', '\n')
222 end
223 if not link.is_empty then
224 pos = md.skip_spaces(pos)
225 if pos > 0 and pos < line.value.length then
226 var c = line.value[pos]
227 if c == '\"' or c == '\'' or c == '(' then
228 pos += 1
229 if c == '(' then
230 pos = md.read_until(comment, pos, ')')
231 else
232 pos = md.read_until(comment, pos, c)
233 end
234 if pos > 0 then is_link_ref = true
235 end
236 else
237 is_link_ref = true
238 end
239 end
240 end
241 end
242 end
243 if is_link_ref and not id.is_empty and not link.is_empty then
244 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
245 add_link_ref(id.write_to_string, lr)
246 if comment.is_empty then last_link_ref = lr
247 return true
248 else
249 comment = new FlatBuffer
250 if not line.is_empty and last_link_ref != null then
251 pos = line.leading
252 var c = line.value[pos]
253 if c == '\"' or c == '\'' or c == '(' then
254 pos += 1
255 if c == '(' then
256 pos = md.read_until(comment, pos, ')')
257 else
258 pos = md.read_until(comment, pos, c)
259 end
260 end
261 var last_link_ref = self.last_link_ref
262 if not comment.is_empty and last_link_ref != null then
263 last_link_ref.title = comment.write_to_string
264 end
265 end
266 if comment.is_empty then return false
267 return true
268 end
269 end
270
271 # Known link refs
272 # This list will be needed during output to expand links.
273 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
274
275 # Last encountered link ref (for multiline definitions)
276 #
277 # Markdown allows link refs to be defined over two lines:
278 #
279 # ~~~md
280 # [id]: http://example.com/longish/path/to/resource/here
281 # "Optional Title Here"
282 # ~~~
283 #
284 private var last_link_ref: nullable LinkRef = null
285
286 # Add a link ref to the list
287 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
288
289 # Recursively split a `block`.
290 #
291 # The block is splitted according to the type of lines it contains.
292 # Some blocks can be splited again recursively like lists.
293 # The `in_list` mode is used to recurse on list and build
294 # nested paragraphs or code blocks.
295 fun recurse(root: MDBlock, in_list: Bool) do
296 var old_mode = self.in_list
297 var old_root = self.current_block
298 self.in_list = in_list
299
300 var line = root.first_line
301 while line != null and line.is_empty do
302 line = line.next
303 if line == null then return
304 end
305
306 current_line = line
307 current_block = root
308 while current_line != null do
309 line_kind(current_line.as(not null)).process(self)
310 end
311 self.in_list = old_mode
312 self.current_block = old_root
313 end
314
315 # Currently processed line.
316 # Used when visiting blocks with `recurse`.
317 var current_line: nullable MDLine = null is writable
318
319 # Currently processed block.
320 # Used when visiting blocks with `recurse`.
321 var current_block: nullable MDBlock = null is writable
322
323 # Is the current recursion in list mode?
324 # Used when visiting blocks with `recurse`
325 private var in_list = false
326
327 # The type of line.
328 # see: `md_line_*`
329 fun line_kind(md: MDLine): Line do
330 var value = md.value
331 var leading = md.leading
332 var trailing = md.trailing
333 if md.is_empty then return new LineEmpty
334 if md.leading > 3 then return new LineCode
335 if value[leading] == '#' then return new LineHeadline
336 if value[leading] == '>' then return new LineBlockquote
337
338 if ext_mode then
339 if value.length - leading - trailing > 2 then
340 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
341 return new LineFence
342 end
343 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
344 return new LineFence
345 end
346 end
347 end
348
349 if value.length - leading - trailing > 2 and
350 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
351 if md.count_chars(value[leading]) >= 3 then
352 return new LineHR
353 end
354 end
355
356 if value.length - leading >= 2 and value[leading + 1] == ' ' then
357 var c = value[leading]
358 if c == '*' or c == '-' or c == '+' then return new LineUList
359 end
360
361 if value.length - leading >= 3 and value[leading].is_digit then
362 var i = leading + 1
363 while i < value.length and value[i].is_digit do i += 1
364 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
365 return new LineOList
366 end
367 end
368
369 if value[leading] == '<' and md.check_html then return new LineXML
370
371 var next = md.next
372 if next != null and not next.is_empty then
373 if next.count_chars('=') > 0 then
374 return new LineHeadline1
375 end
376 if next.count_chars('-') > 0 then
377 return new LineHeadline2
378 end
379 end
380 return new LineOther
381 end
382
383 # Get the token kind at `pos`.
384 fun token_at(text: Text, pos: Int): Token do
385 var c0: Char
386 var c1: Char
387 var c2: Char
388
389 if pos > 0 then
390 c0 = text[pos - 1]
391 else
392 c0 = ' '
393 end
394 var c = text[pos]
395
396 if pos + 1 < text.length then
397 c1 = text[pos + 1]
398 else
399 c1 = ' '
400 end
401 if pos + 2 < text.length then
402 c2 = text[pos + 2]
403 else
404 c2 = ' '
405 end
406
407 var loc
408 if no_location then
409 loc = null
410 else
411 loc = new MDLocation(
412 current_loc.line_start,
413 current_loc.column_start + pos,
414 current_loc.line_start,
415 current_loc.column_start + pos)
416 end
417
418 if c == '*' then
419 if c1 == '*' then
420 if c0 != ' ' or c2 != ' ' then
421 return new TokenStrongStar(loc, pos, c)
422 else
423 return new TokenEmStar(loc, pos, c)
424 end
425 end
426 if c0 != ' ' or c1 != ' ' then
427 return new TokenEmStar(loc, pos, c)
428 else
429 return new TokenNone(loc, pos, c)
430 end
431 else if c == '_' then
432 if c1 == '_' then
433 if c0 != ' ' or c2 != ' ' then
434 return new TokenStrongUnderscore(loc, pos, c)
435 else
436 return new TokenEmUnderscore(loc, pos, c)
437 end
438 end
439 if ext_mode then
440 if (c0.is_letter or c0.is_digit) and c0 != '_' and
441 (c1.is_letter or c1.is_digit) then
442 return new TokenNone(loc, pos, c)
443 else
444 return new TokenEmUnderscore(loc, pos, c)
445 end
446 end
447 if c0 != ' ' or c1 != ' ' then
448 return new TokenEmUnderscore(loc, pos, c)
449 else
450 return new TokenNone(loc, pos, c)
451 end
452 else if c == '!' then
453 if c1 == '[' then return new TokenImage(loc, pos, c)
454 return new TokenNone(loc, pos, c)
455 else if c == '[' then
456 return new TokenLink(loc, pos, c)
457 else if c == ']' then
458 return new TokenNone(loc, pos, c)
459 else if c == '`' then
460 if c1 == '`' then
461 return new TokenCodeDouble(loc, pos, c)
462 else
463 return new TokenCodeSingle(loc, pos, c)
464 end
465 else if c == '\\' then
466 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
467 return new TokenEscape(loc, pos, c)
468 else
469 return new TokenNone(loc, pos, c)
470 end
471 else if c == '<' then
472 return new TokenHTML(loc, pos, c)
473 else if c == '&' then
474 return new TokenEntity(loc, pos, c)
475 else
476 if ext_mode then
477 if c == '~' and c1 == '~' then
478 return new TokenStrike(loc, pos, c)
479 end
480 end
481 return new TokenNone(loc, pos, c)
482 end
483 end
484
485 # Find the position of a `token` in `self`.
486 fun find_token(text: Text, start: Int, token: Token): Int do
487 var pos = start
488 while pos < text.length do
489 if token_at(text, pos).is_same_type(token) then
490 return pos
491 end
492 pos += 1
493 end
494 return -1
495 end
496
497 # Kind of decorator used for decoration.
498 type DECORATOR: Decorator
499
500 # Decorator used for output.
501 # Default is `HTMLDecorator`
502 var decorator: DECORATOR is writable, lazy do
503 return new HTMLDecorator
504 end
505
506 # Create a new `MarkdownEmitter` using a custom `decorator`.
507 init with_decorator(decorator: DECORATOR) do
508 self.decorator = decorator
509 end
510
511 # Output `block` using `decorator` in the current buffer.
512 fun emit(block: Block): Text do
513 var buffer = push_buffer
514 block.emit(self)
515 pop_buffer
516 return buffer
517 end
518
519 # Output the content of `block`.
520 fun emit_in(block: Block) do block.emit_in(self)
521
522 # Transform and emit mardown text
523 fun emit_text(text: Text) do emit_text_until(text, 0, null)
524
525 # Transform and emit mardown text starting at `start` and
526 # until a token with the same type as `token` is found.
527 # Go until the end of `text` if `token` is null.
528 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
529 var old_text = current_text
530 var old_pos = current_pos
531 current_text = text
532 current_pos = start
533 while current_pos < text.length do
534 if text[current_pos] == '\n' then
535 current_loc.line_start += 1
536 current_loc.column_start = -current_pos
537 end
538 var mt = token_at(text, current_pos)
539 if (token != null and not token isa TokenNone) and
540 (mt.is_same_type(token) or
541 (token isa TokenEmStar and mt isa TokenStrongStar) or
542 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
543 return current_pos
544 end
545 mt.emit(self)
546 current_pos += 1
547 end
548 current_text = old_text
549 current_pos = old_pos
550 return -1
551 end
552
553 # Currently processed position in `current_text`.
554 # Used when visiting inline production with `emit_text_until`.
555 private var current_pos: Int = -1
556
557 # Currently processed text.
558 # Used when visiting inline production with `emit_text_until`.
559 private var current_text: nullable Text = null
560
561 # Stacked buffers.
562 private var buffer_stack = new List[FlatBuffer]
563
564 # Push a new buffer on the stack.
565 private fun push_buffer: FlatBuffer do
566 var buffer = new FlatBuffer
567 buffer_stack.add buffer
568 return buffer
569 end
570
571 # Pop the last buffer.
572 private fun pop_buffer do buffer_stack.pop
573
574 # Current output buffer.
575 private fun current_buffer: FlatBuffer do
576 assert not buffer_stack.is_empty
577 return buffer_stack.last
578 end
579
580 # Stacked locations.
581 private var loc_stack = new List[MDLocation]
582
583 # Push a new MDLocation on the stack.
584 private fun push_loc(location: MDLocation) do loc_stack.add location
585
586 # Pop the last buffer.
587 private fun pop_loc: MDLocation do return loc_stack.pop
588
589 # Current output buffer.
590 private fun current_loc: MDLocation do
591 assert not loc_stack.is_empty
592 return loc_stack.last
593 end
594
595 # Append `e` to current buffer.
596 fun add(e: Writable) do
597 if e isa Text then
598 current_buffer.append e
599 else
600 current_buffer.append e.write_to_string
601 end
602 end
603
604 # Append `c` to current buffer.
605 fun addc(c: Char) do
606 current_buffer.add c
607 end
608
609 # Append a "\n" line break.
610 fun addn do addc '\n'
611 end
612
613 # A Link Reference.
614 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
615 #
616 # ~~~raw
617 # [1]: http://example.com/ "Optional title"
618 # ~~~
619 class LinkRef
620
621 # Link href
622 var link: String
623
624 # Optional link title
625 var title: nullable String = null
626
627 # Is the link an abreviation?
628 var is_abbrev = false
629
630 # Create a link with a title.
631 init with_title(link: String, title: nullable String) do
632 init(link)
633 self.title = title
634 end
635 end
636
637 # A `Decorator` is used to emit mardown into a specific format.
638 # Default decorator used is `HTMLDecorator`.
639 interface Decorator
640
641 # Kind of processor used
642 type PROCESSOR: MarkdownProcessor
643
644 # Render a single plain char.
645 #
646 # Redefine this method to add special escaping for plain text.
647 fun add_char(v: PROCESSOR, c: Char) do v.addc c
648
649 # Render a ruler block.
650 fun add_ruler(v: PROCESSOR, block: BlockRuler) is abstract
651
652 # Render a headline block with corresponding level.
653 fun add_headline(v: PROCESSOR, block: BlockHeadline) is abstract
654
655 # Render a paragraph block.
656 fun add_paragraph(v: PROCESSOR, block: BlockParagraph) is abstract
657
658 # Render a code or fence block.
659 fun add_code(v: PROCESSOR, block: BlockCode) is abstract
660
661 # Render a blockquote.
662 fun add_blockquote(v: PROCESSOR, block: BlockQuote) is abstract
663
664 # Render an unordered list.
665 fun add_unorderedlist(v: PROCESSOR, block: BlockUnorderedList) is abstract
666
667 # Render an ordered list.
668 fun add_orderedlist(v: PROCESSOR, block: BlockOrderedList) is abstract
669
670 # Render a list item.
671 fun add_listitem(v: PROCESSOR, block: BlockListItem) is abstract
672
673 # Render an emphasis text.
674 fun add_em(v: PROCESSOR, text: Text) is abstract
675
676 # Render a strong text.
677 fun add_strong(v: PROCESSOR, text: Text) is abstract
678
679 # Render a strike text.
680 #
681 # Extended mode only (see `MarkdownProcessor::ext_mode`)
682 fun add_strike(v: PROCESSOR, text: Text) is abstract
683
684 # Render a link.
685 fun add_link(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract
686
687 # Render an image.
688 fun add_image(v: PROCESSOR, link: Text, name: Text, comment: nullable Text) is abstract
689
690 # Render an abbreviation.
691 fun add_abbr(v: PROCESSOR, name: Text, comment: Text) is abstract
692
693 # Render a code span reading from a buffer.
694 fun add_span_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract
695
696 # Render a text and escape it.
697 fun append_value(v: PROCESSOR, value: Text) is abstract
698
699 # Render code text from buffer and escape it.
700 fun append_code(v: PROCESSOR, buffer: Text, from, to: Int) is abstract
701
702 # Render a character escape.
703 fun escape_char(v: PROCESSOR, char: Char) is abstract
704
705 # Render a line break
706 fun add_line_break(v: PROCESSOR) is abstract
707
708 # Generate a new html valid id from a `String`.
709 fun strip_id(txt: String): String is abstract
710
711 # Found headlines during the processing labeled by their ids.
712 fun headlines: ArrayMap[String, HeadLine] is abstract
713 end
714
715 # Class representing a markdown headline.
716 class HeadLine
717 # Unique identifier of this headline.
718 var id: String
719
720 # Text of the headline.
721 var title: String
722
723 # Level of this headline.
724 #
725 # According toe the markdown specification, level must be in `[1..6]`.
726 var level: Int
727 end
728
729 # `Decorator` that outputs HTML.
730 class HTMLDecorator
731 super Decorator
732
733 redef var headlines = new ArrayMap[String, HeadLine]
734
735 redef fun add_ruler(v, block) do v.add "<hr/>\n"
736
737 redef fun add_headline(v, block) do
738 # save headline
739 var line = block.block.first_line
740 if line == null then return
741 var txt = line.value
742 var id = strip_id(txt)
743 var lvl = block.depth
744 headlines[id] = new HeadLine(id, txt, lvl)
745 # output it
746 v.add "<h{lvl} id=\"{id}\">"
747 v.emit_in block
748 v.add "</h{lvl}>\n"
749 end
750
751 redef fun add_paragraph(v, block) do
752 v.add "<p>"
753 v.emit_in block
754 v.add "</p>\n"
755 end
756
757 redef fun add_code(v, block) do
758 var meta = block.meta
759 if meta != null then
760 v.add "<pre class=\""
761 append_value(v, meta)
762 v.add "\"><code>"
763 else
764 v.add "<pre><code>"
765 end
766 v.emit_in block
767 v.add "</code></pre>\n"
768 end
769
770 redef fun add_blockquote(v, block) do
771 v.add "<blockquote>\n"
772 v.emit_in block
773 v.add "</blockquote>\n"
774 end
775
776 redef fun add_unorderedlist(v, block) do
777 v.add "<ul>\n"
778 v.emit_in block
779 v.add "</ul>\n"
780 end
781
782 redef fun add_orderedlist(v, block) do
783 v.add "<ol>\n"
784 v.emit_in block
785 v.add "</ol>\n"
786 end
787
788 redef fun add_listitem(v, block) do
789 v.add "<li>"
790 v.emit_in block
791 v.add "</li>\n"
792 end
793
794 redef fun add_em(v, text) do
795 v.add "<em>"
796 v.add text
797 v.add "</em>"
798 end
799
800 redef fun add_strong(v, text) do
801 v.add "<strong>"
802 v.add text
803 v.add "</strong>"
804 end
805
806 redef fun add_strike(v, text) do
807 v.add "<del>"
808 v.add text
809 v.add "</del>"
810 end
811
812 redef fun add_image(v, link, name, comment) do
813 v.add "<img src=\""
814 append_value(v, link)
815 v.add "\" alt=\""
816 append_value(v, name)
817 v.add "\""
818 if comment != null and not comment.is_empty then
819 v.add " title=\""
820 append_value(v, comment)
821 v.add "\""
822 end
823 v.add "/>"
824 end
825
826 redef fun add_link(v, link, name, comment) do
827 v.add "<a href=\""
828 append_value(v, link)
829 v.add "\""
830 if comment != null and not comment.is_empty then
831 v.add " title=\""
832 append_value(v, comment)
833 v.add "\""
834 end
835 v.add ">"
836 v.emit_text(name)
837 v.add "</a>"
838 end
839
840 redef fun add_abbr(v, name, comment) do
841 v.add "<abbr title=\""
842 append_value(v, comment)
843 v.add "\">"
844 v.emit_text(name)
845 v.add "</abbr>"
846 end
847
848 redef fun add_span_code(v, text, from, to) do
849 v.add "<code>"
850 append_code(v, text, from, to)
851 v.add "</code>"
852 end
853
854 redef fun add_line_break(v) do
855 v.add "<br/>"
856 end
857
858 redef fun append_value(v, text) do for c in text do escape_char(v, c)
859
860 redef fun escape_char(v, c) do
861 if c == '&' then
862 v.add "&amp;"
863 else if c == '<' then
864 v.add "&lt;"
865 else if c == '>' then
866 v.add "&gt;"
867 else if c == '"' then
868 v.add "&quot;"
869 else if c == '\'' then
870 v.add "&apos;"
871 else
872 v.addc c
873 end
874 end
875
876 redef fun append_code(v, buffer, from, to) do
877 for i in [from..to[ do
878 var c = buffer[i]
879 if c == '&' then
880 v.add "&amp;"
881 else if c == '<' then
882 v.add "&lt;"
883 else if c == '>' then
884 v.add "&gt;"
885 else
886 v.addc c
887 end
888 end
889 end
890
891 redef fun strip_id(txt) do
892 # strip id
893 var b = new FlatBuffer
894 for c in txt do
895 if c == ' ' then
896 b.add '_'
897 else
898 if not c.is_letter and
899 not c.is_digit and
900 not allowed_id_chars.has(c) then continue
901 b.add c
902 end
903 end
904 var res = b.to_s
905 var key = res
906 # check for multiple id definitions
907 if headlines.has_key(key) then
908 var i = 1
909 key = "{res}_{i}"
910 while headlines.has_key(key) do
911 i += 1
912 key = "{res}_{i}"
913 end
914 end
915 return key
916 end
917
918 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
919 end
920
921 # Location in a Markdown input.
922 class MDLocation
923
924 # Starting line number (starting from 1).
925 var line_start: Int
926
927 # Starting column number (starting from 1).
928 var column_start: Int
929
930 # Stopping line number (starting from 1).
931 var line_end: Int
932
933 # Stopping column number (starting from 1).
934 var column_end: Int
935
936 redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
937
938 # Return a copy of `self`.
939 fun copy: MDLocation do
940 return new MDLocation(line_start, column_start, line_end, column_end)
941 end
942 end
943
944 # A block of markdown lines.
945 # A `MDBlock` can contains lines and/or sub-blocks.
946 class MDBlock
947
948 # Position of `self` in the input.
949 var location: MDLocation
950
951 # Kind of block.
952 # See `Block`.
953 var kind: Block = new BlockNone(self) is writable
954
955 # First line if any.
956 var first_line: nullable MDLine = null is writable
957
958 # Last line if any.
959 var last_line: nullable MDLine = null is writable
960
961 # First sub-block if any.
962 var first_block: nullable MDBlock = null is writable
963
964 # Last sub-block if any.
965 var last_block: nullable MDBlock = null is writable
966
967 # Previous block if any.
968 var prev: nullable MDBlock = null is writable
969
970 # Next block if any.
971 var next: nullable MDBlock = null is writable
972
973 # Does this block contain subblocks?
974 fun has_blocks: Bool do return first_block != null
975
976 # Count sub-blocks.
977 fun count_blocks: Int do
978 var count = 0
979 var block = first_block
980 while block != null do
981 count += 1
982 block = block.next
983 end
984 return count
985 end
986
987 # Does this block contain lines?
988 fun has_lines: Bool do return first_line != null
989
990 # Count block lines.
991 fun count_lines: Int do
992 var count = 0
993 var line = first_line
994 while line != null do
995 count += 1
996 line = line.next
997 end
998 return count
999 end
1000
1001 # Split `self` creating a new sub-block having `line` has `last_line`.
1002 fun split(line: MDLine): MDBlock do
1003 # location for new block
1004 var new_loc = new MDLocation(
1005 first_line.as(not null).location.line_start,
1006 first_line.as(not null).location.column_start,
1007 line.location.line_end,
1008 line.location.column_end)
1009 # create block
1010 var block = new MDBlock(new_loc)
1011 block.first_line = first_line
1012 block.last_line = line
1013 first_line = line.next
1014 line.next = null
1015 if first_line == null then
1016 last_line = null
1017 else
1018 first_line.as(not null).prev = null
1019 # update current block loc
1020 location.line_start = first_line.as(not null).location.line_start
1021 location.column_start = first_line.as(not null).location.column_start
1022 end
1023 if first_block == null then
1024 first_block = block
1025 last_block = block
1026 else
1027 last_block.as(not null).next = block
1028 last_block = block
1029 end
1030 return block
1031 end
1032
1033 # Add a `line` to this block.
1034 fun add_line(line: MDLine) do
1035 if last_line == null then
1036 first_line = line
1037 last_line = line
1038 else
1039 last_line.as(not null).next_empty = line.is_empty
1040 line.prev_empty = last_line.as(not null).is_empty
1041 line.prev = last_line
1042 last_line.as(not null).next = line
1043 last_line = line
1044 end
1045 end
1046
1047 # Remove `line` from this block.
1048 fun remove_line(line: MDLine) do
1049 if line.prev == null then
1050 first_line = line.next
1051 else
1052 line.prev.as(not null).next = line.next
1053 end
1054 if line.next == null then
1055 last_line = line.prev
1056 else
1057 line.next.as(not null).prev = line.prev
1058 end
1059 line.prev = null
1060 line.next = null
1061 end
1062
1063 # Remove leading empty lines.
1064 fun remove_leading_empty_lines: Bool do
1065 var was_empty = false
1066 var line = first_line
1067 while line != null and line.is_empty do
1068 remove_line line
1069 line = first_line
1070 was_empty = true
1071 end
1072 return was_empty
1073 end
1074
1075 # Remove trailing empty lines.
1076 fun remove_trailing_empty_lines: Bool do
1077 var was_empty = false
1078 var line = last_line
1079 while line != null and line.is_empty do
1080 remove_line line
1081 line = last_line
1082 was_empty = true
1083 end
1084 return was_empty
1085 end
1086
1087 # Remove leading and trailing empty lines.
1088 fun remove_surrounding_empty_lines: Bool do
1089 var was_empty = false
1090 if remove_leading_empty_lines then was_empty = true
1091 if remove_trailing_empty_lines then was_empty = true
1092 return was_empty
1093 end
1094
1095 # Remove list markers and up to 4 leading spaces.
1096 # Used to clean nested lists.
1097 fun remove_list_indent(v: MarkdownProcessor) do
1098 var line = first_line
1099 while line != null do
1100 if not line.is_empty then
1101 var kind = v.line_kind(line)
1102 if kind isa LineList then
1103 line.value = kind.extract_value(line)
1104 else
1105 line.value = line.value.substring_from(line.leading.min(4))
1106 end
1107 line.leading = line.process_leading
1108 end
1109 line = line.next
1110 end
1111 end
1112
1113 # Collect block line text.
1114 fun text: String do
1115 var text = new FlatBuffer
1116 var line = first_line
1117 while line != null do
1118 if not line.is_empty then
1119 text.append line.text
1120 end
1121 text.append "\n"
1122 line = line.next
1123 end
1124 var block = first_block
1125 while block != null do
1126 text.append block.text
1127 text.append "\n"
1128 block = block.next
1129 end
1130 return text.write_to_string
1131 end
1132 end
1133
1134 # Representation of a markdown block in the AST.
1135 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1136 abstract class Block
1137
1138 # The markdown block `self` is related to.
1139 var block: MDBlock
1140
1141 # Output `self` using `v.decorator`.
1142 fun emit(v: MarkdownProcessor) do v.emit_in(self)
1143
1144 # Emit the containts of `self`, lines or blocks.
1145 fun emit_in(v: MarkdownProcessor) do
1146 block.remove_surrounding_empty_lines
1147 if block.has_lines then
1148 emit_lines(v)
1149 else
1150 emit_blocks(v)
1151 end
1152 end
1153
1154 # Emit lines contained in `block`.
1155 fun emit_lines(v: MarkdownProcessor) do
1156 var tpl = v.push_buffer
1157 var line = block.first_line
1158 while line != null do
1159 if not line.is_empty then
1160 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1161 if line.trailing >= 2 then v.decorator.add_line_break(v)
1162 end
1163 if line.next != null then
1164 v.addn
1165 end
1166 line = line.next
1167 end
1168 v.pop_buffer
1169 v.emit_text(tpl)
1170 end
1171
1172 # Emit sub-blocks contained in `block`.
1173 fun emit_blocks(v: MarkdownProcessor) do
1174 var block = self.block.first_block
1175 while block != null do
1176 v.push_loc(block.location)
1177 block.kind.emit(v)
1178 v.pop_loc
1179 block = block.next
1180 end
1181 end
1182
1183 # The raw content of the block as a multi-line string.
1184 fun raw_content: String do
1185 var infence = self isa BlockFence
1186 var text = new FlatBuffer
1187 var line = self.block.first_line
1188 while line != null do
1189 if not line.is_empty then
1190 var str = line.value
1191 if not infence and str.has_prefix(" ") then
1192 text.append str.substring(4, str.length - line.trailing)
1193 else
1194 text.append str
1195 end
1196 end
1197 text.append "\n"
1198 line = line.next
1199 end
1200 return text.write_to_string
1201 end
1202 end
1203
1204 # A block without any markdown specificities.
1205 #
1206 # Actually use the same implementation than `BlockCode`,
1207 # this class is only used for typing purposes.
1208 class BlockNone
1209 super Block
1210 end
1211
1212 # A markdown blockquote.
1213 class BlockQuote
1214 super Block
1215
1216 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1217
1218 # Remove blockquote markers.
1219 private fun remove_block_quote_prefix(block: MDBlock) do
1220 var line = block.first_line
1221 while line != null do
1222 if not line.is_empty then
1223 if line.value[line.leading] == '>' then
1224 var rem = line.leading + 1
1225 if line.leading + 1 < line.value.length and
1226 line.value[line.leading + 1] == ' ' then
1227 rem += 1
1228 end
1229 line.value = line.value.substring_from(rem)
1230 line.leading = line.process_leading
1231 end
1232 end
1233 line = line.next
1234 end
1235 end
1236 end
1237
1238 # A markdown code block.
1239 class BlockCode
1240 super Block
1241
1242 # Any string found after fence token.
1243 var meta: nullable Text
1244
1245 # Number of char to skip at the beginning of the line.
1246 #
1247 # Block code lines start at 4 spaces.
1248 protected var line_start = 4
1249
1250 redef fun emit(v) do v.decorator.add_code(v, self)
1251
1252 redef fun emit_lines(v) do
1253 var line = block.first_line
1254 while line != null do
1255 if not line.is_empty then
1256 v.decorator.append_code(v, line.value, line_start, line.value.length)
1257 end
1258 v.addn
1259 line = line.next
1260 end
1261 end
1262 end
1263
1264 # A markdown code-fence block.
1265 #
1266 # Actually use the same implementation than `BlockCode`,
1267 # this class is only used for typing purposes.
1268 class BlockFence
1269 super BlockCode
1270
1271 # Fence code lines start at 0 spaces.
1272 redef var line_start = 0
1273 end
1274
1275 # A markdown headline.
1276 class BlockHeadline
1277 super Block
1278
1279 redef fun emit(v) do
1280 var loc = block.location.copy
1281 loc.column_start += start
1282 v.push_loc(loc)
1283 v.decorator.add_headline(v, self)
1284 v.pop_loc
1285 end
1286
1287 private var start = 0
1288
1289 # Depth of the headline used to determine the headline level.
1290 var depth = 0
1291
1292 # Remove healine marks from lines contained in `self`.
1293 private fun transform_headline(block: MDBlock) do
1294 if depth > 0 then return
1295 var level = 0
1296 var line = block.first_line
1297 if line == null then return
1298 if line.is_empty then return
1299 var start = line.leading
1300 while start < line.value.length and line.value[start] == '#' do
1301 level += 1
1302 start += 1
1303 end
1304 while start < line.value.length and line.value[start] == ' ' do
1305 start += 1
1306 end
1307 if start >= line.value.length then
1308 line.is_empty = true
1309 else
1310 var nend = line.value.length - line.trailing - 1
1311 while line.value[nend] == '#' do nend -= 1
1312 while line.value[nend] == ' ' do nend -= 1
1313 line.value = line.value.substring(start, nend - start + 1)
1314 line.leading = 0
1315 line.trailing = 0
1316 end
1317 self.start = start
1318 depth = level.min(6)
1319 end
1320 end
1321
1322 # A markdown list item block.
1323 class BlockListItem
1324 super Block
1325
1326 redef fun emit(v) do v.decorator.add_listitem(v, self)
1327 end
1328
1329 # A markdown list block.
1330 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1331 abstract class BlockList
1332 super Block
1333
1334 # Split list block into list items sub-blocks.
1335 private fun init_block(v: MarkdownProcessor) do
1336 var line = block.first_line
1337 if line == null then return
1338 line = line.next
1339 while line != null do
1340 var t = v.line_kind(line)
1341 if t isa LineList or
1342 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1343 not (t isa LineList))) then
1344 var sblock = block.split(line.prev.as(not null))
1345 sblock.kind = new BlockListItem(sblock)
1346 end
1347 line = line.next
1348 end
1349 var sblock = block.split(block.last_line.as(not null))
1350 sblock.kind = new BlockListItem(sblock)
1351 end
1352
1353 # Expand list items as paragraphs if needed.
1354 private fun expand_paragraphs(block: MDBlock) do
1355 var outer = block.first_block
1356 var inner: nullable MDBlock
1357 var has_paragraph = false
1358 while outer != null and not has_paragraph do
1359 if outer.kind isa BlockListItem then
1360 inner = outer.first_block
1361 while inner != null and not has_paragraph do
1362 if inner.kind isa BlockParagraph then
1363 has_paragraph = true
1364 end
1365 inner = inner.next
1366 end
1367 end
1368 outer = outer.next
1369 end
1370 if has_paragraph then
1371 outer = block.first_block
1372 while outer != null do
1373 if outer.kind isa BlockListItem then
1374 inner = outer.first_block
1375 while inner != null do
1376 if inner.kind isa BlockNone then
1377 inner.kind = new BlockParagraph(inner)
1378 end
1379 inner = inner.next
1380 end
1381 end
1382 outer = outer.next
1383 end
1384 end
1385 end
1386 end
1387
1388 # A markdown ordered list.
1389 class BlockOrderedList
1390 super BlockList
1391
1392 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1393 end
1394
1395 # A markdown unordred list.
1396 class BlockUnorderedList
1397 super BlockList
1398
1399 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1400 end
1401
1402 # A markdown paragraph block.
1403 class BlockParagraph
1404 super Block
1405
1406 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1407 end
1408
1409 # A markdown ruler.
1410 class BlockRuler
1411 super Block
1412
1413 redef fun emit(v) do v.decorator.add_ruler(v, self)
1414 end
1415
1416 # Xml blocks that can be found in markdown markup.
1417 class BlockXML
1418 super Block
1419
1420 redef fun emit_lines(v) do
1421 var line = block.first_line
1422 while line != null do
1423 if not line.is_empty then v.add line.value
1424 v.addn
1425 line = line.next
1426 end
1427 end
1428 end
1429
1430 # A markdown line.
1431 class MDLine
1432
1433 # Location of `self` in the original input.
1434 var location: MDLocation
1435
1436 # Text contained in this line.
1437 var value: String is writable
1438
1439 # Is this line empty?
1440 # Lines containing only spaces are considered empty.
1441 var is_empty: Bool = true is writable
1442
1443 # Previous line in `MDBlock` or null if first line.
1444 var prev: nullable MDLine = null is writable
1445
1446 # Next line in `MDBlock` or null if last line.
1447 var next: nullable MDLine = null is writable
1448
1449 # Is the previous line empty?
1450 var prev_empty: Bool = false is writable
1451
1452 # Is the next line empty?
1453 var next_empty: Bool = false is writable
1454
1455 # Initialize a new MDLine from its string value
1456 init do
1457 self.leading = process_leading
1458 if leading != value.length then
1459 self.is_empty = false
1460 self.trailing = process_trailing
1461 end
1462 end
1463
1464 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1465 fun clear do
1466 value = ""
1467 leading = 0
1468 trailing = 0
1469 is_empty = true
1470 if prev != null then prev.as(not null).next_empty = true
1471 if next != null then next.as(not null).prev_empty = true
1472 end
1473
1474 # Number or leading spaces on this line.
1475 var leading: Int = 0 is writable
1476
1477 # Compute `leading` depending on `value`.
1478 fun process_leading: Int do
1479 var count = 0
1480 var value = self.value
1481 while count < value.length and value[count] == ' ' do count += 1
1482 if leading == value.length then clear
1483 return count
1484 end
1485
1486 # Number of trailing spaces on this line.
1487 var trailing: Int = 0 is writable
1488
1489 # Compute `trailing` depending on `value`.
1490 fun process_trailing: Int do
1491 var count = 0
1492 var value = self.value
1493 while value[value.length - count - 1] == ' ' do
1494 count += 1
1495 end
1496 return count
1497 end
1498
1499 # Count the amount of `ch` in this line.
1500 # Return A value > 0 if this line only consists of `ch` end spaces.
1501 fun count_chars(ch: Char): Int do
1502 var count = 0
1503 for c in value do
1504 if c == ' ' then
1505 continue
1506 end
1507 if c == ch then
1508 count += 1
1509 continue
1510 end
1511 count = 0
1512 break
1513 end
1514 return count
1515 end
1516
1517 # Count the amount of `ch` at the start of this line ignoring spaces.
1518 fun count_chars_start(ch: Char): Int do
1519 var count = 0
1520 for c in value do
1521 if c == ' ' then
1522 continue
1523 end
1524 if c == ch then
1525 count += 1
1526 else
1527 break
1528 end
1529 end
1530 return count
1531 end
1532
1533 # Last XML line if any.
1534 private var xml_end_line: nullable MDLine = null
1535
1536 # Does `value` contains valid XML markup?
1537 private fun check_html: Bool do
1538 var tags = new Array[String]
1539 var tmp = new FlatBuffer
1540 var pos = leading
1541 if pos + 1 < value.length and value[pos + 1] == '!' then
1542 if read_xml_comment(self, pos) > 0 then return true
1543 end
1544 pos = value.read_xml(tmp, pos, false)
1545 var tag: String
1546 if pos > -1 then
1547 tag = tmp.xml_tag
1548 if not tag.is_html_block then
1549 return false
1550 end
1551 if tag == "hr" then
1552 xml_end_line = self
1553 return true
1554 end
1555 tags.add tag
1556 var line: nullable MDLine = self
1557 while line != null do
1558 while pos < line.value.length and line.value[pos] != '<' do
1559 pos += 1
1560 end
1561 if pos >= line.value.length then
1562 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1563 tags.pop
1564 if tags.is_empty then
1565 xml_end_line = line
1566 break
1567 end
1568 end
1569 line = line.next
1570 pos = 0
1571 else
1572 tmp = new FlatBuffer
1573 var new_pos = line.value.read_xml(tmp, pos, false)
1574 if new_pos > 0 then
1575 tag = tmp.xml_tag
1576 if tag.is_html_block and not tag == "hr" then
1577 if tmp[1] == '/' then
1578 if tags.last != tag then
1579 return false
1580 end
1581 tags.pop
1582 else
1583 tags.add tag
1584 end
1585 end
1586 if tags.is_empty then
1587 xml_end_line = line
1588 break
1589 end
1590 pos = new_pos
1591 else
1592 pos += 1
1593 end
1594 end
1595 end
1596 return tags.is_empty
1597 end
1598 return false
1599 end
1600
1601 # Read a XML comment.
1602 # Used by `check_html`.
1603 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1604 var line: nullable MDLine = first_line
1605 if start + 3 < line.as(not null).value.length then
1606 if line.as(not null).value[2] == '-' and line.as(not null).value[3] == '-' then
1607 var pos = start + 4
1608 while line != null do
1609 while pos < line.value.length and line.value[pos] != '-' do
1610 pos += 1
1611 end
1612 if pos == line.value.length then
1613 line = line.next
1614 pos = 0
1615 else
1616 if pos + 2 < line.value.length then
1617 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1618 first_line.xml_end_line = line
1619 return pos + 3
1620 end
1621 end
1622 pos += 1
1623 end
1624 end
1625 end
1626 end
1627 return -1
1628 end
1629
1630 # Extract the text of `self` without leading and trailing.
1631 fun text: String do return value.substring(leading, value.length - trailing)
1632 end
1633
1634 # A markdown line.
1635 interface Line
1636
1637 # Parse the line.
1638 # See `MarkdownProcessor::recurse`.
1639 fun process(v: MarkdownProcessor) is abstract
1640 end
1641
1642 # An empty markdown line.
1643 class LineEmpty
1644 super Line
1645
1646 redef fun process(v) do
1647 v.current_line = v.current_line.as(not null).next
1648 end
1649 end
1650
1651 # A non-specific markdown construction.
1652 # Mainly used as part of another line construct such as paragraphs or lists.
1653 class LineOther
1654 super Line
1655
1656 redef fun process(v) do
1657 var line = v.current_line
1658 # go to block end
1659 var was_empty = line.as(not null).prev_empty
1660 while line != null and not line.is_empty do
1661 var t = v.line_kind(line)
1662 if (v.in_list or v.ext_mode) and t isa LineList then
1663 break
1664 end
1665 if v.ext_mode and (t isa LineCode or t isa LineFence) then
1666 break
1667 end
1668 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1669 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1670 break
1671 end
1672 line = line.next
1673 end
1674 # build block
1675 var current_block = v.current_block.as(not null)
1676 if line != null and not line.is_empty then
1677 var block = current_block.split(line.prev.as(not null))
1678 if v.in_list and not was_empty then
1679 block.kind = new BlockNone(block)
1680 else
1681 block.kind = new BlockParagraph(block)
1682 end
1683 current_block.remove_leading_empty_lines
1684 else
1685 var block: MDBlock
1686 if line != null then
1687 block = current_block.split(line)
1688 else
1689 block = current_block.split(current_block.last_line.as(not null))
1690 end
1691 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1692 block.kind = new BlockNone(block)
1693 else
1694 block.kind = new BlockParagraph(block)
1695 end
1696 current_block.remove_leading_empty_lines
1697 end
1698 v.current_line = current_block.first_line
1699 end
1700 end
1701
1702 # A line of markdown code.
1703 class LineCode
1704 super Line
1705
1706 redef fun process(v) do
1707 var line = v.current_line
1708 # lookup block end
1709 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1710 line = line.next
1711 end
1712 # split at block end line
1713 var current_block = v.current_block.as(not null)
1714 var block: MDBlock
1715 if line != null then
1716 block = current_block.split(line.prev.as(not null))
1717 else
1718 block = current_block.split(current_block.last_line.as(not null))
1719 end
1720 block.kind = new BlockCode(block)
1721 block.remove_surrounding_empty_lines
1722 v.current_line = current_block.first_line
1723 end
1724 end
1725
1726 # A line of raw XML.
1727 class LineXML
1728 super Line
1729
1730 redef fun process(v) do
1731 var line = v.current_line
1732 if line == null then return
1733 var current_block = v.current_block.as(not null)
1734 var prev = line.prev
1735 if prev != null then current_block.split(prev)
1736 var block = current_block.split(line.xml_end_line.as(not null))
1737 block.kind = new BlockXML(block)
1738 current_block.remove_leading_empty_lines
1739 v.current_line = current_block.first_line
1740 end
1741 end
1742
1743 # A markdown blockquote line.
1744 class LineBlockquote
1745 super Line
1746
1747 redef fun process(v) do
1748 var line = v.current_line
1749 var current_block = v.current_block.as(not null)
1750 # go to bquote end
1751 while line != null do
1752 if not line.is_empty and (line.prev_empty and
1753 line.leading == 0 and
1754 not v.line_kind(line) isa LineBlockquote) then break
1755 line = line.next
1756 end
1757 # build sub block
1758 var block: MDBlock
1759 if line != null then
1760 block = current_block.split(line.prev.as(not null))
1761 else
1762 block = current_block.split(current_block.last_line.as(not null))
1763 end
1764 var kind = new BlockQuote(block)
1765 block.kind = kind
1766 block.remove_surrounding_empty_lines
1767 kind.remove_block_quote_prefix(block)
1768 v.current_line = line
1769 v.recurse(block, false)
1770 v.current_line = current_block.first_line
1771 end
1772 end
1773
1774 # A markdown ruler line.
1775 class LineHR
1776 super Line
1777
1778 redef fun process(v) do
1779 var line = v.current_line
1780 if line == null then return
1781 var current_block = v.current_block.as(not null)
1782 if line.prev != null then current_block.split(line.prev.as(not null))
1783 var block = current_block.split(line)
1784 block.kind = new BlockRuler(block)
1785 current_block.remove_leading_empty_lines
1786 v.current_line = current_block.first_line
1787 end
1788 end
1789
1790 # A markdown fence code line.
1791 class LineFence
1792 super Line
1793
1794 redef fun process(v) do
1795 # go to fence end
1796 var line = v.current_line.as(not null).next
1797 var current_block = v.current_block.as(not null)
1798 while line != null do
1799 if v.line_kind(line) isa LineFence then break
1800 line = line.next
1801 end
1802 if line != null then
1803 line = line.next
1804 end
1805 # build fence block
1806 var block: MDBlock
1807 if line != null then
1808 block = current_block.split(line.prev.as(not null))
1809 else
1810 block = current_block.split(current_block.last_line.as(not null))
1811 end
1812 block.remove_surrounding_empty_lines
1813 var meta = block.first_line.as(not null).value.meta_from_fence
1814 block.kind = new BlockFence(block, meta)
1815 block.first_line.as(not null).clear
1816 var last = block.last_line
1817 if last != null and v.line_kind(last) isa LineFence then
1818 block.last_line.as(not null).clear
1819 end
1820 block.remove_surrounding_empty_lines
1821 v.current_line = line
1822 end
1823 end
1824
1825 # A markdown headline.
1826 class LineHeadline
1827 super Line
1828
1829 redef fun process(v) do
1830 var line = v.current_line
1831 if line == null then return
1832 var current_block = v.current_block.as(not null)
1833 var lprev = line.prev
1834 if lprev != null then current_block.split(lprev)
1835 var block = current_block.split(line)
1836 var kind = new BlockHeadline(block)
1837 block.kind = kind
1838 kind.transform_headline(block)
1839 current_block.remove_leading_empty_lines
1840 v.current_line = current_block.first_line
1841 end
1842 end
1843
1844 # A markdown headline of level 1.
1845 class LineHeadline1
1846 super LineHeadline
1847
1848 redef fun process(v) do
1849 var line = v.current_line
1850 if line == null then return
1851 var current_block = v.current_block.as(not null)
1852 var lprev = line.prev
1853 if lprev != null then current_block.split(lprev)
1854 line.next.as(not null).clear
1855 var block = current_block.split(line)
1856 var kind = new BlockHeadline(block)
1857 kind.depth = 1
1858 kind.transform_headline(block)
1859 block.kind = kind
1860 current_block.remove_leading_empty_lines
1861 v.current_line = current_block.first_line
1862 end
1863 end
1864
1865 # A markdown headline of level 2.
1866 class LineHeadline2
1867 super LineHeadline
1868
1869 redef fun process(v) do
1870 var line = v.current_line
1871 if line == null then return
1872 var current_block = v.current_block.as(not null)
1873 var lprev = line.prev
1874 if lprev != null then current_block.split(lprev)
1875 line.next.as(not null).clear
1876 var block = current_block.split(line)
1877 var kind = new BlockHeadline(block)
1878 kind.depth = 2
1879 kind.transform_headline(block)
1880 block.kind = kind
1881 current_block.remove_leading_empty_lines
1882 v.current_line = current_block.first_line
1883 end
1884 end
1885
1886 # A markdown list line.
1887 # Mainly used to factorize code between ordered and unordered lists.
1888 abstract class LineList
1889 super Line
1890
1891 redef fun process(v) do
1892 var line = v.current_line
1893 # go to list end
1894 while line != null do
1895 var t = v.line_kind(line)
1896 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1897 not t isa LineList) then break
1898 line = line.next
1899 end
1900 # build list block
1901 var current_block = v.current_block.as(not null)
1902 var list: MDBlock
1903 if line != null then
1904 list = current_block.split(line.prev.as(not null))
1905 else
1906 list = current_block.split(current_block.last_line.as(not null))
1907 end
1908 var kind = block_kind(list)
1909 list.kind = kind
1910 list.first_line.as(not null).prev_empty = false
1911 list.last_line.as(not null).next_empty = false
1912 list.remove_surrounding_empty_lines
1913 list.first_line.as(not null).prev_empty = false
1914 list.last_line.as(not null).next_empty = false
1915 kind.init_block(v)
1916 var block = list.first_block
1917 while block != null do
1918 block.remove_list_indent(v)
1919 v.recurse(block, true)
1920 block = block.next
1921 end
1922 kind.expand_paragraphs(list)
1923 v.current_line = line
1924 end
1925
1926 # Create a new block kind based on this line.
1927 protected fun block_kind(block: MDBlock): BlockList is abstract
1928
1929 # Extract string value from `MDLine`.
1930 protected fun extract_value(line: MDLine): String is abstract
1931 end
1932
1933 # An ordered list line.
1934 class LineOList
1935 super LineList
1936
1937 redef fun block_kind(block) do return new BlockOrderedList(block)
1938
1939 redef fun extract_value(line) do
1940 return line.value.substring_from(line.value.index_of('.') + 2)
1941 end
1942 end
1943
1944 # An unordered list line.
1945 class LineUList
1946 super LineList
1947
1948 redef fun block_kind(block) do return new BlockUnorderedList(block)
1949
1950 redef fun extract_value(line) do
1951 return line.value.substring_from(line.leading + 2)
1952 end
1953 end
1954
1955 # A token represent a character in the markdown input.
1956 # Some tokens have a specific markup behaviour that is handled here.
1957 abstract class Token
1958
1959 # Location of `self` in the original input.
1960 var location: nullable MDLocation
1961
1962 # Position of `self` in input independant from lines.
1963 var pos: Int
1964
1965 # Character found at `pos` in the markdown input.
1966 var char: Char
1967
1968 # Output that token using `MarkdownEmitter::decorator`.
1969 fun emit(v: MarkdownProcessor) do v.decorator.add_char(v, char)
1970 end
1971
1972 # A token without a specific meaning.
1973 class TokenNone
1974 super Token
1975 end
1976
1977 # An emphasis token.
1978 abstract class TokenEm
1979 super Token
1980
1981 redef fun emit(v) do
1982 var tmp = v.push_buffer
1983 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1984 v.pop_buffer
1985 if b > 0 then
1986 v.decorator.add_em(v, tmp)
1987 v.current_pos = b
1988 else
1989 v.addc char
1990 end
1991 end
1992 end
1993
1994 # An emphasis star token.
1995 class TokenEmStar
1996 super TokenEm
1997 end
1998
1999 # An emphasis underscore token.
2000 class TokenEmUnderscore
2001 super TokenEm
2002 end
2003
2004 # A strong token.
2005 abstract class TokenStrong
2006 super Token
2007
2008 redef fun emit(v) do
2009 var tmp = v.push_buffer
2010 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2011 v.pop_buffer
2012 if b > 0 then
2013 v.decorator.add_strong(v, tmp)
2014 v.current_pos = b + 1
2015 else
2016 v.addc char
2017 end
2018 end
2019 end
2020
2021 # A strong star token.
2022 class TokenStrongStar
2023 super TokenStrong
2024 end
2025
2026 # A strong underscore token.
2027 class TokenStrongUnderscore
2028 super TokenStrong
2029 end
2030
2031 # A code token.
2032 # This class is mainly used to factorize work between single and double quoted span codes.
2033 abstract class TokenCode
2034 super Token
2035
2036 redef fun emit(v) do
2037 var current_text = v.current_text.as(not null)
2038 var a = pos + next_pos + 1
2039 var b = v.find_token(current_text, a, self)
2040 if b > 0 then
2041 v.current_pos = b + next_pos
2042 while a < b and current_text[a] == ' ' do a += 1
2043 if a < b then
2044 while current_text[b - 1] == ' ' do b -= 1
2045 v.decorator.add_span_code(v, current_text, a, b)
2046 end
2047 else
2048 v.addc char
2049 end
2050 end
2051
2052 private fun next_pos: Int is abstract
2053 end
2054
2055 # A span code token.
2056 class TokenCodeSingle
2057 super TokenCode
2058
2059 redef fun next_pos do return 0
2060 end
2061
2062 # A doubled span code token.
2063 class TokenCodeDouble
2064 super TokenCode
2065
2066 redef fun next_pos do return 1
2067 end
2068
2069 # A link or image token.
2070 # This class is mainly used to factorize work between images and links.
2071 abstract class TokenLinkOrImage
2072 super Token
2073
2074 # Link adress
2075 var link: nullable Text = null
2076
2077 # Link text
2078 var name: nullable Text = null
2079
2080 # Link title
2081 var comment: nullable Text = null
2082
2083 # Is the link construct an abbreviation?
2084 var is_abbrev = false
2085
2086 redef fun emit(v) do
2087 var tmp = new FlatBuffer
2088 var b = check_link(v, tmp, pos, self)
2089 if b > 0 then
2090 emit_hyper(v)
2091 v.current_pos = b
2092 else
2093 v.addc char
2094 end
2095 end
2096
2097 # Emit the hyperlink as link or image.
2098 private fun emit_hyper(v: MarkdownProcessor) is abstract
2099
2100 # Check if the link is a valid link.
2101 private fun check_link(v: MarkdownProcessor, out: FlatBuffer, start: Int, token: Token): Int do
2102 var md = v.current_text
2103 if md == null then return -1
2104 var pos
2105 if token isa TokenLink then
2106 pos = start + 1
2107 else
2108 pos = start + 2
2109 end
2110 var tmp = new FlatBuffer
2111 pos = md.read_md_link_id(tmp, pos)
2112 if pos < start then return -1
2113 name = tmp
2114 var old_pos = pos
2115 pos += 1
2116 pos = md.skip_spaces(pos)
2117 if pos < start then
2118 var tid = name.as(not null).write_to_string.to_lower
2119 if v.link_refs.has_key(tid) then
2120 var lr = v.link_refs[tid]
2121 is_abbrev = lr.is_abbrev
2122 link = lr.link
2123 comment = lr.title
2124 pos = old_pos
2125 else
2126 return -1
2127 end
2128 else if md[pos] == '(' then
2129 pos += 1
2130 pos = md.skip_spaces(pos)
2131 if pos < start then return -1
2132 tmp = new FlatBuffer
2133 var use_lt = md[pos] == '<'
2134 if use_lt then
2135 pos = md.read_until(tmp, pos + 1, '>')
2136 else
2137 pos = md.read_md_link(tmp, pos)
2138 end
2139 if pos < start then return -1
2140 if use_lt then pos += 1
2141 link = tmp.write_to_string
2142 if md[pos] == ' ' then
2143 pos = md.skip_spaces(pos)
2144 if pos > start and md[pos] == '"' then
2145 pos += 1
2146 tmp = new FlatBuffer
2147 pos = md.read_until(tmp, pos, '"')
2148 if pos < start then return -1
2149 comment = tmp.write_to_string
2150 pos += 1
2151 pos = md.skip_spaces(pos)
2152 if pos == -1 then return -1
2153 end
2154 end
2155 if pos < start then return -1
2156 if md[pos] != ')' then return -1
2157 else if md[pos] == '[' then
2158 pos += 1
2159 tmp = new FlatBuffer
2160 pos = md.read_raw_until(tmp, pos, ']')
2161 if pos < start then return -1
2162 var id
2163 if tmp.length > 0 then
2164 id = tmp
2165 else
2166 id = name
2167 end
2168 var tid = id.as(not null).write_to_string.to_lower
2169 if v.link_refs.has_key(tid) then
2170 var lr = v.link_refs[tid]
2171 link = lr.link
2172 comment = lr.title
2173 end
2174 else
2175 var tid = name.as(not null).write_to_string.replace("\n", " ").to_lower
2176 if v.link_refs.has_key(tid) then
2177 var lr = v.link_refs[tid]
2178 link = lr.link
2179 comment = lr.title
2180 pos = old_pos
2181 else
2182 return -1
2183 end
2184 end
2185 if link == null then return -1
2186 return pos
2187 end
2188 end
2189
2190 # A markdown link token.
2191 class TokenLink
2192 super TokenLinkOrImage
2193
2194 redef fun emit_hyper(v) do
2195 if is_abbrev and comment != null then
2196 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2197 else
2198 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2199 end
2200 end
2201 end
2202
2203 # A markdown image token.
2204 class TokenImage
2205 super TokenLinkOrImage
2206
2207 redef fun emit_hyper(v) do
2208 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2209 end
2210 end
2211
2212 # A HTML/XML token.
2213 class TokenHTML
2214 super Token
2215
2216 redef fun emit(v) do
2217 var tmp = new FlatBuffer
2218 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2219 if b > 0 then
2220 v.add tmp
2221 v.current_pos = b
2222 else
2223 v.decorator.escape_char(v, char)
2224 end
2225 end
2226
2227 # Is the HTML valid?
2228 # Also take care of link and mailto shortcuts.
2229 private fun check_html(v: MarkdownProcessor, out: FlatBuffer, md: Text, start: Int): Int do
2230 # check for auto links
2231 var tmp = new FlatBuffer
2232 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2233 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2234 pos = md.read_until(tmp, pos, '>')
2235 if pos != -1 then
2236 var link = tmp.write_to_string
2237 v.decorator.add_link(v, link, link, null)
2238 return pos
2239 end
2240 end
2241 # TODO check for mailto
2242 # check for inline html
2243 if start + 2 < md.length then
2244 return md.read_xml(out, start, true)
2245 end
2246 return -1
2247 end
2248 end
2249
2250 # An HTML entity token.
2251 class TokenEntity
2252 super Token
2253
2254 redef fun emit(v) do
2255 var tmp = new FlatBuffer
2256 var b = check_entity(tmp, v.current_text.as(not null), pos)
2257 if b > 0 then
2258 v.add tmp
2259 v.current_pos = b
2260 else
2261 v.decorator.escape_char(v, char)
2262 end
2263 end
2264
2265 # Is the entity valid?
2266 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2267 var pos = md.read_until(out, start, ';')
2268 if pos < 0 or out.length < 3 then
2269 return -1
2270 end
2271 if out[1] == '#' then
2272 if out[2] == 'x' or out[2] == 'X' then
2273 if out.length < 4 then return -1
2274 for i in [3..out.length[ do
2275 var c = out[i]
2276 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2277 return -1
2278 end
2279 end
2280 else
2281 for i in [2..out.length[ do
2282 var c = out[i]
2283 if c < '0' or c > '9' then return -1
2284 end
2285 end
2286 out.add ';'
2287 else
2288 for i in [1..out.length[ do
2289 var c = out[i]
2290 if not c.is_digit and not c.is_letter then return -1
2291 end
2292 out.add ';'
2293 # TODO check entity is valid
2294 # if out.is_entity then
2295 return pos
2296 # else
2297 # return -1
2298 # end
2299 end
2300 return pos
2301 end
2302 end
2303
2304 # A markdown escape token.
2305 class TokenEscape
2306 super Token
2307
2308 redef fun emit(v) do
2309 v.current_pos += 1
2310 v.addc v.current_text.as(not null)[v.current_pos]
2311 end
2312 end
2313
2314 # A markdown strike token.
2315 #
2316 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2317 class TokenStrike
2318 super Token
2319
2320 redef fun emit(v) do
2321 var tmp = v.push_buffer
2322 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2323 v.pop_buffer
2324 if b > 0 then
2325 v.decorator.add_strike(v, tmp)
2326 v.current_pos = b + 1
2327 else
2328 v.addc char
2329 end
2330 end
2331 end
2332
2333 redef class Text
2334
2335 # Get the position of the next non-space character.
2336 private fun skip_spaces(start: Int): Int do
2337 var pos = start
2338 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2339 pos += 1
2340 end
2341 if pos < length then return pos
2342 return -1
2343 end
2344
2345 # Read `self` until `nend` and append it to the `out` buffer.
2346 # Escape markdown special chars.
2347 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2348 var pos = start
2349 while pos < length do
2350 var c = self[pos]
2351 if c == '\\' and pos + 1 < length then
2352 pos = escape(out, self[pos + 1], pos)
2353 else
2354 for n in nend do if c == n then break label
2355 out.add c
2356 end
2357 pos += 1
2358 end label
2359 if pos == length then return -1
2360 return pos
2361 end
2362
2363 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2364 # No escape is made.
2365 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2366 var pos = start
2367 while pos < length do
2368 var c = self[pos]
2369 var end_reached = false
2370 for n in nend do
2371 if c == n then
2372 end_reached = true
2373 break
2374 end
2375 end
2376 if end_reached then break
2377 out.add c
2378 pos += 1
2379 end
2380 if pos == length then return -1
2381 return pos
2382 end
2383
2384 # Read `self` as XML until `to` and append it to the `out` buffer.
2385 # Escape HTML special chars.
2386 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2387 var pos = from
2388 var in_str = false
2389 var str_char: nullable Char = null
2390 while pos < length do
2391 var c = self[pos]
2392 if in_str then
2393 if c == '\\' then
2394 out.add c
2395 pos += 1
2396 if pos < length then
2397 out.add c
2398 pos += 1
2399 end
2400 continue
2401 end
2402 if c == str_char then
2403 in_str = false
2404 out.add c
2405 pos += 1
2406 continue
2407 end
2408 end
2409 if c == '"' or c == '\'' then
2410 in_str = true
2411 str_char = c
2412 end
2413 if not in_str then
2414 var end_reached = false
2415 for n in [0..to.length[ do
2416 if c == to[n] then
2417 end_reached = true
2418 break
2419 end
2420 end
2421 if end_reached then break
2422 end
2423 out.add c
2424 pos += 1
2425 end
2426 if pos == length then return -1
2427 return pos
2428 end
2429
2430 # Read `self` as XML and append it to the `out` buffer.
2431 # Safe mode can be activated to limit reading to valid xml.
2432 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2433 var pos = 0
2434 var is_valid = true
2435 var is_close_tag = false
2436 if start + 1 >= length then return -1
2437 if self[start + 1] == '/' then
2438 is_close_tag = true
2439 pos = start + 2
2440 else if self[start + 1] == '!' then
2441 out.append "<!"
2442 return start + 1
2443 else
2444 is_close_tag = false
2445 pos = start + 1
2446 end
2447 if safe_mode then
2448 var tmp = new FlatBuffer
2449 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2450 if pos == -1 then return -1
2451 var tag = tmp.write_to_string.trim.to_lower
2452 if not tag.is_valid_html_tag then
2453 out.append "&lt;"
2454 pos = -1
2455 else if tag.is_html_unsafe then
2456 is_valid = false
2457 out.append "&lt;"
2458 if is_close_tag then out.add '/'
2459 out.append tmp
2460 else
2461 out.append "<"
2462 if is_close_tag then out.add '/'
2463 out.append tmp
2464 end
2465 else
2466 out.add '<'
2467 if is_close_tag then out.add '/'
2468 pos = read_xml_until(out, pos, ' ', '/', '>')
2469 end
2470 if pos == -1 then return -1
2471 pos = read_xml_until(out, pos, '/', '>')
2472 if pos == -1 then return -1
2473 if self[pos] == '/' then
2474 out.append " /"
2475 pos = self.read_xml_until(out, pos + 1, '>')
2476 if pos == -1 then return -1
2477 end
2478 if self[pos] == '>' then
2479 if is_valid then
2480 out.add '>'
2481 else
2482 out.append "&gt;"
2483 end
2484 return pos
2485 end
2486 return -1
2487 end
2488
2489 # Read a markdown link address and append it to the `out` buffer.
2490 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2491 var pos = start
2492 var counter = 1
2493 while pos < length do
2494 var c = self[pos]
2495 if c == '\\' and pos + 1 < length then
2496 pos = escape(out, self[pos + 1], pos)
2497 else
2498 var end_reached = false
2499 if c == '(' then
2500 counter += 1
2501 else if c == ' ' then
2502 if counter == 1 then end_reached = true
2503 else if c == ')' then
2504 counter -= 1
2505 if counter == 0 then end_reached = true
2506 end
2507 if end_reached then break
2508 out.add c
2509 end
2510 pos += 1
2511 end
2512 if pos == length then return -1
2513 return pos
2514 end
2515
2516 # Read a markdown link text and append it to the `out` buffer.
2517 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2518 var pos = start
2519 var counter = 1
2520 while pos < length do
2521 var c = self[pos]
2522 var end_reached = false
2523 if c == '[' then
2524 counter += 1
2525 out.add c
2526 else if c == ']' then
2527 counter -= 1
2528 if counter == 0 then
2529 end_reached = true
2530 else
2531 out.add c
2532 end
2533 else
2534 out.add c
2535 end
2536 if end_reached then break
2537 pos += 1
2538 end
2539 if pos == length then return -1
2540 return pos
2541 end
2542
2543 # Extract the XML tag name from a XML tag.
2544 private fun xml_tag: String do
2545 var tpl = new FlatBuffer
2546 var pos = 1
2547 if pos < length and self[1] == '/' then pos += 1
2548 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2549 tpl.add self[pos]
2550 pos += 1
2551 end
2552 return tpl.write_to_string.to_lower
2553 end
2554
2555 private fun is_valid_html_tag: Bool do
2556 if is_empty then return false
2557 for c in self do
2558 if not c.is_alpha then return false
2559 end
2560 return true
2561 end
2562
2563 # Read and escape the markdown contained in `self`.
2564 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2565 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2566 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2567 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2568 c == '`' or c == '~' or c == '^' then
2569 out.add c
2570 return pos + 1
2571 end
2572 out.add '\\'
2573 return pos
2574 end
2575
2576 # Extract string found at end of fence opening.
2577 private fun meta_from_fence: nullable Text do
2578 for i in [0..chars.length[ do
2579 var c = chars[i]
2580 if c != ' ' and c != '`' and c != '~' then
2581 return substring_from(i).trim
2582 end
2583 end
2584 return null
2585 end
2586
2587 # Is `self` an unsafe HTML element?
2588 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2589
2590 # Is `self` a HRML block element?
2591 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2592
2593 # Is `self` a link prefix?
2594 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2595
2596 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2597
2598 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2599
2600 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2601 end
2602
2603 redef class String
2604
2605 # Parse `self` as markdown and return the HTML representation
2606 #.
2607 # var md = "**Hello World!**"
2608 # var html = md.md_to_html
2609 # assert html == "<p><strong>Hello World!</strong></p>\n"
2610 fun md_to_html: Writable do
2611 var processor = new MarkdownProcessor
2612 return processor.process(self)
2613 end
2614 end