lib/markdown: fix `text` for nested markdown blocks
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 # `MarkdownEmitter` used for ouput.
34 var emitter: MarkdownEmitter is noinit, protected writable
35
36 # Work in extended mode (default).
37 #
38 # Behavior changes when using extended mode:
39 #
40 # * Lists and code blocks end a paragraph
41 #
42 # In normal markdown the following:
43 #
44 # ~~~md
45 # This is a paragraph
46 # * and this is not a list
47 # ~~~
48 #
49 # Will produce:
50 #
51 # ~~~html
52 # <p>This is a paragraph
53 # * and this is not a list</p>
54 # ~~~
55 #
56 # When using extended mode this changes to:
57 #
58 # ~~~html
59 # <p>This is a paragraph</p>
60 # <ul>
61 # <li>and this is not a list</li>
62 # </ul>
63 # ~~~
64 #
65 # * Fences code blocks
66 #
67 # If you don't want to indent your all your code with 4 spaces,
68 # you can wrap your code in ``` ``` ``` or `~~~`.
69 #
70 # Here's an example:
71 #
72 # ~~~md
73 # fun test do
74 # print "Hello World!"
75 # end
76 # ~~~
77 #
78 # * Code blocks meta
79 #
80 # If you want to use syntax highlighting tools, most of them need to know what kind
81 # of language they are highlighting.
82 # You can add an optional language identifier after the fence declaration to output
83 # it in the HTML render.
84 #
85 # ```nit
86 # import markdown
87 #
88 # print "# Hello World!".md_to_html
89 # ```
90 #
91 # Becomes
92 #
93 # ~~~html
94 # <pre class="nit"><code>import markdown
95 #
96 # print "Hello World!".md_to_html
97 # </code></pre>
98 # ~~~
99 #
100 # * Underscores (Emphasis)
101 #
102 # Underscores in the middle of a word like:
103 #
104 # ~~~md
105 # Con_cat_this
106 # ~~~
107 #
108 # normally produces this:
109 #
110 # ~~~html
111 # <p>Con<em>cat</em>this</p>
112 # ~~~
113 #
114 # With extended mode they don't result in emphasis.
115 #
116 # ~~~html
117 # <p>Con_cat_this</p>
118 # ~~~
119 #
120 # * Strikethrough
121 #
122 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
123 # strikethrought span is marked with `~~`.
124 #
125 # ~~~md
126 # ~~Mistaken text.~~
127 # ~~~
128 #
129 # becomes
130 #
131 # ~~~html
132 # <del>Mistaken text.</del>
133 # ~~~
134 var ext_mode = true
135
136 # Disable attaching MDLocation to Tokens
137 #
138 # Locations are useful for some tools but they may
139 # cause an important time and space overhead.
140 #
141 # Default = `false`
142 var no_location = false is writable
143
144 init do self.emitter = new MarkdownEmitter(self)
145
146 # Process the mardown `input` string and return the processed output.
147 fun process(input: String): Writable do
148 # init processor
149 link_refs.clear
150 last_link_ref = null
151 current_line = null
152 current_block = null
153 # parse markdown
154 var parent = read_lines(input)
155 parent.remove_surrounding_empty_lines
156 recurse(parent, false)
157 # output processed text
158 return emitter.emit(parent.kind)
159 end
160
161 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
162 private fun read_lines(input: String): MDBlock do
163 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
164 var value = new FlatBuffer
165 var i = 0
166
167 var line_pos = 0
168 var col_pos = 0
169
170 while i < input.length do
171 value.clear
172 var pos = 0
173 var eol = false
174 while not eol and i < input.length do
175 col_pos += 1
176 var c = input[i]
177 if c == '\n' then
178 eol = true
179 else if c == '\r' then
180 else if c == '\t' then
181 var np = pos + (4 - (pos & 3))
182 while pos < np do
183 value.add ' '
184 pos += 1
185 end
186 else
187 pos += 1
188 value.add c
189 end
190 i += 1
191 end
192 line_pos += 1
193
194 var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
195 var line = new MDLine(loc, value.write_to_string)
196 var is_link_ref = check_link_ref(line)
197 # Skip link refs
198 if not is_link_ref then block.add_line line
199 col_pos = 0
200 end
201 return block
202 end
203
204 # Check if line is a block link definition.
205 # Return `true` if line contains a valid link ref and save it into `link_refs`.
206 private fun check_link_ref(line: MDLine): Bool do
207 var md = line.value
208 var is_link_ref = false
209 var id = new FlatBuffer
210 var link = new FlatBuffer
211 var comment = new FlatBuffer
212 var pos = -1
213 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
214 pos = line.leading + 1
215 pos = md.read_until(id, pos, ']')
216 if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
217 if line.value[pos + 1] == ':' then
218 pos += 2
219 pos = md.skip_spaces(pos)
220 if pos >= 0 and line.value[pos] == '<' then
221 pos += 1
222 pos = md.read_until(link, pos, '>')
223 pos += 1
224 else if pos >= 0 then
225 pos = md.read_until(link, pos, ' ', '\n')
226 end
227 if not link.is_empty then
228 pos = md.skip_spaces(pos)
229 if pos > 0 and pos < line.value.length then
230 var c = line.value[pos]
231 if c == '\"' or c == '\'' or c == '(' then
232 pos += 1
233 if c == '(' then
234 pos = md.read_until(comment, pos, ')')
235 else
236 pos = md.read_until(comment, pos, c)
237 end
238 if pos > 0 then is_link_ref = true
239 end
240 else
241 is_link_ref = true
242 end
243 end
244 end
245 end
246 end
247 if is_link_ref and not id.is_empty and not link.is_empty then
248 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
249 add_link_ref(id.write_to_string, lr)
250 if comment.is_empty then last_link_ref = lr
251 return true
252 else
253 comment = new FlatBuffer
254 if not line.is_empty and last_link_ref != null then
255 pos = line.leading
256 var c = line.value[pos]
257 if c == '\"' or c == '\'' or c == '(' then
258 pos += 1
259 if c == '(' then
260 pos = md.read_until(comment, pos, ')')
261 else
262 pos = md.read_until(comment, pos, c)
263 end
264 end
265 if not comment.is_empty then last_link_ref.title = comment.write_to_string
266 end
267 if comment.is_empty then return false
268 return true
269 end
270 end
271
272 # Known link refs
273 # This list will be needed during output to expand links.
274 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
275
276 # Last encountered link ref (for multiline definitions)
277 #
278 # Markdown allows link refs to be defined over two lines:
279 #
280 # ~~~md
281 # [id]: http://example.com/longish/path/to/resource/here
282 # "Optional Title Here"
283 # ~~~
284 #
285 private var last_link_ref: nullable LinkRef = null
286
287 # Add a link ref to the list
288 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
289
290 # Recursively split a `block`.
291 #
292 # The block is splitted according to the type of lines it contains.
293 # Some blocks can be splited again recursively like lists.
294 # The `in_list` mode is used to recurse on list and build
295 # nested paragraphs or code blocks.
296 fun recurse(root: MDBlock, in_list: Bool) do
297 var old_mode = self.in_list
298 var old_root = self.current_block
299 self.in_list = in_list
300
301 var line = root.first_line
302 while line != null and line.is_empty do
303 line = line.next
304 if line == null then return
305 end
306
307 current_line = line
308 current_block = root
309 while current_line != null do
310 line_kind(current_line.as(not null)).process(self)
311 end
312 self.in_list = old_mode
313 self.current_block = old_root
314 end
315
316 # Currently processed line.
317 # Used when visiting blocks with `recurse`.
318 var current_line: nullable MDLine = null is writable
319
320 # Currently processed block.
321 # Used when visiting blocks with `recurse`.
322 var current_block: nullable MDBlock = null is writable
323
324 # Is the current recursion in list mode?
325 # Used when visiting blocks with `recurse`
326 private var in_list = false
327
328 # The type of line.
329 # see: `md_line_*`
330 fun line_kind(md: MDLine): Line do
331 var value = md.value
332 var leading = md.leading
333 var trailing = md.trailing
334 if md.is_empty then return new LineEmpty
335 if md.leading > 3 then return new LineCode
336 if value[leading] == '#' then return new LineHeadline
337 if value[leading] == '>' then return new LineBlockquote
338
339 if ext_mode then
340 if value.length - leading - trailing > 2 then
341 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
342 return new LineFence
343 end
344 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
345 return new LineFence
346 end
347 end
348 end
349
350 if value.length - leading - trailing > 2 and
351 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
352 if md.count_chars(value[leading]) >= 3 then
353 return new LineHR
354 end
355 end
356
357 if value.length - leading >= 2 and value[leading + 1] == ' ' then
358 var c = value[leading]
359 if c == '*' or c == '-' or c == '+' then return new LineUList
360 end
361
362 if value.length - leading >= 3 and value[leading].is_digit then
363 var i = leading + 1
364 while i < value.length and value[i].is_digit do i += 1
365 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
366 return new LineOList
367 end
368 end
369
370 if value[leading] == '<' and md.check_html then return new LineXML
371
372 var next = md.next
373 if next != null and not next.is_empty then
374 if next.count_chars('=') > 0 then
375 return new LineHeadline1
376 end
377 if next.count_chars('-') > 0 then
378 return new LineHeadline2
379 end
380 end
381 return new LineOther
382 end
383
384 # Get the token kind at `pos`.
385 fun token_at(text: Text, pos: Int): Token do
386 var c0: Char
387 var c1: Char
388 var c2: Char
389
390 if pos > 0 then
391 c0 = text[pos - 1]
392 else
393 c0 = ' '
394 end
395 var c = text[pos]
396
397 if pos + 1 < text.length then
398 c1 = text[pos + 1]
399 else
400 c1 = ' '
401 end
402 if pos + 2 < text.length then
403 c2 = text[pos + 2]
404 else
405 c2 = ' '
406 end
407
408 var loc
409 if no_location then
410 loc = null
411 else
412 loc = new MDLocation(
413 current_loc.line_start,
414 current_loc.column_start + pos,
415 current_loc.line_start,
416 current_loc.column_start + pos)
417 end
418
419 if c == '*' then
420 if c1 == '*' then
421 if c0 != ' ' or c2 != ' ' then
422 return new TokenStrongStar(loc, pos, c)
423 else
424 return new TokenEmStar(loc, pos, c)
425 end
426 end
427 if c0 != ' ' or c1 != ' ' then
428 return new TokenEmStar(loc, pos, c)
429 else
430 return new TokenNone(loc, pos, c)
431 end
432 else if c == '_' then
433 if c1 == '_' then
434 if c0 != ' ' or c2 != ' ' then
435 return new TokenStrongUnderscore(loc, pos, c)
436 else
437 return new TokenEmUnderscore(loc, pos, c)
438 end
439 end
440 if ext_mode then
441 if (c0.is_letter or c0.is_digit) and c0 != '_' and
442 (c1.is_letter or c1.is_digit) then
443 return new TokenNone(loc, pos, c)
444 else
445 return new TokenEmUnderscore(loc, pos, c)
446 end
447 end
448 if c0 != ' ' or c1 != ' ' then
449 return new TokenEmUnderscore(loc, pos, c)
450 else
451 return new TokenNone(loc, pos, c)
452 end
453 else if c == '!' then
454 if c1 == '[' then return new TokenImage(loc, pos, c)
455 return new TokenNone(loc, pos, c)
456 else if c == '[' then
457 return new TokenLink(loc, pos, c)
458 else if c == ']' then
459 return new TokenNone(loc, pos, c)
460 else if c == '`' then
461 if c1 == '`' then
462 return new TokenCodeDouble(loc, pos, c)
463 else
464 return new TokenCodeSingle(loc, pos, c)
465 end
466 else if c == '\\' then
467 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
468 return new TokenEscape(loc, pos, c)
469 else
470 return new TokenNone(loc, pos, c)
471 end
472 else if c == '<' then
473 return new TokenHTML(loc, pos, c)
474 else if c == '&' then
475 return new TokenEntity(loc, pos, c)
476 else
477 if ext_mode then
478 if c == '~' and c1 == '~' then
479 return new TokenStrike(loc, pos, c)
480 end
481 end
482 return new TokenNone(loc, pos, c)
483 end
484 end
485
486 # Find the position of a `token` in `self`.
487 fun find_token(text: Text, start: Int, token: Token): Int do
488 var pos = start
489 while pos < text.length do
490 if token_at(text, pos).is_same_type(token) then
491 return pos
492 end
493 pos += 1
494 end
495 return -1
496 end
497
498 # Location used for next parsed token.
499 #
500 # This location can be changed by the emitter to adjust with `\n` found
501 # in the input.
502 private fun current_loc: MDLocation do return emitter.current_loc
503 end
504
505 # Emit output corresponding to blocks content.
506 #
507 # Blocks are created by a previous pass in `MarkdownProcessor`.
508 # The emitter use a `Decorator` to select the output format.
509 class MarkdownEmitter
510
511 # Kind of processor used for parsing.
512 type PROCESSOR: MarkdownProcessor
513
514 # Processor containing link refs.
515 var processor: PROCESSOR
516
517 # Kind of decorator used for decoration.
518 type DECORATOR: Decorator
519
520 # Decorator used for output.
521 # Default is `HTMLDecorator`
522 var decorator: DECORATOR is writable, lazy do
523 return new HTMLDecorator
524 end
525
526 # Create a new `MarkdownEmitter` using a custom `decorator`.
527 init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
528 init processor
529 self.decorator = decorator
530 end
531
532 # Output `block` using `decorator` in the current buffer.
533 fun emit(block: Block): Text do
534 var buffer = push_buffer
535 block.emit(self)
536 pop_buffer
537 return buffer
538 end
539
540 # Output the content of `block`.
541 fun emit_in(block: Block) do block.emit_in(self)
542
543 # Transform and emit mardown text
544 fun emit_text(text: Text) do emit_text_until(text, 0, null)
545
546 # Transform and emit mardown text starting at `start` and
547 # until a token with the same type as `token` is found.
548 # Go until the end of `text` if `token` is null.
549 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
550 var old_text = current_text
551 var old_pos = current_pos
552 current_text = text
553 current_pos = start
554 while current_pos < text.length do
555 if text[current_pos] == '\n' then
556 current_loc.line_start += 1
557 current_loc.column_start = -current_pos
558 end
559 var mt = processor.token_at(text, current_pos)
560 if (token != null and not token isa TokenNone) and
561 (mt.is_same_type(token) or
562 (token isa TokenEmStar and mt isa TokenStrongStar) or
563 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
564 return current_pos
565 end
566 mt.emit(self)
567 current_pos += 1
568 end
569 current_text = old_text
570 current_pos = old_pos
571 return -1
572 end
573
574 # Currently processed position in `current_text`.
575 # Used when visiting inline production with `emit_text_until`.
576 private var current_pos: Int = -1
577
578 # Currently processed text.
579 # Used when visiting inline production with `emit_text_until`.
580 private var current_text: nullable Text = null
581
582 # Stacked buffers.
583 private var buffer_stack = new List[FlatBuffer]
584
585 # Push a new buffer on the stack.
586 private fun push_buffer: FlatBuffer do
587 var buffer = new FlatBuffer
588 buffer_stack.add buffer
589 return buffer
590 end
591
592 # Pop the last buffer.
593 private fun pop_buffer do buffer_stack.pop
594
595 # Current output buffer.
596 private fun current_buffer: FlatBuffer do
597 assert not buffer_stack.is_empty
598 return buffer_stack.last
599 end
600
601 # Stacked locations.
602 private var loc_stack = new List[MDLocation]
603
604 # Push a new MDLocation on the stack.
605 private fun push_loc(location: MDLocation) do loc_stack.add location
606
607 # Pop the last buffer.
608 private fun pop_loc: MDLocation do return loc_stack.pop
609
610 # Current output buffer.
611 private fun current_loc: MDLocation do
612 assert not loc_stack.is_empty
613 return loc_stack.last
614 end
615
616 # Append `e` to current buffer.
617 fun add(e: Writable) do
618 if e isa Text then
619 current_buffer.append e
620 else
621 current_buffer.append e.write_to_string
622 end
623 end
624
625 # Append `c` to current buffer.
626 fun addc(c: Char) do
627 current_buffer.add c
628 end
629
630 # Append a "\n" line break.
631 fun addn do addc '\n'
632 end
633
634 # A Link Reference.
635 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
636 #
637 # ~~~raw
638 # [1]: http://example.com/ "Optional title"
639 # ~~~
640 class LinkRef
641
642 # Link href
643 var link: String
644
645 # Optional link title
646 var title: nullable String = null
647
648 # Is the link an abreviation?
649 var is_abbrev = false
650
651 # Create a link with a title.
652 init with_title(link: String, title: nullable String) do
653 init(link)
654 self.title = title
655 end
656 end
657
658 # A `Decorator` is used to emit mardown into a specific format.
659 # Default decorator used is `HTMLDecorator`.
660 interface Decorator
661
662 # Kind of emitter used for decoration.
663 type EMITTER: MarkdownEmitter
664
665 # Render a single plain char.
666 #
667 # Redefine this method to add special escaping for plain text.
668 fun add_char(v: EMITTER, c: Char) do v.addc c
669
670 # Render a ruler block.
671 fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
672
673 # Render a headline block with corresponding level.
674 fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
675
676 # Render a paragraph block.
677 fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
678
679 # Render a code or fence block.
680 fun add_code(v: EMITTER, block: BlockCode) is abstract
681
682 # Render a blockquote.
683 fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
684
685 # Render an unordered list.
686 fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
687
688 # Render an ordered list.
689 fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
690
691 # Render a list item.
692 fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
693
694 # Render an emphasis text.
695 fun add_em(v: EMITTER, text: Text) is abstract
696
697 # Render a strong text.
698 fun add_strong(v: EMITTER, text: Text) is abstract
699
700 # Render a strike text.
701 #
702 # Extended mode only (see `MarkdownProcessor::ext_mode`)
703 fun add_strike(v: EMITTER, text: Text) is abstract
704
705 # Render a link.
706 fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
707
708 # Render an image.
709 fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
710
711 # Render an abbreviation.
712 fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
713
714 # Render a code span reading from a buffer.
715 fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
716
717 # Render a text and escape it.
718 fun append_value(v: EMITTER, value: Text) is abstract
719
720 # Render code text from buffer and escape it.
721 fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
722
723 # Render a character escape.
724 fun escape_char(v: EMITTER, char: Char) is abstract
725
726 # Render a line break
727 fun add_line_break(v: EMITTER) is abstract
728
729 # Generate a new html valid id from a `String`.
730 fun strip_id(txt: String): String is abstract
731
732 # Found headlines during the processing labeled by their ids.
733 fun headlines: ArrayMap[String, HeadLine] is abstract
734 end
735
736 # Class representing a markdown headline.
737 class HeadLine
738 # Unique identifier of this headline.
739 var id: String
740
741 # Text of the headline.
742 var title: String
743
744 # Level of this headline.
745 #
746 # According toe the markdown specification, level must be in `[1..6]`.
747 var level: Int
748 end
749
750 # `Decorator` that outputs HTML.
751 class HTMLDecorator
752 super Decorator
753
754 redef var headlines = new ArrayMap[String, HeadLine]
755
756 redef fun add_ruler(v, block) do v.add "<hr/>\n"
757
758 redef fun add_headline(v, block) do
759 # save headline
760 var txt = block.block.first_line.value
761 var id = strip_id(txt)
762 var lvl = block.depth
763 headlines[id] = new HeadLine(id, txt, lvl)
764 # output it
765 v.add "<h{lvl} id=\"{id}\">"
766 v.emit_in block
767 v.add "</h{lvl}>\n"
768 end
769
770 redef fun add_paragraph(v, block) do
771 v.add "<p>"
772 v.emit_in block
773 v.add "</p>\n"
774 end
775
776 redef fun add_code(v, block) do
777 var meta = block.meta
778 if meta != null then
779 v.add "<pre class=\""
780 append_value(v, meta)
781 v.add "\"><code>"
782 else
783 v.add "<pre><code>"
784 end
785 v.emit_in block
786 v.add "</code></pre>\n"
787 end
788
789 redef fun add_blockquote(v, block) do
790 v.add "<blockquote>\n"
791 v.emit_in block
792 v.add "</blockquote>\n"
793 end
794
795 redef fun add_unorderedlist(v, block) do
796 v.add "<ul>\n"
797 v.emit_in block
798 v.add "</ul>\n"
799 end
800
801 redef fun add_orderedlist(v, block) do
802 v.add "<ol>\n"
803 v.emit_in block
804 v.add "</ol>\n"
805 end
806
807 redef fun add_listitem(v, block) do
808 v.add "<li>"
809 v.emit_in block
810 v.add "</li>\n"
811 end
812
813 redef fun add_em(v, text) do
814 v.add "<em>"
815 v.add text
816 v.add "</em>"
817 end
818
819 redef fun add_strong(v, text) do
820 v.add "<strong>"
821 v.add text
822 v.add "</strong>"
823 end
824
825 redef fun add_strike(v, text) do
826 v.add "<del>"
827 v.add text
828 v.add "</del>"
829 end
830
831 redef fun add_image(v, link, name, comment) do
832 v.add "<img src=\""
833 append_value(v, link)
834 v.add "\" alt=\""
835 append_value(v, name)
836 v.add "\""
837 if comment != null and not comment.is_empty then
838 v.add " title=\""
839 append_value(v, comment)
840 v.add "\""
841 end
842 v.add "/>"
843 end
844
845 redef fun add_link(v, link, name, comment) do
846 v.add "<a href=\""
847 append_value(v, link)
848 v.add "\""
849 if comment != null and not comment.is_empty then
850 v.add " title=\""
851 append_value(v, comment)
852 v.add "\""
853 end
854 v.add ">"
855 v.emit_text(name)
856 v.add "</a>"
857 end
858
859 redef fun add_abbr(v, name, comment) do
860 v.add "<abbr title=\""
861 append_value(v, comment)
862 v.add "\">"
863 v.emit_text(name)
864 v.add "</abbr>"
865 end
866
867 redef fun add_span_code(v, text, from, to) do
868 v.add "<code>"
869 append_code(v, text, from, to)
870 v.add "</code>"
871 end
872
873 redef fun add_line_break(v) do
874 v.add "<br/>"
875 end
876
877 redef fun append_value(v, text) do for c in text do escape_char(v, c)
878
879 redef fun escape_char(v, c) do
880 if c == '&' then
881 v.add "&amp;"
882 else if c == '<' then
883 v.add "&lt;"
884 else if c == '>' then
885 v.add "&gt;"
886 else if c == '"' then
887 v.add "&quot;"
888 else if c == '\'' then
889 v.add "&apos;"
890 else
891 v.addc c
892 end
893 end
894
895 redef fun append_code(v, buffer, from, to) do
896 for i in [from..to[ do
897 var c = buffer[i]
898 if c == '&' then
899 v.add "&amp;"
900 else if c == '<' then
901 v.add "&lt;"
902 else if c == '>' then
903 v.add "&gt;"
904 else
905 v.addc c
906 end
907 end
908 end
909
910 redef fun strip_id(txt) do
911 # strip id
912 var b = new FlatBuffer
913 for c in txt do
914 if c == ' ' then
915 b.add '_'
916 else
917 if not c.is_letter and
918 not c.is_digit and
919 not allowed_id_chars.has(c) then continue
920 b.add c
921 end
922 end
923 var res = b.to_s
924 var key = res
925 # check for multiple id definitions
926 if headlines.has_key(key) then
927 var i = 1
928 key = "{res}_{i}"
929 while headlines.has_key(key) do
930 i += 1
931 key = "{res}_{i}"
932 end
933 end
934 return key
935 end
936
937 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
938 end
939
940 # Location in a Markdown input.
941 class MDLocation
942
943 # Starting line number (starting from 1).
944 var line_start: Int
945
946 # Starting column number (starting from 1).
947 var column_start: Int
948
949 # Stopping line number (starting from 1).
950 var line_end: Int
951
952 # Stopping column number (starting from 1).
953 var column_end: Int
954
955 redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
956
957 # Return a copy of `self`.
958 fun copy: MDLocation do
959 return new MDLocation(line_start, column_start, line_end, column_end)
960 end
961 end
962
963 # A block of markdown lines.
964 # A `MDBlock` can contains lines and/or sub-blocks.
965 class MDBlock
966
967 # Position of `self` in the input.
968 var location: MDLocation
969
970 # Kind of block.
971 # See `Block`.
972 var kind: Block = new BlockNone(self) is writable
973
974 # First line if any.
975 var first_line: nullable MDLine = null is writable
976
977 # Last line if any.
978 var last_line: nullable MDLine = null is writable
979
980 # First sub-block if any.
981 var first_block: nullable MDBlock = null is writable
982
983 # Last sub-block if any.
984 var last_block: nullable MDBlock = null is writable
985
986 # Previous block if any.
987 var prev: nullable MDBlock = null is writable
988
989 # Next block if any.
990 var next: nullable MDBlock = null is writable
991
992 # Does this block contain subblocks?
993 fun has_blocks: Bool do return first_block != null
994
995 # Count sub-blocks.
996 fun count_blocks: Int do
997 var count = 0
998 var block = first_block
999 while block != null do
1000 count += 1
1001 block = block.next
1002 end
1003 return count
1004 end
1005
1006 # Does this block contain lines?
1007 fun has_lines: Bool do return first_line != null
1008
1009 # Count block lines.
1010 fun count_lines: Int do
1011 var count = 0
1012 var line = first_line
1013 while line != null do
1014 count += 1
1015 line = line.next
1016 end
1017 return count
1018 end
1019
1020 # Split `self` creating a new sub-block having `line` has `last_line`.
1021 fun split(line: MDLine): MDBlock do
1022 # location for new block
1023 var new_loc = new MDLocation(
1024 first_line.location.line_start,
1025 first_line.location.column_start,
1026 line.location.line_end,
1027 line.location.column_end)
1028 # create block
1029 var block = new MDBlock(new_loc)
1030 block.first_line = first_line
1031 block.last_line = line
1032 first_line = line.next
1033 line.next = null
1034 if first_line == null then
1035 last_line = null
1036 else
1037 first_line.prev = null
1038 # update current block loc
1039 location.line_start = first_line.location.line_start
1040 location.column_start = first_line.location.column_start
1041 end
1042 if first_block == null then
1043 first_block = block
1044 last_block = block
1045 else
1046 last_block.next = block
1047 last_block = block
1048 end
1049 return block
1050 end
1051
1052 # Add a `line` to this block.
1053 fun add_line(line: MDLine) do
1054 if last_line == null then
1055 first_line = line
1056 last_line = line
1057 else
1058 last_line.next_empty = line.is_empty
1059 line.prev_empty = last_line.is_empty
1060 line.prev = last_line
1061 last_line.next = line
1062 last_line = line
1063 end
1064 end
1065
1066 # Remove `line` from this block.
1067 fun remove_line(line: MDLine) do
1068 if line.prev == null then
1069 first_line = line.next
1070 else
1071 line.prev.next = line.next
1072 end
1073 if line.next == null then
1074 last_line = line.prev
1075 else
1076 line.next.prev = line.prev
1077 end
1078 line.prev = null
1079 line.next = null
1080 end
1081
1082 # Remove leading empty lines.
1083 fun remove_leading_empty_lines: Bool do
1084 var was_empty = false
1085 var line = first_line
1086 while line != null and line.is_empty do
1087 remove_line line
1088 line = first_line
1089 was_empty = true
1090 end
1091 return was_empty
1092 end
1093
1094 # Remove trailing empty lines.
1095 fun remove_trailing_empty_lines: Bool do
1096 var was_empty = false
1097 var line = last_line
1098 while line != null and line.is_empty do
1099 remove_line line
1100 line = last_line
1101 was_empty = true
1102 end
1103 return was_empty
1104 end
1105
1106 # Remove leading and trailing empty lines.
1107 fun remove_surrounding_empty_lines: Bool do
1108 var was_empty = false
1109 if remove_leading_empty_lines then was_empty = true
1110 if remove_trailing_empty_lines then was_empty = true
1111 return was_empty
1112 end
1113
1114 # Remove list markers and up to 4 leading spaces.
1115 # Used to clean nested lists.
1116 fun remove_list_indent(v: MarkdownProcessor) do
1117 var line = first_line
1118 while line != null do
1119 if not line.is_empty then
1120 var kind = v.line_kind(line)
1121 if kind isa LineList then
1122 line.value = kind.extract_value(line)
1123 else
1124 line.value = line.value.substring_from(line.leading.min(4))
1125 end
1126 line.leading = line.process_leading
1127 end
1128 line = line.next
1129 end
1130 end
1131
1132 # Collect block line text.
1133 fun text: String do
1134 var text = new FlatBuffer
1135 var line = first_line
1136 while line != null do
1137 if not line.is_empty then
1138 text.append line.text
1139 end
1140 text.append "\n"
1141 line = line.next
1142 end
1143 var block = first_block
1144 while block != null do
1145 text.append block.text
1146 text.append "\n"
1147 block = block.next
1148 end
1149 return text.write_to_string
1150 end
1151 end
1152
1153 # Representation of a markdown block in the AST.
1154 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1155 abstract class Block
1156
1157 # The markdown block `self` is related to.
1158 var block: MDBlock
1159
1160 # Output `self` using `v.decorator`.
1161 fun emit(v: MarkdownEmitter) do v.emit_in(self)
1162
1163 # Emit the containts of `self`, lines or blocks.
1164 fun emit_in(v: MarkdownEmitter) do
1165 block.remove_surrounding_empty_lines
1166 if block.has_lines then
1167 emit_lines(v)
1168 else
1169 emit_blocks(v)
1170 end
1171 end
1172
1173 # Emit lines contained in `block`.
1174 fun emit_lines(v: MarkdownEmitter) do
1175 var tpl = v.push_buffer
1176 var line = block.first_line
1177 while line != null do
1178 if not line.is_empty then
1179 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1180 if line.trailing >= 2 then v.decorator.add_line_break(v)
1181 end
1182 if line.next != null then
1183 v.addn
1184 end
1185 line = line.next
1186 end
1187 v.pop_buffer
1188 v.emit_text(tpl)
1189 end
1190
1191 # Emit sub-blocks contained in `block`.
1192 fun emit_blocks(v: MarkdownEmitter) do
1193 var block = self.block.first_block
1194 while block != null do
1195 v.push_loc(block.location)
1196 block.kind.emit(v)
1197 v.pop_loc
1198 block = block.next
1199 end
1200 end
1201
1202 # The raw content of the block as a multi-line string.
1203 fun raw_content: String do
1204 var infence = self isa BlockFence
1205 var text = new FlatBuffer
1206 var line = self.block.first_line
1207 while line != null do
1208 if not line.is_empty then
1209 var str = line.value
1210 if not infence and str.has_prefix(" ") then
1211 text.append str.substring(4, str.length - line.trailing)
1212 else
1213 text.append str
1214 end
1215 end
1216 text.append "\n"
1217 line = line.next
1218 end
1219 return text.write_to_string
1220 end
1221 end
1222
1223 # A block without any markdown specificities.
1224 #
1225 # Actually use the same implementation than `BlockCode`,
1226 # this class is only used for typing purposes.
1227 class BlockNone
1228 super Block
1229 end
1230
1231 # A markdown blockquote.
1232 class BlockQuote
1233 super Block
1234
1235 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1236
1237 # Remove blockquote markers.
1238 private fun remove_block_quote_prefix(block: MDBlock) do
1239 var line = block.first_line
1240 while line != null do
1241 if not line.is_empty then
1242 if line.value[line.leading] == '>' then
1243 var rem = line.leading + 1
1244 if line.leading + 1 < line.value.length and
1245 line.value[line.leading + 1] == ' ' then
1246 rem += 1
1247 end
1248 line.value = line.value.substring_from(rem)
1249 line.leading = line.process_leading
1250 end
1251 end
1252 line = line.next
1253 end
1254 end
1255 end
1256
1257 # A markdown code block.
1258 class BlockCode
1259 super Block
1260
1261 # Any string found after fence token.
1262 var meta: nullable Text
1263
1264 # Number of char to skip at the beginning of the line.
1265 #
1266 # Block code lines start at 4 spaces.
1267 protected var line_start = 4
1268
1269 redef fun emit(v) do v.decorator.add_code(v, self)
1270
1271 redef fun emit_lines(v) do
1272 var line = block.first_line
1273 while line != null do
1274 if not line.is_empty then
1275 v.decorator.append_code(v, line.value, line_start, line.value.length)
1276 end
1277 v.addn
1278 line = line.next
1279 end
1280 end
1281 end
1282
1283 # A markdown code-fence block.
1284 #
1285 # Actually use the same implementation than `BlockCode`,
1286 # this class is only used for typing purposes.
1287 class BlockFence
1288 super BlockCode
1289
1290 # Fence code lines start at 0 spaces.
1291 redef var line_start = 0
1292 end
1293
1294 # A markdown headline.
1295 class BlockHeadline
1296 super Block
1297
1298 redef fun emit(v) do
1299 var loc = block.location.copy
1300 loc.column_start += start
1301 v.push_loc(loc)
1302 v.decorator.add_headline(v, self)
1303 v.pop_loc
1304 end
1305
1306 private var start = 0
1307
1308 # Depth of the headline used to determine the headline level.
1309 var depth = 0
1310
1311 # Remove healine marks from lines contained in `self`.
1312 private fun transform_headline(block: MDBlock) do
1313 if depth > 0 then return
1314 var level = 0
1315 var line = block.first_line
1316 if line.is_empty then return
1317 var start = line.leading
1318 while start < line.value.length and line.value[start] == '#' do
1319 level += 1
1320 start += 1
1321 end
1322 while start < line.value.length and line.value[start] == ' ' do
1323 start += 1
1324 end
1325 if start >= line.value.length then
1326 line.is_empty = true
1327 else
1328 var nend = line.value.length - line.trailing - 1
1329 while line.value[nend] == '#' do nend -= 1
1330 while line.value[nend] == ' ' do nend -= 1
1331 line.value = line.value.substring(start, nend - start + 1)
1332 line.leading = 0
1333 line.trailing = 0
1334 end
1335 self.start = start
1336 depth = level.min(6)
1337 end
1338 end
1339
1340 # A markdown list item block.
1341 class BlockListItem
1342 super Block
1343
1344 redef fun emit(v) do v.decorator.add_listitem(v, self)
1345 end
1346
1347 # A markdown list block.
1348 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1349 abstract class BlockList
1350 super Block
1351
1352 # Split list block into list items sub-blocks.
1353 private fun init_block(v: MarkdownProcessor) do
1354 var line = block.first_line
1355 line = line.next
1356 while line != null do
1357 var t = v.line_kind(line)
1358 if t isa LineList or
1359 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1360 not (t isa LineList))) then
1361 var sblock = block.split(line.prev.as(not null))
1362 sblock.kind = new BlockListItem(sblock)
1363 end
1364 line = line.next
1365 end
1366 var sblock = block.split(block.last_line.as(not null))
1367 sblock.kind = new BlockListItem(sblock)
1368 end
1369
1370 # Expand list items as paragraphs if needed.
1371 private fun expand_paragraphs(block: MDBlock) do
1372 var outer = block.first_block
1373 var inner: nullable MDBlock
1374 var has_paragraph = false
1375 while outer != null and not has_paragraph do
1376 if outer.kind isa BlockListItem then
1377 inner = outer.first_block
1378 while inner != null and not has_paragraph do
1379 if inner.kind isa BlockParagraph then
1380 has_paragraph = true
1381 end
1382 inner = inner.next
1383 end
1384 end
1385 outer = outer.next
1386 end
1387 if has_paragraph then
1388 outer = block.first_block
1389 while outer != null do
1390 if outer.kind isa BlockListItem then
1391 inner = outer.first_block
1392 while inner != null do
1393 if inner.kind isa BlockNone then
1394 inner.kind = new BlockParagraph(inner)
1395 end
1396 inner = inner.next
1397 end
1398 end
1399 outer = outer.next
1400 end
1401 end
1402 end
1403 end
1404
1405 # A markdown ordered list.
1406 class BlockOrderedList
1407 super BlockList
1408
1409 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1410 end
1411
1412 # A markdown unordred list.
1413 class BlockUnorderedList
1414 super BlockList
1415
1416 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1417 end
1418
1419 # A markdown paragraph block.
1420 class BlockParagraph
1421 super Block
1422
1423 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1424 end
1425
1426 # A markdown ruler.
1427 class BlockRuler
1428 super Block
1429
1430 redef fun emit(v) do v.decorator.add_ruler(v, self)
1431 end
1432
1433 # Xml blocks that can be found in markdown markup.
1434 class BlockXML
1435 super Block
1436
1437 redef fun emit_lines(v) do
1438 var line = block.first_line
1439 while line != null do
1440 if not line.is_empty then v.add line.value
1441 v.addn
1442 line = line.next
1443 end
1444 end
1445 end
1446
1447 # A markdown line.
1448 class MDLine
1449
1450 # Location of `self` in the original input.
1451 var location: MDLocation
1452
1453 # Text contained in this line.
1454 var value: String is writable
1455
1456 # Is this line empty?
1457 # Lines containing only spaces are considered empty.
1458 var is_empty: Bool = true is writable
1459
1460 # Previous line in `MDBlock` or null if first line.
1461 var prev: nullable MDLine = null is writable
1462
1463 # Next line in `MDBlock` or null if last line.
1464 var next: nullable MDLine = null is writable
1465
1466 # Is the previous line empty?
1467 var prev_empty: Bool = false is writable
1468
1469 # Is the next line empty?
1470 var next_empty: Bool = false is writable
1471
1472 # Initialize a new MDLine from its string value
1473 init do
1474 self.leading = process_leading
1475 if leading != value.length then
1476 self.is_empty = false
1477 self.trailing = process_trailing
1478 end
1479 end
1480
1481 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1482 fun clear do
1483 value = ""
1484 leading = 0
1485 trailing = 0
1486 is_empty = true
1487 if prev != null then prev.next_empty = true
1488 if next != null then next.prev_empty = true
1489 end
1490
1491 # Number or leading spaces on this line.
1492 var leading: Int = 0 is writable
1493
1494 # Compute `leading` depending on `value`.
1495 fun process_leading: Int do
1496 var count = 0
1497 var value = self.value
1498 while count < value.length and value[count] == ' ' do count += 1
1499 if leading == value.length then clear
1500 return count
1501 end
1502
1503 # Number of trailing spaces on this line.
1504 var trailing: Int = 0 is writable
1505
1506 # Compute `trailing` depending on `value`.
1507 fun process_trailing: Int do
1508 var count = 0
1509 var value = self.value
1510 while value[value.length - count - 1] == ' ' do
1511 count += 1
1512 end
1513 return count
1514 end
1515
1516 # Count the amount of `ch` in this line.
1517 # Return A value > 0 if this line only consists of `ch` end spaces.
1518 fun count_chars(ch: Char): Int do
1519 var count = 0
1520 for c in value do
1521 if c == ' ' then
1522 continue
1523 end
1524 if c == ch then
1525 count += 1
1526 continue
1527 end
1528 count = 0
1529 break
1530 end
1531 return count
1532 end
1533
1534 # Count the amount of `ch` at the start of this line ignoring spaces.
1535 fun count_chars_start(ch: Char): Int do
1536 var count = 0
1537 for c in value do
1538 if c == ' ' then
1539 continue
1540 end
1541 if c == ch then
1542 count += 1
1543 else
1544 break
1545 end
1546 end
1547 return count
1548 end
1549
1550 # Last XML line if any.
1551 private var xml_end_line: nullable MDLine = null
1552
1553 # Does `value` contains valid XML markup?
1554 private fun check_html: Bool do
1555 var tags = new Array[String]
1556 var tmp = new FlatBuffer
1557 var pos = leading
1558 if pos + 1 < value.length and value[pos + 1] == '!' then
1559 if read_xml_comment(self, pos) > 0 then return true
1560 end
1561 pos = value.read_xml(tmp, pos, false)
1562 var tag: String
1563 if pos > -1 then
1564 tag = tmp.xml_tag
1565 if not tag.is_html_block then
1566 return false
1567 end
1568 if tag == "hr" then
1569 xml_end_line = self
1570 return true
1571 end
1572 tags.add tag
1573 var line: nullable MDLine = self
1574 while line != null do
1575 while pos < line.value.length and line.value[pos] != '<' do
1576 pos += 1
1577 end
1578 if pos >= line.value.length then
1579 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1580 tags.pop
1581 if tags.is_empty then
1582 xml_end_line = line
1583 break
1584 end
1585 end
1586 line = line.next
1587 pos = 0
1588 else
1589 tmp = new FlatBuffer
1590 var new_pos = line.value.read_xml(tmp, pos, false)
1591 if new_pos > 0 then
1592 tag = tmp.xml_tag
1593 if tag.is_html_block and not tag == "hr" then
1594 if tmp[1] == '/' then
1595 if tags.last != tag then
1596 return false
1597 end
1598 tags.pop
1599 else
1600 tags.add tag
1601 end
1602 end
1603 if tags.is_empty then
1604 xml_end_line = line
1605 break
1606 end
1607 pos = new_pos
1608 else
1609 pos += 1
1610 end
1611 end
1612 end
1613 return tags.is_empty
1614 end
1615 return false
1616 end
1617
1618 # Read a XML comment.
1619 # Used by `check_html`.
1620 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1621 var line: nullable MDLine = first_line
1622 if start + 3 < line.value.length then
1623 if line.value[2] == '-' and line.value[3] == '-' then
1624 var pos = start + 4
1625 while line != null do
1626 while pos < line.value.length and line.value[pos] != '-' do
1627 pos += 1
1628 end
1629 if pos == line.value.length then
1630 line = line.next
1631 pos = 0
1632 else
1633 if pos + 2 < line.value.length then
1634 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1635 first_line.xml_end_line = line
1636 return pos + 3
1637 end
1638 end
1639 pos += 1
1640 end
1641 end
1642 end
1643 end
1644 return -1
1645 end
1646
1647 # Extract the text of `self` without leading and trailing.
1648 fun text: String do return value.substring(leading, value.length - trailing)
1649 end
1650
1651 # A markdown line.
1652 interface Line
1653
1654 # Parse the line.
1655 # See `MarkdownProcessor::recurse`.
1656 fun process(v: MarkdownProcessor) is abstract
1657 end
1658
1659 # An empty markdown line.
1660 class LineEmpty
1661 super Line
1662
1663 redef fun process(v) do
1664 v.current_line = v.current_line.next
1665 end
1666 end
1667
1668 # A non-specific markdown construction.
1669 # Mainly used as part of another line construct such as paragraphs or lists.
1670 class LineOther
1671 super Line
1672
1673 redef fun process(v) do
1674 var line = v.current_line
1675 # go to block end
1676 var was_empty = line.prev_empty
1677 while line != null and not line.is_empty do
1678 var t = v.line_kind(line)
1679 if (v.in_list or v.ext_mode) and t isa LineList then
1680 break
1681 end
1682 if v.ext_mode and (t isa LineCode or t isa LineFence) then
1683 break
1684 end
1685 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1686 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1687 break
1688 end
1689 line = line.next
1690 end
1691 # build block
1692 if line != null and not line.is_empty then
1693 var block = v.current_block.split(line.prev.as(not null))
1694 if v.in_list and not was_empty then
1695 block.kind = new BlockNone(block)
1696 else
1697 block.kind = new BlockParagraph(block)
1698 end
1699 v.current_block.remove_leading_empty_lines
1700 else
1701 var block: MDBlock
1702 if line != null then
1703 block = v.current_block.split(line)
1704 else
1705 block = v.current_block.split(v.current_block.last_line.as(not null))
1706 end
1707 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1708 block.kind = new BlockNone(block)
1709 else
1710 block.kind = new BlockParagraph(block)
1711 end
1712 v.current_block.remove_leading_empty_lines
1713 end
1714 v.current_line = v.current_block.first_line
1715 end
1716 end
1717
1718 # A line of markdown code.
1719 class LineCode
1720 super Line
1721
1722 redef fun process(v) do
1723 var line = v.current_line
1724 # lookup block end
1725 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1726 line = line.next
1727 end
1728 # split at block end line
1729 var block: MDBlock
1730 if line != null then
1731 block = v.current_block.split(line.prev.as(not null))
1732 else
1733 block = v.current_block.split(v.current_block.last_line.as(not null))
1734 end
1735 block.kind = new BlockCode(block)
1736 block.remove_surrounding_empty_lines
1737 v.current_line = v.current_block.first_line
1738 end
1739 end
1740
1741 # A line of raw XML.
1742 class LineXML
1743 super Line
1744
1745 redef fun process(v) do
1746 var line = v.current_line
1747 var prev = line.prev
1748 if prev != null then v.current_block.split(prev)
1749 var block = v.current_block.split(line.xml_end_line.as(not null))
1750 block.kind = new BlockXML(block)
1751 v.current_block.remove_leading_empty_lines
1752 v.current_line = v.current_block.first_line
1753 end
1754 end
1755
1756 # A markdown blockquote line.
1757 class LineBlockquote
1758 super Line
1759
1760 redef fun process(v) do
1761 var line = v.current_line
1762 # go to bquote end
1763 while line != null do
1764 if not line.is_empty and (line.prev_empty and
1765 line.leading == 0 and
1766 not v.line_kind(line) isa LineBlockquote) then break
1767 line = line.next
1768 end
1769 # build sub block
1770 var block: MDBlock
1771 if line != null then
1772 block = v.current_block.split(line.prev.as(not null))
1773 else
1774 block = v.current_block.split(v.current_block.last_line.as(not null))
1775 end
1776 var kind = new BlockQuote(block)
1777 block.kind = kind
1778 block.remove_surrounding_empty_lines
1779 kind.remove_block_quote_prefix(block)
1780 v.current_line = line
1781 v.recurse(block, false)
1782 v.current_line = v.current_block.first_line
1783 end
1784 end
1785
1786 # A markdown ruler line.
1787 class LineHR
1788 super Line
1789
1790 redef fun process(v) do
1791 var line = v.current_line
1792 if line.prev != null then v.current_block.split(line.prev.as(not null))
1793 var block = v.current_block.split(line.as(not null))
1794 block.kind = new BlockRuler(block)
1795 v.current_block.remove_leading_empty_lines
1796 v.current_line = v.current_block.first_line
1797 end
1798 end
1799
1800 # A markdown fence code line.
1801 class LineFence
1802 super Line
1803
1804 redef fun process(v) do
1805 # go to fence end
1806 var line = v.current_line.next
1807 while line != null do
1808 if v.line_kind(line) isa LineFence then break
1809 line = line.next
1810 end
1811 if line != null then
1812 line = line.next
1813 end
1814 # build fence block
1815 var block: MDBlock
1816 if line != null then
1817 block = v.current_block.split(line.prev.as(not null))
1818 else
1819 block = v.current_block.split(v.current_block.last_line.as(not null))
1820 end
1821 block.remove_surrounding_empty_lines
1822 var meta = block.first_line.value.meta_from_fence
1823 block.kind = new BlockFence(block, meta)
1824 block.first_line.clear
1825 var last = block.last_line
1826 if last != null and v.line_kind(last) isa LineFence then
1827 block.last_line.clear
1828 end
1829 block.remove_surrounding_empty_lines
1830 v.current_line = line
1831 end
1832 end
1833
1834 # A markdown headline.
1835 class LineHeadline
1836 super Line
1837
1838 redef fun process(v) do
1839 var line = v.current_line
1840 var lprev = line.prev
1841 if lprev != null then v.current_block.split(lprev)
1842 var block = v.current_block.split(line.as(not null))
1843 var kind = new BlockHeadline(block)
1844 block.kind = kind
1845 kind.transform_headline(block)
1846 v.current_block.remove_leading_empty_lines
1847 v.current_line = v.current_block.first_line
1848 end
1849 end
1850
1851 # A markdown headline of level 1.
1852 class LineHeadline1
1853 super LineHeadline
1854
1855 redef fun process(v) do
1856 var line = v.current_line
1857 var lprev = line.prev
1858 if lprev != null then v.current_block.split(lprev)
1859 line.next.clear
1860 var block = v.current_block.split(line.as(not null))
1861 var kind = new BlockHeadline(block)
1862 kind.depth = 1
1863 kind.transform_headline(block)
1864 block.kind = kind
1865 v.current_block.remove_leading_empty_lines
1866 v.current_line = v.current_block.first_line
1867 end
1868 end
1869
1870 # A markdown headline of level 2.
1871 class LineHeadline2
1872 super LineHeadline
1873
1874 redef fun process(v) do
1875 var line = v.current_line
1876 var lprev = line.prev
1877 if lprev != null then v.current_block.split(lprev)
1878 line.next.clear
1879 var block = v.current_block.split(line.as(not null))
1880 var kind = new BlockHeadline(block)
1881 kind.depth = 2
1882 kind.transform_headline(block)
1883 block.kind = kind
1884 v.current_block.remove_leading_empty_lines
1885 v.current_line = v.current_block.first_line
1886 end
1887 end
1888
1889 # A markdown list line.
1890 # Mainly used to factorize code between ordered and unordered lists.
1891 abstract class LineList
1892 super Line
1893
1894 redef fun process(v) do
1895 var line = v.current_line
1896 # go to list end
1897 while line != null do
1898 var t = v.line_kind(line)
1899 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1900 not t isa LineList) then break
1901 line = line.next
1902 end
1903 # build list block
1904 var list: MDBlock
1905 if line != null then
1906 list = v.current_block.split(line.prev.as(not null))
1907 else
1908 list = v.current_block.split(v.current_block.last_line.as(not null))
1909 end
1910 var kind = block_kind(list)
1911 list.kind = kind
1912 list.first_line.prev_empty = false
1913 list.last_line.next_empty = false
1914 list.remove_surrounding_empty_lines
1915 list.first_line.prev_empty = false
1916 list.last_line.next_empty = false
1917 kind.init_block(v)
1918 var block = list.first_block
1919 while block != null do
1920 block.remove_list_indent(v)
1921 v.recurse(block, true)
1922 block = block.next
1923 end
1924 kind.expand_paragraphs(list)
1925 v.current_line = line
1926 end
1927
1928 # Create a new block kind based on this line.
1929 protected fun block_kind(block: MDBlock): BlockList is abstract
1930
1931 # Extract string value from `MDLine`.
1932 protected fun extract_value(line: MDLine): String is abstract
1933 end
1934
1935 # An ordered list line.
1936 class LineOList
1937 super LineList
1938
1939 redef fun block_kind(block) do return new BlockOrderedList(block)
1940
1941 redef fun extract_value(line) do
1942 return line.value.substring_from(line.value.index_of('.') + 2)
1943 end
1944 end
1945
1946 # An unordered list line.
1947 class LineUList
1948 super LineList
1949
1950 redef fun block_kind(block) do return new BlockUnorderedList(block)
1951
1952 redef fun extract_value(line) do
1953 return line.value.substring_from(line.leading + 2)
1954 end
1955 end
1956
1957 # A token represent a character in the markdown input.
1958 # Some tokens have a specific markup behaviour that is handled here.
1959 abstract class Token
1960
1961 # Location of `self` in the original input.
1962 var location: nullable MDLocation
1963
1964 # Position of `self` in input independant from lines.
1965 var pos: Int
1966
1967 # Character found at `pos` in the markdown input.
1968 var char: Char
1969
1970 # Output that token using `MarkdownEmitter::decorator`.
1971 fun emit(v: MarkdownEmitter) do v.decorator.add_char(v, char)
1972 end
1973
1974 # A token without a specific meaning.
1975 class TokenNone
1976 super Token
1977 end
1978
1979 # An emphasis token.
1980 abstract class TokenEm
1981 super Token
1982
1983 redef fun emit(v) do
1984 var tmp = v.push_buffer
1985 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1986 v.pop_buffer
1987 if b > 0 then
1988 v.decorator.add_em(v, tmp)
1989 v.current_pos = b
1990 else
1991 v.addc char
1992 end
1993 end
1994 end
1995
1996 # An emphasis star token.
1997 class TokenEmStar
1998 super TokenEm
1999 end
2000
2001 # An emphasis underscore token.
2002 class TokenEmUnderscore
2003 super TokenEm
2004 end
2005
2006 # A strong token.
2007 abstract class TokenStrong
2008 super Token
2009
2010 redef fun emit(v) do
2011 var tmp = v.push_buffer
2012 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2013 v.pop_buffer
2014 if b > 0 then
2015 v.decorator.add_strong(v, tmp)
2016 v.current_pos = b + 1
2017 else
2018 v.addc char
2019 end
2020 end
2021 end
2022
2023 # A strong star token.
2024 class TokenStrongStar
2025 super TokenStrong
2026 end
2027
2028 # A strong underscore token.
2029 class TokenStrongUnderscore
2030 super TokenStrong
2031 end
2032
2033 # A code token.
2034 # This class is mainly used to factorize work between single and double quoted span codes.
2035 abstract class TokenCode
2036 super Token
2037
2038 redef fun emit(v) do
2039 var a = pos + next_pos + 1
2040 var b = v.processor.find_token(v.current_text.as(not null), a, self)
2041 if b > 0 then
2042 v.current_pos = b + next_pos
2043 while a < b and v.current_text[a] == ' ' do a += 1
2044 if a < b then
2045 while v.current_text[b - 1] == ' ' do b -= 1
2046 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
2047 end
2048 else
2049 v.addc char
2050 end
2051 end
2052
2053 private fun next_pos: Int is abstract
2054 end
2055
2056 # A span code token.
2057 class TokenCodeSingle
2058 super TokenCode
2059
2060 redef fun next_pos do return 0
2061 end
2062
2063 # A doubled span code token.
2064 class TokenCodeDouble
2065 super TokenCode
2066
2067 redef fun next_pos do return 1
2068 end
2069
2070 # A link or image token.
2071 # This class is mainly used to factorize work between images and links.
2072 abstract class TokenLinkOrImage
2073 super Token
2074
2075 # Link adress
2076 var link: nullable Text = null
2077
2078 # Link text
2079 var name: nullable Text = null
2080
2081 # Link title
2082 var comment: nullable Text = null
2083
2084 # Is the link construct an abbreviation?
2085 var is_abbrev = false
2086
2087 redef fun emit(v) do
2088 var tmp = new FlatBuffer
2089 var b = check_link(v, tmp, pos, self)
2090 if b > 0 then
2091 emit_hyper(v)
2092 v.current_pos = b
2093 else
2094 v.addc char
2095 end
2096 end
2097
2098 # Emit the hyperlink as link or image.
2099 private fun emit_hyper(v: MarkdownEmitter) is abstract
2100
2101 # Check if the link is a valid link.
2102 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
2103 var md = v.current_text
2104 var pos
2105 if token isa TokenLink then
2106 pos = start + 1
2107 else
2108 pos = start + 2
2109 end
2110 var tmp = new FlatBuffer
2111 pos = md.read_md_link_id(tmp, pos)
2112 if pos < start then return -1
2113 name = tmp
2114 var old_pos = pos
2115 pos += 1
2116 pos = md.skip_spaces(pos)
2117 if pos < start then
2118 var tid = name.write_to_string.to_lower
2119 if v.processor.link_refs.has_key(tid) then
2120 var lr = v.processor.link_refs[tid]
2121 is_abbrev = lr.is_abbrev
2122 link = lr.link
2123 comment = lr.title
2124 pos = old_pos
2125 else
2126 return -1
2127 end
2128 else if md[pos] == '(' then
2129 pos += 1
2130 pos = md.skip_spaces(pos)
2131 if pos < start then return -1
2132 tmp = new FlatBuffer
2133 var use_lt = md[pos] == '<'
2134 if use_lt then
2135 pos = md.read_until(tmp, pos + 1, '>')
2136 else
2137 pos = md.read_md_link(tmp, pos)
2138 end
2139 if pos < start then return -1
2140 if use_lt then pos += 1
2141 link = tmp.write_to_string
2142 if md[pos] == ' ' then
2143 pos = md.skip_spaces(pos)
2144 if pos > start and md[pos] == '"' then
2145 pos += 1
2146 tmp = new FlatBuffer
2147 pos = md.read_until(tmp, pos, '"')
2148 if pos < start then return -1
2149 comment = tmp.write_to_string
2150 pos += 1
2151 pos = md.skip_spaces(pos)
2152 if pos == -1 then return -1
2153 end
2154 end
2155 if pos < start then return -1
2156 if md[pos] != ')' then return -1
2157 else if md[pos] == '[' then
2158 pos += 1
2159 tmp = new FlatBuffer
2160 pos = md.read_raw_until(tmp, pos, ']')
2161 if pos < start then return -1
2162 var id
2163 if tmp.length > 0 then
2164 id = tmp
2165 else
2166 id = name
2167 end
2168 var tid = id.write_to_string.to_lower
2169 if v.processor.link_refs.has_key(tid) then
2170 var lr = v.processor.link_refs[tid]
2171 link = lr.link
2172 comment = lr.title
2173 end
2174 else
2175 var tid = name.write_to_string.replace("\n", " ").to_lower
2176 if v.processor.link_refs.has_key(tid) then
2177 var lr = v.processor.link_refs[tid]
2178 link = lr.link
2179 comment = lr.title
2180 pos = old_pos
2181 else
2182 return -1
2183 end
2184 end
2185 if link == null then return -1
2186 return pos
2187 end
2188 end
2189
2190 # A markdown link token.
2191 class TokenLink
2192 super TokenLinkOrImage
2193
2194 redef fun emit_hyper(v) do
2195 if is_abbrev and comment != null then
2196 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2197 else
2198 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2199 end
2200 end
2201 end
2202
2203 # A markdown image token.
2204 class TokenImage
2205 super TokenLinkOrImage
2206
2207 redef fun emit_hyper(v) do
2208 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2209 end
2210 end
2211
2212 # A HTML/XML token.
2213 class TokenHTML
2214 super Token
2215
2216 redef fun emit(v) do
2217 var tmp = new FlatBuffer
2218 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2219 if b > 0 then
2220 v.add tmp
2221 v.current_pos = b
2222 else
2223 v.decorator.escape_char(v, char)
2224 end
2225 end
2226
2227 # Is the HTML valid?
2228 # Also take care of link and mailto shortcuts.
2229 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2230 # check for auto links
2231 var tmp = new FlatBuffer
2232 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2233 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2234 pos = md.read_until(tmp, pos, '>')
2235 if pos != -1 then
2236 var link = tmp.write_to_string
2237 v.decorator.add_link(v, link, link, null)
2238 return pos
2239 end
2240 end
2241 # TODO check for mailto
2242 # check for inline html
2243 if start + 2 < md.length then
2244 return md.read_xml(out, start, true)
2245 end
2246 return -1
2247 end
2248 end
2249
2250 # An HTML entity token.
2251 class TokenEntity
2252 super Token
2253
2254 redef fun emit(v) do
2255 var tmp = new FlatBuffer
2256 var b = check_entity(tmp, v.current_text.as(not null), pos)
2257 if b > 0 then
2258 v.add tmp
2259 v.current_pos = b
2260 else
2261 v.decorator.escape_char(v, char)
2262 end
2263 end
2264
2265 # Is the entity valid?
2266 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2267 var pos = md.read_until(out, start, ';')
2268 if pos < 0 or out.length < 3 then
2269 return -1
2270 end
2271 if out[1] == '#' then
2272 if out[2] == 'x' or out[2] == 'X' then
2273 if out.length < 4 then return -1
2274 for i in [3..out.length[ do
2275 var c = out[i]
2276 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2277 return -1
2278 end
2279 end
2280 else
2281 for i in [2..out.length[ do
2282 var c = out[i]
2283 if c < '0' or c > '9' then return -1
2284 end
2285 end
2286 out.add ';'
2287 else
2288 for i in [1..out.length[ do
2289 var c = out[i]
2290 if not c.is_digit and not c.is_letter then return -1
2291 end
2292 out.add ';'
2293 # TODO check entity is valid
2294 # if out.is_entity then
2295 return pos
2296 # else
2297 # return -1
2298 # end
2299 end
2300 return pos
2301 end
2302 end
2303
2304 # A markdown escape token.
2305 class TokenEscape
2306 super Token
2307
2308 redef fun emit(v) do
2309 v.current_pos += 1
2310 v.addc v.current_text[v.current_pos]
2311 end
2312 end
2313
2314 # A markdown strike token.
2315 #
2316 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2317 class TokenStrike
2318 super Token
2319
2320 redef fun emit(v) do
2321 var tmp = v.push_buffer
2322 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2323 v.pop_buffer
2324 if b > 0 then
2325 v.decorator.add_strike(v, tmp)
2326 v.current_pos = b + 1
2327 else
2328 v.addc char
2329 end
2330 end
2331 end
2332
2333 redef class Text
2334
2335 # Get the position of the next non-space character.
2336 private fun skip_spaces(start: Int): Int do
2337 var pos = start
2338 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2339 pos += 1
2340 end
2341 if pos < length then return pos
2342 return -1
2343 end
2344
2345 # Read `self` until `nend` and append it to the `out` buffer.
2346 # Escape markdown special chars.
2347 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2348 var pos = start
2349 while pos < length do
2350 var c = self[pos]
2351 if c == '\\' and pos + 1 < length then
2352 pos = escape(out, self[pos + 1], pos)
2353 else
2354 for n in nend do if c == n then break label
2355 out.add c
2356 end
2357 pos += 1
2358 end label
2359 if pos == length then return -1
2360 return pos
2361 end
2362
2363 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2364 # No escape is made.
2365 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2366 var pos = start
2367 while pos < length do
2368 var c = self[pos]
2369 var end_reached = false
2370 for n in nend do
2371 if c == n then
2372 end_reached = true
2373 break
2374 end
2375 end
2376 if end_reached then break
2377 out.add c
2378 pos += 1
2379 end
2380 if pos == length then return -1
2381 return pos
2382 end
2383
2384 # Read `self` as XML until `to` and append it to the `out` buffer.
2385 # Escape HTML special chars.
2386 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2387 var pos = from
2388 var in_str = false
2389 var str_char: nullable Char = null
2390 while pos < length do
2391 var c = self[pos]
2392 if in_str then
2393 if c == '\\' then
2394 out.add c
2395 pos += 1
2396 if pos < length then
2397 out.add c
2398 pos += 1
2399 end
2400 continue
2401 end
2402 if c == str_char then
2403 in_str = false
2404 out.add c
2405 pos += 1
2406 continue
2407 end
2408 end
2409 if c == '"' or c == '\'' then
2410 in_str = true
2411 str_char = c
2412 end
2413 if not in_str then
2414 var end_reached = false
2415 for n in [0..to.length[ do
2416 if c == to[n] then
2417 end_reached = true
2418 break
2419 end
2420 end
2421 if end_reached then break
2422 end
2423 out.add c
2424 pos += 1
2425 end
2426 if pos == length then return -1
2427 return pos
2428 end
2429
2430 # Read `self` as XML and append it to the `out` buffer.
2431 # Safe mode can be activated to limit reading to valid xml.
2432 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2433 var pos = 0
2434 var is_valid = true
2435 var is_close_tag = false
2436 if start + 1 >= length then return -1
2437 if self[start + 1] == '/' then
2438 is_close_tag = true
2439 pos = start + 2
2440 else if self[start + 1] == '!' then
2441 out.append "<!"
2442 return start + 1
2443 else
2444 is_close_tag = false
2445 pos = start + 1
2446 end
2447 if safe_mode then
2448 var tmp = new FlatBuffer
2449 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2450 if pos == -1 then return -1
2451 var tag = tmp.write_to_string.trim.to_lower
2452 if not tag.is_valid_html_tag then
2453 out.append "&lt;"
2454 pos = -1
2455 else if tag.is_html_unsafe then
2456 is_valid = false
2457 out.append "&lt;"
2458 if is_close_tag then out.add '/'
2459 out.append tmp
2460 else
2461 out.append "<"
2462 if is_close_tag then out.add '/'
2463 out.append tmp
2464 end
2465 else
2466 out.add '<'
2467 if is_close_tag then out.add '/'
2468 pos = read_xml_until(out, pos, ' ', '/', '>')
2469 end
2470 if pos == -1 then return -1
2471 pos = read_xml_until(out, pos, '/', '>')
2472 if pos == -1 then return -1
2473 if self[pos] == '/' then
2474 out.append " /"
2475 pos = self.read_xml_until(out, pos + 1, '>')
2476 if pos == -1 then return -1
2477 end
2478 if self[pos] == '>' then
2479 if is_valid then
2480 out.add '>'
2481 else
2482 out.append "&gt;"
2483 end
2484 return pos
2485 end
2486 return -1
2487 end
2488
2489 # Read a markdown link address and append it to the `out` buffer.
2490 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2491 var pos = start
2492 var counter = 1
2493 while pos < length do
2494 var c = self[pos]
2495 if c == '\\' and pos + 1 < length then
2496 pos = escape(out, self[pos + 1], pos)
2497 else
2498 var end_reached = false
2499 if c == '(' then
2500 counter += 1
2501 else if c == ' ' then
2502 if counter == 1 then end_reached = true
2503 else if c == ')' then
2504 counter -= 1
2505 if counter == 0 then end_reached = true
2506 end
2507 if end_reached then break
2508 out.add c
2509 end
2510 pos += 1
2511 end
2512 if pos == length then return -1
2513 return pos
2514 end
2515
2516 # Read a markdown link text and append it to the `out` buffer.
2517 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2518 var pos = start
2519 var counter = 1
2520 while pos < length do
2521 var c = self[pos]
2522 var end_reached = false
2523 if c == '[' then
2524 counter += 1
2525 out.add c
2526 else if c == ']' then
2527 counter -= 1
2528 if counter == 0 then
2529 end_reached = true
2530 else
2531 out.add c
2532 end
2533 else
2534 out.add c
2535 end
2536 if end_reached then break
2537 pos += 1
2538 end
2539 if pos == length then return -1
2540 return pos
2541 end
2542
2543 # Extract the XML tag name from a XML tag.
2544 private fun xml_tag: String do
2545 var tpl = new FlatBuffer
2546 var pos = 1
2547 if pos < length and self[1] == '/' then pos += 1
2548 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2549 tpl.add self[pos]
2550 pos += 1
2551 end
2552 return tpl.write_to_string.to_lower
2553 end
2554
2555 private fun is_valid_html_tag: Bool do
2556 if is_empty then return false
2557 for c in self do
2558 if not c.is_alpha then return false
2559 end
2560 return true
2561 end
2562
2563 # Read and escape the markdown contained in `self`.
2564 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2565 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2566 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2567 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2568 c == '`' or c == '~' or c == '^' then
2569 out.add c
2570 return pos + 1
2571 end
2572 out.add '\\'
2573 return pos
2574 end
2575
2576 # Extract string found at end of fence opening.
2577 private fun meta_from_fence: nullable Text do
2578 for i in [0..chars.length[ do
2579 var c = chars[i]
2580 if c != ' ' and c != '`' and c != '~' then
2581 return substring_from(i).trim
2582 end
2583 end
2584 return null
2585 end
2586
2587 # Is `self` an unsafe HTML element?
2588 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2589
2590 # Is `self` a HRML block element?
2591 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2592
2593 # Is `self` a link prefix?
2594 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2595
2596 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2597
2598 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2599
2600 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2601 end
2602
2603 redef class String
2604
2605 # Parse `self` as markdown and return the HTML representation
2606 #.
2607 # var md = "**Hello World!**"
2608 # var html = md.md_to_html
2609 # assert html == "<p><strong>Hello World!</strong></p>\n"
2610 fun md_to_html: Writable do
2611 var processor = new MarkdownProcessor
2612 return processor.process(self)
2613 end
2614 end