e842298499aeabf53b50140d14b682c5fe273488
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 # `MarkdownEmitter` used for ouput.
34 var emitter: MarkdownEmitter is noinit, protected writable
35
36 # Work in extended mode (default).
37 #
38 # Behavior changes when using extended mode:
39 #
40 # * Lists and code blocks end a paragraph
41 #
42 # In normal markdown the following:
43 #
44 # ~~~md
45 # This is a paragraph
46 # * and this is not a list
47 # ~~~
48 #
49 # Will produce:
50 #
51 # ~~~html
52 # <p>This is a paragraph
53 # * and this is not a list</p>
54 # ~~~
55 #
56 # When using extended mode this changes to:
57 #
58 # ~~~html
59 # <p>This is a paragraph</p>
60 # <ul>
61 # <li>and this is not a list</li>
62 # </ul>
63 # ~~~
64 #
65 # * Fences code blocks
66 #
67 # If you don't want to indent your all your code with 4 spaces,
68 # you can wrap your code in ``` ``` ``` or `~~~`.
69 #
70 # Here's an example:
71 #
72 # ~~~md
73 # fun test do
74 # print "Hello World!"
75 # end
76 # ~~~
77 #
78 # * Code blocks meta
79 #
80 # If you want to use syntax highlighting tools, most of them need to know what kind
81 # of language they are highlighting.
82 # You can add an optional language identifier after the fence declaration to output
83 # it in the HTML render.
84 #
85 # ```nit
86 # import markdown
87 #
88 # print "# Hello World!".md_to_html
89 # ```
90 #
91 # Becomes
92 #
93 # ~~~html
94 # <pre class="nit"><code>import markdown
95 #
96 # print "Hello World!".md_to_html
97 # </code></pre>
98 # ~~~
99 #
100 # * Underscores (Emphasis)
101 #
102 # Underscores in the middle of a word like:
103 #
104 # ~~~md
105 # Con_cat_this
106 # ~~~
107 #
108 # normally produces this:
109 #
110 # ~~~html
111 # <p>Con<em>cat</em>this</p>
112 # ~~~
113 #
114 # With extended mode they don't result in emphasis.
115 #
116 # ~~~html
117 # <p>Con_cat_this</p>
118 # ~~~
119 #
120 # * Strikethrough
121 #
122 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
123 # strikethrought span is marked with `~~`.
124 #
125 # ~~~md
126 # ~~Mistaken text.~~
127 # ~~~
128 #
129 # becomes
130 #
131 # ~~~html
132 # <del>Mistaken text.</del>
133 # ~~~
134 var ext_mode = true
135
136 # Disable attaching MDLocation to Tokens
137 #
138 # Locations are useful for some tools but they may
139 # cause an important time and space overhead.
140 #
141 # Default = `false`
142 var no_location = false is writable
143
144 init do self.emitter = new MarkdownEmitter(self)
145
146 # Process the mardown `input` string and return the processed output.
147 fun process(input: String): Writable do
148 # init processor
149 link_refs.clear
150 last_link_ref = null
151 current_line = null
152 current_block = null
153 # parse markdown
154 var parent = read_lines(input)
155 parent.remove_surrounding_empty_lines
156 recurse(parent, false)
157 # output processed text
158 return emitter.emit(parent.kind)
159 end
160
161 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
162 private fun read_lines(input: String): MDBlock do
163 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
164 var value = new FlatBuffer
165 var i = 0
166
167 var line_pos = 0
168 var col_pos = 0
169
170 while i < input.length do
171 value.clear
172 var pos = 0
173 var eol = false
174 while not eol and i < input.length do
175 col_pos += 1
176 var c = input[i]
177 if c == '\n' then
178 eol = true
179 else if c == '\r' then
180 else if c == '\t' then
181 var np = pos + (4 - (pos & 3))
182 while pos < np do
183 value.add ' '
184 pos += 1
185 end
186 else
187 pos += 1
188 value.add c
189 end
190 i += 1
191 end
192 line_pos += 1
193
194 var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
195 var line = new MDLine(loc, value.write_to_string)
196 var is_link_ref = check_link_ref(line)
197 # Skip link refs
198 if not is_link_ref then block.add_line line
199 col_pos = 0
200 end
201 return block
202 end
203
204 # Check if line is a block link definition.
205 # Return `true` if line contains a valid link ref and save it into `link_refs`.
206 private fun check_link_ref(line: MDLine): Bool do
207 var md = line.value
208 var is_link_ref = false
209 var id = new FlatBuffer
210 var link = new FlatBuffer
211 var comment = new FlatBuffer
212 var pos = -1
213 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
214 pos = line.leading + 1
215 pos = md.read_until(id, pos, ']')
216 if not id.is_empty and pos >= 0 and pos + 2 < line.value.length then
217 if line.value[pos + 1] == ':' then
218 pos += 2
219 pos = md.skip_spaces(pos)
220 if pos >= 0 and line.value[pos] == '<' then
221 pos += 1
222 pos = md.read_until(link, pos, '>')
223 pos += 1
224 else if pos >= 0 then
225 pos = md.read_until(link, pos, ' ', '\n')
226 end
227 if not link.is_empty then
228 pos = md.skip_spaces(pos)
229 if pos > 0 and pos < line.value.length then
230 var c = line.value[pos]
231 if c == '\"' or c == '\'' or c == '(' then
232 pos += 1
233 if c == '(' then
234 pos = md.read_until(comment, pos, ')')
235 else
236 pos = md.read_until(comment, pos, c)
237 end
238 if pos > 0 then is_link_ref = true
239 end
240 else
241 is_link_ref = true
242 end
243 end
244 end
245 end
246 end
247 if is_link_ref and not id.is_empty and not link.is_empty then
248 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
249 add_link_ref(id.write_to_string, lr)
250 if comment.is_empty then last_link_ref = lr
251 return true
252 else
253 comment = new FlatBuffer
254 if not line.is_empty and last_link_ref != null then
255 pos = line.leading
256 var c = line.value[pos]
257 if c == '\"' or c == '\'' or c == '(' then
258 pos += 1
259 if c == '(' then
260 pos = md.read_until(comment, pos, ')')
261 else
262 pos = md.read_until(comment, pos, c)
263 end
264 end
265 var last_link_ref = self.last_link_ref
266 if not comment.is_empty and last_link_ref != null then
267 last_link_ref.title = comment.write_to_string
268 end
269 end
270 if comment.is_empty then return false
271 return true
272 end
273 end
274
275 # Known link refs
276 # This list will be needed during output to expand links.
277 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
278
279 # Last encountered link ref (for multiline definitions)
280 #
281 # Markdown allows link refs to be defined over two lines:
282 #
283 # ~~~md
284 # [id]: http://example.com/longish/path/to/resource/here
285 # "Optional Title Here"
286 # ~~~
287 #
288 private var last_link_ref: nullable LinkRef = null
289
290 # Add a link ref to the list
291 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
292
293 # Recursively split a `block`.
294 #
295 # The block is splitted according to the type of lines it contains.
296 # Some blocks can be splited again recursively like lists.
297 # The `in_list` mode is used to recurse on list and build
298 # nested paragraphs or code blocks.
299 fun recurse(root: MDBlock, in_list: Bool) do
300 var old_mode = self.in_list
301 var old_root = self.current_block
302 self.in_list = in_list
303
304 var line = root.first_line
305 while line != null and line.is_empty do
306 line = line.next
307 if line == null then return
308 end
309
310 current_line = line
311 current_block = root
312 while current_line != null do
313 line_kind(current_line.as(not null)).process(self)
314 end
315 self.in_list = old_mode
316 self.current_block = old_root
317 end
318
319 # Currently processed line.
320 # Used when visiting blocks with `recurse`.
321 var current_line: nullable MDLine = null is writable
322
323 # Currently processed block.
324 # Used when visiting blocks with `recurse`.
325 var current_block: nullable MDBlock = null is writable
326
327 # Is the current recursion in list mode?
328 # Used when visiting blocks with `recurse`
329 private var in_list = false
330
331 # The type of line.
332 # see: `md_line_*`
333 fun line_kind(md: MDLine): Line do
334 var value = md.value
335 var leading = md.leading
336 var trailing = md.trailing
337 if md.is_empty then return new LineEmpty
338 if md.leading > 3 then return new LineCode
339 if value[leading] == '#' then return new LineHeadline
340 if value[leading] == '>' then return new LineBlockquote
341
342 if ext_mode then
343 if value.length - leading - trailing > 2 then
344 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
345 return new LineFence
346 end
347 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
348 return new LineFence
349 end
350 end
351 end
352
353 if value.length - leading - trailing > 2 and
354 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
355 if md.count_chars(value[leading]) >= 3 then
356 return new LineHR
357 end
358 end
359
360 if value.length - leading >= 2 and value[leading + 1] == ' ' then
361 var c = value[leading]
362 if c == '*' or c == '-' or c == '+' then return new LineUList
363 end
364
365 if value.length - leading >= 3 and value[leading].is_digit then
366 var i = leading + 1
367 while i < value.length and value[i].is_digit do i += 1
368 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
369 return new LineOList
370 end
371 end
372
373 if value[leading] == '<' and md.check_html then return new LineXML
374
375 var next = md.next
376 if next != null and not next.is_empty then
377 if next.count_chars('=') > 0 then
378 return new LineHeadline1
379 end
380 if next.count_chars('-') > 0 then
381 return new LineHeadline2
382 end
383 end
384 return new LineOther
385 end
386
387 # Get the token kind at `pos`.
388 fun token_at(text: Text, pos: Int): Token do
389 var c0: Char
390 var c1: Char
391 var c2: Char
392
393 if pos > 0 then
394 c0 = text[pos - 1]
395 else
396 c0 = ' '
397 end
398 var c = text[pos]
399
400 if pos + 1 < text.length then
401 c1 = text[pos + 1]
402 else
403 c1 = ' '
404 end
405 if pos + 2 < text.length then
406 c2 = text[pos + 2]
407 else
408 c2 = ' '
409 end
410
411 var loc
412 if no_location then
413 loc = null
414 else
415 loc = new MDLocation(
416 current_loc.line_start,
417 current_loc.column_start + pos,
418 current_loc.line_start,
419 current_loc.column_start + pos)
420 end
421
422 if c == '*' then
423 if c1 == '*' then
424 if c0 != ' ' or c2 != ' ' then
425 return new TokenStrongStar(loc, pos, c)
426 else
427 return new TokenEmStar(loc, pos, c)
428 end
429 end
430 if c0 != ' ' or c1 != ' ' then
431 return new TokenEmStar(loc, pos, c)
432 else
433 return new TokenNone(loc, pos, c)
434 end
435 else if c == '_' then
436 if c1 == '_' then
437 if c0 != ' ' or c2 != ' ' then
438 return new TokenStrongUnderscore(loc, pos, c)
439 else
440 return new TokenEmUnderscore(loc, pos, c)
441 end
442 end
443 if ext_mode then
444 if (c0.is_letter or c0.is_digit) and c0 != '_' and
445 (c1.is_letter or c1.is_digit) then
446 return new TokenNone(loc, pos, c)
447 else
448 return new TokenEmUnderscore(loc, pos, c)
449 end
450 end
451 if c0 != ' ' or c1 != ' ' then
452 return new TokenEmUnderscore(loc, pos, c)
453 else
454 return new TokenNone(loc, pos, c)
455 end
456 else if c == '!' then
457 if c1 == '[' then return new TokenImage(loc, pos, c)
458 return new TokenNone(loc, pos, c)
459 else if c == '[' then
460 return new TokenLink(loc, pos, c)
461 else if c == ']' then
462 return new TokenNone(loc, pos, c)
463 else if c == '`' then
464 if c1 == '`' then
465 return new TokenCodeDouble(loc, pos, c)
466 else
467 return new TokenCodeSingle(loc, pos, c)
468 end
469 else if c == '\\' then
470 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
471 return new TokenEscape(loc, pos, c)
472 else
473 return new TokenNone(loc, pos, c)
474 end
475 else if c == '<' then
476 return new TokenHTML(loc, pos, c)
477 else if c == '&' then
478 return new TokenEntity(loc, pos, c)
479 else
480 if ext_mode then
481 if c == '~' and c1 == '~' then
482 return new TokenStrike(loc, pos, c)
483 end
484 end
485 return new TokenNone(loc, pos, c)
486 end
487 end
488
489 # Find the position of a `token` in `self`.
490 fun find_token(text: Text, start: Int, token: Token): Int do
491 var pos = start
492 while pos < text.length do
493 if token_at(text, pos).is_same_type(token) then
494 return pos
495 end
496 pos += 1
497 end
498 return -1
499 end
500
501 # Location used for next parsed token.
502 #
503 # This location can be changed by the emitter to adjust with `\n` found
504 # in the input.
505 private fun current_loc: MDLocation do return emitter.current_loc
506 end
507
508 # Emit output corresponding to blocks content.
509 #
510 # Blocks are created by a previous pass in `MarkdownProcessor`.
511 # The emitter use a `Decorator` to select the output format.
512 class MarkdownEmitter
513
514 # Kind of processor used for parsing.
515 type PROCESSOR: MarkdownProcessor
516
517 # Processor containing link refs.
518 var processor: PROCESSOR
519
520 # Kind of decorator used for decoration.
521 type DECORATOR: Decorator
522
523 # Decorator used for output.
524 # Default is `HTMLDecorator`
525 var decorator: DECORATOR is writable, lazy do
526 return new HTMLDecorator
527 end
528
529 # Create a new `MarkdownEmitter` using a custom `decorator`.
530 init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
531 init processor
532 self.decorator = decorator
533 end
534
535 # Output `block` using `decorator` in the current buffer.
536 fun emit(block: Block): Text do
537 var buffer = push_buffer
538 block.emit(self)
539 pop_buffer
540 return buffer
541 end
542
543 # Output the content of `block`.
544 fun emit_in(block: Block) do block.emit_in(self)
545
546 # Transform and emit mardown text
547 fun emit_text(text: Text) do emit_text_until(text, 0, null)
548
549 # Transform and emit mardown text starting at `start` and
550 # until a token with the same type as `token` is found.
551 # Go until the end of `text` if `token` is null.
552 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
553 var old_text = current_text
554 var old_pos = current_pos
555 current_text = text
556 current_pos = start
557 while current_pos < text.length do
558 if text[current_pos] == '\n' then
559 current_loc.line_start += 1
560 current_loc.column_start = -current_pos
561 end
562 var mt = processor.token_at(text, current_pos)
563 if (token != null and not token isa TokenNone) and
564 (mt.is_same_type(token) or
565 (token isa TokenEmStar and mt isa TokenStrongStar) or
566 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
567 return current_pos
568 end
569 mt.emit(self)
570 current_pos += 1
571 end
572 current_text = old_text
573 current_pos = old_pos
574 return -1
575 end
576
577 # Currently processed position in `current_text`.
578 # Used when visiting inline production with `emit_text_until`.
579 private var current_pos: Int = -1
580
581 # Currently processed text.
582 # Used when visiting inline production with `emit_text_until`.
583 private var current_text: nullable Text = null
584
585 # Stacked buffers.
586 private var buffer_stack = new List[FlatBuffer]
587
588 # Push a new buffer on the stack.
589 private fun push_buffer: FlatBuffer do
590 var buffer = new FlatBuffer
591 buffer_stack.add buffer
592 return buffer
593 end
594
595 # Pop the last buffer.
596 private fun pop_buffer do buffer_stack.pop
597
598 # Current output buffer.
599 private fun current_buffer: FlatBuffer do
600 assert not buffer_stack.is_empty
601 return buffer_stack.last
602 end
603
604 # Stacked locations.
605 private var loc_stack = new List[MDLocation]
606
607 # Push a new MDLocation on the stack.
608 private fun push_loc(location: MDLocation) do loc_stack.add location
609
610 # Pop the last buffer.
611 private fun pop_loc: MDLocation do return loc_stack.pop
612
613 # Current output buffer.
614 private fun current_loc: MDLocation do
615 assert not loc_stack.is_empty
616 return loc_stack.last
617 end
618
619 # Append `e` to current buffer.
620 fun add(e: Writable) do
621 if e isa Text then
622 current_buffer.append e
623 else
624 current_buffer.append e.write_to_string
625 end
626 end
627
628 # Append `c` to current buffer.
629 fun addc(c: Char) do
630 current_buffer.add c
631 end
632
633 # Append a "\n" line break.
634 fun addn do addc '\n'
635 end
636
637 # A Link Reference.
638 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
639 #
640 # ~~~raw
641 # [1]: http://example.com/ "Optional title"
642 # ~~~
643 class LinkRef
644
645 # Link href
646 var link: String
647
648 # Optional link title
649 var title: nullable String = null
650
651 # Is the link an abreviation?
652 var is_abbrev = false
653
654 # Create a link with a title.
655 init with_title(link: String, title: nullable String) do
656 init(link)
657 self.title = title
658 end
659 end
660
661 # A `Decorator` is used to emit mardown into a specific format.
662 # Default decorator used is `HTMLDecorator`.
663 interface Decorator
664
665 # Kind of emitter used for decoration.
666 type EMITTER: MarkdownEmitter
667
668 # Render a single plain char.
669 #
670 # Redefine this method to add special escaping for plain text.
671 fun add_char(v: EMITTER, c: Char) do v.addc c
672
673 # Render a ruler block.
674 fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
675
676 # Render a headline block with corresponding level.
677 fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
678
679 # Render a paragraph block.
680 fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
681
682 # Render a code or fence block.
683 fun add_code(v: EMITTER, block: BlockCode) is abstract
684
685 # Render a blockquote.
686 fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
687
688 # Render an unordered list.
689 fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
690
691 # Render an ordered list.
692 fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
693
694 # Render a list item.
695 fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
696
697 # Render an emphasis text.
698 fun add_em(v: EMITTER, text: Text) is abstract
699
700 # Render a strong text.
701 fun add_strong(v: EMITTER, text: Text) is abstract
702
703 # Render a strike text.
704 #
705 # Extended mode only (see `MarkdownProcessor::ext_mode`)
706 fun add_strike(v: EMITTER, text: Text) is abstract
707
708 # Render a link.
709 fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
710
711 # Render an image.
712 fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
713
714 # Render an abbreviation.
715 fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
716
717 # Render a code span reading from a buffer.
718 fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
719
720 # Render a text and escape it.
721 fun append_value(v: EMITTER, value: Text) is abstract
722
723 # Render code text from buffer and escape it.
724 fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
725
726 # Render a character escape.
727 fun escape_char(v: EMITTER, char: Char) is abstract
728
729 # Render a line break
730 fun add_line_break(v: EMITTER) is abstract
731
732 # Generate a new html valid id from a `String`.
733 fun strip_id(txt: String): String is abstract
734
735 # Found headlines during the processing labeled by their ids.
736 fun headlines: ArrayMap[String, HeadLine] is abstract
737 end
738
739 # Class representing a markdown headline.
740 class HeadLine
741 # Unique identifier of this headline.
742 var id: String
743
744 # Text of the headline.
745 var title: String
746
747 # Level of this headline.
748 #
749 # According toe the markdown specification, level must be in `[1..6]`.
750 var level: Int
751 end
752
753 # `Decorator` that outputs HTML.
754 class HTMLDecorator
755 super Decorator
756
757 redef var headlines = new ArrayMap[String, HeadLine]
758
759 redef fun add_ruler(v, block) do v.add "<hr/>\n"
760
761 redef fun add_headline(v, block) do
762 # save headline
763 var line = block.block.first_line
764 if line == null then return
765 var txt = line.value
766 var id = strip_id(txt)
767 var lvl = block.depth
768 headlines[id] = new HeadLine(id, txt, lvl)
769 # output it
770 v.add "<h{lvl} id=\"{id}\">"
771 v.emit_in block
772 v.add "</h{lvl}>\n"
773 end
774
775 redef fun add_paragraph(v, block) do
776 v.add "<p>"
777 v.emit_in block
778 v.add "</p>\n"
779 end
780
781 redef fun add_code(v, block) do
782 var meta = block.meta
783 if meta != null then
784 v.add "<pre class=\""
785 append_value(v, meta)
786 v.add "\"><code>"
787 else
788 v.add "<pre><code>"
789 end
790 v.emit_in block
791 v.add "</code></pre>\n"
792 end
793
794 redef fun add_blockquote(v, block) do
795 v.add "<blockquote>\n"
796 v.emit_in block
797 v.add "</blockquote>\n"
798 end
799
800 redef fun add_unorderedlist(v, block) do
801 v.add "<ul>\n"
802 v.emit_in block
803 v.add "</ul>\n"
804 end
805
806 redef fun add_orderedlist(v, block) do
807 v.add "<ol>\n"
808 v.emit_in block
809 v.add "</ol>\n"
810 end
811
812 redef fun add_listitem(v, block) do
813 v.add "<li>"
814 v.emit_in block
815 v.add "</li>\n"
816 end
817
818 redef fun add_em(v, text) do
819 v.add "<em>"
820 v.add text
821 v.add "</em>"
822 end
823
824 redef fun add_strong(v, text) do
825 v.add "<strong>"
826 v.add text
827 v.add "</strong>"
828 end
829
830 redef fun add_strike(v, text) do
831 v.add "<del>"
832 v.add text
833 v.add "</del>"
834 end
835
836 redef fun add_image(v, link, name, comment) do
837 v.add "<img src=\""
838 append_value(v, link)
839 v.add "\" alt=\""
840 append_value(v, name)
841 v.add "\""
842 if comment != null and not comment.is_empty then
843 v.add " title=\""
844 append_value(v, comment)
845 v.add "\""
846 end
847 v.add "/>"
848 end
849
850 redef fun add_link(v, link, name, comment) do
851 v.add "<a href=\""
852 append_value(v, link)
853 v.add "\""
854 if comment != null and not comment.is_empty then
855 v.add " title=\""
856 append_value(v, comment)
857 v.add "\""
858 end
859 v.add ">"
860 v.emit_text(name)
861 v.add "</a>"
862 end
863
864 redef fun add_abbr(v, name, comment) do
865 v.add "<abbr title=\""
866 append_value(v, comment)
867 v.add "\">"
868 v.emit_text(name)
869 v.add "</abbr>"
870 end
871
872 redef fun add_span_code(v, text, from, to) do
873 v.add "<code>"
874 append_code(v, text, from, to)
875 v.add "</code>"
876 end
877
878 redef fun add_line_break(v) do
879 v.add "<br/>"
880 end
881
882 redef fun append_value(v, text) do for c in text do escape_char(v, c)
883
884 redef fun escape_char(v, c) do
885 if c == '&' then
886 v.add "&amp;"
887 else if c == '<' then
888 v.add "&lt;"
889 else if c == '>' then
890 v.add "&gt;"
891 else if c == '"' then
892 v.add "&quot;"
893 else if c == '\'' then
894 v.add "&apos;"
895 else
896 v.addc c
897 end
898 end
899
900 redef fun append_code(v, buffer, from, to) do
901 for i in [from..to[ do
902 var c = buffer[i]
903 if c == '&' then
904 v.add "&amp;"
905 else if c == '<' then
906 v.add "&lt;"
907 else if c == '>' then
908 v.add "&gt;"
909 else
910 v.addc c
911 end
912 end
913 end
914
915 redef fun strip_id(txt) do
916 # strip id
917 var b = new FlatBuffer
918 for c in txt do
919 if c == ' ' then
920 b.add '_'
921 else
922 if not c.is_letter and
923 not c.is_digit and
924 not allowed_id_chars.has(c) then continue
925 b.add c
926 end
927 end
928 var res = b.to_s
929 var key = res
930 # check for multiple id definitions
931 if headlines.has_key(key) then
932 var i = 1
933 key = "{res}_{i}"
934 while headlines.has_key(key) do
935 i += 1
936 key = "{res}_{i}"
937 end
938 end
939 return key
940 end
941
942 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
943 end
944
945 # Location in a Markdown input.
946 class MDLocation
947
948 # Starting line number (starting from 1).
949 var line_start: Int
950
951 # Starting column number (starting from 1).
952 var column_start: Int
953
954 # Stopping line number (starting from 1).
955 var line_end: Int
956
957 # Stopping column number (starting from 1).
958 var column_end: Int
959
960 redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
961
962 # Return a copy of `self`.
963 fun copy: MDLocation do
964 return new MDLocation(line_start, column_start, line_end, column_end)
965 end
966 end
967
968 # A block of markdown lines.
969 # A `MDBlock` can contains lines and/or sub-blocks.
970 class MDBlock
971
972 # Position of `self` in the input.
973 var location: MDLocation
974
975 # Kind of block.
976 # See `Block`.
977 var kind: Block = new BlockNone(self) is writable
978
979 # First line if any.
980 var first_line: nullable MDLine = null is writable
981
982 # Last line if any.
983 var last_line: nullable MDLine = null is writable
984
985 # First sub-block if any.
986 var first_block: nullable MDBlock = null is writable
987
988 # Last sub-block if any.
989 var last_block: nullable MDBlock = null is writable
990
991 # Previous block if any.
992 var prev: nullable MDBlock = null is writable
993
994 # Next block if any.
995 var next: nullable MDBlock = null is writable
996
997 # Does this block contain subblocks?
998 fun has_blocks: Bool do return first_block != null
999
1000 # Count sub-blocks.
1001 fun count_blocks: Int do
1002 var count = 0
1003 var block = first_block
1004 while block != null do
1005 count += 1
1006 block = block.next
1007 end
1008 return count
1009 end
1010
1011 # Does this block contain lines?
1012 fun has_lines: Bool do return first_line != null
1013
1014 # Count block lines.
1015 fun count_lines: Int do
1016 var count = 0
1017 var line = first_line
1018 while line != null do
1019 count += 1
1020 line = line.next
1021 end
1022 return count
1023 end
1024
1025 # Split `self` creating a new sub-block having `line` has `last_line`.
1026 fun split(line: MDLine): MDBlock do
1027 # location for new block
1028 var new_loc = new MDLocation(
1029 first_line.as(not null).location.line_start,
1030 first_line.as(not null).location.column_start,
1031 line.location.line_end,
1032 line.location.column_end)
1033 # create block
1034 var block = new MDBlock(new_loc)
1035 block.first_line = first_line
1036 block.last_line = line
1037 first_line = line.next
1038 line.next = null
1039 if first_line == null then
1040 last_line = null
1041 else
1042 first_line.as(not null).prev = null
1043 # update current block loc
1044 location.line_start = first_line.as(not null).location.line_start
1045 location.column_start = first_line.as(not null).location.column_start
1046 end
1047 if first_block == null then
1048 first_block = block
1049 last_block = block
1050 else
1051 last_block.as(not null).next = block
1052 last_block = block
1053 end
1054 return block
1055 end
1056
1057 # Add a `line` to this block.
1058 fun add_line(line: MDLine) do
1059 if last_line == null then
1060 first_line = line
1061 last_line = line
1062 else
1063 last_line.as(not null).next_empty = line.is_empty
1064 line.prev_empty = last_line.as(not null).is_empty
1065 line.prev = last_line
1066 last_line.as(not null).next = line
1067 last_line = line
1068 end
1069 end
1070
1071 # Remove `line` from this block.
1072 fun remove_line(line: MDLine) do
1073 if line.prev == null then
1074 first_line = line.next
1075 else
1076 line.prev.as(not null).next = line.next
1077 end
1078 if line.next == null then
1079 last_line = line.prev
1080 else
1081 line.next.as(not null).prev = line.prev
1082 end
1083 line.prev = null
1084 line.next = null
1085 end
1086
1087 # Remove leading empty lines.
1088 fun remove_leading_empty_lines: Bool do
1089 var was_empty = false
1090 var line = first_line
1091 while line != null and line.is_empty do
1092 remove_line line
1093 line = first_line
1094 was_empty = true
1095 end
1096 return was_empty
1097 end
1098
1099 # Remove trailing empty lines.
1100 fun remove_trailing_empty_lines: Bool do
1101 var was_empty = false
1102 var line = last_line
1103 while line != null and line.is_empty do
1104 remove_line line
1105 line = last_line
1106 was_empty = true
1107 end
1108 return was_empty
1109 end
1110
1111 # Remove leading and trailing empty lines.
1112 fun remove_surrounding_empty_lines: Bool do
1113 var was_empty = false
1114 if remove_leading_empty_lines then was_empty = true
1115 if remove_trailing_empty_lines then was_empty = true
1116 return was_empty
1117 end
1118
1119 # Remove list markers and up to 4 leading spaces.
1120 # Used to clean nested lists.
1121 fun remove_list_indent(v: MarkdownProcessor) do
1122 var line = first_line
1123 while line != null do
1124 if not line.is_empty then
1125 var kind = v.line_kind(line)
1126 if kind isa LineList then
1127 line.value = kind.extract_value(line)
1128 else
1129 line.value = line.value.substring_from(line.leading.min(4))
1130 end
1131 line.leading = line.process_leading
1132 end
1133 line = line.next
1134 end
1135 end
1136
1137 # Collect block line text.
1138 fun text: String do
1139 var text = new FlatBuffer
1140 var line = first_line
1141 while line != null do
1142 if not line.is_empty then
1143 text.append line.text
1144 end
1145 text.append "\n"
1146 line = line.next
1147 end
1148 return text.write_to_string
1149 end
1150 end
1151
1152 # Representation of a markdown block in the AST.
1153 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1154 abstract class Block
1155
1156 # The markdown block `self` is related to.
1157 var block: MDBlock
1158
1159 # Output `self` using `v.decorator`.
1160 fun emit(v: MarkdownEmitter) do v.emit_in(self)
1161
1162 # Emit the containts of `self`, lines or blocks.
1163 fun emit_in(v: MarkdownEmitter) do
1164 block.remove_surrounding_empty_lines
1165 if block.has_lines then
1166 emit_lines(v)
1167 else
1168 emit_blocks(v)
1169 end
1170 end
1171
1172 # Emit lines contained in `block`.
1173 fun emit_lines(v: MarkdownEmitter) do
1174 var tpl = v.push_buffer
1175 var line = block.first_line
1176 while line != null do
1177 if not line.is_empty then
1178 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1179 if line.trailing >= 2 then v.decorator.add_line_break(v)
1180 end
1181 if line.next != null then
1182 v.addn
1183 end
1184 line = line.next
1185 end
1186 v.pop_buffer
1187 v.emit_text(tpl)
1188 end
1189
1190 # Emit sub-blocks contained in `block`.
1191 fun emit_blocks(v: MarkdownEmitter) do
1192 var block = self.block.first_block
1193 while block != null do
1194 v.push_loc(block.location)
1195 block.kind.emit(v)
1196 v.pop_loc
1197 block = block.next
1198 end
1199 end
1200
1201 # The raw content of the block as a multi-line string.
1202 fun raw_content: String do
1203 var infence = self isa BlockFence
1204 var text = new FlatBuffer
1205 var line = self.block.first_line
1206 while line != null do
1207 if not line.is_empty then
1208 var str = line.value
1209 if not infence and str.has_prefix(" ") then
1210 text.append str.substring(4, str.length - line.trailing)
1211 else
1212 text.append str
1213 end
1214 end
1215 text.append "\n"
1216 line = line.next
1217 end
1218 return text.write_to_string
1219 end
1220 end
1221
1222 # A block without any markdown specificities.
1223 #
1224 # Actually use the same implementation than `BlockCode`,
1225 # this class is only used for typing purposes.
1226 class BlockNone
1227 super Block
1228 end
1229
1230 # A markdown blockquote.
1231 class BlockQuote
1232 super Block
1233
1234 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1235
1236 # Remove blockquote markers.
1237 private fun remove_block_quote_prefix(block: MDBlock) do
1238 var line = block.first_line
1239 while line != null do
1240 if not line.is_empty then
1241 if line.value[line.leading] == '>' then
1242 var rem = line.leading + 1
1243 if line.leading + 1 < line.value.length and
1244 line.value[line.leading + 1] == ' ' then
1245 rem += 1
1246 end
1247 line.value = line.value.substring_from(rem)
1248 line.leading = line.process_leading
1249 end
1250 end
1251 line = line.next
1252 end
1253 end
1254 end
1255
1256 # A markdown code block.
1257 class BlockCode
1258 super Block
1259
1260 # Any string found after fence token.
1261 var meta: nullable Text
1262
1263 # Number of char to skip at the beginning of the line.
1264 #
1265 # Block code lines start at 4 spaces.
1266 protected var line_start = 4
1267
1268 redef fun emit(v) do v.decorator.add_code(v, self)
1269
1270 redef fun emit_lines(v) do
1271 var line = block.first_line
1272 while line != null do
1273 if not line.is_empty then
1274 v.decorator.append_code(v, line.value, line_start, line.value.length)
1275 end
1276 v.addn
1277 line = line.next
1278 end
1279 end
1280 end
1281
1282 # A markdown code-fence block.
1283 #
1284 # Actually use the same implementation than `BlockCode`,
1285 # this class is only used for typing purposes.
1286 class BlockFence
1287 super BlockCode
1288
1289 # Fence code lines start at 0 spaces.
1290 redef var line_start = 0
1291 end
1292
1293 # A markdown headline.
1294 class BlockHeadline
1295 super Block
1296
1297 redef fun emit(v) do
1298 var loc = block.location.copy
1299 loc.column_start += start
1300 v.push_loc(loc)
1301 v.decorator.add_headline(v, self)
1302 v.pop_loc
1303 end
1304
1305 private var start = 0
1306
1307 # Depth of the headline used to determine the headline level.
1308 var depth = 0
1309
1310 # Remove healine marks from lines contained in `self`.
1311 private fun transform_headline(block: MDBlock) do
1312 if depth > 0 then return
1313 var level = 0
1314 var line = block.first_line
1315 if line == null then return
1316 if line.is_empty then return
1317 var start = line.leading
1318 while start < line.value.length and line.value[start] == '#' do
1319 level += 1
1320 start += 1
1321 end
1322 while start < line.value.length and line.value[start] == ' ' do
1323 start += 1
1324 end
1325 if start >= line.value.length then
1326 line.is_empty = true
1327 else
1328 var nend = line.value.length - line.trailing - 1
1329 while line.value[nend] == '#' do nend -= 1
1330 while line.value[nend] == ' ' do nend -= 1
1331 line.value = line.value.substring(start, nend - start + 1)
1332 line.leading = 0
1333 line.trailing = 0
1334 end
1335 self.start = start
1336 depth = level.min(6)
1337 end
1338 end
1339
1340 # A markdown list item block.
1341 class BlockListItem
1342 super Block
1343
1344 redef fun emit(v) do v.decorator.add_listitem(v, self)
1345 end
1346
1347 # A markdown list block.
1348 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1349 abstract class BlockList
1350 super Block
1351
1352 # Split list block into list items sub-blocks.
1353 private fun init_block(v: MarkdownProcessor) do
1354 var line = block.first_line
1355 if line == null then return
1356 line = line.next
1357 while line != null do
1358 var t = v.line_kind(line)
1359 if t isa LineList or
1360 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1361 not (t isa LineList))) then
1362 var sblock = block.split(line.prev.as(not null))
1363 sblock.kind = new BlockListItem(sblock)
1364 end
1365 line = line.next
1366 end
1367 var sblock = block.split(block.last_line.as(not null))
1368 sblock.kind = new BlockListItem(sblock)
1369 end
1370
1371 # Expand list items as paragraphs if needed.
1372 private fun expand_paragraphs(block: MDBlock) do
1373 var outer = block.first_block
1374 var inner: nullable MDBlock
1375 var has_paragraph = false
1376 while outer != null and not has_paragraph do
1377 if outer.kind isa BlockListItem then
1378 inner = outer.first_block
1379 while inner != null and not has_paragraph do
1380 if inner.kind isa BlockParagraph then
1381 has_paragraph = true
1382 end
1383 inner = inner.next
1384 end
1385 end
1386 outer = outer.next
1387 end
1388 if has_paragraph then
1389 outer = block.first_block
1390 while outer != null do
1391 if outer.kind isa BlockListItem then
1392 inner = outer.first_block
1393 while inner != null do
1394 if inner.kind isa BlockNone then
1395 inner.kind = new BlockParagraph(inner)
1396 end
1397 inner = inner.next
1398 end
1399 end
1400 outer = outer.next
1401 end
1402 end
1403 end
1404 end
1405
1406 # A markdown ordered list.
1407 class BlockOrderedList
1408 super BlockList
1409
1410 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1411 end
1412
1413 # A markdown unordred list.
1414 class BlockUnorderedList
1415 super BlockList
1416
1417 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1418 end
1419
1420 # A markdown paragraph block.
1421 class BlockParagraph
1422 super Block
1423
1424 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1425 end
1426
1427 # A markdown ruler.
1428 class BlockRuler
1429 super Block
1430
1431 redef fun emit(v) do v.decorator.add_ruler(v, self)
1432 end
1433
1434 # Xml blocks that can be found in markdown markup.
1435 class BlockXML
1436 super Block
1437
1438 redef fun emit_lines(v) do
1439 var line = block.first_line
1440 while line != null do
1441 if not line.is_empty then v.add line.value
1442 v.addn
1443 line = line.next
1444 end
1445 end
1446 end
1447
1448 # A markdown line.
1449 class MDLine
1450
1451 # Location of `self` in the original input.
1452 var location: MDLocation
1453
1454 # Text contained in this line.
1455 var value: String is writable
1456
1457 # Is this line empty?
1458 # Lines containing only spaces are considered empty.
1459 var is_empty: Bool = true is writable
1460
1461 # Previous line in `MDBlock` or null if first line.
1462 var prev: nullable MDLine = null is writable
1463
1464 # Next line in `MDBlock` or null if last line.
1465 var next: nullable MDLine = null is writable
1466
1467 # Is the previous line empty?
1468 var prev_empty: Bool = false is writable
1469
1470 # Is the next line empty?
1471 var next_empty: Bool = false is writable
1472
1473 # Initialize a new MDLine from its string value
1474 init do
1475 self.leading = process_leading
1476 if leading != value.length then
1477 self.is_empty = false
1478 self.trailing = process_trailing
1479 end
1480 end
1481
1482 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1483 fun clear do
1484 value = ""
1485 leading = 0
1486 trailing = 0
1487 is_empty = true
1488 if prev != null then prev.as(not null).next_empty = true
1489 if next != null then next.as(not null).prev_empty = true
1490 end
1491
1492 # Number or leading spaces on this line.
1493 var leading: Int = 0 is writable
1494
1495 # Compute `leading` depending on `value`.
1496 fun process_leading: Int do
1497 var count = 0
1498 var value = self.value
1499 while count < value.length and value[count] == ' ' do count += 1
1500 if leading == value.length then clear
1501 return count
1502 end
1503
1504 # Number of trailing spaces on this line.
1505 var trailing: Int = 0 is writable
1506
1507 # Compute `trailing` depending on `value`.
1508 fun process_trailing: Int do
1509 var count = 0
1510 var value = self.value
1511 while value[value.length - count - 1] == ' ' do
1512 count += 1
1513 end
1514 return count
1515 end
1516
1517 # Count the amount of `ch` in this line.
1518 # Return A value > 0 if this line only consists of `ch` end spaces.
1519 fun count_chars(ch: Char): Int do
1520 var count = 0
1521 for c in value do
1522 if c == ' ' then
1523 continue
1524 end
1525 if c == ch then
1526 count += 1
1527 continue
1528 end
1529 count = 0
1530 break
1531 end
1532 return count
1533 end
1534
1535 # Count the amount of `ch` at the start of this line ignoring spaces.
1536 fun count_chars_start(ch: Char): Int do
1537 var count = 0
1538 for c in value do
1539 if c == ' ' then
1540 continue
1541 end
1542 if c == ch then
1543 count += 1
1544 else
1545 break
1546 end
1547 end
1548 return count
1549 end
1550
1551 # Last XML line if any.
1552 private var xml_end_line: nullable MDLine = null
1553
1554 # Does `value` contains valid XML markup?
1555 private fun check_html: Bool do
1556 var tags = new Array[String]
1557 var tmp = new FlatBuffer
1558 var pos = leading
1559 if pos + 1 < value.length and value[pos + 1] == '!' then
1560 if read_xml_comment(self, pos) > 0 then return true
1561 end
1562 pos = value.read_xml(tmp, pos, false)
1563 var tag: String
1564 if pos > -1 then
1565 tag = tmp.xml_tag
1566 if not tag.is_html_block then
1567 return false
1568 end
1569 if tag == "hr" then
1570 xml_end_line = self
1571 return true
1572 end
1573 tags.add tag
1574 var line: nullable MDLine = self
1575 while line != null do
1576 while pos < line.value.length and line.value[pos] != '<' do
1577 pos += 1
1578 end
1579 if pos >= line.value.length then
1580 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1581 tags.pop
1582 if tags.is_empty then
1583 xml_end_line = line
1584 break
1585 end
1586 end
1587 line = line.next
1588 pos = 0
1589 else
1590 tmp = new FlatBuffer
1591 var new_pos = line.value.read_xml(tmp, pos, false)
1592 if new_pos > 0 then
1593 tag = tmp.xml_tag
1594 if tag.is_html_block and not tag == "hr" then
1595 if tmp[1] == '/' then
1596 if tags.last != tag then
1597 return false
1598 end
1599 tags.pop
1600 else
1601 tags.add tag
1602 end
1603 end
1604 if tags.is_empty then
1605 xml_end_line = line
1606 break
1607 end
1608 pos = new_pos
1609 else
1610 pos += 1
1611 end
1612 end
1613 end
1614 return tags.is_empty
1615 end
1616 return false
1617 end
1618
1619 # Read a XML comment.
1620 # Used by `check_html`.
1621 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1622 var line: nullable MDLine = first_line
1623 if start + 3 < line.as(not null).value.length then
1624 if line.as(not null).value[2] == '-' and line.as(not null).value[3] == '-' then
1625 var pos = start + 4
1626 while line != null do
1627 while pos < line.value.length and line.value[pos] != '-' do
1628 pos += 1
1629 end
1630 if pos == line.value.length then
1631 line = line.next
1632 pos = 0
1633 else
1634 if pos + 2 < line.value.length then
1635 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1636 first_line.xml_end_line = line
1637 return pos + 3
1638 end
1639 end
1640 pos += 1
1641 end
1642 end
1643 end
1644 end
1645 return -1
1646 end
1647
1648 # Extract the text of `self` without leading and trailing.
1649 fun text: String do return value.substring(leading, value.length - trailing)
1650 end
1651
1652 # A markdown line.
1653 interface Line
1654
1655 # Parse the line.
1656 # See `MarkdownProcessor::recurse`.
1657 fun process(v: MarkdownProcessor) is abstract
1658 end
1659
1660 # An empty markdown line.
1661 class LineEmpty
1662 super Line
1663
1664 redef fun process(v) do
1665 v.current_line = v.current_line.as(not null).next
1666 end
1667 end
1668
1669 # A non-specific markdown construction.
1670 # Mainly used as part of another line construct such as paragraphs or lists.
1671 class LineOther
1672 super Line
1673
1674 redef fun process(v) do
1675 var line = v.current_line
1676 # go to block end
1677 var was_empty = line.as(not null).prev_empty
1678 while line != null and not line.is_empty do
1679 var t = v.line_kind(line)
1680 if (v.in_list or v.ext_mode) and t isa LineList then
1681 break
1682 end
1683 if v.ext_mode and (t isa LineCode or t isa LineFence) then
1684 break
1685 end
1686 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1687 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1688 break
1689 end
1690 line = line.next
1691 end
1692 # build block
1693 var current_block = v.current_block.as(not null)
1694 if line != null and not line.is_empty then
1695 var block = current_block.split(line.prev.as(not null))
1696 if v.in_list and not was_empty then
1697 block.kind = new BlockNone(block)
1698 else
1699 block.kind = new BlockParagraph(block)
1700 end
1701 current_block.remove_leading_empty_lines
1702 else
1703 var block: MDBlock
1704 if line != null then
1705 block = current_block.split(line)
1706 else
1707 block = current_block.split(current_block.last_line.as(not null))
1708 end
1709 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1710 block.kind = new BlockNone(block)
1711 else
1712 block.kind = new BlockParagraph(block)
1713 end
1714 current_block.remove_leading_empty_lines
1715 end
1716 v.current_line = current_block.first_line
1717 end
1718 end
1719
1720 # A line of markdown code.
1721 class LineCode
1722 super Line
1723
1724 redef fun process(v) do
1725 var line = v.current_line
1726 # lookup block end
1727 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1728 line = line.next
1729 end
1730 # split at block end line
1731 var current_block = v.current_block.as(not null)
1732 var block: MDBlock
1733 if line != null then
1734 block = current_block.split(line.prev.as(not null))
1735 else
1736 block = current_block.split(current_block.last_line.as(not null))
1737 end
1738 block.kind = new BlockCode(block)
1739 block.remove_surrounding_empty_lines
1740 v.current_line = current_block.first_line
1741 end
1742 end
1743
1744 # A line of raw XML.
1745 class LineXML
1746 super Line
1747
1748 redef fun process(v) do
1749 var line = v.current_line
1750 if line == null then return
1751 var current_block = v.current_block.as(not null)
1752 var prev = line.prev
1753 if prev != null then current_block.split(prev)
1754 var block = current_block.split(line.xml_end_line.as(not null))
1755 block.kind = new BlockXML(block)
1756 current_block.remove_leading_empty_lines
1757 v.current_line = current_block.first_line
1758 end
1759 end
1760
1761 # A markdown blockquote line.
1762 class LineBlockquote
1763 super Line
1764
1765 redef fun process(v) do
1766 var line = v.current_line
1767 var current_block = v.current_block.as(not null)
1768 # go to bquote end
1769 while line != null do
1770 if not line.is_empty and (line.prev_empty and
1771 line.leading == 0 and
1772 not v.line_kind(line) isa LineBlockquote) then break
1773 line = line.next
1774 end
1775 # build sub block
1776 var block: MDBlock
1777 if line != null then
1778 block = current_block.split(line.prev.as(not null))
1779 else
1780 block = current_block.split(current_block.last_line.as(not null))
1781 end
1782 var kind = new BlockQuote(block)
1783 block.kind = kind
1784 block.remove_surrounding_empty_lines
1785 kind.remove_block_quote_prefix(block)
1786 v.current_line = line
1787 v.recurse(block, false)
1788 v.current_line = current_block.first_line
1789 end
1790 end
1791
1792 # A markdown ruler line.
1793 class LineHR
1794 super Line
1795
1796 redef fun process(v) do
1797 var line = v.current_line
1798 if line == null then return
1799 var current_block = v.current_block.as(not null)
1800 if line.prev != null then current_block.split(line.prev.as(not null))
1801 var block = current_block.split(line)
1802 block.kind = new BlockRuler(block)
1803 current_block.remove_leading_empty_lines
1804 v.current_line = current_block.first_line
1805 end
1806 end
1807
1808 # A markdown fence code line.
1809 class LineFence
1810 super Line
1811
1812 redef fun process(v) do
1813 # go to fence end
1814 var line = v.current_line.as(not null).next
1815 var current_block = v.current_block.as(not null)
1816 while line != null do
1817 if v.line_kind(line) isa LineFence then break
1818 line = line.next
1819 end
1820 if line != null then
1821 line = line.next
1822 end
1823 # build fence block
1824 var block: MDBlock
1825 if line != null then
1826 block = current_block.split(line.prev.as(not null))
1827 else
1828 block = current_block.split(current_block.last_line.as(not null))
1829 end
1830 block.remove_surrounding_empty_lines
1831 var meta = block.first_line.as(not null).value.meta_from_fence
1832 block.kind = new BlockFence(block, meta)
1833 block.first_line.as(not null).clear
1834 var last = block.last_line
1835 if last != null and v.line_kind(last) isa LineFence then
1836 block.last_line.as(not null).clear
1837 end
1838 block.remove_surrounding_empty_lines
1839 v.current_line = line
1840 end
1841 end
1842
1843 # A markdown headline.
1844 class LineHeadline
1845 super Line
1846
1847 redef fun process(v) do
1848 var line = v.current_line
1849 if line == null then return
1850 var current_block = v.current_block.as(not null)
1851 var lprev = line.prev
1852 if lprev != null then current_block.split(lprev)
1853 var block = current_block.split(line)
1854 var kind = new BlockHeadline(block)
1855 block.kind = kind
1856 kind.transform_headline(block)
1857 current_block.remove_leading_empty_lines
1858 v.current_line = current_block.first_line
1859 end
1860 end
1861
1862 # A markdown headline of level 1.
1863 class LineHeadline1
1864 super LineHeadline
1865
1866 redef fun process(v) do
1867 var line = v.current_line
1868 if line == null then return
1869 var current_block = v.current_block.as(not null)
1870 var lprev = line.prev
1871 if lprev != null then current_block.split(lprev)
1872 line.next.as(not null).clear
1873 var block = current_block.split(line)
1874 var kind = new BlockHeadline(block)
1875 kind.depth = 1
1876 kind.transform_headline(block)
1877 block.kind = kind
1878 current_block.remove_leading_empty_lines
1879 v.current_line = current_block.first_line
1880 end
1881 end
1882
1883 # A markdown headline of level 2.
1884 class LineHeadline2
1885 super LineHeadline
1886
1887 redef fun process(v) do
1888 var line = v.current_line
1889 if line == null then return
1890 var current_block = v.current_block.as(not null)
1891 var lprev = line.prev
1892 if lprev != null then current_block.split(lprev)
1893 line.next.as(not null).clear
1894 var block = current_block.split(line)
1895 var kind = new BlockHeadline(block)
1896 kind.depth = 2
1897 kind.transform_headline(block)
1898 block.kind = kind
1899 current_block.remove_leading_empty_lines
1900 v.current_line = current_block.first_line
1901 end
1902 end
1903
1904 # A markdown list line.
1905 # Mainly used to factorize code between ordered and unordered lists.
1906 abstract class LineList
1907 super Line
1908
1909 redef fun process(v) do
1910 var line = v.current_line
1911 # go to list end
1912 while line != null do
1913 var t = v.line_kind(line)
1914 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1915 not t isa LineList) then break
1916 line = line.next
1917 end
1918 # build list block
1919 var current_block = v.current_block.as(not null)
1920 var list: MDBlock
1921 if line != null then
1922 list = current_block.split(line.prev.as(not null))
1923 else
1924 list = current_block.split(current_block.last_line.as(not null))
1925 end
1926 var kind = block_kind(list)
1927 list.kind = kind
1928 list.first_line.as(not null).prev_empty = false
1929 list.last_line.as(not null).next_empty = false
1930 list.remove_surrounding_empty_lines
1931 list.first_line.as(not null).prev_empty = false
1932 list.last_line.as(not null).next_empty = false
1933 kind.init_block(v)
1934 var block = list.first_block
1935 while block != null do
1936 block.remove_list_indent(v)
1937 v.recurse(block, true)
1938 block = block.next
1939 end
1940 kind.expand_paragraphs(list)
1941 v.current_line = line
1942 end
1943
1944 # Create a new block kind based on this line.
1945 protected fun block_kind(block: MDBlock): BlockList is abstract
1946
1947 # Extract string value from `MDLine`.
1948 protected fun extract_value(line: MDLine): String is abstract
1949 end
1950
1951 # An ordered list line.
1952 class LineOList
1953 super LineList
1954
1955 redef fun block_kind(block) do return new BlockOrderedList(block)
1956
1957 redef fun extract_value(line) do
1958 return line.value.substring_from(line.value.index_of('.') + 2)
1959 end
1960 end
1961
1962 # An unordered list line.
1963 class LineUList
1964 super LineList
1965
1966 redef fun block_kind(block) do return new BlockUnorderedList(block)
1967
1968 redef fun extract_value(line) do
1969 return line.value.substring_from(line.leading + 2)
1970 end
1971 end
1972
1973 # A token represent a character in the markdown input.
1974 # Some tokens have a specific markup behaviour that is handled here.
1975 abstract class Token
1976
1977 # Location of `self` in the original input.
1978 var location: nullable MDLocation
1979
1980 # Position of `self` in input independant from lines.
1981 var pos: Int
1982
1983 # Character found at `pos` in the markdown input.
1984 var char: Char
1985
1986 # Output that token using `MarkdownEmitter::decorator`.
1987 fun emit(v: MarkdownEmitter) do v.decorator.add_char(v, char)
1988 end
1989
1990 # A token without a specific meaning.
1991 class TokenNone
1992 super Token
1993 end
1994
1995 # An emphasis token.
1996 abstract class TokenEm
1997 super Token
1998
1999 redef fun emit(v) do
2000 var tmp = v.push_buffer
2001 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
2002 v.pop_buffer
2003 if b > 0 then
2004 v.decorator.add_em(v, tmp)
2005 v.current_pos = b
2006 else
2007 v.addc char
2008 end
2009 end
2010 end
2011
2012 # An emphasis star token.
2013 class TokenEmStar
2014 super TokenEm
2015 end
2016
2017 # An emphasis underscore token.
2018 class TokenEmUnderscore
2019 super TokenEm
2020 end
2021
2022 # A strong token.
2023 abstract class TokenStrong
2024 super Token
2025
2026 redef fun emit(v) do
2027 var tmp = v.push_buffer
2028 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2029 v.pop_buffer
2030 if b > 0 then
2031 v.decorator.add_strong(v, tmp)
2032 v.current_pos = b + 1
2033 else
2034 v.addc char
2035 end
2036 end
2037 end
2038
2039 # A strong star token.
2040 class TokenStrongStar
2041 super TokenStrong
2042 end
2043
2044 # A strong underscore token.
2045 class TokenStrongUnderscore
2046 super TokenStrong
2047 end
2048
2049 # A code token.
2050 # This class is mainly used to factorize work between single and double quoted span codes.
2051 abstract class TokenCode
2052 super Token
2053
2054 redef fun emit(v) do
2055 var current_text = v.current_text.as(not null)
2056 var a = pos + next_pos + 1
2057 var b = v.processor.find_token(current_text, a, self)
2058 if b > 0 then
2059 v.current_pos = b + next_pos
2060 while a < b and current_text[a] == ' ' do a += 1
2061 if a < b then
2062 while current_text[b - 1] == ' ' do b -= 1
2063 v.decorator.add_span_code(v, current_text, a, b)
2064 end
2065 else
2066 v.addc char
2067 end
2068 end
2069
2070 private fun next_pos: Int is abstract
2071 end
2072
2073 # A span code token.
2074 class TokenCodeSingle
2075 super TokenCode
2076
2077 redef fun next_pos do return 0
2078 end
2079
2080 # A doubled span code token.
2081 class TokenCodeDouble
2082 super TokenCode
2083
2084 redef fun next_pos do return 1
2085 end
2086
2087 # A link or image token.
2088 # This class is mainly used to factorize work between images and links.
2089 abstract class TokenLinkOrImage
2090 super Token
2091
2092 # Link adress
2093 var link: nullable Text = null
2094
2095 # Link text
2096 var name: nullable Text = null
2097
2098 # Link title
2099 var comment: nullable Text = null
2100
2101 # Is the link construct an abbreviation?
2102 var is_abbrev = false
2103
2104 redef fun emit(v) do
2105 var tmp = new FlatBuffer
2106 var b = check_link(v, tmp, pos, self)
2107 if b > 0 then
2108 emit_hyper(v)
2109 v.current_pos = b
2110 else
2111 v.addc char
2112 end
2113 end
2114
2115 # Emit the hyperlink as link or image.
2116 private fun emit_hyper(v: MarkdownEmitter) is abstract
2117
2118 # Check if the link is a valid link.
2119 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
2120 var md = v.current_text
2121 if md == null then return -1
2122 var pos
2123 if token isa TokenLink then
2124 pos = start + 1
2125 else
2126 pos = start + 2
2127 end
2128 var tmp = new FlatBuffer
2129 pos = md.read_md_link_id(tmp, pos)
2130 if pos < start then return -1
2131 name = tmp
2132 var old_pos = pos
2133 pos += 1
2134 pos = md.skip_spaces(pos)
2135 if pos < start then
2136 var tid = name.as(not null).write_to_string.to_lower
2137 if v.processor.link_refs.has_key(tid) then
2138 var lr = v.processor.link_refs[tid]
2139 is_abbrev = lr.is_abbrev
2140 link = lr.link
2141 comment = lr.title
2142 pos = old_pos
2143 else
2144 return -1
2145 end
2146 else if md[pos] == '(' then
2147 pos += 1
2148 pos = md.skip_spaces(pos)
2149 if pos < start then return -1
2150 tmp = new FlatBuffer
2151 var use_lt = md[pos] == '<'
2152 if use_lt then
2153 pos = md.read_until(tmp, pos + 1, '>')
2154 else
2155 pos = md.read_md_link(tmp, pos)
2156 end
2157 if pos < start then return -1
2158 if use_lt then pos += 1
2159 link = tmp.write_to_string
2160 if md[pos] == ' ' then
2161 pos = md.skip_spaces(pos)
2162 if pos > start and md[pos] == '"' then
2163 pos += 1
2164 tmp = new FlatBuffer
2165 pos = md.read_until(tmp, pos, '"')
2166 if pos < start then return -1
2167 comment = tmp.write_to_string
2168 pos += 1
2169 pos = md.skip_spaces(pos)
2170 if pos == -1 then return -1
2171 end
2172 end
2173 if pos < start then return -1
2174 if md[pos] != ')' then return -1
2175 else if md[pos] == '[' then
2176 pos += 1
2177 tmp = new FlatBuffer
2178 pos = md.read_raw_until(tmp, pos, ']')
2179 if pos < start then return -1
2180 var id
2181 if tmp.length > 0 then
2182 id = tmp
2183 else
2184 id = name
2185 end
2186 var tid = id.as(not null).write_to_string.to_lower
2187 if v.processor.link_refs.has_key(tid) then
2188 var lr = v.processor.link_refs[tid]
2189 link = lr.link
2190 comment = lr.title
2191 end
2192 else
2193 var tid = name.as(not null).write_to_string.replace("\n", " ").to_lower
2194 if v.processor.link_refs.has_key(tid) then
2195 var lr = v.processor.link_refs[tid]
2196 link = lr.link
2197 comment = lr.title
2198 pos = old_pos
2199 else
2200 return -1
2201 end
2202 end
2203 if link == null then return -1
2204 return pos
2205 end
2206 end
2207
2208 # A markdown link token.
2209 class TokenLink
2210 super TokenLinkOrImage
2211
2212 redef fun emit_hyper(v) do
2213 if is_abbrev and comment != null then
2214 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2215 else
2216 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2217 end
2218 end
2219 end
2220
2221 # A markdown image token.
2222 class TokenImage
2223 super TokenLinkOrImage
2224
2225 redef fun emit_hyper(v) do
2226 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2227 end
2228 end
2229
2230 # A HTML/XML token.
2231 class TokenHTML
2232 super Token
2233
2234 redef fun emit(v) do
2235 var tmp = new FlatBuffer
2236 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2237 if b > 0 then
2238 v.add tmp
2239 v.current_pos = b
2240 else
2241 v.decorator.escape_char(v, char)
2242 end
2243 end
2244
2245 # Is the HTML valid?
2246 # Also take care of link and mailto shortcuts.
2247 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2248 # check for auto links
2249 var tmp = new FlatBuffer
2250 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2251 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2252 pos = md.read_until(tmp, pos, '>')
2253 if pos != -1 then
2254 var link = tmp.write_to_string
2255 v.decorator.add_link(v, link, link, null)
2256 return pos
2257 end
2258 end
2259 # TODO check for mailto
2260 # check for inline html
2261 if start + 2 < md.length then
2262 return md.read_xml(out, start, true)
2263 end
2264 return -1
2265 end
2266 end
2267
2268 # An HTML entity token.
2269 class TokenEntity
2270 super Token
2271
2272 redef fun emit(v) do
2273 var tmp = new FlatBuffer
2274 var b = check_entity(tmp, v.current_text.as(not null), pos)
2275 if b > 0 then
2276 v.add tmp
2277 v.current_pos = b
2278 else
2279 v.decorator.escape_char(v, char)
2280 end
2281 end
2282
2283 # Is the entity valid?
2284 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2285 var pos = md.read_until(out, start, ';')
2286 if pos < 0 or out.length < 3 then
2287 return -1
2288 end
2289 if out[1] == '#' then
2290 if out[2] == 'x' or out[2] == 'X' then
2291 if out.length < 4 then return -1
2292 for i in [3..out.length[ do
2293 var c = out[i]
2294 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2295 return -1
2296 end
2297 end
2298 else
2299 for i in [2..out.length[ do
2300 var c = out[i]
2301 if c < '0' or c > '9' then return -1
2302 end
2303 end
2304 out.add ';'
2305 else
2306 for i in [1..out.length[ do
2307 var c = out[i]
2308 if not c.is_digit and not c.is_letter then return -1
2309 end
2310 out.add ';'
2311 # TODO check entity is valid
2312 # if out.is_entity then
2313 return pos
2314 # else
2315 # return -1
2316 # end
2317 end
2318 return pos
2319 end
2320 end
2321
2322 # A markdown escape token.
2323 class TokenEscape
2324 super Token
2325
2326 redef fun emit(v) do
2327 v.current_pos += 1
2328 v.addc v.current_text.as(not null)[v.current_pos]
2329 end
2330 end
2331
2332 # A markdown strike token.
2333 #
2334 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2335 class TokenStrike
2336 super Token
2337
2338 redef fun emit(v) do
2339 var tmp = v.push_buffer
2340 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2341 v.pop_buffer
2342 if b > 0 then
2343 v.decorator.add_strike(v, tmp)
2344 v.current_pos = b + 1
2345 else
2346 v.addc char
2347 end
2348 end
2349 end
2350
2351 redef class Text
2352
2353 # Get the position of the next non-space character.
2354 private fun skip_spaces(start: Int): Int do
2355 var pos = start
2356 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2357 pos += 1
2358 end
2359 if pos < length then return pos
2360 return -1
2361 end
2362
2363 # Read `self` until `nend` and append it to the `out` buffer.
2364 # Escape markdown special chars.
2365 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2366 var pos = start
2367 while pos < length do
2368 var c = self[pos]
2369 if c == '\\' and pos + 1 < length then
2370 pos = escape(out, self[pos + 1], pos)
2371 else
2372 for n in nend do if c == n then break label
2373 out.add c
2374 end
2375 pos += 1
2376 end label
2377 if pos == length then return -1
2378 return pos
2379 end
2380
2381 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2382 # No escape is made.
2383 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2384 var pos = start
2385 while pos < length do
2386 var c = self[pos]
2387 var end_reached = false
2388 for n in nend do
2389 if c == n then
2390 end_reached = true
2391 break
2392 end
2393 end
2394 if end_reached then break
2395 out.add c
2396 pos += 1
2397 end
2398 if pos == length then return -1
2399 return pos
2400 end
2401
2402 # Read `self` as XML until `to` and append it to the `out` buffer.
2403 # Escape HTML special chars.
2404 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2405 var pos = from
2406 var in_str = false
2407 var str_char: nullable Char = null
2408 while pos < length do
2409 var c = self[pos]
2410 if in_str then
2411 if c == '\\' then
2412 out.add c
2413 pos += 1
2414 if pos < length then
2415 out.add c
2416 pos += 1
2417 end
2418 continue
2419 end
2420 if c == str_char then
2421 in_str = false
2422 out.add c
2423 pos += 1
2424 continue
2425 end
2426 end
2427 if c == '"' or c == '\'' then
2428 in_str = true
2429 str_char = c
2430 end
2431 if not in_str then
2432 var end_reached = false
2433 for n in [0..to.length[ do
2434 if c == to[n] then
2435 end_reached = true
2436 break
2437 end
2438 end
2439 if end_reached then break
2440 end
2441 out.add c
2442 pos += 1
2443 end
2444 if pos == length then return -1
2445 return pos
2446 end
2447
2448 # Read `self` as XML and append it to the `out` buffer.
2449 # Safe mode can be activated to limit reading to valid xml.
2450 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2451 var pos = 0
2452 var is_valid = true
2453 var is_close_tag = false
2454 if start + 1 >= length then return -1
2455 if self[start + 1] == '/' then
2456 is_close_tag = true
2457 pos = start + 2
2458 else if self[start + 1] == '!' then
2459 out.append "<!"
2460 return start + 1
2461 else
2462 is_close_tag = false
2463 pos = start + 1
2464 end
2465 if safe_mode then
2466 var tmp = new FlatBuffer
2467 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2468 if pos == -1 then return -1
2469 var tag = tmp.write_to_string.trim.to_lower
2470 if not tag.is_valid_html_tag then
2471 out.append "&lt;"
2472 pos = -1
2473 else if tag.is_html_unsafe then
2474 is_valid = false
2475 out.append "&lt;"
2476 if is_close_tag then out.add '/'
2477 out.append tmp
2478 else
2479 out.append "<"
2480 if is_close_tag then out.add '/'
2481 out.append tmp
2482 end
2483 else
2484 out.add '<'
2485 if is_close_tag then out.add '/'
2486 pos = read_xml_until(out, pos, ' ', '/', '>')
2487 end
2488 if pos == -1 then return -1
2489 pos = read_xml_until(out, pos, '/', '>')
2490 if pos == -1 then return -1
2491 if self[pos] == '/' then
2492 out.append " /"
2493 pos = self.read_xml_until(out, pos + 1, '>')
2494 if pos == -1 then return -1
2495 end
2496 if self[pos] == '>' then
2497 if is_valid then
2498 out.add '>'
2499 else
2500 out.append "&gt;"
2501 end
2502 return pos
2503 end
2504 return -1
2505 end
2506
2507 # Read a markdown link address and append it to the `out` buffer.
2508 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2509 var pos = start
2510 var counter = 1
2511 while pos < length do
2512 var c = self[pos]
2513 if c == '\\' and pos + 1 < length then
2514 pos = escape(out, self[pos + 1], pos)
2515 else
2516 var end_reached = false
2517 if c == '(' then
2518 counter += 1
2519 else if c == ' ' then
2520 if counter == 1 then end_reached = true
2521 else if c == ')' then
2522 counter -= 1
2523 if counter == 0 then end_reached = true
2524 end
2525 if end_reached then break
2526 out.add c
2527 end
2528 pos += 1
2529 end
2530 if pos == length then return -1
2531 return pos
2532 end
2533
2534 # Read a markdown link text and append it to the `out` buffer.
2535 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2536 var pos = start
2537 var counter = 1
2538 while pos < length do
2539 var c = self[pos]
2540 var end_reached = false
2541 if c == '[' then
2542 counter += 1
2543 out.add c
2544 else if c == ']' then
2545 counter -= 1
2546 if counter == 0 then
2547 end_reached = true
2548 else
2549 out.add c
2550 end
2551 else
2552 out.add c
2553 end
2554 if end_reached then break
2555 pos += 1
2556 end
2557 if pos == length then return -1
2558 return pos
2559 end
2560
2561 # Extract the XML tag name from a XML tag.
2562 private fun xml_tag: String do
2563 var tpl = new FlatBuffer
2564 var pos = 1
2565 if pos < length and self[1] == '/' then pos += 1
2566 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2567 tpl.add self[pos]
2568 pos += 1
2569 end
2570 return tpl.write_to_string.to_lower
2571 end
2572
2573 private fun is_valid_html_tag: Bool do
2574 if is_empty then return false
2575 for c in self do
2576 if not c.is_alpha then return false
2577 end
2578 return true
2579 end
2580
2581 # Read and escape the markdown contained in `self`.
2582 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2583 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2584 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2585 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2586 c == '`' or c == '~' or c == '^' then
2587 out.add c
2588 return pos + 1
2589 end
2590 out.add '\\'
2591 return pos
2592 end
2593
2594 # Extract string found at end of fence opening.
2595 private fun meta_from_fence: nullable Text do
2596 for i in [0..chars.length[ do
2597 var c = chars[i]
2598 if c != ' ' and c != '`' and c != '~' then
2599 return substring_from(i).trim
2600 end
2601 end
2602 return null
2603 end
2604
2605 # Is `self` an unsafe HTML element?
2606 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2607
2608 # Is `self` a HRML block element?
2609 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2610
2611 # Is `self` a link prefix?
2612 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2613
2614 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2615
2616 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2617
2618 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2619 end
2620
2621 redef class String
2622
2623 # Parse `self` as markdown and return the HTML representation
2624 #.
2625 # var md = "**Hello World!**"
2626 # var html = md.md_to_html
2627 # assert html == "<p><strong>Hello World!</strong></p>\n"
2628 fun md_to_html: Writable do
2629 var processor = new MarkdownProcessor
2630 return processor.process(self)
2631 end
2632 end