3c19b0bfe5e10faf35781673afeba24e3ad654ad
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 # `MarkdownEmitter` used for ouput.
34 var emitter: MarkdownEmitter is noinit, protected writable
35
36 # Work in extended mode (default).
37 #
38 # Behavior changes when using extended mode:
39 #
40 # * Lists and code blocks end a paragraph
41 #
42 # In normal markdown the following:
43 #
44 # This is a paragraph
45 # * and this is not a list
46 #
47 # Will produce:
48 #
49 # <p>This is a paragraph
50 # * and this is not a list</p>
51 #
52 # When using extended mode this changes to:
53 #
54 # <p>This is a paragraph</p>
55 # <ul>
56 # <li>and this is not a list</li>
57 # </ul>
58 #
59 # * Fences code blocks
60 #
61 # If you don't want to indent your all your code with 4 spaces,
62 # you can wrap your code in ``` ``` ``` or `~~~`.
63 #
64 # Here's an example:
65 #
66 # ```
67 # fun test do
68 # print "Hello World!"
69 # end
70 # ```
71 #
72 # * Code blocks meta
73 #
74 # If you want to use syntax highlighting tools, most of them need to know what kind
75 # of language they are highlighting.
76 # You can add an optional language identifier after the fence declaration to output
77 # it in the HTML render.
78 #
79 # ```nit
80 # import markdown
81 #
82 # print "# Hello World!".md_to_html
83 # ```
84 #
85 # Becomes
86 #
87 # <pre class="nit"><code>import markdown
88 #
89 # print "Hello World!".md_to_html
90 # </code></pre>
91 #
92 # * Underscores (Emphasis)
93 #
94 # Underscores in the middle of a word like:
95 #
96 # Con_cat_this
97 #
98 # normally produces this:
99 #
100 # <p>Con<em>cat</em>this</p>
101 #
102 # With extended mode they don't result in emphasis.
103 #
104 # <p>Con_cat_this</p>
105 #
106 # * Strikethrough
107 #
108 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
109 # strikethrought span is marked with `~~`.
110 #
111 # ~~Mistaken text.~~
112 #
113 # becomes
114 #
115 # <del>Mistaken text.</del>
116 var ext_mode = true
117
118 init do self.emitter = new MarkdownEmitter(self)
119
120 # Process the mardown `input` string and return the processed output.
121 fun process(input: String): Streamable do
122 # init processor
123 link_refs.clear
124 last_link_ref = null
125 current_line = null
126 current_block = null
127 # parse markdown
128 var parent = read_lines(input)
129 parent.remove_surrounding_empty_lines
130 recurse(parent, false)
131 # output processed text
132 return emitter.emit(parent.kind)
133 end
134
135 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
136 private fun read_lines(input: String): MDBlock do
137 var block = new MDBlock
138 var value = new FlatBuffer
139 var i = 0
140 while i < input.length do
141 value.clear
142 var pos = 0
143 var eol = false
144 while not eol and i < input.length do
145 var c = input[i]
146 if c == '\n' then
147 i += 1
148 eol = true
149 else if c == '\t' then
150 var np = pos + (4 - (pos.bin_and(3)))
151 while pos < np do
152 value.add ' '
153 pos += 1
154 end
155 i += 1
156 else
157 pos += 1
158 value.add c
159 i += 1
160 end
161 end
162
163 var line = new MDLine(value.write_to_string)
164 var is_link_ref = check_link_ref(line)
165 # Skip link refs
166 if not is_link_ref then block.add_line line
167 end
168 return block
169 end
170
171 # Check if line is a block link definition.
172 # Return `true` if line contains a valid link ref and save it into `link_refs`.
173 private fun check_link_ref(line: MDLine): Bool do
174 var md = line.value
175 var is_link_ref = false
176 var id = new FlatBuffer
177 var link = new FlatBuffer
178 var comment = new FlatBuffer
179 var pos = -1
180 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
181 pos = line.leading + 1
182 pos = md.read_until(id, pos, ']')
183 if not id.is_empty and pos + 2 < line.value.length then
184 if line.value[pos + 1] == ':' then
185 pos += 2
186 pos = md.skip_spaces(pos)
187 if line.value[pos] == '<' then
188 pos += 1
189 pos = md.read_until(link, pos, '>')
190 pos += 1
191 else
192 pos = md.read_until(link, pos, ' ', '\n')
193 end
194 if not link.is_empty then
195 pos = md.skip_spaces(pos)
196 if pos > 0 and pos < line.value.length then
197 var c = line.value[pos]
198 if c == '\"' or c == '\'' or c == '(' then
199 pos += 1
200 if c == '(' then
201 pos = md.read_until(comment, pos, ')')
202 else
203 pos = md.read_until(comment, pos, c)
204 end
205 if pos > 0 then is_link_ref = true
206 end
207 else
208 is_link_ref = true
209 end
210 end
211 end
212 end
213 end
214 if is_link_ref and not id.is_empty and not link.is_empty then
215 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
216 add_link_ref(id.write_to_string, lr)
217 if comment.is_empty then last_link_ref = lr
218 return true
219 else
220 comment = new FlatBuffer
221 if not line.is_empty and last_link_ref != null then
222 pos = line.leading
223 var c = line.value[pos]
224 if c == '\"' or c == '\'' or c == '(' then
225 pos += 1
226 if c == '(' then
227 pos = md.read_until(comment, pos, ')')
228 else
229 pos = md.read_until(comment, pos, c)
230 end
231 end
232 if not comment.is_empty then last_link_ref.title = comment.write_to_string
233 end
234 if comment.is_empty then return false
235 return true
236 end
237 end
238
239 # Known link refs
240 # This list will be needed during output to expand links.
241 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
242
243 # Last encountered link ref (for multiline definitions)
244 #
245 # Markdown allows link refs to be defined over two lines:
246 #
247 # [id]: http://example.com/longish/path/to/resource/here
248 # "Optional Title Here"
249 #
250 private var last_link_ref: nullable LinkRef = null
251
252 # Add a link ref to the list
253 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
254
255 # Recursively split a `block`.
256 #
257 # The block is splitted according to the type of lines it contains.
258 # Some blocks can be splited again recursively like lists.
259 # The `in_list` mode is used to recurse on list and build
260 # nested paragraphs or code blocks.
261 fun recurse(root: MDBlock, in_list: Bool) do
262 var old_mode = self.in_list
263 var old_root = self.current_block
264 self.in_list = in_list
265
266 var line = root.first_line
267 while line != null and line.is_empty do
268 line = line.next
269 if line == null then return
270 end
271
272 current_line = line
273 current_block = root
274 while current_line != null do
275 line_kind(current_line.as(not null)).process(self)
276 end
277 self.in_list = old_mode
278 self.current_block = old_root
279 end
280
281 # Currently processed line.
282 # Used when visiting blocks with `recurse`.
283 var current_line: nullable MDLine = null is writable
284
285 # Currently processed block.
286 # Used when visiting blocks with `recurse`.
287 var current_block: nullable MDBlock = null is writable
288
289 # Is the current recursion in list mode?
290 # Used when visiting blocks with `recurse`
291 private var in_list = false
292
293 # The type of line.
294 # see: `md_line_*`
295 fun line_kind(md: MDLine): Line do
296 var value = md.value
297 var leading = md.leading
298 var trailing = md.trailing
299 if md.is_empty then return new LineEmpty
300 if md.leading > 3 then return new LineCode
301 if value[leading] == '#' then return new LineHeadline
302 if value[leading] == '>' then return new LineBlockquote
303
304 if ext_mode then
305 if value.length - leading - trailing > 2 then
306 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
307 return new LineFence
308 end
309 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
310 return new LineFence
311 end
312 end
313 end
314
315 if value.length - leading - trailing > 2 and
316 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
317 if md.count_chars(value[leading]) >= 3 then
318 return new LineHR
319 end
320 end
321
322 if value.length - leading >= 2 and value[leading + 1] == ' ' then
323 var c = value[leading]
324 if c == '*' or c == '-' or c == '+' then return new LineUList
325 end
326
327 if value.length - leading >= 3 and value[leading].is_digit then
328 var i = leading + 1
329 while i < value.length and value[i].is_digit do i += 1
330 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
331 return new LineOList
332 end
333 end
334
335 if value[leading] == '<' and md.check_html then return new LineXML
336
337 var next = md.next
338 if next != null and not next.is_empty then
339 if next.count_chars('=') > 0 then
340 return new LineHeadline1
341 end
342 if next.count_chars('-') > 0 then
343 return new LineHeadline2
344 end
345 end
346 return new LineOther
347 end
348
349 # Get the token kind at `pos`.
350 fun token_at(text: Text, pos: Int): Token do
351 var c0: Char
352 var c1: Char
353 var c2: Char
354
355 if pos > 0 then
356 c0 = text[pos - 1]
357 else
358 c0 = ' '
359 end
360 var c = text[pos]
361
362 if pos + 1 < text.length then
363 c1 = text[pos + 1]
364 else
365 c1 = ' '
366 end
367 if pos + 2 < text.length then
368 c2 = text[pos + 2]
369 else
370 c2 = ' '
371 end
372
373 if c == '*' then
374 if c1 == '*' then
375 if c0 != ' ' or c2 != ' ' then
376 return new TokenStrongStar(pos, c)
377 else
378 return new TokenEmStar(pos, c)
379 end
380 end
381 if c0 != ' ' or c1 != ' ' then
382 return new TokenEmStar(pos, c)
383 else
384 return new TokenNone(pos, c)
385 end
386 else if c == '_' then
387 if c1 == '_' then
388 if c0 != ' ' or c2 != ' 'then
389 return new TokenStrongUnderscore(pos, c)
390 else
391 return new TokenEmUnderscore(pos, c)
392 end
393 end
394 if ext_mode then
395 if (c0.is_letter or c0.is_digit) and c0 != '_' and
396 (c1.is_letter or c1.is_digit) then
397 return new TokenNone(pos, c)
398 else
399 return new TokenEmUnderscore(pos, c)
400 end
401 end
402 if c0 != ' ' or c1 != ' ' then
403 return new TokenEmUnderscore(pos, c)
404 else
405 return new TokenNone(pos, c)
406 end
407 else if c == '!' then
408 if c1 == '[' then return new TokenImage(pos, c)
409 return new TokenNone(pos, c)
410 else if c == '[' then
411 return new TokenLink(pos, c)
412 else if c == ']' then
413 return new TokenNone(pos, c)
414 else if c == '`' then
415 if c1 == '`' then
416 return new TokenCodeDouble(pos, c)
417 else
418 return new TokenCodeSingle(pos, c)
419 end
420 else if c == '\\' then
421 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
422 return new TokenEscape(pos, c)
423 else
424 return new TokenNone(pos, c)
425 end
426 else if c == '<' then
427 return new TokenHTML(pos, c)
428 else if c == '&' then
429 return new TokenEntity(pos, c)
430 else
431 if ext_mode then
432 if c == '~' and c1 == '~' then
433 return new TokenStrike(pos, c)
434 end
435 end
436 return new TokenNone(pos, c)
437 end
438 end
439
440 # Find the position of a `token` in `self`.
441 fun find_token(text: Text, start: Int, token: Token): Int do
442 var pos = start
443 while pos < text.length do
444 if token_at(text, pos).is_same_type(token) then
445 return pos
446 end
447 pos += 1
448 end
449 return -1
450 end
451 end
452
453 # Emit output corresponding to blocks content.
454 #
455 # Blocks are created by a previous pass in `MarkdownProcessor`.
456 # The emitter use a `Decorator` to select the output format.
457 class MarkdownEmitter
458
459 # Processor containing link refs.
460 var processor: MarkdownProcessor
461
462 # Decorator used for output.
463 # Default is `HTMLDecorator`
464 var decorator: Decorator = new HTMLDecorator is writable
465
466 # Create a new `MarkdownEmitter` using a custom `decorator`.
467 init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
468 init processor
469 self.decorator = decorator
470 end
471
472 # Output `block` using `decorator` in the current buffer.
473 fun emit(block: Block): Text do
474 var buffer = push_buffer
475 block.emit(self)
476 pop_buffer
477 return buffer
478 end
479
480 # Output the content of `block`.
481 fun emit_in(block: Block) do block.emit_in(self)
482
483 # Transform and emit mardown text
484 fun emit_text(text: Text) do
485 emit_text_until(text, 0, null)
486 end
487
488 # Transform and emit mardown text starting at `from` and
489 # until a token with the same type as `token` is found.
490 # Go until the end of text if `token` is null.
491 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
492 var old_text = current_text
493 var old_pos = current_pos
494 current_text = text
495 current_pos = start
496 while current_pos < text.length do
497 var mt = processor.token_at(text, current_pos)
498 if (token != null and not token isa TokenNone) and
499 (mt.is_same_type(token) or
500 (token isa TokenEmStar and mt isa TokenStrongStar) or
501 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
502 return current_pos
503 end
504 mt.emit(self)
505 current_pos += 1
506 end
507 current_text = old_text
508 current_pos = old_pos
509 return -1
510 end
511
512 # Currently processed position in `current_text`.
513 # Used when visiting inline production with `emit_text_until`.
514 private var current_pos: Int = -1
515
516 # Currently processed text.
517 # Used when visiting inline production with `emit_text_until`.
518 private var current_text: nullable Text = null
519
520 # Stacked buffers.
521 private var buffer_stack = new List[FlatBuffer]
522
523 # Push a new buffer on the stack.
524 private fun push_buffer: FlatBuffer do
525 var buffer = new FlatBuffer
526 buffer_stack.add buffer
527 return buffer
528 end
529
530 # Pop the last buffer.
531 private fun pop_buffer do buffer_stack.pop
532
533 # Current output buffer.
534 private fun current_buffer: FlatBuffer do
535 assert not buffer_stack.is_empty
536 return buffer_stack.last
537 end
538
539 # Append `e` to current buffer.
540 fun add(e: Streamable) do
541 if e isa Text then
542 current_buffer.append e
543 else
544 current_buffer.append e.write_to_string
545 end
546 end
547
548 # Append `c` to current buffer.
549 fun addc(c: Char) do current_buffer.add c
550
551 # Append a "\n" line break.
552 fun addn do current_buffer.add '\n'
553 end
554
555 # A Link Reference.
556 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
557 #
558 # ~~~raw
559 # [1]: http://example.com/ "Optional title"
560 # ~~~
561 class LinkRef
562
563 # Link href
564 var link: String
565
566 # Optional link title
567 var title: nullable String = null
568
569 # Is the link an abreviation?
570 var is_abbrev = false
571
572 # Create a link with a title.
573 init with_title(link: String, title: nullable String) do
574 self.link = link
575 self.title = title
576 end
577 end
578
579 # A `Decorator` is used to emit mardown into a specific format.
580 # Default decorator used is `HTMLDecorator`.
581 interface Decorator
582
583 # Render a ruler block.
584 fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
585
586 # Render a headline block with corresponding level.
587 fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
588
589 # Render a paragraph block.
590 fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
591
592 # Render a code or fence block.
593 fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
594
595 # Render a blockquote.
596 fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
597
598 # Render an unordered list.
599 fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
600
601 # Render an ordered list.
602 fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
603
604 # Render a list item.
605 fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
606
607 # Render an emphasis text.
608 fun add_em(v: MarkdownEmitter, text: Text) is abstract
609
610 # Render a strong text.
611 fun add_strong(v: MarkdownEmitter, text: Text) is abstract
612
613 # Render a strike text.
614 #
615 # Extended mode only (see `MarkdownProcessor::ext_mode`)
616 fun add_strike(v: MarkdownEmitter, text: Text) is abstract
617
618 # Render a link.
619 fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
620
621 # Render an image.
622 fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
623
624 # Render an abbreviation.
625 fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
626
627 # Render a code span reading from a buffer.
628 fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
629
630 # Render a text and escape it.
631 fun append_value(v: MarkdownEmitter, value: Text) is abstract
632
633 # Render code text from buffer and escape it.
634 fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
635
636 # Render a character escape.
637 fun escape_char(v: MarkdownEmitter, char: Char) is abstract
638
639 # Render a line break
640 fun add_line_break(v: MarkdownEmitter) is abstract
641
642 # Generate a new html valid id from a `String`.
643 fun strip_id(txt: String): String is abstract
644
645 # Found headlines during the processing labeled by their ids.
646 fun headlines: ArrayMap[String, HeadLine] is abstract
647 end
648
649 # Class representing a markdown headline.
650 class HeadLine
651 # Unique identifier of this headline.
652 var id: String
653
654 # Text of the headline.
655 var title: String
656
657 # Level of this headline.
658 #
659 # According toe the markdown specification, level must be in `[1..6]`.
660 var level: Int
661 end
662
663 # `Decorator` that outputs HTML.
664 class HTMLDecorator
665 super Decorator
666
667 redef var headlines = new ArrayMap[String, HeadLine]
668
669 redef fun add_ruler(v, block) do v.add "<hr/>\n"
670
671 redef fun add_headline(v, block) do
672 # save headline
673 var txt = block.block.first_line.value
674 var id = strip_id(txt)
675 var lvl = block.depth
676 headlines[id] = new HeadLine(id, txt, lvl)
677 # output it
678 v.add "<h{lvl} id=\"{id}\">"
679 v.emit_in block
680 v.add "</h{lvl}>\n"
681 end
682
683 redef fun add_paragraph(v, block) do
684 v.add "<p>"
685 v.emit_in block
686 v.add "</p>\n"
687 end
688
689 redef fun add_code(v, block) do
690 if block isa BlockFence and block.meta != null then
691 v.add "<pre class=\"{block.meta.to_s}\"><code>"
692 else
693 v.add "<pre><code>"
694 end
695 v.emit_in block
696 v.add "</code></pre>\n"
697 end
698
699 redef fun add_blockquote(v, block) do
700 v.add "<blockquote>\n"
701 v.emit_in block
702 v.add "</blockquote>\n"
703 end
704
705 redef fun add_unorderedlist(v, block) do
706 v.add "<ul>\n"
707 v.emit_in block
708 v.add "</ul>\n"
709 end
710
711 redef fun add_orderedlist(v, block) do
712 v.add "<ol>\n"
713 v.emit_in block
714 v.add "</ol>\n"
715 end
716
717 redef fun add_listitem(v, block) do
718 v.add "<li>"
719 v.emit_in block
720 v.add "</li>\n"
721 end
722
723 redef fun add_em(v, text) do
724 v.add "<em>"
725 v.add text
726 v.add "</em>"
727 end
728
729 redef fun add_strong(v, text) do
730 v.add "<strong>"
731 v.add text
732 v.add "</strong>"
733 end
734
735 redef fun add_strike(v, text) do
736 v.add "<del>"
737 v.add text
738 v.add "</del>"
739 end
740
741 redef fun add_image(v, link, name, comment) do
742 v.add "<img src=\""
743 append_value(v, link)
744 v.add "\" alt=\""
745 append_value(v, name)
746 v.add "\""
747 if comment != null and not comment.is_empty then
748 v.add " title=\""
749 append_value(v, comment)
750 v.add "\""
751 end
752 v.add "/>"
753 end
754
755 redef fun add_link(v, link, name, comment) do
756 v.add "<a href=\""
757 append_value(v, link)
758 v.add "\""
759 if comment != null and not comment.is_empty then
760 v.add " title=\""
761 append_value(v, comment)
762 v.add "\""
763 end
764 v.add ">"
765 v.emit_text(name)
766 v.add "</a>"
767 end
768
769 redef fun add_abbr(v, name, comment) do
770 v.add "<abbr title=\""
771 append_value(v, comment)
772 v.add "\">"
773 v.emit_text(name)
774 v.add "</abbr>"
775 end
776
777 redef fun add_span_code(v, text, from, to) do
778 v.add "<code>"
779 append_code(v, text, from, to)
780 v.add "</code>"
781 end
782
783 redef fun add_line_break(v) do
784 v.add "<br/>"
785 end
786
787 redef fun append_value(v, text) do for c in text do escape_char(v, c)
788
789 redef fun escape_char(v, c) do
790 if c == '&' then
791 v.add "&amp;"
792 else if c == '<' then
793 v.add "&lt;"
794 else if c == '>' then
795 v.add "&gt;"
796 else if c == '"' then
797 v.add "&quot;"
798 else if c == '\'' then
799 v.add "&apos;"
800 else
801 v.addc c
802 end
803 end
804
805 redef fun append_code(v, buffer, from, to) do
806 for i in [from..to[ do
807 var c = buffer[i]
808 if c == '&' then
809 v.add "&amp;"
810 else if c == '<' then
811 v.add "&lt;"
812 else if c == '>' then
813 v.add "&gt;"
814 else
815 v.addc c
816 end
817 end
818 end
819
820 redef fun strip_id(txt) do
821 # strip id
822 var b = new FlatBuffer
823 for c in txt do
824 if c == ' ' then
825 b.add '_'
826 else
827 if not c.is_letter and
828 not c.is_digit and
829 not allowed_id_chars.has(c) then continue
830 b.add c
831 end
832 end
833 var res = b.to_s
834 var key = res
835 # check for multiple id definitions
836 if headlines.has_key(key) then
837 var i = 1
838 key = "{res}_{i}"
839 while headlines.has_key(key) do
840 i += 1
841 key = "{res}_{i}"
842 end
843 end
844 return key
845 end
846
847 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
848 end
849
850 # A block of markdown lines.
851 # A `MDBlock` can contains lines and/or sub-blocks.
852 class MDBlock
853 # Kind of block.
854 # See `Block`.
855 var kind: Block = new BlockNone(self) is writable
856
857 # First line if any.
858 var first_line: nullable MDLine = null is writable
859
860 # Last line if any.
861 var last_line: nullable MDLine = null is writable
862
863 # First sub-block if any.
864 var first_block: nullable MDBlock = null is writable
865
866 # Last sub-block if any.
867 var last_block: nullable MDBlock = null is writable
868
869 # Previous block if any.
870 var prev: nullable MDBlock = null is writable
871
872 # Next block if any.
873 var next: nullable MDBlock = null is writable
874
875 # Does this block contain subblocks?
876 fun has_blocks: Bool do return first_block != null
877
878 # Count sub-blocks.
879 fun count_blocks: Int do
880 var count = 0
881 var block = first_block
882 while block != null do
883 count += 1
884 block = block.next
885 end
886 return count
887 end
888
889 # Does this block contain lines?
890 fun has_lines: Bool do return first_line != null
891
892 # Count block lines.
893 fun count_lines: Int do
894 var count = 0
895 var line = first_line
896 while line != null do
897 count += 1
898 line = line.next
899 end
900 return count
901 end
902
903 # Split `self` creating a new sub-block having `line` has `last_line`.
904 fun split(line: MDLine): MDBlock do
905 var block = new MDBlock
906 block.first_line = first_line
907 block.last_line = line
908 first_line = line.next
909 line.next = null
910 if first_line == null then
911 last_line = null
912 else
913 first_line.prev = null
914 end
915 if first_block == null then
916 first_block = block
917 last_block = block
918 else
919 last_block.next = block
920 last_block = block
921 end
922 return block
923 end
924
925 # Add a `line` to this block.
926 fun add_line(line: MDLine) do
927 if last_line == null then
928 first_line = line
929 last_line = line
930 else
931 last_line.next_empty = line.is_empty
932 line.prev_empty = last_line.is_empty
933 line.prev = last_line
934 last_line.next = line
935 last_line = line
936 end
937 end
938
939 # Remove `line` from this block.
940 fun remove_line(line: MDLine) do
941 if line.prev == null then
942 first_line = line.next
943 else
944 line.prev.next = line.next
945 end
946 if line.next == null then
947 last_line = line.prev
948 else
949 line.next.prev = line.prev
950 end
951 line.prev = null
952 line.next = null
953 end
954
955 # Remove leading empty lines.
956 fun remove_leading_empty_lines: Bool do
957 var was_empty = false
958 var line = first_line
959 while line != null and line.is_empty do
960 remove_line line
961 line = first_line
962 was_empty = true
963 end
964 return was_empty
965 end
966
967 # Remove trailing empty lines.
968 fun remove_trailing_empty_lines: Bool do
969 var was_empty = false
970 var line = last_line
971 while line != null and line.is_empty do
972 remove_line line
973 line = last_line
974 was_empty = true
975 end
976 return was_empty
977 end
978
979 # Remove leading and trailing empty lines.
980 fun remove_surrounding_empty_lines: Bool do
981 var was_empty = false
982 if remove_leading_empty_lines then was_empty = true
983 if remove_trailing_empty_lines then was_empty = true
984 return was_empty
985 end
986
987 # Remove list markers and up to 4 leading spaces.
988 # Used to clean nested lists.
989 fun remove_list_indent(v: MarkdownProcessor) do
990 var line = first_line
991 while line != null do
992 if not line.is_empty then
993 var kind = v.line_kind(line)
994 if kind isa LineList then
995 line.value = kind.extract_value(line)
996 else
997 line.value = line.value.substring_from(line.leading.min(4))
998 end
999 line.leading = line.process_leading
1000 end
1001 line = line.next
1002 end
1003 end
1004
1005 # Collect block line text.
1006 fun text: String do
1007 var text = new FlatBuffer
1008 var line = first_line
1009 while line != null do
1010 if not line.is_empty then
1011 text.append line.text
1012 end
1013 text.append "\n"
1014 line = line.next
1015 end
1016 return text.write_to_string
1017 end
1018 end
1019
1020 # Representation of a markdown block in the AST.
1021 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1022 abstract class Block
1023
1024 # The markdown block `self` is related to.
1025 var block: MDBlock
1026
1027 # Output `self` using `v.decorator`.
1028 fun emit(v: MarkdownEmitter) do v.emit_in(self)
1029
1030 # Emit the containts of `self`, lines or blocks.
1031 fun emit_in(v: MarkdownEmitter) do
1032 block.remove_surrounding_empty_lines
1033 if block.has_lines then
1034 emit_lines(v)
1035 else
1036 emit_blocks(v)
1037 end
1038 end
1039
1040 # Emit lines contained in `block`.
1041 fun emit_lines(v: MarkdownEmitter) do
1042 var tpl = v.push_buffer
1043 var line = block.first_line
1044 while line != null do
1045 if not line.is_empty then
1046 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1047 if line.trailing >= 2 then v.decorator.add_line_break(v)
1048 end
1049 if line.next != null then
1050 v.addn
1051 end
1052 line = line.next
1053 end
1054 v.pop_buffer
1055 v.emit_text(tpl)
1056 end
1057
1058 # Emit sub-blocks contained in `block`.
1059 fun emit_blocks(v: MarkdownEmitter) do
1060 var block = self.block.first_block
1061 while block != null do
1062 block.kind.emit(v)
1063 block = block.next
1064 end
1065 end
1066 end
1067
1068 # A block without any markdown specificities.
1069 #
1070 # Actually use the same implementation than `BlockCode`,
1071 # this class is only used for typing purposes.
1072 class BlockNone
1073 super Block
1074 end
1075
1076 # A markdown blockquote.
1077 class BlockQuote
1078 super Block
1079
1080 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1081
1082 # Remove blockquote markers.
1083 private fun remove_block_quote_prefix(block: MDBlock) do
1084 var line = block.first_line
1085 while line != null do
1086 if not line.is_empty then
1087 if line.value[line.leading] == '>' then
1088 var rem = line.leading + 1
1089 if line.leading + 1 < line.value.length and
1090 line.value[line.leading + 1] == ' ' then
1091 rem += 1
1092 end
1093 line.value = line.value.substring_from(rem)
1094 line.leading = line.process_leading
1095 end
1096 end
1097 line = line.next
1098 end
1099 end
1100 end
1101
1102 # A markdown code block.
1103 class BlockCode
1104 super Block
1105
1106 # Number of char to skip at the beginning of the line.
1107 #
1108 # Block code lines start at 4 spaces.
1109 protected var line_start = 4
1110
1111 redef fun emit(v) do v.decorator.add_code(v, self)
1112
1113 redef fun emit_lines(v) do
1114 var line = block.first_line
1115 while line != null do
1116 if not line.is_empty then
1117 v.decorator.append_code(v, line.value, line_start, line.value.length)
1118 end
1119 v.addn
1120 line = line.next
1121 end
1122 end
1123 end
1124
1125 # A markdown code-fence block.
1126 #
1127 # Actually use the same implementation than `BlockCode`,
1128 # this class is only used for typing purposes.
1129 class BlockFence
1130 super BlockCode
1131
1132 # Any string found after fence token.
1133 var meta: nullable Text
1134
1135 # Fence code lines start at 0 spaces.
1136 redef var line_start = 0
1137 end
1138
1139 # A markdown headline.
1140 class BlockHeadline
1141 super Block
1142
1143 redef fun emit(v) do v.decorator.add_headline(v, self)
1144
1145 # Depth of the headline used to determine the headline level.
1146 var depth = 0
1147
1148 # Remove healine marks from lines contained in `self`.
1149 private fun transform_headline(block: MDBlock) do
1150 if depth > 0 then return
1151 var level = 0
1152 var line = block.first_line
1153 if line.is_empty then return
1154 var start = line.leading
1155 while start < line.value.length and line.value[start] == '#' do
1156 level += 1
1157 start += 1
1158 end
1159 while start < line.value.length and line.value[start] == ' ' do
1160 start += 1
1161 end
1162 if start >= line.value.length then
1163 line.is_empty = true
1164 else
1165 var nend = line.value.length - line.trailing - 1
1166 while line.value[nend] == '#' do nend -= 1
1167 while line.value[nend] == ' ' do nend -= 1
1168 line.value = line.value.substring(start, nend - start + 1)
1169 line.leading = 0
1170 line.trailing = 0
1171 end
1172 depth = level.min(6)
1173 end
1174 end
1175
1176 # A markdown list item block.
1177 class BlockListItem
1178 super Block
1179
1180 redef fun emit(v) do v.decorator.add_listitem(v, self)
1181 end
1182
1183 # A markdown list block.
1184 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1185 abstract class BlockList
1186 super Block
1187
1188 # Split list block into list items sub-blocks.
1189 private fun init_block(v: MarkdownProcessor) do
1190 var line = block.first_line
1191 line = line.next
1192 while line != null do
1193 var t = v.line_kind(line)
1194 if t isa LineList or
1195 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1196 not (t isa LineList))) then
1197 var sblock = block.split(line.prev.as(not null))
1198 sblock.kind = new BlockListItem(sblock)
1199 end
1200 line = line.next
1201 end
1202 var sblock = block.split(block.last_line.as(not null))
1203 sblock.kind = new BlockListItem(sblock)
1204 end
1205
1206 # Expand list items as paragraphs if needed.
1207 private fun expand_paragraphs(block: MDBlock) do
1208 var outer = block.first_block
1209 var inner: nullable MDBlock
1210 var has_paragraph = false
1211 while outer != null and not has_paragraph do
1212 if outer.kind isa BlockListItem then
1213 inner = outer.first_block
1214 while inner != null and not has_paragraph do
1215 if inner.kind isa BlockParagraph then
1216 has_paragraph = true
1217 end
1218 inner = inner.next
1219 end
1220 end
1221 outer = outer.next
1222 end
1223 if has_paragraph then
1224 outer = block.first_block
1225 while outer != null do
1226 if outer.kind isa BlockListItem then
1227 inner = outer.first_block
1228 while inner != null do
1229 if inner.kind isa BlockNone then
1230 inner.kind = new BlockParagraph(inner)
1231 end
1232 inner = inner.next
1233 end
1234 end
1235 outer = outer.next
1236 end
1237 end
1238 end
1239 end
1240
1241 # A markdown ordered list.
1242 class BlockOrderedList
1243 super BlockList
1244
1245 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1246 end
1247
1248 # A markdown unordred list.
1249 class BlockUnorderedList
1250 super BlockList
1251
1252 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1253 end
1254
1255 # A markdown paragraph block.
1256 class BlockParagraph
1257 super Block
1258
1259 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1260 end
1261
1262 # A markdown ruler.
1263 class BlockRuler
1264 super Block
1265
1266 redef fun emit(v) do v.decorator.add_ruler(v, self)
1267 end
1268
1269 # Xml blocks that can be found in markdown markup.
1270 class BlockXML
1271 super Block
1272
1273 redef fun emit_lines(v) do
1274 var line = block.first_line
1275 while line != null do
1276 if not line.is_empty then v.add line.value
1277 v.addn
1278 line = line.next
1279 end
1280 end
1281 end
1282
1283 # A markdown line.
1284 class MDLine
1285
1286 # Text contained in this line.
1287 var value: String is writable
1288
1289 # Is this line empty?
1290 # Lines containing only spaces are considered empty.
1291 var is_empty: Bool = true is writable
1292
1293 # Previous line in `MDBlock` or null if first line.
1294 var prev: nullable MDLine = null is writable
1295
1296 # Next line in `MDBlock` or null if last line.
1297 var next: nullable MDLine = null is writable
1298
1299 # Is the previous line empty?
1300 var prev_empty: Bool = false is writable
1301
1302 # Is the next line empty?
1303 var next_empty: Bool = false is writable
1304
1305 # Initialize a new MDLine from its string value
1306 init do
1307 self.leading = process_leading
1308 if leading != value.length then
1309 self.is_empty = false
1310 self.trailing = process_trailing
1311 end
1312 end
1313
1314 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1315 fun clear do
1316 value = ""
1317 leading = 0
1318 trailing = 0
1319 is_empty = true
1320 if prev != null then prev.next_empty = true
1321 if next != null then next.prev_empty = true
1322 end
1323
1324 # Number or leading spaces on this line.
1325 var leading: Int = 0 is writable
1326
1327 # Compute `leading` depending on `value`.
1328 fun process_leading: Int do
1329 var count = 0
1330 var value = self.value
1331 while count < value.length and value[count] == ' ' do count += 1
1332 if leading == value.length then clear
1333 return count
1334 end
1335
1336 # Number of trailing spaces on this line.
1337 var trailing: Int = 0 is writable
1338
1339 # Compute `trailing` depending on `value`.
1340 fun process_trailing: Int do
1341 var count = 0
1342 var value = self.value
1343 while value[value.length - count - 1] == ' ' do
1344 count += 1
1345 end
1346 return count
1347 end
1348
1349 # Count the amount of `ch` in this line.
1350 # Return A value > 0 if this line only consists of `ch` end spaces.
1351 fun count_chars(ch: Char): Int do
1352 var count = 0
1353 for c in value do
1354 if c == ' ' then
1355 continue
1356 end
1357 if c == ch then
1358 count += 1
1359 continue
1360 end
1361 count = 0
1362 break
1363 end
1364 return count
1365 end
1366
1367 # Count the amount of `ch` at the start of this line ignoring spaces.
1368 fun count_chars_start(ch: Char): Int do
1369 var count = 0
1370 for c in value do
1371 if c == ' ' then
1372 continue
1373 end
1374 if c == ch then
1375 count += 1
1376 else
1377 break
1378 end
1379 end
1380 return count
1381 end
1382
1383 # Last XML line if any.
1384 private var xml_end_line: nullable MDLine = null
1385
1386 # Does `value` contains valid XML markup?
1387 private fun check_html: Bool do
1388 var tags = new Array[String]
1389 var tmp = new FlatBuffer
1390 var pos = leading
1391 if pos + 1 < value.length and value[pos + 1] == '!' then
1392 if read_xml_comment(self, pos) > 0 then return true
1393 end
1394 pos = value.read_xml(tmp, pos, false)
1395 var tag: String
1396 if pos > -1 then
1397 tag = tmp.xml_tag
1398 if not tag.is_html_block then
1399 return false
1400 end
1401 if tag == "hr" then
1402 xml_end_line = self
1403 return true
1404 end
1405 tags.add tag
1406 var line: nullable MDLine = self
1407 while line != null do
1408 while pos < line.value.length and line.value[pos] != '<' do
1409 pos += 1
1410 end
1411 if pos >= line.value.length then
1412 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1413 tags.pop
1414 if tags.is_empty then
1415 xml_end_line = line
1416 break
1417 end
1418 end
1419 line = line.next
1420 pos = 0
1421 else
1422 tmp = new FlatBuffer
1423 var new_pos = line.value.read_xml(tmp, pos, false)
1424 if new_pos > 0 then
1425 tag = tmp.xml_tag
1426 if tag.is_html_block and not tag == "hr" then
1427 if tmp[1] == '/' then
1428 if tags.last != tag then
1429 return false
1430 end
1431 tags.pop
1432 else
1433 tags.add tag
1434 end
1435 end
1436 if tags.is_empty then
1437 xml_end_line = line
1438 break
1439 end
1440 pos = new_pos
1441 else
1442 pos += 1
1443 end
1444 end
1445 end
1446 return tags.is_empty
1447 end
1448 return false
1449 end
1450
1451 # Read a XML comment.
1452 # Used by `check_html`.
1453 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1454 var line: nullable MDLine = first_line
1455 if start + 3 < line.value.length then
1456 if line.value[2] == '-' and line.value[3] == '-' then
1457 var pos = start + 4
1458 while line != null do
1459 while pos < line.value.length and line.value[pos] != '-' do
1460 pos += 1
1461 end
1462 if pos == line.value.length then
1463 line = line.next
1464 pos = 0
1465 else
1466 if pos + 2 < line.value.length then
1467 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1468 first_line.xml_end_line = line
1469 return pos + 3
1470 end
1471 end
1472 pos += 1
1473 end
1474 end
1475 end
1476 end
1477 return -1
1478 end
1479
1480 # Extract the text of `self` without leading and trailing.
1481 fun text: String do return value.substring(leading, value.length - trailing)
1482 end
1483
1484 # A markdown line.
1485 interface Line
1486
1487 # Parse the line.
1488 # See `MarkdownProcessor::recurse`.
1489 fun process(v: MarkdownProcessor) is abstract
1490 end
1491
1492 # An empty markdown line.
1493 class LineEmpty
1494 super Line
1495
1496 redef fun process(v) do
1497 v.current_line = v.current_line.next
1498 end
1499 end
1500
1501 # A non-specific markdown construction.
1502 # Mainly used as part of another line construct such as paragraphs or lists.
1503 class LineOther
1504 super Line
1505
1506 redef fun process(v) do
1507 var line = v.current_line
1508 # go to block end
1509 var was_empty = line.prev_empty
1510 while line != null and not line.is_empty do
1511 var t = v.line_kind(line)
1512 if (v.in_list or v.ext_mode) and t isa LineList then
1513 break
1514 end
1515 if v.ext_mode and (t isa LineCode or t isa LineFence) then
1516 break
1517 end
1518 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1519 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1520 break
1521 end
1522 line = line.next
1523 end
1524 # build block
1525 if line != null and not line.is_empty then
1526 var block = v.current_block.split(line.prev.as(not null))
1527 if v.in_list and not was_empty then
1528 block.kind = new BlockNone(block)
1529 else
1530 block.kind = new BlockParagraph(block)
1531 end
1532 v.current_block.remove_leading_empty_lines
1533 else
1534 var block: MDBlock
1535 if line != null then
1536 block = v.current_block.split(line)
1537 else
1538 block = v.current_block.split(v.current_block.last_line.as(not null))
1539 end
1540 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1541 block.kind = new BlockNone(block)
1542 else
1543 block.kind = new BlockParagraph(block)
1544 end
1545 v.current_block.remove_leading_empty_lines
1546 end
1547 v.current_line = v.current_block.first_line
1548 end
1549 end
1550
1551 # A line of markdown code.
1552 class LineCode
1553 super Line
1554
1555 redef fun process(v) do
1556 var line = v.current_line
1557 # lookup block end
1558 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1559 line = line.next
1560 end
1561 # split at block end line
1562 var block: MDBlock
1563 if line != null then
1564 block = v.current_block.split(line.prev.as(not null))
1565 else
1566 block = v.current_block.split(v.current_block.last_line.as(not null))
1567 end
1568 block.kind = new BlockCode(block)
1569 block.remove_surrounding_empty_lines
1570 v.current_line = v.current_block.first_line
1571 end
1572 end
1573
1574 # A line of raw XML.
1575 class LineXML
1576 super Line
1577
1578 redef fun process(v) do
1579 var line = v.current_line
1580 var prev = line.prev
1581 if prev != null then v.current_block.split(prev)
1582 var block = v.current_block.split(line.xml_end_line.as(not null))
1583 block.kind = new BlockXML(block)
1584 v.current_block.remove_leading_empty_lines
1585 v.current_line = v.current_block.first_line
1586 end
1587 end
1588
1589 # A markdown blockquote line.
1590 class LineBlockquote
1591 super Line
1592
1593 redef fun process(v) do
1594 var line = v.current_line
1595 # go to bquote end
1596 while line != null do
1597 if not line.is_empty and (line.prev_empty and
1598 line.leading == 0 and
1599 not v.line_kind(line) isa LineBlockquote) then break
1600 line = line.next
1601 end
1602 # build sub block
1603 var block: MDBlock
1604 if line != null then
1605 block = v.current_block.split(line.prev.as(not null))
1606 else
1607 block = v.current_block.split(v.current_block.last_line.as(not null))
1608 end
1609 var kind = new BlockQuote(block)
1610 block.kind = kind
1611 block.remove_surrounding_empty_lines
1612 kind.remove_block_quote_prefix(block)
1613 v.current_line = line
1614 v.recurse(block, false)
1615 v.current_line = v.current_block.first_line
1616 end
1617 end
1618
1619 # A markdown ruler line.
1620 class LineHR
1621 super Line
1622
1623 redef fun process(v) do
1624 var line = v.current_line
1625 if line.prev != null then v.current_block.split(line.prev.as(not null))
1626 var block = v.current_block.split(line.as(not null))
1627 block.kind = new BlockRuler(block)
1628 v.current_block.remove_leading_empty_lines
1629 v.current_line = v.current_block.first_line
1630 end
1631 end
1632
1633 # A markdown fence code line.
1634 class LineFence
1635 super Line
1636
1637 redef fun process(v) do
1638 # go to fence end
1639 var line = v.current_line.next
1640 while line != null do
1641 if v.line_kind(line) isa LineFence then break
1642 line = line.next
1643 end
1644 if line != null then
1645 line = line.next
1646 end
1647 # build fence block
1648 var block: MDBlock
1649 if line != null then
1650 block = v.current_block.split(line.prev.as(not null))
1651 else
1652 block = v.current_block.split(v.current_block.last_line.as(not null))
1653 end
1654 var meta = block.first_line.value.meta_from_fence
1655 block.kind = new BlockFence(block, meta)
1656 block.first_line.clear
1657 var last = block.last_line
1658 if last != null and v.line_kind(last) isa LineFence then
1659 block.last_line.clear
1660 end
1661 block.remove_surrounding_empty_lines
1662 v.current_line = line
1663 end
1664 end
1665
1666 # A markdown headline.
1667 class LineHeadline
1668 super Line
1669
1670 redef fun process(v) do
1671 var line = v.current_line
1672 var lprev = line.prev
1673 if lprev != null then v.current_block.split(lprev)
1674 var block = v.current_block.split(line.as(not null))
1675 var kind = new BlockHeadline(block)
1676 block.kind = kind
1677 kind.transform_headline(block)
1678 v.current_block.remove_leading_empty_lines
1679 v.current_line = v.current_block.first_line
1680 end
1681 end
1682
1683 # A markdown headline of level 1.
1684 class LineHeadline1
1685 super LineHeadline
1686
1687 redef fun process(v) do
1688 var line = v.current_line
1689 var lprev = line.prev
1690 if lprev != null then v.current_block.split(lprev)
1691 line.next.clear
1692 var block = v.current_block.split(line.as(not null))
1693 var kind = new BlockHeadline(block)
1694 kind.depth = 1
1695 kind.transform_headline(block)
1696 block.kind = kind
1697 v.current_block.remove_leading_empty_lines
1698 v.current_line = v.current_block.first_line
1699 end
1700 end
1701
1702 # A markdown headline of level 2.
1703 class LineHeadline2
1704 super LineHeadline
1705
1706 redef fun process(v) do
1707 var line = v.current_line
1708 var lprev = line.prev
1709 if lprev != null then v.current_block.split(lprev)
1710 line.next.clear
1711 var block = v.current_block.split(line.as(not null))
1712 var kind = new BlockHeadline(block)
1713 kind.depth = 2
1714 kind.transform_headline(block)
1715 block.kind = kind
1716 v.current_block.remove_leading_empty_lines
1717 v.current_line = v.current_block.first_line
1718 end
1719 end
1720
1721 # A markdown list line.
1722 # Mainly used to factorize code between ordered and unordered lists.
1723 class LineList
1724 super Line
1725
1726 redef fun process(v) do
1727 var line = v.current_line
1728 # go to list end
1729 while line != null do
1730 var t = v.line_kind(line)
1731 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1732 not t isa LineList) then break
1733 line = line.next
1734 end
1735 # build list block
1736 var list: MDBlock
1737 if line != null then
1738 list = v.current_block.split(line.prev.as(not null))
1739 else
1740 list = v.current_block.split(v.current_block.last_line.as(not null))
1741 end
1742 var kind = block_kind(list)
1743 list.kind = kind
1744 list.first_line.prev_empty = false
1745 list.last_line.next_empty = false
1746 list.remove_surrounding_empty_lines
1747 list.first_line.prev_empty = false
1748 list.last_line.next_empty = false
1749 kind.init_block(v)
1750 var block = list.first_block
1751 while block != null do
1752 block.remove_list_indent(v)
1753 v.recurse(block, true)
1754 block = block.next
1755 end
1756 kind.expand_paragraphs(list)
1757 v.current_line = line
1758 end
1759
1760 # Create a new block kind based on this line.
1761 protected fun block_kind(block: MDBlock): BlockList is abstract
1762
1763 # Extract string value from `MDLine`.
1764 protected fun extract_value(line: MDLine): String is abstract
1765 end
1766
1767 # An ordered list line.
1768 class LineOList
1769 super LineList
1770
1771 redef fun block_kind(block) do return new BlockOrderedList(block)
1772
1773 redef fun extract_value(line) do
1774 return line.value.substring_from(line.value.index_of('.') + 2)
1775 end
1776 end
1777
1778 # An unordered list line.
1779 class LineUList
1780 super LineList
1781
1782 redef fun block_kind(block) do return new BlockUnorderedList(block)
1783
1784 redef fun extract_value(line) do
1785 return line.value.substring_from(line.leading + 2)
1786 end
1787 end
1788
1789 # A token represent a character in the markdown input.
1790 # Some tokens have a specific markup behaviour that is handled here.
1791 abstract class Token
1792
1793 # Position of `self` in markdown input.
1794 var pos: Int
1795
1796 # Character found at `pos` in the markdown input.
1797 var char: Char
1798
1799 # Output that token using `MarkdownEmitter::decorator`.
1800 fun emit(v: MarkdownEmitter) do v.addc char
1801 end
1802
1803 # A token without a specific meaning.
1804 class TokenNone
1805 super Token
1806 end
1807
1808 # An emphasis token.
1809 abstract class TokenEm
1810 super Token
1811
1812 redef fun emit(v) do
1813 var tmp = v.push_buffer
1814 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1815 v.pop_buffer
1816 if b > 0 then
1817 v.decorator.add_em(v, tmp)
1818 v.current_pos = b
1819 else
1820 v.addc char
1821 end
1822 end
1823 end
1824
1825 # An emphasis star token.
1826 class TokenEmStar
1827 super TokenEm
1828 end
1829
1830 # An emphasis underscore token.
1831 class TokenEmUnderscore
1832 super TokenEm
1833 end
1834
1835 # A strong token.
1836 abstract class TokenStrong
1837 super Token
1838
1839 redef fun emit(v) do
1840 var tmp = v.push_buffer
1841 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1842 v.pop_buffer
1843 if b > 0 then
1844 v.decorator.add_strong(v, tmp)
1845 v.current_pos = b + 1
1846 else
1847 v.addc char
1848 end
1849 end
1850 end
1851
1852 # A strong star token.
1853 class TokenStrongStar
1854 super TokenStrong
1855 end
1856
1857 # A strong underscore token.
1858 class TokenStrongUnderscore
1859 super TokenStrong
1860 end
1861
1862 # A code token.
1863 # This class is mainly used to factorize work between single and double quoted span codes.
1864 abstract class TokenCode
1865 super Token
1866
1867 redef fun emit(v) do
1868 var a = pos + next_pos + 1
1869 var b = v.processor.find_token(v.current_text.as(not null), a, self)
1870 if b > 0 then
1871 v.current_pos = b + next_pos
1872 while a < b and v.current_text[a] == ' ' do a += 1
1873 if a < b then
1874 while v.current_text[b - 1] == ' ' do b -= 1
1875 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1876 end
1877 else
1878 v.addc char
1879 end
1880 end
1881
1882 private fun next_pos: Int is abstract
1883 end
1884
1885 # A span code token.
1886 class TokenCodeSingle
1887 super TokenCode
1888
1889 redef fun next_pos do return 0
1890 end
1891
1892 # A doubled span code token.
1893 class TokenCodeDouble
1894 super TokenCode
1895
1896 redef fun next_pos do return 1
1897 end
1898
1899 # A link or image token.
1900 # This class is mainly used to factorize work between images and links.
1901 abstract class TokenLinkOrImage
1902 super Token
1903
1904 # Link adress
1905 var link: nullable Text = null
1906
1907 # Link text
1908 var name: nullable Text = null
1909
1910 # Link title
1911 var comment: nullable Text = null
1912
1913 # Is the link construct an abbreviation?
1914 var is_abbrev = false
1915
1916 redef fun emit(v) do
1917 var tmp = new FlatBuffer
1918 var b = check_link(v, tmp, pos, self)
1919 if b > 0 then
1920 emit_hyper(v)
1921 v.current_pos = b
1922 else
1923 v.addc char
1924 end
1925 end
1926
1927 # Emit the hyperlink as link or image.
1928 private fun emit_hyper(v: MarkdownEmitter) is abstract
1929
1930 # Check if the link is a valid link.
1931 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1932 var md = v.current_text
1933 var pos
1934 if token isa TokenLink then
1935 pos = start + 1
1936 else
1937 pos = start + 2
1938 end
1939 var tmp = new FlatBuffer
1940 pos = md.read_md_link_id(tmp, pos)
1941 if pos < start then return -1
1942 name = tmp
1943 var old_pos = pos
1944 pos += 1
1945 pos = md.skip_spaces(pos)
1946 if pos < start then
1947 var tid = name.write_to_string.to_lower
1948 if v.processor.link_refs.has_key(tid) then
1949 var lr = v.processor.link_refs[tid]
1950 is_abbrev = lr.is_abbrev
1951 link = lr.link
1952 comment = lr.title
1953 pos = old_pos
1954 else
1955 return -1
1956 end
1957 else if md[pos] == '(' then
1958 pos += 1
1959 pos = md.skip_spaces(pos)
1960 if pos < start then return -1
1961 tmp = new FlatBuffer
1962 var use_lt = md[pos] == '<'
1963 if use_lt then
1964 pos = md.read_until(tmp, pos + 1, '>')
1965 else
1966 pos = md.read_md_link(tmp, pos)
1967 end
1968 if pos < start then return -1
1969 if use_lt then pos += 1
1970 link = tmp.write_to_string
1971 if md[pos] == ' ' then
1972 pos = md.skip_spaces(pos)
1973 if pos > start and md[pos] == '"' then
1974 pos += 1
1975 tmp = new FlatBuffer
1976 pos = md.read_until(tmp, pos, '"')
1977 if pos < start then return -1
1978 comment = tmp.write_to_string
1979 pos += 1
1980 pos = md.skip_spaces(pos)
1981 if pos == -1 then return -1
1982 end
1983 end
1984 if md[pos] != ')' then return -1
1985 else if md[pos] == '[' then
1986 pos += 1
1987 tmp = new FlatBuffer
1988 pos = md.read_raw_until(tmp, pos, ']')
1989 if pos < start then return -1
1990 var id
1991 if tmp.length > 0 then
1992 id = tmp
1993 else
1994 id = name
1995 end
1996 var tid = id.write_to_string.to_lower
1997 if v.processor.link_refs.has_key(tid) then
1998 var lr = v.processor.link_refs[tid]
1999 link = lr.link
2000 comment = lr.title
2001 end
2002 else
2003 var tid = name.write_to_string.replace("\n", " ").to_lower
2004 if v.processor.link_refs.has_key(tid) then
2005 var lr = v.processor.link_refs[tid]
2006 link = lr.link
2007 comment = lr.title
2008 pos = old_pos
2009 else
2010 return -1
2011 end
2012 end
2013 if link == null then return -1
2014 return pos
2015 end
2016 end
2017
2018 # A markdown link token.
2019 class TokenLink
2020 super TokenLinkOrImage
2021
2022 redef fun emit_hyper(v) do
2023 if is_abbrev and comment != null then
2024 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2025 else
2026 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2027 end
2028 end
2029 end
2030
2031 # A markdown image token.
2032 class TokenImage
2033 super TokenLinkOrImage
2034
2035 redef fun emit_hyper(v) do
2036 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2037 end
2038 end
2039
2040 # A HTML/XML token.
2041 class TokenHTML
2042 super Token
2043
2044 redef fun emit(v) do
2045 var tmp = new FlatBuffer
2046 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2047 if b > 0 then
2048 v.add tmp
2049 v.current_pos = b
2050 else
2051 v.decorator.escape_char(v, char)
2052 end
2053 end
2054
2055 # Is the HTML valid?
2056 # Also take care of link and mailto shortcuts.
2057 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2058 # check for auto links
2059 var tmp = new FlatBuffer
2060 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2061 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2062 pos = md.read_until(tmp, pos, '>')
2063 if pos != -1 then
2064 var link = tmp.write_to_string
2065 v.decorator.add_link(v, link, link, null)
2066 return pos
2067 end
2068 end
2069 # TODO check for mailto
2070 # check for inline html
2071 if start + 2 < md.length then
2072 return md.read_xml(out, start, true)
2073 end
2074 return -1
2075 end
2076 end
2077
2078 # An HTML entity token.
2079 class TokenEntity
2080 super Token
2081
2082 redef fun emit(v) do
2083 var tmp = new FlatBuffer
2084 var b = check_entity(tmp, v.current_text.as(not null), pos)
2085 if b > 0 then
2086 v.add tmp
2087 v.current_pos = b
2088 else
2089 v.decorator.escape_char(v, char)
2090 end
2091 end
2092
2093 # Is the entity valid?
2094 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2095 var pos = md.read_until(out, start, ';')
2096 if pos < 0 or out.length < 3 then
2097 return -1
2098 end
2099 if out[1] == '#' then
2100 if out[2] == 'x' or out[2] == 'X' then
2101 if out.length < 4 then return -1
2102 for i in [3..out.length[ do
2103 var c = out[i]
2104 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2105 return -1
2106 end
2107 end
2108 else
2109 for i in [2..out.length[ do
2110 var c = out[i]
2111 if c < '0' or c > '9' then return -1
2112 end
2113 end
2114 out.add ';'
2115 else
2116 for i in [1..out.length[ do
2117 var c = out[i]
2118 if not c.is_digit and not c.is_letter then return -1
2119 end
2120 out.add ';'
2121 # TODO check entity is valid
2122 # if out.is_entity then
2123 return pos
2124 # else
2125 # return -1
2126 # end
2127 end
2128 return pos
2129 end
2130 end
2131
2132 # A markdown escape token.
2133 class TokenEscape
2134 super Token
2135
2136 redef fun emit(v) do
2137 v.current_pos += 1
2138 v.addc v.current_text[v.current_pos]
2139 end
2140 end
2141
2142 # A markdown strike token.
2143 #
2144 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2145 class TokenStrike
2146 super Token
2147
2148 redef fun emit(v) do
2149 var tmp = v.push_buffer
2150 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2151 v.pop_buffer
2152 if b > 0 then
2153 v.decorator.add_strike(v, tmp)
2154 v.current_pos = b + 1
2155 else
2156 v.addc char
2157 end
2158 end
2159 end
2160
2161 redef class Text
2162
2163 # Get the position of the next non-space character.
2164 private fun skip_spaces(start: Int): Int do
2165 var pos = start
2166 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2167 pos += 1
2168 end
2169 if pos < length then return pos
2170 return -1
2171 end
2172
2173 # Read `self` until `nend` and append it to the `out` buffer.
2174 # Escape markdown special chars.
2175 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2176 var pos = start
2177 while pos < length do
2178 var c = self[pos]
2179 if c == '\\' and pos + 1 < length then
2180 pos = escape(out, self[pos + 1], pos)
2181 else
2182 var end_reached = false
2183 for n in nend do
2184 if c == n then
2185 end_reached = true
2186 break
2187 end
2188 end
2189 if end_reached then break
2190 out.add c
2191 end
2192 pos += 1
2193 end
2194 if pos == length then return -1
2195 return pos
2196 end
2197
2198 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2199 # No escape is made.
2200 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2201 var pos = start
2202 while pos < length do
2203 var c = self[pos]
2204 var end_reached = false
2205 for n in nend do
2206 if c == n then
2207 end_reached = true
2208 break
2209 end
2210 end
2211 if end_reached then break
2212 out.add c
2213 pos += 1
2214 end
2215 if pos == length then return -1
2216 return pos
2217 end
2218
2219 # Read `self` as XML until `to` and append it to the `out` buffer.
2220 # Escape HTML special chars.
2221 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2222 var pos = from
2223 var in_str = false
2224 var str_char: nullable Char = null
2225 while pos < length do
2226 var c = self[pos]
2227 if in_str then
2228 if c == '\\' then
2229 out.add c
2230 pos += 1
2231 if pos < length then
2232 out.add c
2233 pos += 1
2234 end
2235 continue
2236 end
2237 if c == str_char then
2238 in_str = false
2239 out.add c
2240 pos += 1
2241 continue
2242 end
2243 end
2244 if c == '"' or c == '\'' then
2245 in_str = true
2246 str_char = c
2247 end
2248 if not in_str then
2249 var end_reached = false
2250 for n in [0..to.length[ do
2251 if c == to[n] then
2252 end_reached = true
2253 break
2254 end
2255 end
2256 if end_reached then break
2257 end
2258 out.add c
2259 pos += 1
2260 end
2261 if pos == length then return -1
2262 return pos
2263 end
2264
2265 # Read `self` as XML and append it to the `out` buffer.
2266 # Safe mode can be activated to limit reading to valid xml.
2267 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2268 var pos = 0
2269 var is_valid = true
2270 var is_close_tag = false
2271 if start + 1 >= length then return -1
2272 if self[start + 1] == '/' then
2273 is_close_tag = true
2274 pos = start + 2
2275 else if self[start + 1] == '!' then
2276 out.append "<!"
2277 return start + 1
2278 else
2279 is_close_tag = false
2280 pos = start + 1
2281 end
2282 if safe_mode then
2283 var tmp = new FlatBuffer
2284 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2285 if pos == -1 then return -1
2286 var tag = tmp.write_to_string.trim.to_lower
2287 if not tag.is_valid_html_tag then
2288 out.append "&lt;"
2289 pos = -1
2290 else if tag.is_html_unsafe then
2291 is_valid = false
2292 out.append "&lt;"
2293 if is_close_tag then out.add '/'
2294 out.append tmp
2295 else
2296 out.append "<"
2297 if is_close_tag then out.add '/'
2298 out.append tmp
2299 end
2300 else
2301 out.add '<'
2302 if is_close_tag then out.add '/'
2303 pos = read_xml_until(out, pos, ' ', '/', '>')
2304 end
2305 if pos == -1 then return -1
2306 pos = read_xml_until(out, pos, '/', '>')
2307 if pos == -1 then return -1
2308 if self[pos] == '/' then
2309 out.append " /"
2310 pos = self.read_xml_until(out, pos + 1, '>')
2311 if pos == -1 then return -1
2312 end
2313 if self[pos] == '>' then
2314 if is_valid then
2315 out.add '>'
2316 else
2317 out.append "&gt;"
2318 end
2319 return pos
2320 end
2321 return -1
2322 end
2323
2324 # Read a markdown link address and append it to the `out` buffer.
2325 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2326 var pos = start
2327 var counter = 1
2328 while pos < length do
2329 var c = self[pos]
2330 if c == '\\' and pos + 1 < length then
2331 pos = escape(out, self[pos + 1], pos)
2332 else
2333 var end_reached = false
2334 if c == '(' then
2335 counter += 1
2336 else if c == ' ' then
2337 if counter == 1 then end_reached = true
2338 else if c == ')' then
2339 counter -= 1
2340 if counter == 0 then end_reached = true
2341 end
2342 if end_reached then break
2343 out.add c
2344 end
2345 pos += 1
2346 end
2347 if pos == length then return -1
2348 return pos
2349 end
2350
2351 # Read a markdown link text and append it to the `out` buffer.
2352 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2353 var pos = start
2354 var counter = 1
2355 while pos < length do
2356 var c = self[pos]
2357 var end_reached = false
2358 if c == '[' then
2359 counter += 1
2360 out.add c
2361 else if c == ']' then
2362 counter -= 1
2363 if counter == 0 then
2364 end_reached = true
2365 else
2366 out.add c
2367 end
2368 else
2369 out.add c
2370 end
2371 if end_reached then break
2372 pos += 1
2373 end
2374 if pos == length then return -1
2375 return pos
2376 end
2377
2378 # Extract the XML tag name from a XML tag.
2379 private fun xml_tag: String do
2380 var tpl = new FlatBuffer
2381 var pos = 1
2382 if pos < length and self[1] == '/' then pos += 1
2383 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2384 tpl.add self[pos]
2385 pos += 1
2386 end
2387 return tpl.write_to_string.to_lower
2388 end
2389
2390 private fun is_valid_html_tag: Bool do
2391 if is_empty then return false
2392 for c in self do
2393 if not c.is_alpha then return false
2394 end
2395 return true
2396 end
2397
2398 # Read and escape the markdown contained in `self`.
2399 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2400 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2401 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2402 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2403 c == '`' or c == '~' or c == '^' then
2404 out.add c
2405 return pos + 1
2406 end
2407 out.add '\\'
2408 return pos
2409 end
2410
2411 # Extract string found at end of fence opening.
2412 private fun meta_from_fence: nullable Text do
2413 for i in [0..chars.length[ do
2414 var c = chars[i]
2415 if c != ' ' and c != '`' and c != '~' then
2416 return substring_from(i).trim
2417 end
2418 end
2419 return null
2420 end
2421
2422 # Is `self` an unsafe HTML element?
2423 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2424
2425 # Is `self` a HRML block element?
2426 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2427
2428 # Is `self` a link prefix?
2429 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2430
2431 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2432
2433 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2434
2435 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2436 end
2437
2438 redef class String
2439
2440 # Parse `self` as markdown and return the HTML representation
2441 #.
2442 # var md = "**Hello World!**"
2443 # var html = md.md_to_html
2444 # assert html == "<p><strong>Hello World!</strong></p>\n"
2445 fun md_to_html: Streamable do
2446 var processor = new MarkdownProcessor
2447 return processor.process(self)
2448 end
2449 end