Merge branch 'master' into polymorphic_extern_classes
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 var emitter: MarkdownEmitter is noinit
34
35 init do self.emitter = new MarkdownEmitter(self)
36
37 # Process the mardown `input` string and return the processed output.
38 fun process(input: String): Streamable do
39 # init processor
40 link_refs.clear
41 last_link_ref = null
42 current_line = null
43 current_block = null
44 # parse markdown
45 var parent = read_lines(input)
46 parent.remove_surrounding_empty_lines
47 recurse(parent, false)
48 # output processed text
49 return emitter.emit(parent.kind)
50 end
51
52 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
53 private fun read_lines(input: String): MDBlock do
54 var block = new MDBlock
55 var value = new FlatBuffer
56 var i = 0
57 while i < input.length do
58 value.clear
59 var pos = 0
60 var eol = false
61 while not eol and i < input.length do
62 var c = input[i]
63 if c == '\n' then
64 i += 1
65 eol = true
66 else if c == '\t' then
67 var np = pos + (4 - (pos.bin_and(3)))
68 while pos < np do
69 value.add ' '
70 pos += 1
71 end
72 i += 1
73 else
74 pos += 1
75 value.add c
76 i += 1
77 end
78 end
79
80 var line = new MDLine(value.write_to_string)
81 var is_link_ref = check_link_ref(line)
82 # Skip link refs
83 if not is_link_ref then block.add_line line
84 end
85 return block
86 end
87
88 # Check if line is a block link definition.
89 # Return `true` if line contains a valid link ref and save it into `link_refs`.
90 private fun check_link_ref(line: MDLine): Bool do
91 var md = line.value
92 var is_link_ref = false
93 var id = new FlatBuffer
94 var link = new FlatBuffer
95 var comment = new FlatBuffer
96 var pos = -1
97 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
98 pos = line.leading + 1
99 pos = md.read_until(id, pos, ']')
100 if not id.is_empty and pos + 2 < line.value.length then
101 if line.value[pos + 1] == ':' then
102 pos += 2
103 pos = md.skip_spaces(pos)
104 if line.value[pos] == '<' then
105 pos += 1
106 pos = md.read_until(link, pos, '>')
107 pos += 1
108 else
109 pos = md.read_until(link, pos, ' ', '\n')
110 end
111 if not link.is_empty then
112 pos = md.skip_spaces(pos)
113 if pos > 0 and pos < line.value.length then
114 var c = line.value[pos]
115 if c == '\"' or c == '\'' or c == '(' then
116 pos += 1
117 if c == '(' then
118 pos = md.read_until(comment, pos, ')')
119 else
120 pos = md.read_until(comment, pos, c)
121 end
122 if pos > 0 then is_link_ref = true
123 end
124 else
125 is_link_ref = true
126 end
127 end
128 end
129 end
130 end
131 if is_link_ref and not id.is_empty and not link.is_empty then
132 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
133 add_link_ref(id.write_to_string, lr)
134 if comment.is_empty then last_link_ref = lr
135 return true
136 else
137 comment = new FlatBuffer
138 if not line.is_empty and last_link_ref != null then
139 pos = line.leading
140 var c = line.value[pos]
141 if c == '\"' or c == '\'' or c == '(' then
142 pos += 1
143 if c == '(' then
144 pos = md.read_until(comment, pos, ')')
145 else
146 pos = md.read_until(comment, pos, c)
147 end
148 end
149 if not comment.is_empty then last_link_ref.title = comment.write_to_string
150 end
151 if comment.is_empty then return false
152 return true
153 end
154 end
155
156 # Known link refs
157 # This list will be needed during output to expand links.
158 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
159
160 # Last encountered link ref (for multiline definitions)
161 #
162 # Markdown allows link refs to be defined over two lines:
163 #
164 # [id]: http://example.com/longish/path/to/resource/here
165 # "Optional Title Here"
166 #
167 private var last_link_ref: nullable LinkRef = null
168
169 # Add a link ref to the list
170 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
171
172 # Recursively split a `block`.
173 #
174 # The block is splitted according to the type of lines it contains.
175 # Some blocks can be splited again recursively like lists.
176 # The `in_list` mode is used to recurse on list and build
177 # nested paragraphs or code blocks.
178 fun recurse(root: MDBlock, in_list: Bool) do
179 var old_mode = self.in_list
180 var old_root = self.current_block
181 self.in_list = in_list
182
183 var line = root.first_line
184 while line != null and line.is_empty do
185 line = line.next
186 if line == null then return
187 end
188
189 current_line = line
190 current_block = root
191 while current_line != null do
192 current_line.kind(self).process(self)
193 end
194 self.in_list = old_mode
195 self.current_block = old_root
196 end
197
198 # Currently processed line.
199 # Used when visiting blocks with `recurse`.
200 var current_line: nullable MDLine = null is writable
201
202 # Currently processed block.
203 # Used when visiting blocks with `recurse`.
204 var current_block: nullable MDBlock = null is writable
205
206 # Is the current recursion in list mode?
207 # Used when visiting blocks with `recurse`
208 private var in_list = false
209 end
210
211 # Emit output corresponding to blocks content.
212 #
213 # Blocks are created by a previous pass in `MarkdownProcessor`.
214 # The emitter use a `Decorator` to select the output format.
215 class MarkdownEmitter
216
217 # Processor containing link refs.
218 var processor: MarkdownProcessor
219
220 # Decorator used for output.
221 # Default is `HTMLDecorator`
222 var decorator: Decorator = new HTMLDecorator is writable
223
224 # Create a new `MardownEmitter` using the default `HTMLDecorator`
225 init(processor: MarkdownProcessor) do
226 self.processor = processor
227 end
228
229 # Create a new `MarkdownEmitter` using a custom `decorator`.
230 init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
231 init processor
232 self.decorator = decorator
233 end
234
235 # Output `block` using `decorator` in the current buffer.
236 fun emit(block: Block): Text do
237 var buffer = push_buffer
238 block.emit(self)
239 pop_buffer
240 return buffer
241 end
242
243 # Output the content of `block`.
244 fun emit_in(block: Block) do block.emit_in(self)
245
246 # Transform and emit mardown text
247 fun emit_text(text: Text) do
248 emit_text_until(text, 0, null)
249 end
250
251 # Transform and emit mardown text starting at `from` and
252 # until a token with the same type as `token` is found.
253 # Go until the end of text if `token` is null.
254 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
255 var old_text = current_text
256 var old_pos = current_pos
257 current_text = text
258 current_pos = start
259 while current_pos < text.length do
260 var mt = text.token_at(current_pos)
261 if (token != null and not token isa TokenNone) and
262 (mt.is_same_type(token) or
263 (token isa TokenEmStar and mt isa TokenStrongStar) or
264 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
265 return current_pos
266 end
267 mt.emit(self)
268 current_pos += 1
269 end
270 current_text = old_text
271 current_pos = old_pos
272 return -1
273 end
274
275 # Currently processed position in `current_text`.
276 # Used when visiting inline production with `emit_text_until`.
277 private var current_pos: Int = -1
278
279 # Currently processed text.
280 # Used when visiting inline production with `emit_text_until`.
281 private var current_text: nullable Text = null
282
283 # Stacked buffers.
284 private var buffer_stack = new List[FlatBuffer]
285
286 # Push a new buffer on the stack.
287 private fun push_buffer: FlatBuffer do
288 var buffer = new FlatBuffer
289 buffer_stack.add buffer
290 return buffer
291 end
292
293 # Pop the last buffer.
294 private fun pop_buffer do buffer_stack.pop
295
296 # Current output buffer.
297 private fun current_buffer: FlatBuffer do
298 assert not buffer_stack.is_empty
299 return buffer_stack.last
300 end
301
302 # Append `e` to current buffer.
303 fun add(e: Streamable) do
304 if e isa Text then
305 current_buffer.append e
306 else
307 current_buffer.append e.write_to_string
308 end
309 end
310
311 # Append `c` to current buffer.
312 fun addc(c: Char) do current_buffer.add c
313
314 # Append a "\n" line break.
315 fun addn do current_buffer.add '\n'
316 end
317
318 # A Link Reference.
319 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
320 #
321 # Example:
322 #
323 # [1]: http://example.com/ "Optional title"
324 class LinkRef
325
326 # Link href
327 var link: String
328
329 # Optional link title
330 var title: nullable String = null
331
332 # Is the link an abreviation?
333 var is_abbrev = false
334
335 init with_title(link: String, title: nullable String) do
336 self.link = link
337 self.title = title
338 end
339 end
340
341 # A `Decorator` is used to emit mardown into a specific format.
342 # Default decorator used is `HTMLDecorator`.
343 interface Decorator
344
345 # Render a ruler block.
346 fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
347
348 # Render a headline block with corresponding level.
349 fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
350
351 # Render a paragraph block.
352 fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
353
354 # Render a code or fence block.
355 fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
356
357 # Render a blockquote.
358 fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
359
360 # Render an unordered list.
361 fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
362
363 # Render an ordered list.
364 fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
365
366 # Render a list item.
367 fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
368
369 # Render an emphasis text.
370 fun add_em(v: MarkdownEmitter, text: Text) is abstract
371
372 # Render a strong text.
373 fun add_strong(v: MarkdownEmitter, text: Text) is abstract
374
375 # Render a super text.
376 fun add_super(v: MarkdownEmitter, text: Text) is abstract
377
378 # Render a link.
379 fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
380
381 # Render an image.
382 fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
383
384 # Render an abbreviation.
385 fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
386
387 # Render a code span reading from a buffer.
388 fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
389
390 # Render a text and escape it.
391 fun append_value(v: MarkdownEmitter, value: Text) is abstract
392
393 # Render code text from buffer and escape it.
394 fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
395
396 # Render a character escape.
397 fun escape_char(v: MarkdownEmitter, char: Char) is abstract
398
399 # Render a line break
400 fun add_line_break(v: MarkdownEmitter) is abstract
401 end
402
403 # `Decorator` that outputs HTML.
404 class HTMLDecorator
405 super Decorator
406
407 redef fun add_ruler(v, block) do v.add "<hr/>\n"
408
409 redef fun add_headline(v, block) do
410 v.add "<h{block.depth}>"
411 v.emit_in block
412 v.add "</h{block.depth}>\n"
413 end
414
415 redef fun add_paragraph(v, block) do
416 v.add "<p>"
417 v.emit_in block
418 v.add "</p>\n"
419 end
420
421 redef fun add_code(v, block) do
422 v.add "<pre><code>"
423 v.emit_in block
424 v.add "</code></pre>\n"
425 end
426
427 redef fun add_blockquote(v, block) do
428 v.add "<blockquote>\n"
429 v.emit_in block
430 v.add "</blockquote>\n"
431 end
432
433 redef fun add_unorderedlist(v, block) do
434 v.add "<ul>\n"
435 v.emit_in block
436 v.add "</ul>\n"
437 end
438
439 redef fun add_orderedlist(v, block) do
440 v.add "<ol>\n"
441 v.emit_in block
442 v.add "</ol>\n"
443 end
444
445 redef fun add_listitem(v, block) do
446 v.add "<li>"
447 v.emit_in block
448 v.add "</li>\n"
449 end
450
451 redef fun add_em(v, text) do
452 v.add "<em>"
453 v.add text
454 v.add "</em>"
455 end
456
457 redef fun add_strong(v, text) do
458 v.add "<strong>"
459 v.add text
460 v.add "</strong>"
461 end
462
463 redef fun add_super(v, text) do
464 v.add "<sup>"
465 v.add text
466 v.add "</sup>"
467 end
468
469 redef fun add_image(v, link, name, comment) do
470 v.add "<img src=\""
471 append_value(v, link)
472 v.add "\" alt=\""
473 append_value(v, name)
474 v.add "\""
475 if comment != null and not comment.is_empty then
476 v.add " title=\""
477 append_value(v, comment)
478 v.add "\""
479 end
480 v.add "/>"
481 end
482
483 redef fun add_link(v, link, name, comment) do
484 v.add "<a href=\""
485 append_value(v, link)
486 v.add "\""
487 if comment != null and not comment.is_empty then
488 v.add " title=\""
489 append_value(v, comment)
490 v.add "\""
491 end
492 v.add ">"
493 v.emit_text(name)
494 v.add "</a>"
495 end
496
497 redef fun add_abbr(v, name, comment) do
498 v.add "<abbr title=\""
499 append_value(v, comment)
500 v.add "\">"
501 v.emit_text(name)
502 v.add "</abbr>"
503 end
504
505 redef fun add_span_code(v, text, from, to) do
506 v.add "<code>"
507 append_code(v, text, from, to)
508 v.add "</code>"
509 end
510
511 redef fun add_line_break(v) do
512 v.add "<br/>"
513 end
514
515 redef fun append_value(v, text) do for c in text do escape_char(v, c)
516
517 redef fun escape_char(v, c) do
518 if c == '&' then
519 v.add "&amp;"
520 else if c == '<' then
521 v.add "&lt;"
522 else if c == '>' then
523 v.add "&gt;"
524 else if c == '"' then
525 v.add "&quot;"
526 else if c == '\'' then
527 v.add "&apos;"
528 else
529 v.addc c
530 end
531 end
532
533 redef fun append_code(v, buffer, from, to) do
534 for i in [from..to[ do
535 var c = buffer[i]
536 if c == '&' then
537 v.add "&amp;"
538 else if c == '<' then
539 v.add "&lt;"
540 else if c == '>' then
541 v.add "&gt;"
542 else
543 v.addc c
544 end
545 end
546 end
547 end
548
549 # A block of markdown lines.
550 # A `MDBlock` can contains lines and/or sub-blocks.
551 class MDBlock
552 # Kind of block.
553 # See `Block`.
554 var kind: Block = new BlockNone(self) is writable
555
556 # First line if any.
557 var first_line: nullable MDLine = null is writable
558
559 # Last line if any.
560 var last_line: nullable MDLine = null is writable
561
562 # First sub-block if any.
563 var first_block: nullable MDBlock = null is writable
564
565 # Last sub-block if any.
566 var last_block: nullable MDBlock = null is writable
567
568 # Previous block if any.
569 var prev: nullable MDBlock = null is writable
570
571 # Next block if any.
572 var next: nullable MDBlock = null is writable
573
574 # Does this block contain subblocks?
575 fun has_blocks: Bool do return first_block != null
576
577 # Count sub-blocks.
578 fun count_blocks: Int do
579 var count = 0
580 var block = first_block
581 while block != null do
582 count += 1
583 block = block.next
584 end
585 return count
586 end
587
588 # Does this block contain lines?
589 fun has_lines: Bool do return first_line != null
590
591 # Count block lines.
592 fun count_lines: Int do
593 var count = 0
594 var line = first_line
595 while line != null do
596 count += 1
597 line = line.next
598 end
599 return count
600 end
601
602 # Split `self` creating a new sub-block having `line` has `last_line`.
603 fun split(line: MDLine): MDBlock do
604 var block = new MDBlock
605 block.first_line = first_line
606 block.last_line = line
607 first_line = line.next
608 line.next = null
609 if first_line == null then
610 last_line = null
611 else
612 first_line.prev = null
613 end
614 if first_block == null then
615 first_block = block
616 last_block = block
617 else
618 last_block.next = block
619 last_block = block
620 end
621 return block
622 end
623
624 # Add a `line` to this block.
625 fun add_line(line: MDLine) do
626 if last_line == null then
627 first_line = line
628 last_line = line
629 else
630 last_line.next_empty = line.is_empty
631 line.prev_empty = last_line.is_empty
632 line.prev = last_line
633 last_line.next = line
634 last_line = line
635 end
636 end
637
638 # Remove `line` from this block.
639 fun remove_line(line: MDLine) do
640 if line.prev == null then
641 first_line = line.next
642 else
643 line.prev.next = line.next
644 end
645 if line.next == null then
646 last_line = line.prev
647 else
648 line.next.prev = line.prev
649 end
650 line.prev = null
651 line.next = null
652 end
653
654 # Remove leading empty lines.
655 fun remove_leading_empty_lines: Bool do
656 var was_empty = false
657 var line = first_line
658 while line != null and line.is_empty do
659 remove_line line
660 line = first_line
661 was_empty = true
662 end
663 return was_empty
664 end
665
666 # Remove trailing empty lines.
667 fun remove_trailing_empty_lines: Bool do
668 var was_empty = false
669 var line = last_line
670 while line != null and line.is_empty do
671 remove_line line
672 line = last_line
673 was_empty = true
674 end
675 return was_empty
676 end
677
678 # Remove leading and trailing empty lines.
679 fun remove_surrounding_empty_lines: Bool do
680 var was_empty = false
681 if remove_leading_empty_lines then was_empty = true
682 if remove_trailing_empty_lines then was_empty = true
683 return was_empty
684 end
685
686 # Remove list markers and up to 4 leading spaces.
687 # Used to clean nested lists.
688 fun remove_list_indent(v: MarkdownProcessor) do
689 var line = first_line
690 while line != null do
691 if not line.is_empty then
692 var kind = line.kind(v)
693 if kind isa LineList then
694 line.value = kind.extract_value(line)
695 else
696 line.value = line.value.substring_from(line.leading.min(4))
697 end
698 line.leading = line.process_leading
699 end
700 line = line.next
701 end
702 end
703
704 # Collect block line text.
705 fun text: String do
706 var text = new FlatBuffer
707 var line = first_line
708 while line != null do
709 if not line.is_empty then
710 text.append line.text
711 end
712 text.append "\n"
713 line = line.next
714 end
715 return text.write_to_string
716 end
717 end
718
719 # Representation of a markdown block in the AST.
720 # Each `Block` is linked to a `MDBlock` that contains mardown code.
721 abstract class Block
722
723 # The markdown block `self` is related to.
724 var block: MDBlock
725
726 # Output `self` using `v.decorator`.
727 fun emit(v: MarkdownEmitter) do v.emit_in(self)
728
729 # Emit the containts of `self`, lines or blocks.
730 fun emit_in(v: MarkdownEmitter) do
731 block.remove_surrounding_empty_lines
732 if block.has_lines then
733 emit_lines(v)
734 else
735 emit_blocks(v)
736 end
737 end
738
739 # Emit lines contained in `block`.
740 fun emit_lines(v: MarkdownEmitter) do
741 var tpl = v.push_buffer
742 var line = block.first_line
743 while line != null do
744 if not line.is_empty then
745 v.add line.value.substring(line.leading, line.value.length - line.trailing)
746 if line.trailing >= 2 then v.decorator.add_line_break(v)
747 end
748 if line.next != null then
749 v.addn
750 end
751 line = line.next
752 end
753 v.pop_buffer
754 v.emit_text(tpl)
755 end
756
757 # Emit sub-blocks contained in `block`.
758 fun emit_blocks(v: MarkdownEmitter) do
759 var block = self.block.first_block
760 while block != null do
761 block.kind.emit(v)
762 block = block.next
763 end
764 end
765 end
766
767 # A block without any markdown specificities.
768 #
769 # Actually use the same implementation than `BlockCode`,
770 # this class is only used for typing purposes.
771 class BlockNone
772 super Block
773 end
774
775 # A markdown blockquote.
776 class BlockQuote
777 super Block
778
779 redef fun emit(v) do v.decorator.add_blockquote(v, self)
780
781 # Remove blockquote markers.
782 private fun remove_block_quote_prefix(block: MDBlock) do
783 var line = block.first_line
784 while line != null do
785 if not line.is_empty then
786 if line.value[line.leading] == '>' then
787 var rem = line.leading + 1
788 if line.leading + 1 < line.value.length and
789 line.value[line.leading + 1] == ' ' then
790 rem += 1
791 end
792 line.value = line.value.substring_from(rem)
793 line.leading = line.process_leading
794 end
795 end
796 line = line.next
797 end
798 end
799 end
800
801 # A markdown code block.
802 class BlockCode
803 super Block
804
805 redef fun emit(v) do v.decorator.add_code(v, self)
806
807 redef fun emit_lines(v) do
808 var line = block.first_line
809 while line != null do
810 if not line.is_empty then
811 v.decorator.append_code(v, line.value, 4, line.value.length)
812 end
813 v.addn
814 line = line.next
815 end
816 end
817 end
818
819 # A markdown code-fence block.
820 #
821 # Actually use the same implementation than `BlockCode`,
822 # this class is only used for typing purposes.
823 class BlockFence
824 super BlockCode
825 end
826
827 # A markdown headline.
828 class BlockHeadline
829 super Block
830
831 redef fun emit(v) do v.decorator.add_headline(v, self)
832
833 # Depth of the headline used to determine the headline level.
834 var depth = 0
835
836 # Remove healine marks from lines contained in `self`.
837 private fun transform_headline(block: MDBlock) do
838 if depth > 0 then return
839 var level = 0
840 var line = block.first_line
841 if line.is_empty then return
842 var start = line.leading
843 while start < line.value.length and line.value[start] == '#' do
844 level += 1
845 start += 1
846 end
847 while start < line.value.length and line.value[start] == ' ' do
848 start += 1
849 end
850 if start >= line.value.length then
851 line.is_empty = true
852 else
853 var nend = line.value.length - line.trailing - 1
854 while line.value[nend] == '#' do nend -= 1
855 while line.value[nend] == ' ' do nend -= 1
856 line.value = line.value.substring(start, nend - start + 1)
857 line.leading = 0
858 line.trailing = 0
859 end
860 depth = level.min(6)
861 end
862 end
863
864 # A markdown list item block.
865 class BlockListItem
866 super Block
867
868 redef fun emit(v) do v.decorator.add_listitem(v, self)
869 end
870
871 # A markdown list block.
872 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
873 abstract class BlockList
874 super Block
875
876 # Split list block into list items sub-blocks.
877 private fun init_block(v: MarkdownProcessor) do
878 var line = block.first_line
879 line = line.next
880 while line != null do
881 var t = line.kind(v)
882 if t isa LineList or
883 (not line.is_empty and (line.prev_empty and line.leading == 0 and
884 not (t isa LineList))) then
885 var sblock = block.split(line.prev.as(not null))
886 sblock.kind = new BlockListItem(sblock)
887 end
888 line = line.next
889 end
890 var sblock = block.split(block.last_line.as(not null))
891 sblock.kind = new BlockListItem(sblock)
892 end
893
894 # Expand list items as paragraphs if needed.
895 private fun expand_paragraphs(block: MDBlock) do
896 var outer = block.first_block
897 var inner: nullable MDBlock
898 var has_paragraph = false
899 while outer != null and not has_paragraph do
900 if outer.kind isa BlockListItem then
901 inner = outer.first_block
902 while inner != null and not has_paragraph do
903 if inner.kind isa BlockParagraph then
904 has_paragraph = true
905 end
906 inner = inner.next
907 end
908 end
909 outer = outer.next
910 end
911 if has_paragraph then
912 outer = block.first_block
913 while outer != null do
914 if outer.kind isa BlockListItem then
915 inner = outer.first_block
916 while inner != null do
917 if inner.kind isa BlockNone then
918 inner.kind = new BlockParagraph(inner)
919 end
920 inner = inner.next
921 end
922 end
923 outer = outer.next
924 end
925 end
926 end
927 end
928
929 # A markdown ordered list.
930 class BlockOrderedList
931 super BlockList
932
933 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
934 end
935
936 # A markdown unordred list.
937 class BlockUnorderedList
938 super BlockList
939
940 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
941 end
942
943 # A markdown paragraph block.
944 class BlockParagraph
945 super Block
946
947 redef fun emit(v) do v.decorator.add_paragraph(v, self)
948 end
949
950 # A markdown ruler.
951 class BlockRuler
952 super Block
953
954 redef fun emit(v) do v.decorator.add_ruler(v, self)
955 end
956
957 # Xml blocks that can be found in markdown markup.
958 class BlockXML
959 super Block
960
961 redef fun emit_lines(v) do
962 var line = block.first_line
963 while line != null do
964 if not line.is_empty then v.add line.value
965 v.addn
966 line = line.next
967 end
968 end
969 end
970
971 # A markdown line.
972 class MDLine
973
974 # Text contained in this line.
975 var value: String is writable
976
977 # Is this line empty?
978 # Lines containing only spaces are considered empty.
979 var is_empty: Bool = true is writable
980
981 # Previous line in `MDBlock` or null if first line.
982 var prev: nullable MDLine = null is writable
983
984 # Next line in `MDBlock` or null if last line.
985 var next: nullable MDLine = null is writable
986
987 # Is the previous line empty?
988 var prev_empty: Bool = false is writable
989
990 # Is the next line empty?
991 var next_empty: Bool = false is writable
992
993 init(value: String) do
994 self.value = value
995 self.leading = process_leading
996 if leading != value.length then
997 self.is_empty = false
998 self.trailing = process_trailing
999 end
1000 end
1001
1002 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1003 fun clear do
1004 value = ""
1005 leading = 0
1006 trailing = 0
1007 is_empty = true
1008 if prev != null then prev.next_empty = true
1009 if next != null then next.prev_empty = true
1010 end
1011
1012 # The type of line.
1013 # see `md_line_*`
1014 fun kind(v: MarkdownProcessor): Line do
1015 var value = self.value
1016 if is_empty then return new LineEmpty
1017 if leading > 3 then return new LineCode
1018 if value[leading] == '#' then return new LineHeadline
1019 if value[leading] == '>' then return new LineBlockquote
1020
1021 if value.length - leading - trailing > 2 then
1022 if value[leading] == '`' and count_chars_start('`') >= 3 then
1023 return new LineFence
1024 end
1025 if value[leading] == '~' and count_chars_start('~') >= 3 then
1026 return new LineFence
1027 end
1028 end
1029
1030 if value.length - leading - trailing > 2 and
1031 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
1032 if count_chars(value[leading]) >= 3 then
1033 return new LineHR
1034 end
1035 end
1036
1037 if value.length - leading >= 2 and value[leading + 1] == ' ' then
1038 var c = value[leading]
1039 if c == '*' or c == '-' or c == '+' then return new LineUList
1040 end
1041
1042 if value.length - leading >= 3 and value[leading].is_digit then
1043 var i = leading + 1
1044 while i < value.length and value[i].is_digit do i += 1
1045 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
1046 return new LineOList
1047 end
1048 end
1049
1050 if value[leading] == '<' and check_html then return new LineXML
1051
1052 if next != null and not next.is_empty then
1053 if next.count_chars('=') > 0 then
1054 return new LineHeadline1
1055 end
1056 if next.count_chars('-') > 0 then
1057 return new LineHeadline2
1058 end
1059 end
1060 return new LineOther
1061 end
1062
1063 # Number or leading spaces on this line.
1064 var leading: Int = 0 is writable
1065
1066 # Compute `leading` depending on `value`.
1067 fun process_leading: Int do
1068 var count = 0
1069 var value = self.value
1070 while count < value.length and value[count] == ' ' do count += 1
1071 if leading == value.length then clear
1072 return count
1073 end
1074
1075 # Number of trailing spaces on this line.
1076 var trailing: Int = 0 is writable
1077
1078 # Compute `trailing` depending on `value`.
1079 fun process_trailing: Int do
1080 var count = 0
1081 var value = self.value
1082 while value[value.length - count - 1] == ' ' do
1083 count += 1
1084 end
1085 return count
1086 end
1087
1088 # Count the amount of `ch` in this line.
1089 # Return A value > 0 if this line only consists of `ch` end spaces.
1090 fun count_chars(ch: Char): Int do
1091 var count = 0
1092 for c in value do
1093 if c == ' ' then
1094 continue
1095 end
1096 if c == ch then
1097 count += 1
1098 continue
1099 end
1100 count = 0
1101 break
1102 end
1103 return count
1104 end
1105
1106 # Count the amount of `ch` at the start of this line ignoring spaces.
1107 fun count_chars_start(ch: Char): Int do
1108 var count = 0
1109 for c in value do
1110 if c == ' ' then
1111 continue
1112 end
1113 if c == ch then
1114 count += 1
1115 else
1116 break
1117 end
1118 end
1119 return count
1120 end
1121
1122 # Last XML line if any.
1123 private var xml_end_line: nullable MDLine = null
1124
1125 # Does `value` contains valid XML markup?
1126 private fun check_html: Bool do
1127 var tags = new Array[String]
1128 var tmp = new FlatBuffer
1129 var pos = leading
1130 if pos + 1 < value.length and value[pos + 1] == '!' then
1131 if read_xml_comment(self, pos) > 0 then return true
1132 end
1133 pos = value.read_xml(tmp, pos, false)
1134 var tag: String
1135 if pos > -1 then
1136 tag = tmp.xml_tag
1137 if not tag.is_html_block then
1138 return false
1139 end
1140 if tag == "hr" then
1141 xml_end_line = self
1142 return true
1143 end
1144 tags.add tag
1145 var line: nullable MDLine = self
1146 while line != null do
1147 while pos < line.value.length and line.value[pos] != '<' do
1148 pos += 1
1149 end
1150 if pos >= line.value.length then
1151 if line.value[pos - 2] == '/' then
1152 tags.pop
1153 if tags.is_empty then
1154 xml_end_line = line
1155 break
1156 end
1157 end
1158 line = line.next
1159 pos = 0
1160 else
1161 tmp = new FlatBuffer
1162 var new_pos = line.value.read_xml(tmp, pos, false)
1163 if new_pos > 0 then
1164 tag = tmp.xml_tag
1165 if tag.is_html_block and not tag == "hr" then
1166 if tmp[1] == '/' then
1167 if tags.last != tag then
1168 return false
1169 end
1170 tags.pop
1171 else
1172 tags.add tag
1173 end
1174 end
1175 if tags.is_empty then
1176 xml_end_line = line
1177 break
1178 end
1179 pos = new_pos
1180 else
1181 pos += 1
1182 end
1183 end
1184 end
1185 return tags.is_empty
1186 end
1187 return false
1188 end
1189
1190 # Read a XML comment.
1191 # Used by `check_html`.
1192 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1193 var line: nullable MDLine = first_line
1194 if start + 3 < line.value.length then
1195 if line.value[2] == '-' and line.value[3] == '-' then
1196 var pos = start + 4
1197 while line != null do
1198 while pos < line.value.length and line.value[pos] != '-' do
1199 pos += 1
1200 end
1201 if pos == line.value.length then
1202 line = line.next
1203 pos = 0
1204 else
1205 if pos + 2 < line.value.length then
1206 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1207 first_line.xml_end_line = line
1208 return pos + 3
1209 end
1210 end
1211 pos += 1
1212 end
1213 end
1214 end
1215 end
1216 return -1
1217 end
1218
1219 # Extract the text of `self` without leading and trailing.
1220 fun text: String do return value.substring(leading, value.length - trailing)
1221 end
1222
1223 # A markdown line.
1224 interface Line
1225
1226 # Parse the line.
1227 # See `MarkdownProcessor::recurse`.
1228 fun process(v: MarkdownProcessor) is abstract
1229 end
1230
1231 # An empty markdown line.
1232 class LineEmpty
1233 super Line
1234
1235 redef fun process(v) do
1236 v.current_line = v.current_line.next
1237 end
1238 end
1239
1240 # A non-specific markdown construction.
1241 # Mainly used as part of another line construct such as paragraphs or lists.
1242 class LineOther
1243 super Line
1244
1245 redef fun process(v) do
1246 var line = v.current_line
1247 # go to block end
1248 var was_empty = line.prev_empty
1249 while line != null and not line.is_empty do
1250 var t = line.kind(v)
1251 if v.in_list and t isa LineList then
1252 break
1253 end
1254 if t isa LineCode or t isa LineFence then
1255 break
1256 end
1257 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1258 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1259 break
1260 end
1261 line = line.next
1262 end
1263 # build block
1264 var bk: Block
1265 if line != null and not line.is_empty then
1266 var block = v.current_block.split(line.prev.as(not null))
1267 if v.in_list and not was_empty then
1268 block.kind = new BlockNone(block)
1269 else
1270 block.kind = new BlockParagraph(block)
1271 end
1272 v.current_block.remove_leading_empty_lines
1273 else
1274 var block: MDBlock
1275 if line != null then
1276 block = v.current_block.split(line)
1277 else
1278 block = v.current_block.split(v.current_block.last_line.as(not null))
1279 end
1280 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1281 block.kind = new BlockNone(block)
1282 else
1283 block.kind = new BlockParagraph(block)
1284 end
1285 v.current_block.remove_leading_empty_lines
1286 end
1287 v.current_line = v.current_block.first_line
1288 end
1289 end
1290
1291 # A line of markdown code.
1292 class LineCode
1293 super Line
1294
1295 redef fun process(v) do
1296 var line = v.current_line
1297 # lookup block end
1298 while line != null and (line.is_empty or line.kind(v) isa LineCode) do
1299 line = line.next
1300 end
1301 # split at block end line
1302 var block: MDBlock
1303 if line != null then
1304 block = v.current_block.split(line.prev.as(not null))
1305 else
1306 block = v.current_block.split(v.current_block.last_line.as(not null))
1307 end
1308 block.kind = new BlockCode(block)
1309 block.remove_surrounding_empty_lines
1310 v.current_line = v.current_block.first_line
1311 end
1312 end
1313
1314 # A line of raw XML.
1315 class LineXML
1316 super Line
1317
1318 redef fun process(v) do
1319 var line = v.current_line
1320 var prev = line.prev
1321 if prev != null then v.current_block.split(prev)
1322 var block = v.current_block.split(line.xml_end_line.as(not null))
1323 block.kind = new BlockXML(block)
1324 v.current_block.remove_leading_empty_lines
1325 v.current_line = v.current_block.first_line
1326 end
1327 end
1328
1329 # A markdown blockquote line.
1330 class LineBlockquote
1331 super Line
1332
1333 redef fun process(v) do
1334 var line = v.current_line
1335 # go to bquote end
1336 while line != null do
1337 if not line.is_empty and (line.prev_empty and
1338 line.leading == 0 and
1339 not line.kind(v) isa LineBlockquote) then break
1340 line = line.next
1341 end
1342 # build sub block
1343 var block: MDBlock
1344 if line != null then
1345 block = v.current_block.split(line.prev.as(not null))
1346 else
1347 block = v.current_block.split(v.current_block.last_line.as(not null))
1348 end
1349 var kind = new BlockQuote(block)
1350 block.kind = kind
1351 block.remove_surrounding_empty_lines
1352 kind.remove_block_quote_prefix(block)
1353 v.current_line = line
1354 v.recurse(block, false)
1355 v.current_line = v.current_block.first_line
1356 end
1357 end
1358
1359 # A markdown ruler line.
1360 class LineHR
1361 super Line
1362
1363 redef fun process(v) do
1364 var line = v.current_line
1365 if line.prev != null then v.current_block.split(line.prev.as(not null))
1366 var block = v.current_block.split(line.as(not null))
1367 block.kind = new BlockRuler(block)
1368 v.current_block.remove_leading_empty_lines
1369 v.current_line = v.current_block.first_line
1370 end
1371 end
1372
1373 # A markdown fence code line.
1374 class LineFence
1375 super Line
1376
1377 redef fun process(v) do
1378 # go to fence end
1379 var line = v.current_line.next
1380 while line != null do
1381 if line.kind(v) isa LineFence then break
1382 line = line.next
1383 end
1384 if line != null then
1385 line = line.next
1386 end
1387 # build fence block
1388 var block: MDBlock
1389 if line != null then
1390 block = v.current_block.split(line.prev.as(not null))
1391 else
1392 block = v.current_block.split(v.current_block.last_line.as(not null))
1393 end
1394 block.kind = new BlockFence(block)
1395 block.first_line.clear
1396 if block.last_line.kind(v) isa LineFence then
1397 block.last_line.clear
1398 end
1399 block.remove_surrounding_empty_lines
1400 v.current_line = line
1401 end
1402 end
1403
1404 # A markdown headline.
1405 class LineHeadline
1406 super Line
1407
1408 redef fun process(v) do
1409 var line = v.current_line
1410 var lprev = line.prev
1411 if lprev != null then v.current_block.split(lprev)
1412 var block = v.current_block.split(line.as(not null))
1413 var kind = new BlockHeadline(block)
1414 block.kind = kind
1415 # TODO block ID
1416 # block.id = block.first_line.strip_id
1417 kind.transform_headline(block)
1418 v.current_block.remove_leading_empty_lines
1419 v.current_line = v.current_block.first_line
1420 end
1421 end
1422
1423 # A markdown headline of level 1.
1424 class LineHeadline1
1425 super LineHeadline
1426
1427 redef fun process(v) do
1428 var line = v.current_line
1429 var lprev = line.prev
1430 if lprev != null then v.current_block.split(lprev)
1431 line.next.clear
1432 var block = v.current_block.split(line.as(not null))
1433 var kind = new BlockHeadline(block)
1434 kind.depth = 1
1435 # TODO block ID
1436 # block.id = block.first_line.strip_id
1437 kind.transform_headline(block)
1438 block.kind = kind
1439 v.current_block.remove_leading_empty_lines
1440 v.current_line = v.current_block.first_line
1441 end
1442 end
1443
1444 # A markdown headline of level 2.
1445 class LineHeadline2
1446 super LineHeadline
1447
1448 redef fun process(v) do
1449 var line = v.current_line
1450 var lprev = line.prev
1451 if lprev != null then v.current_block.split(lprev)
1452 line.next.clear
1453 var block = v.current_block.split(line.as(not null))
1454 var kind = new BlockHeadline(block)
1455 kind.depth = 2
1456 # TODO block ID
1457 # block.id = block.first_line.strip_id
1458 kind.transform_headline(block)
1459 block.kind = kind
1460 v.current_block.remove_leading_empty_lines
1461 v.current_line = v.current_block.first_line
1462 end
1463 end
1464
1465 # A markdown list line.
1466 # Mainly used to factorize code between ordered and unordered lists.
1467 class LineList
1468 super Line
1469
1470 redef fun process(v) do
1471 var line = v.current_line
1472 # go to list end
1473 while line != null do
1474 var t = line.kind(v)
1475 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1476 not t isa LineList) then break
1477 line = line.next
1478 end
1479 # build list block
1480 var list: MDBlock
1481 if line != null then
1482 list = v.current_block.split(line.prev.as(not null))
1483 else
1484 list = v.current_block.split(v.current_block.last_line.as(not null))
1485 end
1486 var kind = block_kind(list)
1487 list.kind = kind
1488 list.first_line.prev_empty = false
1489 list.last_line.next_empty = false
1490 list.remove_surrounding_empty_lines
1491 list.first_line.prev_empty = false
1492 list.last_line.next_empty = false
1493 kind.init_block(v)
1494 var block = list.first_block
1495 while block != null do
1496 block.remove_list_indent(v)
1497 v.recurse(block, true)
1498 block = block.next
1499 end
1500 kind.expand_paragraphs(list)
1501 v.current_line = line
1502 end
1503
1504 # Create a new block kind based on this line.
1505 protected fun block_kind(block: MDBlock): BlockList is abstract
1506
1507 protected fun extract_value(line: MDLine): String is abstract
1508 end
1509
1510 # An ordered list line.
1511 class LineOList
1512 super LineList
1513
1514 redef fun block_kind(block) do return new BlockOrderedList(block)
1515
1516 redef fun extract_value(line) do
1517 return line.value.substring_from(line.value.index_of('.') + 2)
1518 end
1519 end
1520
1521 # An unordered list line.
1522 class LineUList
1523 super LineList
1524
1525 redef fun block_kind(block) do return new BlockUnorderedList(block)
1526
1527 redef fun extract_value(line) do
1528 return line.value.substring_from(line.leading + 2)
1529 end
1530 end
1531
1532 # A token represent a character in the markdown input.
1533 # Some tokens have a specific markup behaviour that is handled here.
1534 abstract class Token
1535
1536 # Position of `self` in markdown input.
1537 var pos: Int
1538
1539 # Character found at `pos` in the markdown input.
1540 var char: Char
1541
1542 # Output that token using `MarkdownEmitter::decorator`.
1543 fun emit(v: MarkdownEmitter) do v.addc char
1544 end
1545
1546 # A token without a specific meaning.
1547 class TokenNone
1548 super Token
1549 end
1550
1551 # An emphasis token.
1552 abstract class TokenEm
1553 super Token
1554
1555 redef fun emit(v) do
1556 var tmp = v.push_buffer
1557 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1558 v.pop_buffer
1559 if b > 0 then
1560 v.decorator.add_em(v, tmp)
1561 v.current_pos = b
1562 else
1563 v.addc char
1564 end
1565 end
1566 end
1567
1568 # An emphasis star token.
1569 class TokenEmStar
1570 super TokenEm
1571 end
1572
1573 # An emphasis underscore token.
1574 class TokenEmUnderscore
1575 super TokenEm
1576 end
1577
1578 # A strong token.
1579 abstract class TokenStrong
1580 super Token
1581
1582 redef fun emit(v) do
1583 var tmp = v.push_buffer
1584 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1585 v.pop_buffer
1586 if b > 0 then
1587 v.decorator.add_strong(v, tmp)
1588 v.current_pos = b + 1
1589 else
1590 v.addc char
1591 end
1592 end
1593 end
1594
1595 # A strong star token.
1596 class TokenStrongStar
1597 super TokenStrong
1598 end
1599
1600 # A strong underscore token.
1601 class TokenStrongUnderscore
1602 super TokenStrong
1603 end
1604
1605 # A code token.
1606 # This class is mainly used to factorize work between single and double quoted span codes.
1607 abstract class TokenCode
1608 super Token
1609
1610 redef fun emit(v) do
1611 var a = pos + next_pos + 1
1612 var b = v.current_text.find_token(a, self)
1613 if b > 0 then
1614 v.current_pos = b + next_pos
1615 while a < b and v.current_text[a] == ' ' do a += 1
1616 if a < b then
1617 while v.current_text[b - 1] == ' ' do b -= 1
1618 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1619 end
1620 else
1621 v.addc char
1622 end
1623 end
1624
1625 private fun next_pos: Int is abstract
1626 end
1627
1628 # A span code token.
1629 class TokenCodeSingle
1630 super TokenCode
1631
1632 redef fun next_pos do return 0
1633 end
1634
1635 # A doubled span code token.
1636 class TokenCodeDouble
1637 super TokenCode
1638
1639 redef fun next_pos do return 1
1640 end
1641
1642 # A link or image token.
1643 # This class is mainly used to factorize work between images and links.
1644 abstract class TokenLinkOrImage
1645 super Token
1646
1647 # Link adress
1648 var link: nullable Text = null
1649
1650 # Link text
1651 var name: nullable Text = null
1652
1653 # Link title
1654 var comment: nullable Text = null
1655
1656 # Is the link construct an abbreviation?
1657 var is_abbrev = false
1658
1659 redef fun emit(v) do
1660 var tmp = new FlatBuffer
1661 var b = check_link(v, tmp, pos, self)
1662 if b > 0 then
1663 emit_hyper(v)
1664 v.current_pos = b
1665 else
1666 v.addc char
1667 end
1668 end
1669
1670 # Emit the hyperlink as link or image.
1671 private fun emit_hyper(v: MarkdownEmitter) is abstract
1672
1673 # Check if the link is a valid link.
1674 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1675 var md = v.current_text
1676 var pos
1677 if token isa TokenLink then
1678 pos = start + 1
1679 else
1680 pos = start + 2
1681 end
1682 var tmp = new FlatBuffer
1683 pos = md.read_md_link_id(tmp, pos)
1684 if pos < start then return -1
1685 name = tmp
1686 var old_pos = pos
1687 pos += 1
1688 pos = md.skip_spaces(pos)
1689 if pos < start then
1690 var tid = name.write_to_string.to_lower
1691 if v.processor.link_refs.has_key(tid) then
1692 var lr = v.processor.link_refs[tid]
1693 is_abbrev = lr.is_abbrev
1694 link = lr.link
1695 comment = lr.title
1696 pos = old_pos
1697 else
1698 return -1
1699 end
1700 else if md[pos] == '(' then
1701 pos += 1
1702 pos = md.skip_spaces(pos)
1703 if pos < start then return -1
1704 tmp = new FlatBuffer
1705 var use_lt = md[pos] == '<'
1706 if use_lt then
1707 pos = md.read_until(tmp, pos + 1, '>')
1708 else
1709 pos = md.read_md_link(tmp, pos)
1710 end
1711 if pos < start then return -1
1712 if use_lt then pos += 1
1713 link = tmp.write_to_string
1714 if md[pos] == ' ' then
1715 pos = md.skip_spaces(pos)
1716 if pos > start and md[pos] == '"' then
1717 pos += 1
1718 tmp = new FlatBuffer
1719 pos = md.read_until(tmp, pos, '"')
1720 if pos < start then return -1
1721 comment = tmp.write_to_string
1722 pos += 1
1723 pos = md.skip_spaces(pos)
1724 if pos == -1 then return -1
1725 end
1726 end
1727 if md[pos] != ')' then return -1
1728 else if md[pos] == '[' then
1729 pos += 1
1730 tmp = new FlatBuffer
1731 pos = md.read_raw_until(tmp, pos, ']')
1732 if pos < start then return -1
1733 var id
1734 if tmp.length > 0 then
1735 id = tmp
1736 else
1737 id = name
1738 end
1739 var tid = id.write_to_string.to_lower
1740 if v.processor.link_refs.has_key(tid) then
1741 var lr = v.processor.link_refs[tid]
1742 link = lr.link
1743 comment = lr.title
1744 end
1745 else
1746 var tid = name.write_to_string.replace("\n", " ").to_lower
1747 if v.processor.link_refs.has_key(tid) then
1748 var lr = v.processor.link_refs[tid]
1749 link = lr.link
1750 comment = lr.title
1751 pos = old_pos
1752 else
1753 return -1
1754 end
1755 end
1756 if link == null then return -1
1757 return pos
1758 end
1759 end
1760
1761 # A markdown link token.
1762 class TokenLink
1763 super TokenLinkOrImage
1764
1765 redef fun emit_hyper(v) do
1766 if is_abbrev and comment != null then
1767 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
1768 else
1769 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
1770 end
1771 end
1772 end
1773
1774 # A markdown image token.
1775 class TokenImage
1776 super TokenLinkOrImage
1777
1778 redef fun emit_hyper(v) do
1779 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
1780 end
1781 end
1782
1783 # A HTML/XML token.
1784 class TokenHTML
1785 super Token
1786
1787 redef fun emit(v) do
1788 var tmp = new FlatBuffer
1789 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
1790 if b > 0 then
1791 v.add tmp
1792 v.current_pos = b
1793 else
1794 v.decorator.escape_char(v, char)
1795 end
1796 end
1797
1798 # Is the HTML valid?
1799 # Also take care of link and mailto shortcuts.
1800 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
1801 # check for auto links
1802 var tmp = new FlatBuffer
1803 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
1804 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
1805 pos = md.read_until(tmp, pos, '>')
1806 if pos != -1 then
1807 var link = tmp.write_to_string
1808 v.decorator.add_link(v, link, link, null)
1809 return pos
1810 end
1811 end
1812 # TODO check for mailto
1813 # check for inline html
1814 if start + 2 < md.length then
1815 return md.read_xml(out, start, true)
1816 end
1817 return -1
1818 end
1819 end
1820
1821 # An HTML entity token.
1822 class TokenEntity
1823 super Token
1824
1825 redef fun emit(v) do
1826 var tmp = new FlatBuffer
1827 var b = check_entity(tmp, v.current_text.as(not null), pos)
1828 if b > 0 then
1829 v.add tmp
1830 v.current_pos = b
1831 else
1832 v.decorator.escape_char(v, char)
1833 end
1834 end
1835
1836 # Is the entity valid?
1837 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
1838 var pos = md.read_until(out, start, ';')
1839 if pos < 0 or out.length < 3 then
1840 return -1
1841 end
1842 if out[1] == '#' then
1843 if out[2] == 'x' or out[2] == 'X' then
1844 if out.length < 4 then return -1
1845 for i in [3..out.length[ do
1846 var c = out[i]
1847 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
1848 return -1
1849 end
1850 end
1851 else
1852 for i in [2..out.length[ do
1853 var c = out[i]
1854 if c < '0' or c > '9' then return -1
1855 end
1856 end
1857 out.add ';'
1858 else
1859 for i in [1..out.length[ do
1860 var c = out[i]
1861 if not c.is_digit and not c.is_letter then return -1
1862 end
1863 out.add ';'
1864 # TODO check entity is valid
1865 # if out.is_entity then
1866 return pos
1867 # else
1868 # return -1
1869 # end
1870 end
1871 return pos
1872 end
1873 end
1874
1875 # A markdown escape token.
1876 class TokenEscape
1877 super Token
1878
1879 redef fun emit(v) do
1880 v.current_pos += 1
1881 v.addc v.current_text[v.current_pos]
1882 end
1883 end
1884
1885 # A markdown super token.
1886 class TokenSuper
1887 super Token
1888
1889 redef fun emit(v) do
1890 var tmp = v.push_buffer
1891 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1892 v.pop_buffer
1893 if b > 0 then
1894 v.decorator.add_super(v, tmp)
1895 v.current_pos = b
1896 else
1897 v.addc char
1898 end
1899 end
1900 end
1901
1902 redef class Text
1903
1904 # Get the token kind at `pos`.
1905 private fun token_at(pos: Int): Token do
1906 var c0: Char
1907 var c1: Char
1908 var c2: Char
1909 var c3: Char
1910
1911 if pos > 0 then
1912 c0 = self[pos - 1]
1913 else
1914 c0 = ' '
1915 end
1916 var c = self[pos]
1917
1918 if pos + 1 < length then
1919 c1 = self[pos + 1]
1920 else
1921 c1 = ' '
1922 end
1923 if pos + 2 < length then
1924 c2 = self[pos + 2]
1925 else
1926 c2 = ' '
1927 end
1928 if pos + 3 < length then
1929 c3 = self[pos + 3]
1930 else
1931 c3 = ' '
1932 end
1933
1934 if c == '*' then
1935 if c1 == '*' then
1936 if c0 != ' ' or c2 != ' ' then
1937 return new TokenStrongStar(pos, c)
1938 else
1939 return new TokenEmStar(pos, c)
1940 end
1941 end
1942 if c0 != ' ' or c1 != ' ' then
1943 return new TokenEmStar(pos, c)
1944 else
1945 return new TokenNone(pos, c)
1946 end
1947 else if c == '_' then
1948 if c1 == '_' then
1949 if c0 != ' ' or c2 != ' 'then
1950 return new TokenStrongUnderscore(pos, c)
1951 else
1952 return new TokenEmUnderscore(pos, c)
1953 end
1954 end
1955 if c0 != ' ' or c1 != ' ' then
1956 return new TokenEmUnderscore(pos, c)
1957 else
1958 return new TokenNone(pos, c)
1959 end
1960 else if c == '!' then
1961 if c1 == '[' then return new TokenImage(pos, c)
1962 return new TokenNone(pos, c)
1963 else if c == '[' then
1964 return new TokenLink(pos, c)
1965 else if c == ']' then
1966 return new TokenNone(pos, c)
1967 else if c == '`' then
1968 if c1 == '`' then
1969 return new TokenCodeDouble(pos, c)
1970 else
1971 return new TokenCodeSingle(pos, c)
1972 end
1973 else if c == '\\' then
1974 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
1975 return new TokenEscape(pos, c)
1976 else
1977 return new TokenNone(pos, c)
1978 end
1979 else if c == '<' then
1980 return new TokenHTML(pos, c)
1981 else if c == '&' then
1982 return new TokenEntity(pos, c)
1983 else if c == '^' then
1984 if c0 == '^' or c1 == '^' then
1985 return new TokenNone(pos, c)
1986 else
1987 return new TokenSuper(pos, c)
1988 end
1989 else
1990 return new TokenNone(pos, c)
1991 end
1992 end
1993
1994 # Find the position of a `token` in `self`.
1995 private fun find_token(start: Int, token: Token): Int do
1996 var pos = start
1997 while pos < length do
1998 if token_at(pos).is_same_type(token) then
1999 return pos
2000 end
2001 pos += 1
2002 end
2003 return -1
2004 end
2005
2006 # Get the position of the next non-space character.
2007 private fun skip_spaces(start: Int): Int do
2008 var pos = start
2009 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2010 pos += 1
2011 end
2012 if pos < length then return pos
2013 return -1
2014 end
2015
2016 # Read `self` until `nend` and append it to the `out` buffer.
2017 # Escape markdown special chars.
2018 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2019 var pos = start
2020 while pos < length do
2021 var c = self[pos]
2022 if c == '\\' and pos + 1 < length then
2023 pos = escape(out, self[pos + 1], pos)
2024 else
2025 var end_reached = false
2026 for n in nend do
2027 if c == n then
2028 end_reached = true
2029 break
2030 end
2031 end
2032 if end_reached then break
2033 out.add c
2034 end
2035 pos += 1
2036 end
2037 if pos == length then return -1
2038 return pos
2039 end
2040
2041 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2042 # No escape is made.
2043 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2044 var pos = start
2045 while pos < length do
2046 var c = self[pos]
2047 var end_reached = false
2048 for n in nend do
2049 if c == n then
2050 end_reached = true
2051 break
2052 end
2053 end
2054 if end_reached then break
2055 out.add c
2056 pos += 1
2057 end
2058 if pos == length then return -1
2059 return pos
2060 end
2061
2062 # Read `self` as XML until `to` and append it to the `out` buffer.
2063 # Escape HTML special chars.
2064 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2065 var pos = from
2066 var in_str = false
2067 var str_char: nullable Char = null
2068 while pos < length do
2069 var c = self[pos]
2070 if in_str then
2071 if c == '\\' then
2072 out.add c
2073 pos += 1
2074 if pos < length then
2075 out.add c
2076 pos += 1
2077 end
2078 continue
2079 end
2080 if c == str_char then
2081 in_str = false
2082 out.add c
2083 pos += 1
2084 continue
2085 end
2086 end
2087 if c == '"' or c == '\'' then
2088 in_str = true
2089 str_char = c
2090 end
2091 if not in_str then
2092 var end_reached = false
2093 for n in [0..to.length[ do
2094 if c == to[n] then
2095 end_reached = true
2096 break
2097 end
2098 end
2099 if end_reached then break
2100 end
2101 out.add c
2102 pos += 1
2103 end
2104 if pos == length then return -1
2105 return pos
2106 end
2107
2108 # Read `self` as XML and append it to the `out` buffer.
2109 # Safe mode can be activated to limit reading to valid xml.
2110 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2111 var pos = 0
2112 var is_close_tag = false
2113 if start + 1 >= length then return -1
2114 if self[start + 1] == '/' then
2115 is_close_tag = true
2116 pos = start + 2
2117 else if self[start + 1] == '!' then
2118 out.append "<!"
2119 return start + 1
2120 else
2121 is_close_tag = false
2122 pos = start + 1
2123 end
2124 if safe_mode then
2125 var tmp = new FlatBuffer
2126 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2127 if pos == -1 then return -1
2128 var tag = tmp.write_to_string.trim.to_lower
2129 if tag.is_html_unsafe then
2130 out.append "&lt;"
2131 if is_close_tag then out.add '/'
2132 out.append tmp
2133 else
2134 out.append "<"
2135 if is_close_tag then out.add '/'
2136 out.append tmp
2137 end
2138 else
2139 out.add '<'
2140 if is_close_tag then out.add '/'
2141 pos = read_xml_until(out, pos, ' ', '/', '>')
2142 end
2143 if pos == -1 then return -1
2144 pos = read_xml_until(out, pos, '/', '>')
2145 if pos == -1 then return -1
2146 if self[pos] == '/' then
2147 out.append " /"
2148 pos = self.read_xml_until(out, pos + 1, '>')
2149 if pos == -1 then return -1
2150 end
2151 if self[pos] == '>' then
2152 out.add '>'
2153 return pos
2154 end
2155 return -1
2156 end
2157
2158 # Read a markdown link address and append it to the `out` buffer.
2159 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2160 var pos = start
2161 var counter = 1
2162 while pos < length do
2163 var c = self[pos]
2164 if c == '\\' and pos + 1 < length then
2165 pos = escape(out, self[pos + 1], pos)
2166 else
2167 var end_reached = false
2168 if c == '(' then
2169 counter += 1
2170 else if c == ' ' then
2171 if counter == 1 then end_reached = true
2172 else if c == ')' then
2173 counter -= 1
2174 if counter == 0 then end_reached = true
2175 end
2176 if end_reached then break
2177 out.add c
2178 end
2179 pos += 1
2180 end
2181 if pos == length then return -1
2182 return pos
2183 end
2184
2185 # Read a markdown link text and append it to the `out` buffer.
2186 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2187 var pos = start
2188 var counter = 1
2189 while pos < length do
2190 var c = self[pos]
2191 var end_reached = false
2192 if c == '[' then
2193 counter += 1
2194 out.add c
2195 else if c == ']' then
2196 counter -= 1
2197 if counter == 0 then
2198 end_reached = true
2199 else
2200 out.add c
2201 end
2202 else
2203 out.add c
2204 end
2205 if end_reached then break
2206 pos += 1
2207 end
2208 if pos == length then return -1
2209 return pos
2210 end
2211
2212 # Extract the XML tag name from a XML tag.
2213 private fun xml_tag: String do
2214 var tpl = new FlatBuffer
2215 var pos = 1
2216 if pos < length and self[1] == '/' then pos += 1
2217 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2218 tpl.add self[pos]
2219 pos += 1
2220 end
2221 return tpl.write_to_string.to_lower
2222 end
2223
2224 # Read and escape the markdown contained in `self`.
2225 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2226 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2227 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2228 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2229 c == '`' or c == '~' or c == '^' then
2230 out.add c
2231 return pos + 1
2232 end
2233 out.add '\\'
2234 return pos
2235 end
2236
2237 # Is `self` an unsafe HTML element?
2238 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2239
2240 # Is `self` a HRML block element?
2241 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2242
2243 # Is `self` a link prefix?
2244 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2245
2246 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2247
2248 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2249
2250 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2251 end
2252
2253 redef class String
2254
2255 # Parse `self` as markdown and return the HTML representation
2256 #.
2257 # var md = "**Hello World!**"
2258 # var html = md.md_to_html
2259 # assert html == "<p><strong>Hello World!</strong></p>\n"
2260 fun md_to_html: Streamable do
2261 var processor = new MarkdownProcessor
2262 return processor.process(self)
2263 end
2264 end