4efde743cb06bac409f5d34edaedc1ed0716fe91
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 var emitter: MarkdownEmitter is noinit
34
35 init do self.emitter = new MarkdownEmitter(self)
36
37 # Process the mardown `input` string and return the processed output.
38 fun process(input: String): Streamable do
39 # init processor
40 link_refs.clear
41 last_link_ref = null
42 current_line = null
43 current_block = null
44 # parse markdown
45 var parent = read_lines(input)
46 parent.remove_surrounding_empty_lines
47 recurse(parent, false)
48 # output processed text
49 return emitter.emit(parent.kind)
50 end
51
52 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
53 private fun read_lines(input: String): MDBlock do
54 var block = new MDBlock
55 var value = new FlatBuffer
56 var i = 0
57 while i < input.length do
58 value.clear
59 var pos = 0
60 var eol = false
61 while not eol and i < input.length do
62 var c = input[i]
63 if c == '\n' then
64 i += 1
65 eol = true
66 else if c == '\t' then
67 var np = pos + (4 - (pos.bin_and(3)))
68 while pos < np do
69 value.add ' '
70 pos += 1
71 end
72 i += 1
73 else
74 pos += 1
75 value.add c
76 i += 1
77 end
78 end
79
80 var line = new MDLine(value.write_to_string)
81 var is_link_ref = check_link_ref(line)
82 # Skip link refs
83 if not is_link_ref then block.add_line line
84 end
85 return block
86 end
87
88 # Check if line is a block link definition.
89 # Return `true` if line contains a valid link ref and save it into `link_refs`.
90 private fun check_link_ref(line: MDLine): Bool do
91 var md = line.value
92 var is_link_ref = false
93 var id = new FlatBuffer
94 var link = new FlatBuffer
95 var comment = new FlatBuffer
96 var pos = -1
97 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
98 pos = line.leading + 1
99 pos = md.read_until(id, pos, ']')
100 if not id.is_empty and pos + 2 < line.value.length then
101 if line.value[pos + 1] == ':' then
102 pos += 2
103 pos = md.skip_spaces(pos)
104 if line.value[pos] == '<' then
105 pos += 1
106 pos = md.read_until(link, pos, '>')
107 pos += 1
108 else
109 pos = md.read_until(link, pos, ' ', '\n')
110 end
111 if not link.is_empty then
112 pos = md.skip_spaces(pos)
113 if pos > 0 and pos < line.value.length then
114 var c = line.value[pos]
115 if c == '\"' or c == '\'' or c == '(' then
116 pos += 1
117 if c == '(' then
118 pos = md.read_until(comment, pos, ')')
119 else
120 pos = md.read_until(comment, pos, c)
121 end
122 if pos > 0 then is_link_ref = true
123 end
124 else
125 is_link_ref = true
126 end
127 end
128 end
129 end
130 end
131 if is_link_ref and not id.is_empty and not link.is_empty then
132 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
133 add_link_ref(id.write_to_string, lr)
134 if comment.is_empty then last_link_ref = lr
135 return true
136 else
137 comment = new FlatBuffer
138 if not line.is_empty and last_link_ref != null then
139 pos = line.leading
140 var c = line.value[pos]
141 if c == '\"' or c == '\'' or c == '(' then
142 pos += 1
143 if c == '(' then
144 pos = md.read_until(comment, pos, ')')
145 else
146 pos = md.read_until(comment, pos, c)
147 end
148 end
149 if not comment.is_empty then last_link_ref.title = comment.write_to_string
150 end
151 if comment.is_empty then return false
152 return true
153 end
154 end
155
156 # Known link refs
157 # This list will be needed during output to expand links.
158 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
159
160 # Last encountered link ref (for multiline definitions)
161 #
162 # Markdown allows link refs to be defined over two lines:
163 #
164 # [id]: http://example.com/longish/path/to/resource/here
165 # "Optional Title Here"
166 #
167 private var last_link_ref: nullable LinkRef = null
168
169 # Add a link ref to the list
170 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
171
172 # Recursively split a `block`.
173 #
174 # The block is splitted according to the type of lines it contains.
175 # Some blocks can be splited again recursively like lists.
176 # The `in_list` mode is used to recurse on list and build
177 # nested paragraphs or code blocks.
178 fun recurse(root: MDBlock, in_list: Bool) do
179 var old_mode = self.in_list
180 var old_root = self.current_block
181 self.in_list = in_list
182
183 var line = root.first_line
184 while line != null and line.is_empty do
185 line = line.next
186 if line == null then return
187 end
188
189 current_line = line
190 current_block = root
191 while current_line != null do
192 current_line.kind(self).process(self)
193 end
194 self.in_list = old_mode
195 self.current_block = old_root
196 end
197
198 # Currently processed line.
199 # Used when visiting blocks with `recurse`.
200 var current_line: nullable MDLine = null is writable
201
202 # Currently processed block.
203 # Used when visiting blocks with `recurse`.
204 var current_block: nullable MDBlock = null is writable
205
206 # Is the current recursion in list mode?
207 # Used when visiting blocks with `recurse`
208 private var in_list = false
209 end
210
211 # Emit output corresponding to blocks content.
212 #
213 # Blocks are created by a previous pass in `MarkdownProcessor`.
214 # The emitter use a `Decorator` to select the output format.
215 class MarkdownEmitter
216
217 # Processor containing link refs.
218 var processor: MarkdownProcessor
219
220 # Decorator used for output.
221 # Default is `HTMLDecorator`
222 var decorator: Decorator = new HTMLDecorator is writable
223
224 # Create a new `MardownEmitter` using the default `HTMLDecorator`
225 init(processor: MarkdownProcessor) do
226 self.processor = processor
227 end
228
229 # Create a new `MarkdownEmitter` using a custom `decorator`.
230 init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
231 init processor
232 self.decorator = decorator
233 end
234
235 # Output `block` using `decorator` in the current buffer.
236 fun emit(block: Block): Text do
237 var buffer = push_buffer
238 block.emit(self)
239 pop_buffer
240 return buffer
241 end
242
243 # Output the content of `block`.
244 fun emit_in(block: Block) do block.emit_in(self)
245
246 # Transform and emit mardown text
247 fun emit_text(text: Text) do
248 emit_text_until(text, 0, null)
249 end
250
251 # Transform and emit mardown text starting at `from` and
252 # until a token with the same type as `token` is found.
253 # Go until the end of text if `token` is null.
254 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
255 var old_text = current_text
256 var old_pos = current_pos
257 current_text = text
258 current_pos = start
259 while current_pos < text.length do
260 var mt = text.token_at(current_pos)
261 if (token != null and not token isa TokenNone) and
262 (mt.is_same_type(token) or
263 (token isa TokenEmStar and mt isa TokenStrongStar) or
264 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
265 return current_pos
266 end
267 mt.emit(self)
268 current_pos += 1
269 end
270 current_text = old_text
271 current_pos = old_pos
272 return -1
273 end
274
275 # Currently processed position in `current_text`.
276 # Used when visiting inline production with `emit_text_until`.
277 private var current_pos: Int = -1
278
279 # Currently processed text.
280 # Used when visiting inline production with `emit_text_until`.
281 private var current_text: nullable Text = null
282
283 # Stacked buffers.
284 private var buffer_stack = new List[FlatBuffer]
285
286 # Push a new buffer on the stack.
287 private fun push_buffer: FlatBuffer do
288 var buffer = new FlatBuffer
289 buffer_stack.add buffer
290 return buffer
291 end
292
293 # Pop the last buffer.
294 private fun pop_buffer do buffer_stack.pop
295
296 # Current output buffer.
297 private fun current_buffer: FlatBuffer do
298 assert not buffer_stack.is_empty
299 return buffer_stack.last
300 end
301
302 # Append `e` to current buffer.
303 fun add(e: Streamable) do
304 if e isa Text then
305 current_buffer.append e
306 else
307 current_buffer.append e.write_to_string
308 end
309 end
310
311 # Append `c` to current buffer.
312 fun addc(c: Char) do current_buffer.add c
313
314 # Append a "\n" line break.
315 fun addn do current_buffer.add '\n'
316 end
317
318 # A Link Reference.
319 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
320 #
321 # Example:
322 #
323 # [1]: http://example.com/ "Optional title"
324 class LinkRef
325
326 # Link href
327 var link: String
328
329 # Optional link title
330 var title: nullable String = null
331
332 # Is the link an abreviation?
333 var is_abbrev = false
334
335 init with_title(link: String, title: nullable String) do
336 self.link = link
337 self.title = title
338 end
339 end
340
341 # A `Decorator` is used to emit mardown into a specific format.
342 # Default decorator used is `HTMLDecorator`.
343 interface Decorator
344
345 # Render a ruler block.
346 fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
347
348 # Render a headline block with corresponding level.
349 fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
350
351 # Render a paragraph block.
352 fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
353
354 # Render a code or fence block.
355 fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
356
357 # Render a blockquote.
358 fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
359
360 # Render an unordered list.
361 fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
362
363 # Render an ordered list.
364 fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
365
366 # Render a list item.
367 fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
368
369 # Render an emphasis text.
370 fun add_em(v: MarkdownEmitter, text: Text) is abstract
371
372 # Render a strong text.
373 fun add_strong(v: MarkdownEmitter, text: Text) is abstract
374
375 # Render a super text.
376 fun add_super(v: MarkdownEmitter, text: Text) is abstract
377
378 # Render a link.
379 fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
380
381 # Render an image.
382 fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
383
384 # Render an abbreviation.
385 fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
386
387 # Render a code span reading from a buffer.
388 fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
389
390 # Render a text and escape it.
391 fun append_value(v: MarkdownEmitter, value: Text) is abstract
392
393 # Render code text from buffer and escape it.
394 fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
395
396 # Render a character escape.
397 fun escape_char(v: MarkdownEmitter, char: Char) is abstract
398
399 # Render a line break
400 fun add_line_break(v: MarkdownEmitter) is abstract
401
402 # Generate a new html valid id from a `String`.
403 fun strip_id(txt: String): String is abstract
404
405 # Found headlines during the processing labeled by their ids.
406 fun headlines: ArrayMap[String, HeadLine] is abstract
407 end
408
409 # Class representing a markdown headline.
410 class HeadLine
411 # Unique identifier of this headline.
412 var id: String
413
414 # Text of the headline.
415 var title: String
416
417 # Level of this headline.
418 #
419 # According toe the markdown specification, level must be in `[1..6]`.
420 var level: Int
421 end
422
423 # `Decorator` that outputs HTML.
424 class HTMLDecorator
425 super Decorator
426
427 redef var headlines = new ArrayMap[String, HeadLine]
428
429 redef fun add_ruler(v, block) do v.add "<hr/>\n"
430
431 redef fun add_headline(v, block) do
432 # save headline
433 var txt = block.block.first_line.value
434 var id = strip_id(txt)
435 var lvl = block.depth
436 headlines[id] = new HeadLine(id, txt, lvl)
437 # output it
438 v.add "<h{lvl} id=\"{id}\">"
439 v.emit_in block
440 v.add "</h{lvl}>\n"
441 end
442
443 redef fun add_paragraph(v, block) do
444 v.add "<p>"
445 v.emit_in block
446 v.add "</p>\n"
447 end
448
449 redef fun add_code(v, block) do
450 v.add "<pre><code>"
451 v.emit_in block
452 v.add "</code></pre>\n"
453 end
454
455 redef fun add_blockquote(v, block) do
456 v.add "<blockquote>\n"
457 v.emit_in block
458 v.add "</blockquote>\n"
459 end
460
461 redef fun add_unorderedlist(v, block) do
462 v.add "<ul>\n"
463 v.emit_in block
464 v.add "</ul>\n"
465 end
466
467 redef fun add_orderedlist(v, block) do
468 v.add "<ol>\n"
469 v.emit_in block
470 v.add "</ol>\n"
471 end
472
473 redef fun add_listitem(v, block) do
474 v.add "<li>"
475 v.emit_in block
476 v.add "</li>\n"
477 end
478
479 redef fun add_em(v, text) do
480 v.add "<em>"
481 v.add text
482 v.add "</em>"
483 end
484
485 redef fun add_strong(v, text) do
486 v.add "<strong>"
487 v.add text
488 v.add "</strong>"
489 end
490
491 redef fun add_super(v, text) do
492 v.add "<sup>"
493 v.add text
494 v.add "</sup>"
495 end
496
497 redef fun add_image(v, link, name, comment) do
498 v.add "<img src=\""
499 append_value(v, link)
500 v.add "\" alt=\""
501 append_value(v, name)
502 v.add "\""
503 if comment != null and not comment.is_empty then
504 v.add " title=\""
505 append_value(v, comment)
506 v.add "\""
507 end
508 v.add "/>"
509 end
510
511 redef fun add_link(v, link, name, comment) do
512 v.add "<a href=\""
513 append_value(v, link)
514 v.add "\""
515 if comment != null and not comment.is_empty then
516 v.add " title=\""
517 append_value(v, comment)
518 v.add "\""
519 end
520 v.add ">"
521 v.emit_text(name)
522 v.add "</a>"
523 end
524
525 redef fun add_abbr(v, name, comment) do
526 v.add "<abbr title=\""
527 append_value(v, comment)
528 v.add "\">"
529 v.emit_text(name)
530 v.add "</abbr>"
531 end
532
533 redef fun add_span_code(v, text, from, to) do
534 v.add "<code>"
535 append_code(v, text, from, to)
536 v.add "</code>"
537 end
538
539 redef fun add_line_break(v) do
540 v.add "<br/>"
541 end
542
543 redef fun append_value(v, text) do for c in text do escape_char(v, c)
544
545 redef fun escape_char(v, c) do
546 if c == '&' then
547 v.add "&amp;"
548 else if c == '<' then
549 v.add "&lt;"
550 else if c == '>' then
551 v.add "&gt;"
552 else if c == '"' then
553 v.add "&quot;"
554 else if c == '\'' then
555 v.add "&apos;"
556 else
557 v.addc c
558 end
559 end
560
561 redef fun append_code(v, buffer, from, to) do
562 for i in [from..to[ do
563 var c = buffer[i]
564 if c == '&' then
565 v.add "&amp;"
566 else if c == '<' then
567 v.add "&lt;"
568 else if c == '>' then
569 v.add "&gt;"
570 else
571 v.addc c
572 end
573 end
574 end
575
576 redef fun strip_id(txt) do
577 # strip id
578 var b = new FlatBuffer
579 for c in txt do
580 if c == ' ' then
581 b.add '_'
582 else
583 if not c.is_letter and
584 not c.is_digit and
585 not allowed_id_chars.has(c) then continue
586 b.add c
587 end
588 end
589 var res = b.to_s
590 var key = res
591 # check for multiple id definitions
592 if headlines.has_key(key) then
593 var i = 1
594 key = "{res}_{i}"
595 while headlines.has_key(key) do
596 i += 1
597 key = "{res}_{i}"
598 end
599 end
600 return key
601 end
602
603 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
604 end
605
606 # A block of markdown lines.
607 # A `MDBlock` can contains lines and/or sub-blocks.
608 class MDBlock
609 # Kind of block.
610 # See `Block`.
611 var kind: Block = new BlockNone(self) is writable
612
613 # First line if any.
614 var first_line: nullable MDLine = null is writable
615
616 # Last line if any.
617 var last_line: nullable MDLine = null is writable
618
619 # First sub-block if any.
620 var first_block: nullable MDBlock = null is writable
621
622 # Last sub-block if any.
623 var last_block: nullable MDBlock = null is writable
624
625 # Previous block if any.
626 var prev: nullable MDBlock = null is writable
627
628 # Next block if any.
629 var next: nullable MDBlock = null is writable
630
631 # Does this block contain subblocks?
632 fun has_blocks: Bool do return first_block != null
633
634 # Count sub-blocks.
635 fun count_blocks: Int do
636 var count = 0
637 var block = first_block
638 while block != null do
639 count += 1
640 block = block.next
641 end
642 return count
643 end
644
645 # Does this block contain lines?
646 fun has_lines: Bool do return first_line != null
647
648 # Count block lines.
649 fun count_lines: Int do
650 var count = 0
651 var line = first_line
652 while line != null do
653 count += 1
654 line = line.next
655 end
656 return count
657 end
658
659 # Split `self` creating a new sub-block having `line` has `last_line`.
660 fun split(line: MDLine): MDBlock do
661 var block = new MDBlock
662 block.first_line = first_line
663 block.last_line = line
664 first_line = line.next
665 line.next = null
666 if first_line == null then
667 last_line = null
668 else
669 first_line.prev = null
670 end
671 if first_block == null then
672 first_block = block
673 last_block = block
674 else
675 last_block.next = block
676 last_block = block
677 end
678 return block
679 end
680
681 # Add a `line` to this block.
682 fun add_line(line: MDLine) do
683 if last_line == null then
684 first_line = line
685 last_line = line
686 else
687 last_line.next_empty = line.is_empty
688 line.prev_empty = last_line.is_empty
689 line.prev = last_line
690 last_line.next = line
691 last_line = line
692 end
693 end
694
695 # Remove `line` from this block.
696 fun remove_line(line: MDLine) do
697 if line.prev == null then
698 first_line = line.next
699 else
700 line.prev.next = line.next
701 end
702 if line.next == null then
703 last_line = line.prev
704 else
705 line.next.prev = line.prev
706 end
707 line.prev = null
708 line.next = null
709 end
710
711 # Remove leading empty lines.
712 fun remove_leading_empty_lines: Bool do
713 var was_empty = false
714 var line = first_line
715 while line != null and line.is_empty do
716 remove_line line
717 line = first_line
718 was_empty = true
719 end
720 return was_empty
721 end
722
723 # Remove trailing empty lines.
724 fun remove_trailing_empty_lines: Bool do
725 var was_empty = false
726 var line = last_line
727 while line != null and line.is_empty do
728 remove_line line
729 line = last_line
730 was_empty = true
731 end
732 return was_empty
733 end
734
735 # Remove leading and trailing empty lines.
736 fun remove_surrounding_empty_lines: Bool do
737 var was_empty = false
738 if remove_leading_empty_lines then was_empty = true
739 if remove_trailing_empty_lines then was_empty = true
740 return was_empty
741 end
742
743 # Remove list markers and up to 4 leading spaces.
744 # Used to clean nested lists.
745 fun remove_list_indent(v: MarkdownProcessor) do
746 var line = first_line
747 while line != null do
748 if not line.is_empty then
749 var kind = line.kind(v)
750 if kind isa LineList then
751 line.value = kind.extract_value(line)
752 else
753 line.value = line.value.substring_from(line.leading.min(4))
754 end
755 line.leading = line.process_leading
756 end
757 line = line.next
758 end
759 end
760
761 # Collect block line text.
762 fun text: String do
763 var text = new FlatBuffer
764 var line = first_line
765 while line != null do
766 if not line.is_empty then
767 text.append line.text
768 end
769 text.append "\n"
770 line = line.next
771 end
772 return text.write_to_string
773 end
774 end
775
776 # Representation of a markdown block in the AST.
777 # Each `Block` is linked to a `MDBlock` that contains mardown code.
778 abstract class Block
779
780 # The markdown block `self` is related to.
781 var block: MDBlock
782
783 # Output `self` using `v.decorator`.
784 fun emit(v: MarkdownEmitter) do v.emit_in(self)
785
786 # Emit the containts of `self`, lines or blocks.
787 fun emit_in(v: MarkdownEmitter) do
788 block.remove_surrounding_empty_lines
789 if block.has_lines then
790 emit_lines(v)
791 else
792 emit_blocks(v)
793 end
794 end
795
796 # Emit lines contained in `block`.
797 fun emit_lines(v: MarkdownEmitter) do
798 var tpl = v.push_buffer
799 var line = block.first_line
800 while line != null do
801 if not line.is_empty then
802 v.add line.value.substring(line.leading, line.value.length - line.trailing)
803 if line.trailing >= 2 then v.decorator.add_line_break(v)
804 end
805 if line.next != null then
806 v.addn
807 end
808 line = line.next
809 end
810 v.pop_buffer
811 v.emit_text(tpl)
812 end
813
814 # Emit sub-blocks contained in `block`.
815 fun emit_blocks(v: MarkdownEmitter) do
816 var block = self.block.first_block
817 while block != null do
818 block.kind.emit(v)
819 block = block.next
820 end
821 end
822 end
823
824 # A block without any markdown specificities.
825 #
826 # Actually use the same implementation than `BlockCode`,
827 # this class is only used for typing purposes.
828 class BlockNone
829 super Block
830 end
831
832 # A markdown blockquote.
833 class BlockQuote
834 super Block
835
836 redef fun emit(v) do v.decorator.add_blockquote(v, self)
837
838 # Remove blockquote markers.
839 private fun remove_block_quote_prefix(block: MDBlock) do
840 var line = block.first_line
841 while line != null do
842 if not line.is_empty then
843 if line.value[line.leading] == '>' then
844 var rem = line.leading + 1
845 if line.leading + 1 < line.value.length and
846 line.value[line.leading + 1] == ' ' then
847 rem += 1
848 end
849 line.value = line.value.substring_from(rem)
850 line.leading = line.process_leading
851 end
852 end
853 line = line.next
854 end
855 end
856 end
857
858 # A markdown code block.
859 class BlockCode
860 super Block
861
862 redef fun emit(v) do v.decorator.add_code(v, self)
863
864 redef fun emit_lines(v) do
865 var line = block.first_line
866 while line != null do
867 if not line.is_empty then
868 v.decorator.append_code(v, line.value, 4, line.value.length)
869 end
870 v.addn
871 line = line.next
872 end
873 end
874 end
875
876 # A markdown code-fence block.
877 #
878 # Actually use the same implementation than `BlockCode`,
879 # this class is only used for typing purposes.
880 class BlockFence
881 super BlockCode
882 end
883
884 # A markdown headline.
885 class BlockHeadline
886 super Block
887
888 redef fun emit(v) do v.decorator.add_headline(v, self)
889
890 # Depth of the headline used to determine the headline level.
891 var depth = 0
892
893 # Remove healine marks from lines contained in `self`.
894 private fun transform_headline(block: MDBlock) do
895 if depth > 0 then return
896 var level = 0
897 var line = block.first_line
898 if line.is_empty then return
899 var start = line.leading
900 while start < line.value.length and line.value[start] == '#' do
901 level += 1
902 start += 1
903 end
904 while start < line.value.length and line.value[start] == ' ' do
905 start += 1
906 end
907 if start >= line.value.length then
908 line.is_empty = true
909 else
910 var nend = line.value.length - line.trailing - 1
911 while line.value[nend] == '#' do nend -= 1
912 while line.value[nend] == ' ' do nend -= 1
913 line.value = line.value.substring(start, nend - start + 1)
914 line.leading = 0
915 line.trailing = 0
916 end
917 depth = level.min(6)
918 end
919 end
920
921 # A markdown list item block.
922 class BlockListItem
923 super Block
924
925 redef fun emit(v) do v.decorator.add_listitem(v, self)
926 end
927
928 # A markdown list block.
929 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
930 abstract class BlockList
931 super Block
932
933 # Split list block into list items sub-blocks.
934 private fun init_block(v: MarkdownProcessor) do
935 var line = block.first_line
936 line = line.next
937 while line != null do
938 var t = line.kind(v)
939 if t isa LineList or
940 (not line.is_empty and (line.prev_empty and line.leading == 0 and
941 not (t isa LineList))) then
942 var sblock = block.split(line.prev.as(not null))
943 sblock.kind = new BlockListItem(sblock)
944 end
945 line = line.next
946 end
947 var sblock = block.split(block.last_line.as(not null))
948 sblock.kind = new BlockListItem(sblock)
949 end
950
951 # Expand list items as paragraphs if needed.
952 private fun expand_paragraphs(block: MDBlock) do
953 var outer = block.first_block
954 var inner: nullable MDBlock
955 var has_paragraph = false
956 while outer != null and not has_paragraph do
957 if outer.kind isa BlockListItem then
958 inner = outer.first_block
959 while inner != null and not has_paragraph do
960 if inner.kind isa BlockParagraph then
961 has_paragraph = true
962 end
963 inner = inner.next
964 end
965 end
966 outer = outer.next
967 end
968 if has_paragraph then
969 outer = block.first_block
970 while outer != null do
971 if outer.kind isa BlockListItem then
972 inner = outer.first_block
973 while inner != null do
974 if inner.kind isa BlockNone then
975 inner.kind = new BlockParagraph(inner)
976 end
977 inner = inner.next
978 end
979 end
980 outer = outer.next
981 end
982 end
983 end
984 end
985
986 # A markdown ordered list.
987 class BlockOrderedList
988 super BlockList
989
990 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
991 end
992
993 # A markdown unordred list.
994 class BlockUnorderedList
995 super BlockList
996
997 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
998 end
999
1000 # A markdown paragraph block.
1001 class BlockParagraph
1002 super Block
1003
1004 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1005 end
1006
1007 # A markdown ruler.
1008 class BlockRuler
1009 super Block
1010
1011 redef fun emit(v) do v.decorator.add_ruler(v, self)
1012 end
1013
1014 # Xml blocks that can be found in markdown markup.
1015 class BlockXML
1016 super Block
1017
1018 redef fun emit_lines(v) do
1019 var line = block.first_line
1020 while line != null do
1021 if not line.is_empty then v.add line.value
1022 v.addn
1023 line = line.next
1024 end
1025 end
1026 end
1027
1028 # A markdown line.
1029 class MDLine
1030
1031 # Text contained in this line.
1032 var value: String is writable
1033
1034 # Is this line empty?
1035 # Lines containing only spaces are considered empty.
1036 var is_empty: Bool = true is writable
1037
1038 # Previous line in `MDBlock` or null if first line.
1039 var prev: nullable MDLine = null is writable
1040
1041 # Next line in `MDBlock` or null if last line.
1042 var next: nullable MDLine = null is writable
1043
1044 # Is the previous line empty?
1045 var prev_empty: Bool = false is writable
1046
1047 # Is the next line empty?
1048 var next_empty: Bool = false is writable
1049
1050 init(value: String) do
1051 self.value = value
1052 self.leading = process_leading
1053 if leading != value.length then
1054 self.is_empty = false
1055 self.trailing = process_trailing
1056 end
1057 end
1058
1059 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1060 fun clear do
1061 value = ""
1062 leading = 0
1063 trailing = 0
1064 is_empty = true
1065 if prev != null then prev.next_empty = true
1066 if next != null then next.prev_empty = true
1067 end
1068
1069 # The type of line.
1070 # see `md_line_*`
1071 fun kind(v: MarkdownProcessor): Line do
1072 var value = self.value
1073 if is_empty then return new LineEmpty
1074 if leading > 3 then return new LineCode
1075 if value[leading] == '#' then return new LineHeadline
1076 if value[leading] == '>' then return new LineBlockquote
1077
1078 if value.length - leading - trailing > 2 then
1079 if value[leading] == '`' and count_chars_start('`') >= 3 then
1080 return new LineFence
1081 end
1082 if value[leading] == '~' and count_chars_start('~') >= 3 then
1083 return new LineFence
1084 end
1085 end
1086
1087 if value.length - leading - trailing > 2 and
1088 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
1089 if count_chars(value[leading]) >= 3 then
1090 return new LineHR
1091 end
1092 end
1093
1094 if value.length - leading >= 2 and value[leading + 1] == ' ' then
1095 var c = value[leading]
1096 if c == '*' or c == '-' or c == '+' then return new LineUList
1097 end
1098
1099 if value.length - leading >= 3 and value[leading].is_digit then
1100 var i = leading + 1
1101 while i < value.length and value[i].is_digit do i += 1
1102 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
1103 return new LineOList
1104 end
1105 end
1106
1107 if value[leading] == '<' and check_html then return new LineXML
1108
1109 if next != null and not next.is_empty then
1110 if next.count_chars('=') > 0 then
1111 return new LineHeadline1
1112 end
1113 if next.count_chars('-') > 0 then
1114 return new LineHeadline2
1115 end
1116 end
1117 return new LineOther
1118 end
1119
1120 # Number or leading spaces on this line.
1121 var leading: Int = 0 is writable
1122
1123 # Compute `leading` depending on `value`.
1124 fun process_leading: Int do
1125 var count = 0
1126 var value = self.value
1127 while count < value.length and value[count] == ' ' do count += 1
1128 if leading == value.length then clear
1129 return count
1130 end
1131
1132 # Number of trailing spaces on this line.
1133 var trailing: Int = 0 is writable
1134
1135 # Compute `trailing` depending on `value`.
1136 fun process_trailing: Int do
1137 var count = 0
1138 var value = self.value
1139 while value[value.length - count - 1] == ' ' do
1140 count += 1
1141 end
1142 return count
1143 end
1144
1145 # Count the amount of `ch` in this line.
1146 # Return A value > 0 if this line only consists of `ch` end spaces.
1147 fun count_chars(ch: Char): Int do
1148 var count = 0
1149 for c in value do
1150 if c == ' ' then
1151 continue
1152 end
1153 if c == ch then
1154 count += 1
1155 continue
1156 end
1157 count = 0
1158 break
1159 end
1160 return count
1161 end
1162
1163 # Count the amount of `ch` at the start of this line ignoring spaces.
1164 fun count_chars_start(ch: Char): Int do
1165 var count = 0
1166 for c in value do
1167 if c == ' ' then
1168 continue
1169 end
1170 if c == ch then
1171 count += 1
1172 else
1173 break
1174 end
1175 end
1176 return count
1177 end
1178
1179 # Last XML line if any.
1180 private var xml_end_line: nullable MDLine = null
1181
1182 # Does `value` contains valid XML markup?
1183 private fun check_html: Bool do
1184 var tags = new Array[String]
1185 var tmp = new FlatBuffer
1186 var pos = leading
1187 if pos + 1 < value.length and value[pos + 1] == '!' then
1188 if read_xml_comment(self, pos) > 0 then return true
1189 end
1190 pos = value.read_xml(tmp, pos, false)
1191 var tag: String
1192 if pos > -1 then
1193 tag = tmp.xml_tag
1194 if not tag.is_html_block then
1195 return false
1196 end
1197 if tag == "hr" then
1198 xml_end_line = self
1199 return true
1200 end
1201 tags.add tag
1202 var line: nullable MDLine = self
1203 while line != null do
1204 while pos < line.value.length and line.value[pos] != '<' do
1205 pos += 1
1206 end
1207 if pos >= line.value.length then
1208 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1209 tags.pop
1210 if tags.is_empty then
1211 xml_end_line = line
1212 break
1213 end
1214 end
1215 line = line.next
1216 pos = 0
1217 else
1218 tmp = new FlatBuffer
1219 var new_pos = line.value.read_xml(tmp, pos, false)
1220 if new_pos > 0 then
1221 tag = tmp.xml_tag
1222 if tag.is_html_block and not tag == "hr" then
1223 if tmp[1] == '/' then
1224 if tags.last != tag then
1225 return false
1226 end
1227 tags.pop
1228 else
1229 tags.add tag
1230 end
1231 end
1232 if tags.is_empty then
1233 xml_end_line = line
1234 break
1235 end
1236 pos = new_pos
1237 else
1238 pos += 1
1239 end
1240 end
1241 end
1242 return tags.is_empty
1243 end
1244 return false
1245 end
1246
1247 # Read a XML comment.
1248 # Used by `check_html`.
1249 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1250 var line: nullable MDLine = first_line
1251 if start + 3 < line.value.length then
1252 if line.value[2] == '-' and line.value[3] == '-' then
1253 var pos = start + 4
1254 while line != null do
1255 while pos < line.value.length and line.value[pos] != '-' do
1256 pos += 1
1257 end
1258 if pos == line.value.length then
1259 line = line.next
1260 pos = 0
1261 else
1262 if pos + 2 < line.value.length then
1263 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1264 first_line.xml_end_line = line
1265 return pos + 3
1266 end
1267 end
1268 pos += 1
1269 end
1270 end
1271 end
1272 end
1273 return -1
1274 end
1275
1276 # Extract the text of `self` without leading and trailing.
1277 fun text: String do return value.substring(leading, value.length - trailing)
1278 end
1279
1280 # A markdown line.
1281 interface Line
1282
1283 # Parse the line.
1284 # See `MarkdownProcessor::recurse`.
1285 fun process(v: MarkdownProcessor) is abstract
1286 end
1287
1288 # An empty markdown line.
1289 class LineEmpty
1290 super Line
1291
1292 redef fun process(v) do
1293 v.current_line = v.current_line.next
1294 end
1295 end
1296
1297 # A non-specific markdown construction.
1298 # Mainly used as part of another line construct such as paragraphs or lists.
1299 class LineOther
1300 super Line
1301
1302 redef fun process(v) do
1303 var line = v.current_line
1304 # go to block end
1305 var was_empty = line.prev_empty
1306 while line != null and not line.is_empty do
1307 var t = line.kind(v)
1308 if v.in_list and t isa LineList then
1309 break
1310 end
1311 if t isa LineCode or t isa LineFence then
1312 break
1313 end
1314 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1315 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1316 break
1317 end
1318 line = line.next
1319 end
1320 # build block
1321 var bk: Block
1322 if line != null and not line.is_empty then
1323 var block = v.current_block.split(line.prev.as(not null))
1324 if v.in_list and not was_empty then
1325 block.kind = new BlockNone(block)
1326 else
1327 block.kind = new BlockParagraph(block)
1328 end
1329 v.current_block.remove_leading_empty_lines
1330 else
1331 var block: MDBlock
1332 if line != null then
1333 block = v.current_block.split(line)
1334 else
1335 block = v.current_block.split(v.current_block.last_line.as(not null))
1336 end
1337 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1338 block.kind = new BlockNone(block)
1339 else
1340 block.kind = new BlockParagraph(block)
1341 end
1342 v.current_block.remove_leading_empty_lines
1343 end
1344 v.current_line = v.current_block.first_line
1345 end
1346 end
1347
1348 # A line of markdown code.
1349 class LineCode
1350 super Line
1351
1352 redef fun process(v) do
1353 var line = v.current_line
1354 # lookup block end
1355 while line != null and (line.is_empty or line.kind(v) isa LineCode) do
1356 line = line.next
1357 end
1358 # split at block end line
1359 var block: MDBlock
1360 if line != null then
1361 block = v.current_block.split(line.prev.as(not null))
1362 else
1363 block = v.current_block.split(v.current_block.last_line.as(not null))
1364 end
1365 block.kind = new BlockCode(block)
1366 block.remove_surrounding_empty_lines
1367 v.current_line = v.current_block.first_line
1368 end
1369 end
1370
1371 # A line of raw XML.
1372 class LineXML
1373 super Line
1374
1375 redef fun process(v) do
1376 var line = v.current_line
1377 var prev = line.prev
1378 if prev != null then v.current_block.split(prev)
1379 var block = v.current_block.split(line.xml_end_line.as(not null))
1380 block.kind = new BlockXML(block)
1381 v.current_block.remove_leading_empty_lines
1382 v.current_line = v.current_block.first_line
1383 end
1384 end
1385
1386 # A markdown blockquote line.
1387 class LineBlockquote
1388 super Line
1389
1390 redef fun process(v) do
1391 var line = v.current_line
1392 # go to bquote end
1393 while line != null do
1394 if not line.is_empty and (line.prev_empty and
1395 line.leading == 0 and
1396 not line.kind(v) isa LineBlockquote) then break
1397 line = line.next
1398 end
1399 # build sub block
1400 var block: MDBlock
1401 if line != null then
1402 block = v.current_block.split(line.prev.as(not null))
1403 else
1404 block = v.current_block.split(v.current_block.last_line.as(not null))
1405 end
1406 var kind = new BlockQuote(block)
1407 block.kind = kind
1408 block.remove_surrounding_empty_lines
1409 kind.remove_block_quote_prefix(block)
1410 v.current_line = line
1411 v.recurse(block, false)
1412 v.current_line = v.current_block.first_line
1413 end
1414 end
1415
1416 # A markdown ruler line.
1417 class LineHR
1418 super Line
1419
1420 redef fun process(v) do
1421 var line = v.current_line
1422 if line.prev != null then v.current_block.split(line.prev.as(not null))
1423 var block = v.current_block.split(line.as(not null))
1424 block.kind = new BlockRuler(block)
1425 v.current_block.remove_leading_empty_lines
1426 v.current_line = v.current_block.first_line
1427 end
1428 end
1429
1430 # A markdown fence code line.
1431 class LineFence
1432 super Line
1433
1434 redef fun process(v) do
1435 # go to fence end
1436 var line = v.current_line.next
1437 while line != null do
1438 if line.kind(v) isa LineFence then break
1439 line = line.next
1440 end
1441 if line != null then
1442 line = line.next
1443 end
1444 # build fence block
1445 var block: MDBlock
1446 if line != null then
1447 block = v.current_block.split(line.prev.as(not null))
1448 else
1449 block = v.current_block.split(v.current_block.last_line.as(not null))
1450 end
1451 block.kind = new BlockFence(block)
1452 block.first_line.clear
1453 if block.last_line.kind(v) isa LineFence then
1454 block.last_line.clear
1455 end
1456 block.remove_surrounding_empty_lines
1457 v.current_line = line
1458 end
1459 end
1460
1461 # A markdown headline.
1462 class LineHeadline
1463 super Line
1464
1465 redef fun process(v) do
1466 var line = v.current_line
1467 var lprev = line.prev
1468 if lprev != null then v.current_block.split(lprev)
1469 var block = v.current_block.split(line.as(not null))
1470 var kind = new BlockHeadline(block)
1471 block.kind = kind
1472 kind.transform_headline(block)
1473 v.current_block.remove_leading_empty_lines
1474 v.current_line = v.current_block.first_line
1475 end
1476 end
1477
1478 # A markdown headline of level 1.
1479 class LineHeadline1
1480 super LineHeadline
1481
1482 redef fun process(v) do
1483 var line = v.current_line
1484 var lprev = line.prev
1485 if lprev != null then v.current_block.split(lprev)
1486 line.next.clear
1487 var block = v.current_block.split(line.as(not null))
1488 var kind = new BlockHeadline(block)
1489 kind.depth = 1
1490 kind.transform_headline(block)
1491 block.kind = kind
1492 v.current_block.remove_leading_empty_lines
1493 v.current_line = v.current_block.first_line
1494 end
1495 end
1496
1497 # A markdown headline of level 2.
1498 class LineHeadline2
1499 super LineHeadline
1500
1501 redef fun process(v) do
1502 var line = v.current_line
1503 var lprev = line.prev
1504 if lprev != null then v.current_block.split(lprev)
1505 line.next.clear
1506 var block = v.current_block.split(line.as(not null))
1507 var kind = new BlockHeadline(block)
1508 kind.depth = 2
1509 kind.transform_headline(block)
1510 block.kind = kind
1511 v.current_block.remove_leading_empty_lines
1512 v.current_line = v.current_block.first_line
1513 end
1514 end
1515
1516 # A markdown list line.
1517 # Mainly used to factorize code between ordered and unordered lists.
1518 class LineList
1519 super Line
1520
1521 redef fun process(v) do
1522 var line = v.current_line
1523 # go to list end
1524 while line != null do
1525 var t = line.kind(v)
1526 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1527 not t isa LineList) then break
1528 line = line.next
1529 end
1530 # build list block
1531 var list: MDBlock
1532 if line != null then
1533 list = v.current_block.split(line.prev.as(not null))
1534 else
1535 list = v.current_block.split(v.current_block.last_line.as(not null))
1536 end
1537 var kind = block_kind(list)
1538 list.kind = kind
1539 list.first_line.prev_empty = false
1540 list.last_line.next_empty = false
1541 list.remove_surrounding_empty_lines
1542 list.first_line.prev_empty = false
1543 list.last_line.next_empty = false
1544 kind.init_block(v)
1545 var block = list.first_block
1546 while block != null do
1547 block.remove_list_indent(v)
1548 v.recurse(block, true)
1549 block = block.next
1550 end
1551 kind.expand_paragraphs(list)
1552 v.current_line = line
1553 end
1554
1555 # Create a new block kind based on this line.
1556 protected fun block_kind(block: MDBlock): BlockList is abstract
1557
1558 protected fun extract_value(line: MDLine): String is abstract
1559 end
1560
1561 # An ordered list line.
1562 class LineOList
1563 super LineList
1564
1565 redef fun block_kind(block) do return new BlockOrderedList(block)
1566
1567 redef fun extract_value(line) do
1568 return line.value.substring_from(line.value.index_of('.') + 2)
1569 end
1570 end
1571
1572 # An unordered list line.
1573 class LineUList
1574 super LineList
1575
1576 redef fun block_kind(block) do return new BlockUnorderedList(block)
1577
1578 redef fun extract_value(line) do
1579 return line.value.substring_from(line.leading + 2)
1580 end
1581 end
1582
1583 # A token represent a character in the markdown input.
1584 # Some tokens have a specific markup behaviour that is handled here.
1585 abstract class Token
1586
1587 # Position of `self` in markdown input.
1588 var pos: Int
1589
1590 # Character found at `pos` in the markdown input.
1591 var char: Char
1592
1593 # Output that token using `MarkdownEmitter::decorator`.
1594 fun emit(v: MarkdownEmitter) do v.addc char
1595 end
1596
1597 # A token without a specific meaning.
1598 class TokenNone
1599 super Token
1600 end
1601
1602 # An emphasis token.
1603 abstract class TokenEm
1604 super Token
1605
1606 redef fun emit(v) do
1607 var tmp = v.push_buffer
1608 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1609 v.pop_buffer
1610 if b > 0 then
1611 v.decorator.add_em(v, tmp)
1612 v.current_pos = b
1613 else
1614 v.addc char
1615 end
1616 end
1617 end
1618
1619 # An emphasis star token.
1620 class TokenEmStar
1621 super TokenEm
1622 end
1623
1624 # An emphasis underscore token.
1625 class TokenEmUnderscore
1626 super TokenEm
1627 end
1628
1629 # A strong token.
1630 abstract class TokenStrong
1631 super Token
1632
1633 redef fun emit(v) do
1634 var tmp = v.push_buffer
1635 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1636 v.pop_buffer
1637 if b > 0 then
1638 v.decorator.add_strong(v, tmp)
1639 v.current_pos = b + 1
1640 else
1641 v.addc char
1642 end
1643 end
1644 end
1645
1646 # A strong star token.
1647 class TokenStrongStar
1648 super TokenStrong
1649 end
1650
1651 # A strong underscore token.
1652 class TokenStrongUnderscore
1653 super TokenStrong
1654 end
1655
1656 # A code token.
1657 # This class is mainly used to factorize work between single and double quoted span codes.
1658 abstract class TokenCode
1659 super Token
1660
1661 redef fun emit(v) do
1662 var a = pos + next_pos + 1
1663 var b = v.current_text.find_token(a, self)
1664 if b > 0 then
1665 v.current_pos = b + next_pos
1666 while a < b and v.current_text[a] == ' ' do a += 1
1667 if a < b then
1668 while v.current_text[b - 1] == ' ' do b -= 1
1669 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1670 end
1671 else
1672 v.addc char
1673 end
1674 end
1675
1676 private fun next_pos: Int is abstract
1677 end
1678
1679 # A span code token.
1680 class TokenCodeSingle
1681 super TokenCode
1682
1683 redef fun next_pos do return 0
1684 end
1685
1686 # A doubled span code token.
1687 class TokenCodeDouble
1688 super TokenCode
1689
1690 redef fun next_pos do return 1
1691 end
1692
1693 # A link or image token.
1694 # This class is mainly used to factorize work between images and links.
1695 abstract class TokenLinkOrImage
1696 super Token
1697
1698 # Link adress
1699 var link: nullable Text = null
1700
1701 # Link text
1702 var name: nullable Text = null
1703
1704 # Link title
1705 var comment: nullable Text = null
1706
1707 # Is the link construct an abbreviation?
1708 var is_abbrev = false
1709
1710 redef fun emit(v) do
1711 var tmp = new FlatBuffer
1712 var b = check_link(v, tmp, pos, self)
1713 if b > 0 then
1714 emit_hyper(v)
1715 v.current_pos = b
1716 else
1717 v.addc char
1718 end
1719 end
1720
1721 # Emit the hyperlink as link or image.
1722 private fun emit_hyper(v: MarkdownEmitter) is abstract
1723
1724 # Check if the link is a valid link.
1725 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1726 var md = v.current_text
1727 var pos
1728 if token isa TokenLink then
1729 pos = start + 1
1730 else
1731 pos = start + 2
1732 end
1733 var tmp = new FlatBuffer
1734 pos = md.read_md_link_id(tmp, pos)
1735 if pos < start then return -1
1736 name = tmp
1737 var old_pos = pos
1738 pos += 1
1739 pos = md.skip_spaces(pos)
1740 if pos < start then
1741 var tid = name.write_to_string.to_lower
1742 if v.processor.link_refs.has_key(tid) then
1743 var lr = v.processor.link_refs[tid]
1744 is_abbrev = lr.is_abbrev
1745 link = lr.link
1746 comment = lr.title
1747 pos = old_pos
1748 else
1749 return -1
1750 end
1751 else if md[pos] == '(' then
1752 pos += 1
1753 pos = md.skip_spaces(pos)
1754 if pos < start then return -1
1755 tmp = new FlatBuffer
1756 var use_lt = md[pos] == '<'
1757 if use_lt then
1758 pos = md.read_until(tmp, pos + 1, '>')
1759 else
1760 pos = md.read_md_link(tmp, pos)
1761 end
1762 if pos < start then return -1
1763 if use_lt then pos += 1
1764 link = tmp.write_to_string
1765 if md[pos] == ' ' then
1766 pos = md.skip_spaces(pos)
1767 if pos > start and md[pos] == '"' then
1768 pos += 1
1769 tmp = new FlatBuffer
1770 pos = md.read_until(tmp, pos, '"')
1771 if pos < start then return -1
1772 comment = tmp.write_to_string
1773 pos += 1
1774 pos = md.skip_spaces(pos)
1775 if pos == -1 then return -1
1776 end
1777 end
1778 if md[pos] != ')' then return -1
1779 else if md[pos] == '[' then
1780 pos += 1
1781 tmp = new FlatBuffer
1782 pos = md.read_raw_until(tmp, pos, ']')
1783 if pos < start then return -1
1784 var id
1785 if tmp.length > 0 then
1786 id = tmp
1787 else
1788 id = name
1789 end
1790 var tid = id.write_to_string.to_lower
1791 if v.processor.link_refs.has_key(tid) then
1792 var lr = v.processor.link_refs[tid]
1793 link = lr.link
1794 comment = lr.title
1795 end
1796 else
1797 var tid = name.write_to_string.replace("\n", " ").to_lower
1798 if v.processor.link_refs.has_key(tid) then
1799 var lr = v.processor.link_refs[tid]
1800 link = lr.link
1801 comment = lr.title
1802 pos = old_pos
1803 else
1804 return -1
1805 end
1806 end
1807 if link == null then return -1
1808 return pos
1809 end
1810 end
1811
1812 # A markdown link token.
1813 class TokenLink
1814 super TokenLinkOrImage
1815
1816 redef fun emit_hyper(v) do
1817 if is_abbrev and comment != null then
1818 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
1819 else
1820 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
1821 end
1822 end
1823 end
1824
1825 # A markdown image token.
1826 class TokenImage
1827 super TokenLinkOrImage
1828
1829 redef fun emit_hyper(v) do
1830 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
1831 end
1832 end
1833
1834 # A HTML/XML token.
1835 class TokenHTML
1836 super Token
1837
1838 redef fun emit(v) do
1839 var tmp = new FlatBuffer
1840 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
1841 if b > 0 then
1842 v.add tmp
1843 v.current_pos = b
1844 else
1845 v.decorator.escape_char(v, char)
1846 end
1847 end
1848
1849 # Is the HTML valid?
1850 # Also take care of link and mailto shortcuts.
1851 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
1852 # check for auto links
1853 var tmp = new FlatBuffer
1854 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
1855 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
1856 pos = md.read_until(tmp, pos, '>')
1857 if pos != -1 then
1858 var link = tmp.write_to_string
1859 v.decorator.add_link(v, link, link, null)
1860 return pos
1861 end
1862 end
1863 # TODO check for mailto
1864 # check for inline html
1865 if start + 2 < md.length then
1866 return md.read_xml(out, start, true)
1867 end
1868 return -1
1869 end
1870 end
1871
1872 # An HTML entity token.
1873 class TokenEntity
1874 super Token
1875
1876 redef fun emit(v) do
1877 var tmp = new FlatBuffer
1878 var b = check_entity(tmp, v.current_text.as(not null), pos)
1879 if b > 0 then
1880 v.add tmp
1881 v.current_pos = b
1882 else
1883 v.decorator.escape_char(v, char)
1884 end
1885 end
1886
1887 # Is the entity valid?
1888 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
1889 var pos = md.read_until(out, start, ';')
1890 if pos < 0 or out.length < 3 then
1891 return -1
1892 end
1893 if out[1] == '#' then
1894 if out[2] == 'x' or out[2] == 'X' then
1895 if out.length < 4 then return -1
1896 for i in [3..out.length[ do
1897 var c = out[i]
1898 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
1899 return -1
1900 end
1901 end
1902 else
1903 for i in [2..out.length[ do
1904 var c = out[i]
1905 if c < '0' or c > '9' then return -1
1906 end
1907 end
1908 out.add ';'
1909 else
1910 for i in [1..out.length[ do
1911 var c = out[i]
1912 if not c.is_digit and not c.is_letter then return -1
1913 end
1914 out.add ';'
1915 # TODO check entity is valid
1916 # if out.is_entity then
1917 return pos
1918 # else
1919 # return -1
1920 # end
1921 end
1922 return pos
1923 end
1924 end
1925
1926 # A markdown escape token.
1927 class TokenEscape
1928 super Token
1929
1930 redef fun emit(v) do
1931 v.current_pos += 1
1932 v.addc v.current_text[v.current_pos]
1933 end
1934 end
1935
1936 # A markdown super token.
1937 class TokenSuper
1938 super Token
1939
1940 redef fun emit(v) do
1941 var tmp = v.push_buffer
1942 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1943 v.pop_buffer
1944 if b > 0 then
1945 v.decorator.add_super(v, tmp)
1946 v.current_pos = b
1947 else
1948 v.addc char
1949 end
1950 end
1951 end
1952
1953 redef class Text
1954
1955 # Get the token kind at `pos`.
1956 private fun token_at(pos: Int): Token do
1957 var c0: Char
1958 var c1: Char
1959 var c2: Char
1960 var c3: Char
1961
1962 if pos > 0 then
1963 c0 = self[pos - 1]
1964 else
1965 c0 = ' '
1966 end
1967 var c = self[pos]
1968
1969 if pos + 1 < length then
1970 c1 = self[pos + 1]
1971 else
1972 c1 = ' '
1973 end
1974 if pos + 2 < length then
1975 c2 = self[pos + 2]
1976 else
1977 c2 = ' '
1978 end
1979 if pos + 3 < length then
1980 c3 = self[pos + 3]
1981 else
1982 c3 = ' '
1983 end
1984
1985 if c == '*' then
1986 if c1 == '*' then
1987 if c0 != ' ' or c2 != ' ' then
1988 return new TokenStrongStar(pos, c)
1989 else
1990 return new TokenEmStar(pos, c)
1991 end
1992 end
1993 if c0 != ' ' or c1 != ' ' then
1994 return new TokenEmStar(pos, c)
1995 else
1996 return new TokenNone(pos, c)
1997 end
1998 else if c == '_' then
1999 if c1 == '_' then
2000 if c0 != ' ' or c2 != ' 'then
2001 return new TokenStrongUnderscore(pos, c)
2002 else
2003 return new TokenEmUnderscore(pos, c)
2004 end
2005 end
2006 if c0 != ' ' or c1 != ' ' then
2007 return new TokenEmUnderscore(pos, c)
2008 else
2009 return new TokenNone(pos, c)
2010 end
2011 else if c == '!' then
2012 if c1 == '[' then return new TokenImage(pos, c)
2013 return new TokenNone(pos, c)
2014 else if c == '[' then
2015 return new TokenLink(pos, c)
2016 else if c == ']' then
2017 return new TokenNone(pos, c)
2018 else if c == '`' then
2019 if c1 == '`' then
2020 return new TokenCodeDouble(pos, c)
2021 else
2022 return new TokenCodeSingle(pos, c)
2023 end
2024 else if c == '\\' then
2025 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
2026 return new TokenEscape(pos, c)
2027 else
2028 return new TokenNone(pos, c)
2029 end
2030 else if c == '<' then
2031 return new TokenHTML(pos, c)
2032 else if c == '&' then
2033 return new TokenEntity(pos, c)
2034 else if c == '^' then
2035 if c0 == '^' or c1 == '^' then
2036 return new TokenNone(pos, c)
2037 else
2038 return new TokenSuper(pos, c)
2039 end
2040 else
2041 return new TokenNone(pos, c)
2042 end
2043 end
2044
2045 # Find the position of a `token` in `self`.
2046 private fun find_token(start: Int, token: Token): Int do
2047 var pos = start
2048 while pos < length do
2049 if token_at(pos).is_same_type(token) then
2050 return pos
2051 end
2052 pos += 1
2053 end
2054 return -1
2055 end
2056
2057 # Get the position of the next non-space character.
2058 private fun skip_spaces(start: Int): Int do
2059 var pos = start
2060 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2061 pos += 1
2062 end
2063 if pos < length then return pos
2064 return -1
2065 end
2066
2067 # Read `self` until `nend` and append it to the `out` buffer.
2068 # Escape markdown special chars.
2069 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2070 var pos = start
2071 while pos < length do
2072 var c = self[pos]
2073 if c == '\\' and pos + 1 < length then
2074 pos = escape(out, self[pos + 1], pos)
2075 else
2076 var end_reached = false
2077 for n in nend do
2078 if c == n then
2079 end_reached = true
2080 break
2081 end
2082 end
2083 if end_reached then break
2084 out.add c
2085 end
2086 pos += 1
2087 end
2088 if pos == length then return -1
2089 return pos
2090 end
2091
2092 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2093 # No escape is made.
2094 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2095 var pos = start
2096 while pos < length do
2097 var c = self[pos]
2098 var end_reached = false
2099 for n in nend do
2100 if c == n then
2101 end_reached = true
2102 break
2103 end
2104 end
2105 if end_reached then break
2106 out.add c
2107 pos += 1
2108 end
2109 if pos == length then return -1
2110 return pos
2111 end
2112
2113 # Read `self` as XML until `to` and append it to the `out` buffer.
2114 # Escape HTML special chars.
2115 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2116 var pos = from
2117 var in_str = false
2118 var str_char: nullable Char = null
2119 while pos < length do
2120 var c = self[pos]
2121 if in_str then
2122 if c == '\\' then
2123 out.add c
2124 pos += 1
2125 if pos < length then
2126 out.add c
2127 pos += 1
2128 end
2129 continue
2130 end
2131 if c == str_char then
2132 in_str = false
2133 out.add c
2134 pos += 1
2135 continue
2136 end
2137 end
2138 if c == '"' or c == '\'' then
2139 in_str = true
2140 str_char = c
2141 end
2142 if not in_str then
2143 var end_reached = false
2144 for n in [0..to.length[ do
2145 if c == to[n] then
2146 end_reached = true
2147 break
2148 end
2149 end
2150 if end_reached then break
2151 end
2152 out.add c
2153 pos += 1
2154 end
2155 if pos == length then return -1
2156 return pos
2157 end
2158
2159 # Read `self` as XML and append it to the `out` buffer.
2160 # Safe mode can be activated to limit reading to valid xml.
2161 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2162 var pos = 0
2163 var is_close_tag = false
2164 if start + 1 >= length then return -1
2165 if self[start + 1] == '/' then
2166 is_close_tag = true
2167 pos = start + 2
2168 else if self[start + 1] == '!' then
2169 out.append "<!"
2170 return start + 1
2171 else
2172 is_close_tag = false
2173 pos = start + 1
2174 end
2175 if safe_mode then
2176 var tmp = new FlatBuffer
2177 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2178 if pos == -1 then return -1
2179 var tag = tmp.write_to_string.trim.to_lower
2180 if tag.is_html_unsafe then
2181 out.append "&lt;"
2182 if is_close_tag then out.add '/'
2183 out.append tmp
2184 else
2185 out.append "<"
2186 if is_close_tag then out.add '/'
2187 out.append tmp
2188 end
2189 else
2190 out.add '<'
2191 if is_close_tag then out.add '/'
2192 pos = read_xml_until(out, pos, ' ', '/', '>')
2193 end
2194 if pos == -1 then return -1
2195 pos = read_xml_until(out, pos, '/', '>')
2196 if pos == -1 then return -1
2197 if self[pos] == '/' then
2198 out.append " /"
2199 pos = self.read_xml_until(out, pos + 1, '>')
2200 if pos == -1 then return -1
2201 end
2202 if self[pos] == '>' then
2203 out.add '>'
2204 return pos
2205 end
2206 return -1
2207 end
2208
2209 # Read a markdown link address and append it to the `out` buffer.
2210 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2211 var pos = start
2212 var counter = 1
2213 while pos < length do
2214 var c = self[pos]
2215 if c == '\\' and pos + 1 < length then
2216 pos = escape(out, self[pos + 1], pos)
2217 else
2218 var end_reached = false
2219 if c == '(' then
2220 counter += 1
2221 else if c == ' ' then
2222 if counter == 1 then end_reached = true
2223 else if c == ')' then
2224 counter -= 1
2225 if counter == 0 then end_reached = true
2226 end
2227 if end_reached then break
2228 out.add c
2229 end
2230 pos += 1
2231 end
2232 if pos == length then return -1
2233 return pos
2234 end
2235
2236 # Read a markdown link text and append it to the `out` buffer.
2237 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2238 var pos = start
2239 var counter = 1
2240 while pos < length do
2241 var c = self[pos]
2242 var end_reached = false
2243 if c == '[' then
2244 counter += 1
2245 out.add c
2246 else if c == ']' then
2247 counter -= 1
2248 if counter == 0 then
2249 end_reached = true
2250 else
2251 out.add c
2252 end
2253 else
2254 out.add c
2255 end
2256 if end_reached then break
2257 pos += 1
2258 end
2259 if pos == length then return -1
2260 return pos
2261 end
2262
2263 # Extract the XML tag name from a XML tag.
2264 private fun xml_tag: String do
2265 var tpl = new FlatBuffer
2266 var pos = 1
2267 if pos < length and self[1] == '/' then pos += 1
2268 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2269 tpl.add self[pos]
2270 pos += 1
2271 end
2272 return tpl.write_to_string.to_lower
2273 end
2274
2275 # Read and escape the markdown contained in `self`.
2276 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2277 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2278 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2279 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2280 c == '`' or c == '~' or c == '^' then
2281 out.add c
2282 return pos + 1
2283 end
2284 out.add '\\'
2285 return pos
2286 end
2287
2288 # Is `self` an unsafe HTML element?
2289 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2290
2291 # Is `self` a HRML block element?
2292 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2293
2294 # Is `self` a link prefix?
2295 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2296
2297 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2298
2299 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2300
2301 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2302 end
2303
2304 redef class String
2305
2306 # Parse `self` as markdown and return the HTML representation
2307 #.
2308 # var md = "**Hello World!**"
2309 # var html = md.md_to_html
2310 # assert html == "<p><strong>Hello World!</strong></p>\n"
2311 fun md_to_html: Streamable do
2312 var processor = new MarkdownProcessor
2313 return processor.process(self)
2314 end
2315 end