bd5e3beae212bc2ae13ace9cc1224d67dd435484
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 var emitter: MarkdownEmitter is noinit
34
35 init do self.emitter = new MarkdownEmitter(self)
36
37 # Process the mardown `input` string and return the processed output.
38 fun process(input: String): Streamable do
39 # init processor
40 link_refs.clear
41 last_link_ref = null
42 current_line = null
43 current_block = null
44 # parse markdown
45 var parent = read_lines(input)
46 parent.remove_surrounding_empty_lines
47 recurse(parent, false)
48 # output processed text
49 return emitter.emit(parent.kind)
50 end
51
52 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
53 private fun read_lines(input: String): MDBlock do
54 var block = new MDBlock
55 var value = new FlatBuffer
56 var i = 0
57 while i < input.length do
58 value.clear
59 var pos = 0
60 var eol = false
61 while not eol and i < input.length do
62 var c = input[i]
63 if c == '\n' then
64 i += 1
65 eol = true
66 else if c == '\t' then
67 var np = pos + (4 - (pos.bin_and(3)))
68 while pos < np do
69 value.add ' '
70 pos += 1
71 end
72 i += 1
73 else
74 pos += 1
75 value.add c
76 i += 1
77 end
78 end
79
80 var line = new MDLine(value.write_to_string)
81 var is_link_ref = check_link_ref(line)
82 # Skip link refs
83 if not is_link_ref then block.add_line line
84 end
85 return block
86 end
87
88 # Check if line is a block link definition.
89 # Return `true` if line contains a valid link ref and save it into `link_refs`.
90 private fun check_link_ref(line: MDLine): Bool do
91 var md = line.value
92 var is_link_ref = false
93 var id = new FlatBuffer
94 var link = new FlatBuffer
95 var comment = new FlatBuffer
96 var pos = -1
97 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
98 pos = line.leading + 1
99 pos = md.read_until(id, pos, ']')
100 if not id.is_empty and pos + 2 < line.value.length then
101 if line.value[pos + 1] == ':' then
102 pos += 2
103 pos = md.skip_spaces(pos)
104 if line.value[pos] == '<' then
105 pos += 1
106 pos = md.read_until(link, pos, '>')
107 pos += 1
108 else
109 pos = md.read_until(link, pos, ' ', '\n')
110 end
111 if not link.is_empty then
112 pos = md.skip_spaces(pos)
113 if pos > 0 and pos < line.value.length then
114 var c = line.value[pos]
115 if c == '\"' or c == '\'' or c == '(' then
116 pos += 1
117 if c == '(' then
118 pos = md.read_until(comment, pos, ')')
119 else
120 pos = md.read_until(comment, pos, c)
121 end
122 if pos > 0 then is_link_ref = true
123 end
124 else
125 is_link_ref = true
126 end
127 end
128 end
129 end
130 end
131 if is_link_ref and not id.is_empty and not link.is_empty then
132 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
133 add_link_ref(id.write_to_string, lr)
134 if comment.is_empty then last_link_ref = lr
135 return true
136 else
137 comment = new FlatBuffer
138 if not line.is_empty and last_link_ref != null then
139 pos = line.leading
140 var c = line.value[pos]
141 if c == '\"' or c == '\'' or c == '(' then
142 pos += 1
143 if c == '(' then
144 pos = md.read_until(comment, pos, ')')
145 else
146 pos = md.read_until(comment, pos, c)
147 end
148 end
149 if not comment.is_empty then last_link_ref.title = comment.write_to_string
150 end
151 if comment.is_empty then return false
152 return true
153 end
154 end
155
156 # Known link refs
157 # This list will be needed during output to expand links.
158 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
159
160 # Last encountered link ref (for multiline definitions)
161 #
162 # Markdown allows link refs to be defined over two lines:
163 #
164 # [id]: http://example.com/longish/path/to/resource/here
165 # "Optional Title Here"
166 #
167 private var last_link_ref: nullable LinkRef = null
168
169 # Add a link ref to the list
170 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
171
172 # Recursively split a `block`.
173 #
174 # The block is splitted according to the type of lines it contains.
175 # Some blocks can be splited again recursively like lists.
176 # The `in_list` mode is used to recurse on list and build
177 # nested paragraphs or code blocks.
178 fun recurse(root: MDBlock, in_list: Bool) do
179 var old_mode = self.in_list
180 var old_root = self.current_block
181 self.in_list = in_list
182
183 var line = root.first_line
184 while line != null and line.is_empty do
185 line = line.next
186 if line == null then return
187 end
188
189 current_line = line
190 current_block = root
191 while current_line != null do
192 line_kind(current_line.as(not null)).process(self)
193 end
194 self.in_list = old_mode
195 self.current_block = old_root
196 end
197
198 # Currently processed line.
199 # Used when visiting blocks with `recurse`.
200 var current_line: nullable MDLine = null is writable
201
202 # Currently processed block.
203 # Used when visiting blocks with `recurse`.
204 var current_block: nullable MDBlock = null is writable
205
206 # Is the current recursion in list mode?
207 # Used when visiting blocks with `recurse`
208 private var in_list = false
209
210 # The type of line.
211 # see: `md_line_*`
212 fun line_kind(md: MDLine): Line do
213 var value = md.value
214 var leading = md.leading
215 var trailing = md.trailing
216 if md.is_empty then return new LineEmpty
217 if md.leading > 3 then return new LineCode
218 if value[leading] == '#' then return new LineHeadline
219 if value[leading] == '>' then return new LineBlockquote
220
221 if value.length - leading - trailing > 2 then
222 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
223 return new LineFence
224 end
225 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
226 return new LineFence
227 end
228 end
229
230 if value.length - leading - trailing > 2 and
231 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
232 if md.count_chars(value[leading]) >= 3 then
233 return new LineHR
234 end
235 end
236
237 if value.length - leading >= 2 and value[leading + 1] == ' ' then
238 var c = value[leading]
239 if c == '*' or c == '-' or c == '+' then return new LineUList
240 end
241
242 if value.length - leading >= 3 and value[leading].is_digit then
243 var i = leading + 1
244 while i < value.length and value[i].is_digit do i += 1
245 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
246 return new LineOList
247 end
248 end
249
250 if value[leading] == '<' and md.check_html then return new LineXML
251
252 var next = md.next
253 if next != null and not next.is_empty then
254 if next.count_chars('=') > 0 then
255 return new LineHeadline1
256 end
257 if next.count_chars('-') > 0 then
258 return new LineHeadline2
259 end
260 end
261 return new LineOther
262 end
263
264 end
265
266 # Emit output corresponding to blocks content.
267 #
268 # Blocks are created by a previous pass in `MarkdownProcessor`.
269 # The emitter use a `Decorator` to select the output format.
270 class MarkdownEmitter
271
272 # Processor containing link refs.
273 var processor: MarkdownProcessor
274
275 # Decorator used for output.
276 # Default is `HTMLDecorator`
277 var decorator: Decorator = new HTMLDecorator is writable
278
279 # Create a new `MardownEmitter` using the default `HTMLDecorator`
280 init(processor: MarkdownProcessor) do
281 self.processor = processor
282 end
283
284 # Create a new `MarkdownEmitter` using a custom `decorator`.
285 init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
286 init processor
287 self.decorator = decorator
288 end
289
290 # Output `block` using `decorator` in the current buffer.
291 fun emit(block: Block): Text do
292 var buffer = push_buffer
293 block.emit(self)
294 pop_buffer
295 return buffer
296 end
297
298 # Output the content of `block`.
299 fun emit_in(block: Block) do block.emit_in(self)
300
301 # Transform and emit mardown text
302 fun emit_text(text: Text) do
303 emit_text_until(text, 0, null)
304 end
305
306 # Transform and emit mardown text starting at `from` and
307 # until a token with the same type as `token` is found.
308 # Go until the end of text if `token` is null.
309 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
310 var old_text = current_text
311 var old_pos = current_pos
312 current_text = text
313 current_pos = start
314 while current_pos < text.length do
315 var mt = text.token_at(current_pos)
316 if (token != null and not token isa TokenNone) and
317 (mt.is_same_type(token) or
318 (token isa TokenEmStar and mt isa TokenStrongStar) or
319 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
320 return current_pos
321 end
322 mt.emit(self)
323 current_pos += 1
324 end
325 current_text = old_text
326 current_pos = old_pos
327 return -1
328 end
329
330 # Currently processed position in `current_text`.
331 # Used when visiting inline production with `emit_text_until`.
332 private var current_pos: Int = -1
333
334 # Currently processed text.
335 # Used when visiting inline production with `emit_text_until`.
336 private var current_text: nullable Text = null
337
338 # Stacked buffers.
339 private var buffer_stack = new List[FlatBuffer]
340
341 # Push a new buffer on the stack.
342 private fun push_buffer: FlatBuffer do
343 var buffer = new FlatBuffer
344 buffer_stack.add buffer
345 return buffer
346 end
347
348 # Pop the last buffer.
349 private fun pop_buffer do buffer_stack.pop
350
351 # Current output buffer.
352 private fun current_buffer: FlatBuffer do
353 assert not buffer_stack.is_empty
354 return buffer_stack.last
355 end
356
357 # Append `e` to current buffer.
358 fun add(e: Streamable) do
359 if e isa Text then
360 current_buffer.append e
361 else
362 current_buffer.append e.write_to_string
363 end
364 end
365
366 # Append `c` to current buffer.
367 fun addc(c: Char) do current_buffer.add c
368
369 # Append a "\n" line break.
370 fun addn do current_buffer.add '\n'
371 end
372
373 # A Link Reference.
374 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
375 #
376 # Example:
377 #
378 # [1]: http://example.com/ "Optional title"
379 class LinkRef
380
381 # Link href
382 var link: String
383
384 # Optional link title
385 var title: nullable String = null
386
387 # Is the link an abreviation?
388 var is_abbrev = false
389
390 # Create a link with a title.
391 init with_title(link: String, title: nullable String) do
392 self.link = link
393 self.title = title
394 end
395 end
396
397 # A `Decorator` is used to emit mardown into a specific format.
398 # Default decorator used is `HTMLDecorator`.
399 interface Decorator
400
401 # Render a ruler block.
402 fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
403
404 # Render a headline block with corresponding level.
405 fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
406
407 # Render a paragraph block.
408 fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
409
410 # Render a code or fence block.
411 fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
412
413 # Render a blockquote.
414 fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
415
416 # Render an unordered list.
417 fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
418
419 # Render an ordered list.
420 fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
421
422 # Render a list item.
423 fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
424
425 # Render an emphasis text.
426 fun add_em(v: MarkdownEmitter, text: Text) is abstract
427
428 # Render a strong text.
429 fun add_strong(v: MarkdownEmitter, text: Text) is abstract
430
431 # Render a super text.
432 fun add_super(v: MarkdownEmitter, text: Text) is abstract
433
434 # Render a link.
435 fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
436
437 # Render an image.
438 fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
439
440 # Render an abbreviation.
441 fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
442
443 # Render a code span reading from a buffer.
444 fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
445
446 # Render a text and escape it.
447 fun append_value(v: MarkdownEmitter, value: Text) is abstract
448
449 # Render code text from buffer and escape it.
450 fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
451
452 # Render a character escape.
453 fun escape_char(v: MarkdownEmitter, char: Char) is abstract
454
455 # Render a line break
456 fun add_line_break(v: MarkdownEmitter) is abstract
457
458 # Generate a new html valid id from a `String`.
459 fun strip_id(txt: String): String is abstract
460
461 # Found headlines during the processing labeled by their ids.
462 fun headlines: ArrayMap[String, HeadLine] is abstract
463 end
464
465 # Class representing a markdown headline.
466 class HeadLine
467 # Unique identifier of this headline.
468 var id: String
469
470 # Text of the headline.
471 var title: String
472
473 # Level of this headline.
474 #
475 # According toe the markdown specification, level must be in `[1..6]`.
476 var level: Int
477 end
478
479 # `Decorator` that outputs HTML.
480 class HTMLDecorator
481 super Decorator
482
483 redef var headlines = new ArrayMap[String, HeadLine]
484
485 redef fun add_ruler(v, block) do v.add "<hr/>\n"
486
487 redef fun add_headline(v, block) do
488 # save headline
489 var txt = block.block.first_line.value
490 var id = strip_id(txt)
491 var lvl = block.depth
492 headlines[id] = new HeadLine(id, txt, lvl)
493 # output it
494 v.add "<h{lvl} id=\"{id}\">"
495 v.emit_in block
496 v.add "</h{lvl}>\n"
497 end
498
499 redef fun add_paragraph(v, block) do
500 v.add "<p>"
501 v.emit_in block
502 v.add "</p>\n"
503 end
504
505 redef fun add_code(v, block) do
506 v.add "<pre><code>"
507 v.emit_in block
508 v.add "</code></pre>\n"
509 end
510
511 redef fun add_blockquote(v, block) do
512 v.add "<blockquote>\n"
513 v.emit_in block
514 v.add "</blockquote>\n"
515 end
516
517 redef fun add_unorderedlist(v, block) do
518 v.add "<ul>\n"
519 v.emit_in block
520 v.add "</ul>\n"
521 end
522
523 redef fun add_orderedlist(v, block) do
524 v.add "<ol>\n"
525 v.emit_in block
526 v.add "</ol>\n"
527 end
528
529 redef fun add_listitem(v, block) do
530 v.add "<li>"
531 v.emit_in block
532 v.add "</li>\n"
533 end
534
535 redef fun add_em(v, text) do
536 v.add "<em>"
537 v.add text
538 v.add "</em>"
539 end
540
541 redef fun add_strong(v, text) do
542 v.add "<strong>"
543 v.add text
544 v.add "</strong>"
545 end
546
547 redef fun add_super(v, text) do
548 v.add "<sup>"
549 v.add text
550 v.add "</sup>"
551 end
552
553 redef fun add_image(v, link, name, comment) do
554 v.add "<img src=\""
555 append_value(v, link)
556 v.add "\" alt=\""
557 append_value(v, name)
558 v.add "\""
559 if comment != null and not comment.is_empty then
560 v.add " title=\""
561 append_value(v, comment)
562 v.add "\""
563 end
564 v.add "/>"
565 end
566
567 redef fun add_link(v, link, name, comment) do
568 v.add "<a href=\""
569 append_value(v, link)
570 v.add "\""
571 if comment != null and not comment.is_empty then
572 v.add " title=\""
573 append_value(v, comment)
574 v.add "\""
575 end
576 v.add ">"
577 v.emit_text(name)
578 v.add "</a>"
579 end
580
581 redef fun add_abbr(v, name, comment) do
582 v.add "<abbr title=\""
583 append_value(v, comment)
584 v.add "\">"
585 v.emit_text(name)
586 v.add "</abbr>"
587 end
588
589 redef fun add_span_code(v, text, from, to) do
590 v.add "<code>"
591 append_code(v, text, from, to)
592 v.add "</code>"
593 end
594
595 redef fun add_line_break(v) do
596 v.add "<br/>"
597 end
598
599 redef fun append_value(v, text) do for c in text do escape_char(v, c)
600
601 redef fun escape_char(v, c) do
602 if c == '&' then
603 v.add "&amp;"
604 else if c == '<' then
605 v.add "&lt;"
606 else if c == '>' then
607 v.add "&gt;"
608 else if c == '"' then
609 v.add "&quot;"
610 else if c == '\'' then
611 v.add "&apos;"
612 else
613 v.addc c
614 end
615 end
616
617 redef fun append_code(v, buffer, from, to) do
618 for i in [from..to[ do
619 var c = buffer[i]
620 if c == '&' then
621 v.add "&amp;"
622 else if c == '<' then
623 v.add "&lt;"
624 else if c == '>' then
625 v.add "&gt;"
626 else
627 v.addc c
628 end
629 end
630 end
631
632 redef fun strip_id(txt) do
633 # strip id
634 var b = new FlatBuffer
635 for c in txt do
636 if c == ' ' then
637 b.add '_'
638 else
639 if not c.is_letter and
640 not c.is_digit and
641 not allowed_id_chars.has(c) then continue
642 b.add c
643 end
644 end
645 var res = b.to_s
646 var key = res
647 # check for multiple id definitions
648 if headlines.has_key(key) then
649 var i = 1
650 key = "{res}_{i}"
651 while headlines.has_key(key) do
652 i += 1
653 key = "{res}_{i}"
654 end
655 end
656 return key
657 end
658
659 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
660 end
661
662 # A block of markdown lines.
663 # A `MDBlock` can contains lines and/or sub-blocks.
664 class MDBlock
665 # Kind of block.
666 # See `Block`.
667 var kind: Block = new BlockNone(self) is writable
668
669 # First line if any.
670 var first_line: nullable MDLine = null is writable
671
672 # Last line if any.
673 var last_line: nullable MDLine = null is writable
674
675 # First sub-block if any.
676 var first_block: nullable MDBlock = null is writable
677
678 # Last sub-block if any.
679 var last_block: nullable MDBlock = null is writable
680
681 # Previous block if any.
682 var prev: nullable MDBlock = null is writable
683
684 # Next block if any.
685 var next: nullable MDBlock = null is writable
686
687 # Does this block contain subblocks?
688 fun has_blocks: Bool do return first_block != null
689
690 # Count sub-blocks.
691 fun count_blocks: Int do
692 var count = 0
693 var block = first_block
694 while block != null do
695 count += 1
696 block = block.next
697 end
698 return count
699 end
700
701 # Does this block contain lines?
702 fun has_lines: Bool do return first_line != null
703
704 # Count block lines.
705 fun count_lines: Int do
706 var count = 0
707 var line = first_line
708 while line != null do
709 count += 1
710 line = line.next
711 end
712 return count
713 end
714
715 # Split `self` creating a new sub-block having `line` has `last_line`.
716 fun split(line: MDLine): MDBlock do
717 var block = new MDBlock
718 block.first_line = first_line
719 block.last_line = line
720 first_line = line.next
721 line.next = null
722 if first_line == null then
723 last_line = null
724 else
725 first_line.prev = null
726 end
727 if first_block == null then
728 first_block = block
729 last_block = block
730 else
731 last_block.next = block
732 last_block = block
733 end
734 return block
735 end
736
737 # Add a `line` to this block.
738 fun add_line(line: MDLine) do
739 if last_line == null then
740 first_line = line
741 last_line = line
742 else
743 last_line.next_empty = line.is_empty
744 line.prev_empty = last_line.is_empty
745 line.prev = last_line
746 last_line.next = line
747 last_line = line
748 end
749 end
750
751 # Remove `line` from this block.
752 fun remove_line(line: MDLine) do
753 if line.prev == null then
754 first_line = line.next
755 else
756 line.prev.next = line.next
757 end
758 if line.next == null then
759 last_line = line.prev
760 else
761 line.next.prev = line.prev
762 end
763 line.prev = null
764 line.next = null
765 end
766
767 # Remove leading empty lines.
768 fun remove_leading_empty_lines: Bool do
769 var was_empty = false
770 var line = first_line
771 while line != null and line.is_empty do
772 remove_line line
773 line = first_line
774 was_empty = true
775 end
776 return was_empty
777 end
778
779 # Remove trailing empty lines.
780 fun remove_trailing_empty_lines: Bool do
781 var was_empty = false
782 var line = last_line
783 while line != null and line.is_empty do
784 remove_line line
785 line = last_line
786 was_empty = true
787 end
788 return was_empty
789 end
790
791 # Remove leading and trailing empty lines.
792 fun remove_surrounding_empty_lines: Bool do
793 var was_empty = false
794 if remove_leading_empty_lines then was_empty = true
795 if remove_trailing_empty_lines then was_empty = true
796 return was_empty
797 end
798
799 # Remove list markers and up to 4 leading spaces.
800 # Used to clean nested lists.
801 fun remove_list_indent(v: MarkdownProcessor) do
802 var line = first_line
803 while line != null do
804 if not line.is_empty then
805 var kind = v.line_kind(line)
806 if kind isa LineList then
807 line.value = kind.extract_value(line)
808 else
809 line.value = line.value.substring_from(line.leading.min(4))
810 end
811 line.leading = line.process_leading
812 end
813 line = line.next
814 end
815 end
816
817 # Collect block line text.
818 fun text: String do
819 var text = new FlatBuffer
820 var line = first_line
821 while line != null do
822 if not line.is_empty then
823 text.append line.text
824 end
825 text.append "\n"
826 line = line.next
827 end
828 return text.write_to_string
829 end
830 end
831
832 # Representation of a markdown block in the AST.
833 # Each `Block` is linked to a `MDBlock` that contains mardown code.
834 abstract class Block
835
836 # The markdown block `self` is related to.
837 var block: MDBlock
838
839 # Output `self` using `v.decorator`.
840 fun emit(v: MarkdownEmitter) do v.emit_in(self)
841
842 # Emit the containts of `self`, lines or blocks.
843 fun emit_in(v: MarkdownEmitter) do
844 block.remove_surrounding_empty_lines
845 if block.has_lines then
846 emit_lines(v)
847 else
848 emit_blocks(v)
849 end
850 end
851
852 # Emit lines contained in `block`.
853 fun emit_lines(v: MarkdownEmitter) do
854 var tpl = v.push_buffer
855 var line = block.first_line
856 while line != null do
857 if not line.is_empty then
858 v.add line.value.substring(line.leading, line.value.length - line.trailing)
859 if line.trailing >= 2 then v.decorator.add_line_break(v)
860 end
861 if line.next != null then
862 v.addn
863 end
864 line = line.next
865 end
866 v.pop_buffer
867 v.emit_text(tpl)
868 end
869
870 # Emit sub-blocks contained in `block`.
871 fun emit_blocks(v: MarkdownEmitter) do
872 var block = self.block.first_block
873 while block != null do
874 block.kind.emit(v)
875 block = block.next
876 end
877 end
878 end
879
880 # A block without any markdown specificities.
881 #
882 # Actually use the same implementation than `BlockCode`,
883 # this class is only used for typing purposes.
884 class BlockNone
885 super Block
886 end
887
888 # A markdown blockquote.
889 class BlockQuote
890 super Block
891
892 redef fun emit(v) do v.decorator.add_blockquote(v, self)
893
894 # Remove blockquote markers.
895 private fun remove_block_quote_prefix(block: MDBlock) do
896 var line = block.first_line
897 while line != null do
898 if not line.is_empty then
899 if line.value[line.leading] == '>' then
900 var rem = line.leading + 1
901 if line.leading + 1 < line.value.length and
902 line.value[line.leading + 1] == ' ' then
903 rem += 1
904 end
905 line.value = line.value.substring_from(rem)
906 line.leading = line.process_leading
907 end
908 end
909 line = line.next
910 end
911 end
912 end
913
914 # A markdown code block.
915 class BlockCode
916 super Block
917
918 # Number of char to skip at the beginning of the line.
919 #
920 # Block code lines start at 4 spaces.
921 protected var line_start = 4
922
923 redef fun emit(v) do v.decorator.add_code(v, self)
924
925 redef fun emit_lines(v) do
926 var line = block.first_line
927 while line != null do
928 if not line.is_empty then
929 v.decorator.append_code(v, line.value, line_start, line.value.length)
930 end
931 v.addn
932 line = line.next
933 end
934 end
935 end
936
937 # A markdown code-fence block.
938 #
939 # Actually use the same implementation than `BlockCode`,
940 # this class is only used for typing purposes.
941 class BlockFence
942 super BlockCode
943
944 # Fence code lines start at 0 spaces.
945 redef var line_start = 0
946 end
947
948 # A markdown headline.
949 class BlockHeadline
950 super Block
951
952 redef fun emit(v) do v.decorator.add_headline(v, self)
953
954 # Depth of the headline used to determine the headline level.
955 var depth = 0
956
957 # Remove healine marks from lines contained in `self`.
958 private fun transform_headline(block: MDBlock) do
959 if depth > 0 then return
960 var level = 0
961 var line = block.first_line
962 if line.is_empty then return
963 var start = line.leading
964 while start < line.value.length and line.value[start] == '#' do
965 level += 1
966 start += 1
967 end
968 while start < line.value.length and line.value[start] == ' ' do
969 start += 1
970 end
971 if start >= line.value.length then
972 line.is_empty = true
973 else
974 var nend = line.value.length - line.trailing - 1
975 while line.value[nend] == '#' do nend -= 1
976 while line.value[nend] == ' ' do nend -= 1
977 line.value = line.value.substring(start, nend - start + 1)
978 line.leading = 0
979 line.trailing = 0
980 end
981 depth = level.min(6)
982 end
983 end
984
985 # A markdown list item block.
986 class BlockListItem
987 super Block
988
989 redef fun emit(v) do v.decorator.add_listitem(v, self)
990 end
991
992 # A markdown list block.
993 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
994 abstract class BlockList
995 super Block
996
997 # Split list block into list items sub-blocks.
998 private fun init_block(v: MarkdownProcessor) do
999 var line = block.first_line
1000 line = line.next
1001 while line != null do
1002 var t = v.line_kind(line)
1003 if t isa LineList or
1004 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1005 not (t isa LineList))) then
1006 var sblock = block.split(line.prev.as(not null))
1007 sblock.kind = new BlockListItem(sblock)
1008 end
1009 line = line.next
1010 end
1011 var sblock = block.split(block.last_line.as(not null))
1012 sblock.kind = new BlockListItem(sblock)
1013 end
1014
1015 # Expand list items as paragraphs if needed.
1016 private fun expand_paragraphs(block: MDBlock) do
1017 var outer = block.first_block
1018 var inner: nullable MDBlock
1019 var has_paragraph = false
1020 while outer != null and not has_paragraph do
1021 if outer.kind isa BlockListItem then
1022 inner = outer.first_block
1023 while inner != null and not has_paragraph do
1024 if inner.kind isa BlockParagraph then
1025 has_paragraph = true
1026 end
1027 inner = inner.next
1028 end
1029 end
1030 outer = outer.next
1031 end
1032 if has_paragraph then
1033 outer = block.first_block
1034 while outer != null do
1035 if outer.kind isa BlockListItem then
1036 inner = outer.first_block
1037 while inner != null do
1038 if inner.kind isa BlockNone then
1039 inner.kind = new BlockParagraph(inner)
1040 end
1041 inner = inner.next
1042 end
1043 end
1044 outer = outer.next
1045 end
1046 end
1047 end
1048 end
1049
1050 # A markdown ordered list.
1051 class BlockOrderedList
1052 super BlockList
1053
1054 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1055 end
1056
1057 # A markdown unordred list.
1058 class BlockUnorderedList
1059 super BlockList
1060
1061 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1062 end
1063
1064 # A markdown paragraph block.
1065 class BlockParagraph
1066 super Block
1067
1068 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1069 end
1070
1071 # A markdown ruler.
1072 class BlockRuler
1073 super Block
1074
1075 redef fun emit(v) do v.decorator.add_ruler(v, self)
1076 end
1077
1078 # Xml blocks that can be found in markdown markup.
1079 class BlockXML
1080 super Block
1081
1082 redef fun emit_lines(v) do
1083 var line = block.first_line
1084 while line != null do
1085 if not line.is_empty then v.add line.value
1086 v.addn
1087 line = line.next
1088 end
1089 end
1090 end
1091
1092 # A markdown line.
1093 class MDLine
1094
1095 # Text contained in this line.
1096 var value: String is writable
1097
1098 # Is this line empty?
1099 # Lines containing only spaces are considered empty.
1100 var is_empty: Bool = true is writable
1101
1102 # Previous line in `MDBlock` or null if first line.
1103 var prev: nullable MDLine = null is writable
1104
1105 # Next line in `MDBlock` or null if last line.
1106 var next: nullable MDLine = null is writable
1107
1108 # Is the previous line empty?
1109 var prev_empty: Bool = false is writable
1110
1111 # Is the next line empty?
1112 var next_empty: Bool = false is writable
1113
1114 # Initialize a new MDLine from its string value
1115 init(value: String) do
1116 self.value = value
1117 self.leading = process_leading
1118 if leading != value.length then
1119 self.is_empty = false
1120 self.trailing = process_trailing
1121 end
1122 end
1123
1124 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1125 fun clear do
1126 value = ""
1127 leading = 0
1128 trailing = 0
1129 is_empty = true
1130 if prev != null then prev.next_empty = true
1131 if next != null then next.prev_empty = true
1132 end
1133
1134 # Number or leading spaces on this line.
1135 var leading: Int = 0 is writable
1136
1137 # Compute `leading` depending on `value`.
1138 fun process_leading: Int do
1139 var count = 0
1140 var value = self.value
1141 while count < value.length and value[count] == ' ' do count += 1
1142 if leading == value.length then clear
1143 return count
1144 end
1145
1146 # Number of trailing spaces on this line.
1147 var trailing: Int = 0 is writable
1148
1149 # Compute `trailing` depending on `value`.
1150 fun process_trailing: Int do
1151 var count = 0
1152 var value = self.value
1153 while value[value.length - count - 1] == ' ' do
1154 count += 1
1155 end
1156 return count
1157 end
1158
1159 # Count the amount of `ch` in this line.
1160 # Return A value > 0 if this line only consists of `ch` end spaces.
1161 fun count_chars(ch: Char): Int do
1162 var count = 0
1163 for c in value do
1164 if c == ' ' then
1165 continue
1166 end
1167 if c == ch then
1168 count += 1
1169 continue
1170 end
1171 count = 0
1172 break
1173 end
1174 return count
1175 end
1176
1177 # Count the amount of `ch` at the start of this line ignoring spaces.
1178 fun count_chars_start(ch: Char): Int do
1179 var count = 0
1180 for c in value do
1181 if c == ' ' then
1182 continue
1183 end
1184 if c == ch then
1185 count += 1
1186 else
1187 break
1188 end
1189 end
1190 return count
1191 end
1192
1193 # Last XML line if any.
1194 private var xml_end_line: nullable MDLine = null
1195
1196 # Does `value` contains valid XML markup?
1197 private fun check_html: Bool do
1198 var tags = new Array[String]
1199 var tmp = new FlatBuffer
1200 var pos = leading
1201 if pos + 1 < value.length and value[pos + 1] == '!' then
1202 if read_xml_comment(self, pos) > 0 then return true
1203 end
1204 pos = value.read_xml(tmp, pos, false)
1205 var tag: String
1206 if pos > -1 then
1207 tag = tmp.xml_tag
1208 if not tag.is_html_block then
1209 return false
1210 end
1211 if tag == "hr" then
1212 xml_end_line = self
1213 return true
1214 end
1215 tags.add tag
1216 var line: nullable MDLine = self
1217 while line != null do
1218 while pos < line.value.length and line.value[pos] != '<' do
1219 pos += 1
1220 end
1221 if pos >= line.value.length then
1222 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1223 tags.pop
1224 if tags.is_empty then
1225 xml_end_line = line
1226 break
1227 end
1228 end
1229 line = line.next
1230 pos = 0
1231 else
1232 tmp = new FlatBuffer
1233 var new_pos = line.value.read_xml(tmp, pos, false)
1234 if new_pos > 0 then
1235 tag = tmp.xml_tag
1236 if tag.is_html_block and not tag == "hr" then
1237 if tmp[1] == '/' then
1238 if tags.last != tag then
1239 return false
1240 end
1241 tags.pop
1242 else
1243 tags.add tag
1244 end
1245 end
1246 if tags.is_empty then
1247 xml_end_line = line
1248 break
1249 end
1250 pos = new_pos
1251 else
1252 pos += 1
1253 end
1254 end
1255 end
1256 return tags.is_empty
1257 end
1258 return false
1259 end
1260
1261 # Read a XML comment.
1262 # Used by `check_html`.
1263 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1264 var line: nullable MDLine = first_line
1265 if start + 3 < line.value.length then
1266 if line.value[2] == '-' and line.value[3] == '-' then
1267 var pos = start + 4
1268 while line != null do
1269 while pos < line.value.length and line.value[pos] != '-' do
1270 pos += 1
1271 end
1272 if pos == line.value.length then
1273 line = line.next
1274 pos = 0
1275 else
1276 if pos + 2 < line.value.length then
1277 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1278 first_line.xml_end_line = line
1279 return pos + 3
1280 end
1281 end
1282 pos += 1
1283 end
1284 end
1285 end
1286 end
1287 return -1
1288 end
1289
1290 # Extract the text of `self` without leading and trailing.
1291 fun text: String do return value.substring(leading, value.length - trailing)
1292 end
1293
1294 # A markdown line.
1295 interface Line
1296
1297 # Parse the line.
1298 # See `MarkdownProcessor::recurse`.
1299 fun process(v: MarkdownProcessor) is abstract
1300 end
1301
1302 # An empty markdown line.
1303 class LineEmpty
1304 super Line
1305
1306 redef fun process(v) do
1307 v.current_line = v.current_line.next
1308 end
1309 end
1310
1311 # A non-specific markdown construction.
1312 # Mainly used as part of another line construct such as paragraphs or lists.
1313 class LineOther
1314 super Line
1315
1316 redef fun process(v) do
1317 var line = v.current_line
1318 # go to block end
1319 var was_empty = line.prev_empty
1320 while line != null and not line.is_empty do
1321 var t = v.line_kind(line)
1322 if v.in_list and t isa LineList then
1323 break
1324 end
1325 if t isa LineCode or t isa LineFence then
1326 break
1327 end
1328 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1329 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1330 break
1331 end
1332 line = line.next
1333 end
1334 # build block
1335 var bk: Block
1336 if line != null and not line.is_empty then
1337 var block = v.current_block.split(line.prev.as(not null))
1338 if v.in_list and not was_empty then
1339 block.kind = new BlockNone(block)
1340 else
1341 block.kind = new BlockParagraph(block)
1342 end
1343 v.current_block.remove_leading_empty_lines
1344 else
1345 var block: MDBlock
1346 if line != null then
1347 block = v.current_block.split(line)
1348 else
1349 block = v.current_block.split(v.current_block.last_line.as(not null))
1350 end
1351 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1352 block.kind = new BlockNone(block)
1353 else
1354 block.kind = new BlockParagraph(block)
1355 end
1356 v.current_block.remove_leading_empty_lines
1357 end
1358 v.current_line = v.current_block.first_line
1359 end
1360 end
1361
1362 # A line of markdown code.
1363 class LineCode
1364 super Line
1365
1366 redef fun process(v) do
1367 var line = v.current_line
1368 # lookup block end
1369 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1370 line = line.next
1371 end
1372 # split at block end line
1373 var block: MDBlock
1374 if line != null then
1375 block = v.current_block.split(line.prev.as(not null))
1376 else
1377 block = v.current_block.split(v.current_block.last_line.as(not null))
1378 end
1379 block.kind = new BlockCode(block)
1380 block.remove_surrounding_empty_lines
1381 v.current_line = v.current_block.first_line
1382 end
1383 end
1384
1385 # A line of raw XML.
1386 class LineXML
1387 super Line
1388
1389 redef fun process(v) do
1390 var line = v.current_line
1391 var prev = line.prev
1392 if prev != null then v.current_block.split(prev)
1393 var block = v.current_block.split(line.xml_end_line.as(not null))
1394 block.kind = new BlockXML(block)
1395 v.current_block.remove_leading_empty_lines
1396 v.current_line = v.current_block.first_line
1397 end
1398 end
1399
1400 # A markdown blockquote line.
1401 class LineBlockquote
1402 super Line
1403
1404 redef fun process(v) do
1405 var line = v.current_line
1406 # go to bquote end
1407 while line != null do
1408 if not line.is_empty and (line.prev_empty and
1409 line.leading == 0 and
1410 not v.line_kind(line) isa LineBlockquote) then break
1411 line = line.next
1412 end
1413 # build sub block
1414 var block: MDBlock
1415 if line != null then
1416 block = v.current_block.split(line.prev.as(not null))
1417 else
1418 block = v.current_block.split(v.current_block.last_line.as(not null))
1419 end
1420 var kind = new BlockQuote(block)
1421 block.kind = kind
1422 block.remove_surrounding_empty_lines
1423 kind.remove_block_quote_prefix(block)
1424 v.current_line = line
1425 v.recurse(block, false)
1426 v.current_line = v.current_block.first_line
1427 end
1428 end
1429
1430 # A markdown ruler line.
1431 class LineHR
1432 super Line
1433
1434 redef fun process(v) do
1435 var line = v.current_line
1436 if line.prev != null then v.current_block.split(line.prev.as(not null))
1437 var block = v.current_block.split(line.as(not null))
1438 block.kind = new BlockRuler(block)
1439 v.current_block.remove_leading_empty_lines
1440 v.current_line = v.current_block.first_line
1441 end
1442 end
1443
1444 # A markdown fence code line.
1445 class LineFence
1446 super Line
1447
1448 redef fun process(v) do
1449 # go to fence end
1450 var line = v.current_line.next
1451 while line != null do
1452 if v.line_kind(line) isa LineFence then break
1453 line = line.next
1454 end
1455 if line != null then
1456 line = line.next
1457 end
1458 # build fence block
1459 var block: MDBlock
1460 if line != null then
1461 block = v.current_block.split(line.prev.as(not null))
1462 else
1463 block = v.current_block.split(v.current_block.last_line.as(not null))
1464 end
1465 block.kind = new BlockFence(block)
1466 block.first_line.clear
1467 var last = block.last_line
1468 if last != null and v.line_kind(last) isa LineFence then
1469 block.last_line.clear
1470 end
1471 block.remove_surrounding_empty_lines
1472 v.current_line = line
1473 end
1474 end
1475
1476 # A markdown headline.
1477 class LineHeadline
1478 super Line
1479
1480 redef fun process(v) do
1481 var line = v.current_line
1482 var lprev = line.prev
1483 if lprev != null then v.current_block.split(lprev)
1484 var block = v.current_block.split(line.as(not null))
1485 var kind = new BlockHeadline(block)
1486 block.kind = kind
1487 kind.transform_headline(block)
1488 v.current_block.remove_leading_empty_lines
1489 v.current_line = v.current_block.first_line
1490 end
1491 end
1492
1493 # A markdown headline of level 1.
1494 class LineHeadline1
1495 super LineHeadline
1496
1497 redef fun process(v) do
1498 var line = v.current_line
1499 var lprev = line.prev
1500 if lprev != null then v.current_block.split(lprev)
1501 line.next.clear
1502 var block = v.current_block.split(line.as(not null))
1503 var kind = new BlockHeadline(block)
1504 kind.depth = 1
1505 kind.transform_headline(block)
1506 block.kind = kind
1507 v.current_block.remove_leading_empty_lines
1508 v.current_line = v.current_block.first_line
1509 end
1510 end
1511
1512 # A markdown headline of level 2.
1513 class LineHeadline2
1514 super LineHeadline
1515
1516 redef fun process(v) do
1517 var line = v.current_line
1518 var lprev = line.prev
1519 if lprev != null then v.current_block.split(lprev)
1520 line.next.clear
1521 var block = v.current_block.split(line.as(not null))
1522 var kind = new BlockHeadline(block)
1523 kind.depth = 2
1524 kind.transform_headline(block)
1525 block.kind = kind
1526 v.current_block.remove_leading_empty_lines
1527 v.current_line = v.current_block.first_line
1528 end
1529 end
1530
1531 # A markdown list line.
1532 # Mainly used to factorize code between ordered and unordered lists.
1533 class LineList
1534 super Line
1535
1536 redef fun process(v) do
1537 var line = v.current_line
1538 # go to list end
1539 while line != null do
1540 var t = v.line_kind(line)
1541 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1542 not t isa LineList) then break
1543 line = line.next
1544 end
1545 # build list block
1546 var list: MDBlock
1547 if line != null then
1548 list = v.current_block.split(line.prev.as(not null))
1549 else
1550 list = v.current_block.split(v.current_block.last_line.as(not null))
1551 end
1552 var kind = block_kind(list)
1553 list.kind = kind
1554 list.first_line.prev_empty = false
1555 list.last_line.next_empty = false
1556 list.remove_surrounding_empty_lines
1557 list.first_line.prev_empty = false
1558 list.last_line.next_empty = false
1559 kind.init_block(v)
1560 var block = list.first_block
1561 while block != null do
1562 block.remove_list_indent(v)
1563 v.recurse(block, true)
1564 block = block.next
1565 end
1566 kind.expand_paragraphs(list)
1567 v.current_line = line
1568 end
1569
1570 # Create a new block kind based on this line.
1571 protected fun block_kind(block: MDBlock): BlockList is abstract
1572
1573 # Extract string value from `MDLine`.
1574 protected fun extract_value(line: MDLine): String is abstract
1575 end
1576
1577 # An ordered list line.
1578 class LineOList
1579 super LineList
1580
1581 redef fun block_kind(block) do return new BlockOrderedList(block)
1582
1583 redef fun extract_value(line) do
1584 return line.value.substring_from(line.value.index_of('.') + 2)
1585 end
1586 end
1587
1588 # An unordered list line.
1589 class LineUList
1590 super LineList
1591
1592 redef fun block_kind(block) do return new BlockUnorderedList(block)
1593
1594 redef fun extract_value(line) do
1595 return line.value.substring_from(line.leading + 2)
1596 end
1597 end
1598
1599 # A token represent a character in the markdown input.
1600 # Some tokens have a specific markup behaviour that is handled here.
1601 abstract class Token
1602
1603 # Position of `self` in markdown input.
1604 var pos: Int
1605
1606 # Character found at `pos` in the markdown input.
1607 var char: Char
1608
1609 # Output that token using `MarkdownEmitter::decorator`.
1610 fun emit(v: MarkdownEmitter) do v.addc char
1611 end
1612
1613 # A token without a specific meaning.
1614 class TokenNone
1615 super Token
1616 end
1617
1618 # An emphasis token.
1619 abstract class TokenEm
1620 super Token
1621
1622 redef fun emit(v) do
1623 var tmp = v.push_buffer
1624 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1625 v.pop_buffer
1626 if b > 0 then
1627 v.decorator.add_em(v, tmp)
1628 v.current_pos = b
1629 else
1630 v.addc char
1631 end
1632 end
1633 end
1634
1635 # An emphasis star token.
1636 class TokenEmStar
1637 super TokenEm
1638 end
1639
1640 # An emphasis underscore token.
1641 class TokenEmUnderscore
1642 super TokenEm
1643 end
1644
1645 # A strong token.
1646 abstract class TokenStrong
1647 super Token
1648
1649 redef fun emit(v) do
1650 var tmp = v.push_buffer
1651 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1652 v.pop_buffer
1653 if b > 0 then
1654 v.decorator.add_strong(v, tmp)
1655 v.current_pos = b + 1
1656 else
1657 v.addc char
1658 end
1659 end
1660 end
1661
1662 # A strong star token.
1663 class TokenStrongStar
1664 super TokenStrong
1665 end
1666
1667 # A strong underscore token.
1668 class TokenStrongUnderscore
1669 super TokenStrong
1670 end
1671
1672 # A code token.
1673 # This class is mainly used to factorize work between single and double quoted span codes.
1674 abstract class TokenCode
1675 super Token
1676
1677 redef fun emit(v) do
1678 var a = pos + next_pos + 1
1679 var b = v.current_text.find_token(a, self)
1680 if b > 0 then
1681 v.current_pos = b + next_pos
1682 while a < b and v.current_text[a] == ' ' do a += 1
1683 if a < b then
1684 while v.current_text[b - 1] == ' ' do b -= 1
1685 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1686 end
1687 else
1688 v.addc char
1689 end
1690 end
1691
1692 private fun next_pos: Int is abstract
1693 end
1694
1695 # A span code token.
1696 class TokenCodeSingle
1697 super TokenCode
1698
1699 redef fun next_pos do return 0
1700 end
1701
1702 # A doubled span code token.
1703 class TokenCodeDouble
1704 super TokenCode
1705
1706 redef fun next_pos do return 1
1707 end
1708
1709 # A link or image token.
1710 # This class is mainly used to factorize work between images and links.
1711 abstract class TokenLinkOrImage
1712 super Token
1713
1714 # Link adress
1715 var link: nullable Text = null
1716
1717 # Link text
1718 var name: nullable Text = null
1719
1720 # Link title
1721 var comment: nullable Text = null
1722
1723 # Is the link construct an abbreviation?
1724 var is_abbrev = false
1725
1726 redef fun emit(v) do
1727 var tmp = new FlatBuffer
1728 var b = check_link(v, tmp, pos, self)
1729 if b > 0 then
1730 emit_hyper(v)
1731 v.current_pos = b
1732 else
1733 v.addc char
1734 end
1735 end
1736
1737 # Emit the hyperlink as link or image.
1738 private fun emit_hyper(v: MarkdownEmitter) is abstract
1739
1740 # Check if the link is a valid link.
1741 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1742 var md = v.current_text
1743 var pos
1744 if token isa TokenLink then
1745 pos = start + 1
1746 else
1747 pos = start + 2
1748 end
1749 var tmp = new FlatBuffer
1750 pos = md.read_md_link_id(tmp, pos)
1751 if pos < start then return -1
1752 name = tmp
1753 var old_pos = pos
1754 pos += 1
1755 pos = md.skip_spaces(pos)
1756 if pos < start then
1757 var tid = name.write_to_string.to_lower
1758 if v.processor.link_refs.has_key(tid) then
1759 var lr = v.processor.link_refs[tid]
1760 is_abbrev = lr.is_abbrev
1761 link = lr.link
1762 comment = lr.title
1763 pos = old_pos
1764 else
1765 return -1
1766 end
1767 else if md[pos] == '(' then
1768 pos += 1
1769 pos = md.skip_spaces(pos)
1770 if pos < start then return -1
1771 tmp = new FlatBuffer
1772 var use_lt = md[pos] == '<'
1773 if use_lt then
1774 pos = md.read_until(tmp, pos + 1, '>')
1775 else
1776 pos = md.read_md_link(tmp, pos)
1777 end
1778 if pos < start then return -1
1779 if use_lt then pos += 1
1780 link = tmp.write_to_string
1781 if md[pos] == ' ' then
1782 pos = md.skip_spaces(pos)
1783 if pos > start and md[pos] == '"' then
1784 pos += 1
1785 tmp = new FlatBuffer
1786 pos = md.read_until(tmp, pos, '"')
1787 if pos < start then return -1
1788 comment = tmp.write_to_string
1789 pos += 1
1790 pos = md.skip_spaces(pos)
1791 if pos == -1 then return -1
1792 end
1793 end
1794 if md[pos] != ')' then return -1
1795 else if md[pos] == '[' then
1796 pos += 1
1797 tmp = new FlatBuffer
1798 pos = md.read_raw_until(tmp, pos, ']')
1799 if pos < start then return -1
1800 var id
1801 if tmp.length > 0 then
1802 id = tmp
1803 else
1804 id = name
1805 end
1806 var tid = id.write_to_string.to_lower
1807 if v.processor.link_refs.has_key(tid) then
1808 var lr = v.processor.link_refs[tid]
1809 link = lr.link
1810 comment = lr.title
1811 end
1812 else
1813 var tid = name.write_to_string.replace("\n", " ").to_lower
1814 if v.processor.link_refs.has_key(tid) then
1815 var lr = v.processor.link_refs[tid]
1816 link = lr.link
1817 comment = lr.title
1818 pos = old_pos
1819 else
1820 return -1
1821 end
1822 end
1823 if link == null then return -1
1824 return pos
1825 end
1826 end
1827
1828 # A markdown link token.
1829 class TokenLink
1830 super TokenLinkOrImage
1831
1832 redef fun emit_hyper(v) do
1833 if is_abbrev and comment != null then
1834 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
1835 else
1836 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
1837 end
1838 end
1839 end
1840
1841 # A markdown image token.
1842 class TokenImage
1843 super TokenLinkOrImage
1844
1845 redef fun emit_hyper(v) do
1846 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
1847 end
1848 end
1849
1850 # A HTML/XML token.
1851 class TokenHTML
1852 super Token
1853
1854 redef fun emit(v) do
1855 var tmp = new FlatBuffer
1856 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
1857 if b > 0 then
1858 v.add tmp
1859 v.current_pos = b
1860 else
1861 v.decorator.escape_char(v, char)
1862 end
1863 end
1864
1865 # Is the HTML valid?
1866 # Also take care of link and mailto shortcuts.
1867 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
1868 # check for auto links
1869 var tmp = new FlatBuffer
1870 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
1871 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
1872 pos = md.read_until(tmp, pos, '>')
1873 if pos != -1 then
1874 var link = tmp.write_to_string
1875 v.decorator.add_link(v, link, link, null)
1876 return pos
1877 end
1878 end
1879 # TODO check for mailto
1880 # check for inline html
1881 if start + 2 < md.length then
1882 return md.read_xml(out, start, true)
1883 end
1884 return -1
1885 end
1886 end
1887
1888 # An HTML entity token.
1889 class TokenEntity
1890 super Token
1891
1892 redef fun emit(v) do
1893 var tmp = new FlatBuffer
1894 var b = check_entity(tmp, v.current_text.as(not null), pos)
1895 if b > 0 then
1896 v.add tmp
1897 v.current_pos = b
1898 else
1899 v.decorator.escape_char(v, char)
1900 end
1901 end
1902
1903 # Is the entity valid?
1904 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
1905 var pos = md.read_until(out, start, ';')
1906 if pos < 0 or out.length < 3 then
1907 return -1
1908 end
1909 if out[1] == '#' then
1910 if out[2] == 'x' or out[2] == 'X' then
1911 if out.length < 4 then return -1
1912 for i in [3..out.length[ do
1913 var c = out[i]
1914 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
1915 return -1
1916 end
1917 end
1918 else
1919 for i in [2..out.length[ do
1920 var c = out[i]
1921 if c < '0' or c > '9' then return -1
1922 end
1923 end
1924 out.add ';'
1925 else
1926 for i in [1..out.length[ do
1927 var c = out[i]
1928 if not c.is_digit and not c.is_letter then return -1
1929 end
1930 out.add ';'
1931 # TODO check entity is valid
1932 # if out.is_entity then
1933 return pos
1934 # else
1935 # return -1
1936 # end
1937 end
1938 return pos
1939 end
1940 end
1941
1942 # A markdown escape token.
1943 class TokenEscape
1944 super Token
1945
1946 redef fun emit(v) do
1947 v.current_pos += 1
1948 v.addc v.current_text[v.current_pos]
1949 end
1950 end
1951
1952 # A markdown super token.
1953 class TokenSuper
1954 super Token
1955
1956 redef fun emit(v) do
1957 var tmp = v.push_buffer
1958 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1959 v.pop_buffer
1960 if b > 0 then
1961 v.decorator.add_super(v, tmp)
1962 v.current_pos = b
1963 else
1964 v.addc char
1965 end
1966 end
1967 end
1968
1969 redef class Text
1970
1971 # Get the token kind at `pos`.
1972 private fun token_at(pos: Int): Token do
1973 var c0: Char
1974 var c1: Char
1975 var c2: Char
1976 var c3: Char
1977
1978 if pos > 0 then
1979 c0 = self[pos - 1]
1980 else
1981 c0 = ' '
1982 end
1983 var c = self[pos]
1984
1985 if pos + 1 < length then
1986 c1 = self[pos + 1]
1987 else
1988 c1 = ' '
1989 end
1990 if pos + 2 < length then
1991 c2 = self[pos + 2]
1992 else
1993 c2 = ' '
1994 end
1995 if pos + 3 < length then
1996 c3 = self[pos + 3]
1997 else
1998 c3 = ' '
1999 end
2000
2001 if c == '*' then
2002 if c1 == '*' then
2003 if c0 != ' ' or c2 != ' ' then
2004 return new TokenStrongStar(pos, c)
2005 else
2006 return new TokenEmStar(pos, c)
2007 end
2008 end
2009 if c0 != ' ' or c1 != ' ' then
2010 return new TokenEmStar(pos, c)
2011 else
2012 return new TokenNone(pos, c)
2013 end
2014 else if c == '_' then
2015 if c1 == '_' then
2016 if c0 != ' ' or c2 != ' 'then
2017 return new TokenStrongUnderscore(pos, c)
2018 else
2019 return new TokenEmUnderscore(pos, c)
2020 end
2021 end
2022 if c0 != ' ' or c1 != ' ' then
2023 return new TokenEmUnderscore(pos, c)
2024 else
2025 return new TokenNone(pos, c)
2026 end
2027 else if c == '!' then
2028 if c1 == '[' then return new TokenImage(pos, c)
2029 return new TokenNone(pos, c)
2030 else if c == '[' then
2031 return new TokenLink(pos, c)
2032 else if c == ']' then
2033 return new TokenNone(pos, c)
2034 else if c == '`' then
2035 if c1 == '`' then
2036 return new TokenCodeDouble(pos, c)
2037 else
2038 return new TokenCodeSingle(pos, c)
2039 end
2040 else if c == '\\' then
2041 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
2042 return new TokenEscape(pos, c)
2043 else
2044 return new TokenNone(pos, c)
2045 end
2046 else if c == '<' then
2047 return new TokenHTML(pos, c)
2048 else if c == '&' then
2049 return new TokenEntity(pos, c)
2050 else if c == '^' then
2051 if c0 == '^' or c1 == '^' then
2052 return new TokenNone(pos, c)
2053 else
2054 return new TokenSuper(pos, c)
2055 end
2056 else
2057 return new TokenNone(pos, c)
2058 end
2059 end
2060
2061 # Find the position of a `token` in `self`.
2062 private fun find_token(start: Int, token: Token): Int do
2063 var pos = start
2064 while pos < length do
2065 if token_at(pos).is_same_type(token) then
2066 return pos
2067 end
2068 pos += 1
2069 end
2070 return -1
2071 end
2072
2073 # Get the position of the next non-space character.
2074 private fun skip_spaces(start: Int): Int do
2075 var pos = start
2076 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2077 pos += 1
2078 end
2079 if pos < length then return pos
2080 return -1
2081 end
2082
2083 # Read `self` until `nend` and append it to the `out` buffer.
2084 # Escape markdown special chars.
2085 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2086 var pos = start
2087 while pos < length do
2088 var c = self[pos]
2089 if c == '\\' and pos + 1 < length then
2090 pos = escape(out, self[pos + 1], pos)
2091 else
2092 var end_reached = false
2093 for n in nend do
2094 if c == n then
2095 end_reached = true
2096 break
2097 end
2098 end
2099 if end_reached then break
2100 out.add c
2101 end
2102 pos += 1
2103 end
2104 if pos == length then return -1
2105 return pos
2106 end
2107
2108 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2109 # No escape is made.
2110 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2111 var pos = start
2112 while pos < length do
2113 var c = self[pos]
2114 var end_reached = false
2115 for n in nend do
2116 if c == n then
2117 end_reached = true
2118 break
2119 end
2120 end
2121 if end_reached then break
2122 out.add c
2123 pos += 1
2124 end
2125 if pos == length then return -1
2126 return pos
2127 end
2128
2129 # Read `self` as XML until `to` and append it to the `out` buffer.
2130 # Escape HTML special chars.
2131 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2132 var pos = from
2133 var in_str = false
2134 var str_char: nullable Char = null
2135 while pos < length do
2136 var c = self[pos]
2137 if in_str then
2138 if c == '\\' then
2139 out.add c
2140 pos += 1
2141 if pos < length then
2142 out.add c
2143 pos += 1
2144 end
2145 continue
2146 end
2147 if c == str_char then
2148 in_str = false
2149 out.add c
2150 pos += 1
2151 continue
2152 end
2153 end
2154 if c == '"' or c == '\'' then
2155 in_str = true
2156 str_char = c
2157 end
2158 if not in_str then
2159 var end_reached = false
2160 for n in [0..to.length[ do
2161 if c == to[n] then
2162 end_reached = true
2163 break
2164 end
2165 end
2166 if end_reached then break
2167 end
2168 out.add c
2169 pos += 1
2170 end
2171 if pos == length then return -1
2172 return pos
2173 end
2174
2175 # Read `self` as XML and append it to the `out` buffer.
2176 # Safe mode can be activated to limit reading to valid xml.
2177 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2178 var pos = 0
2179 var is_close_tag = false
2180 if start + 1 >= length then return -1
2181 if self[start + 1] == '/' then
2182 is_close_tag = true
2183 pos = start + 2
2184 else if self[start + 1] == '!' then
2185 out.append "<!"
2186 return start + 1
2187 else
2188 is_close_tag = false
2189 pos = start + 1
2190 end
2191 if safe_mode then
2192 var tmp = new FlatBuffer
2193 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2194 if pos == -1 then return -1
2195 var tag = tmp.write_to_string.trim.to_lower
2196 if tag.is_html_unsafe then
2197 out.append "&lt;"
2198 if is_close_tag then out.add '/'
2199 out.append tmp
2200 else
2201 out.append "<"
2202 if is_close_tag then out.add '/'
2203 out.append tmp
2204 end
2205 else
2206 out.add '<'
2207 if is_close_tag then out.add '/'
2208 pos = read_xml_until(out, pos, ' ', '/', '>')
2209 end
2210 if pos == -1 then return -1
2211 pos = read_xml_until(out, pos, '/', '>')
2212 if pos == -1 then return -1
2213 if self[pos] == '/' then
2214 out.append " /"
2215 pos = self.read_xml_until(out, pos + 1, '>')
2216 if pos == -1 then return -1
2217 end
2218 if self[pos] == '>' then
2219 out.add '>'
2220 return pos
2221 end
2222 return -1
2223 end
2224
2225 # Read a markdown link address and append it to the `out` buffer.
2226 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2227 var pos = start
2228 var counter = 1
2229 while pos < length do
2230 var c = self[pos]
2231 if c == '\\' and pos + 1 < length then
2232 pos = escape(out, self[pos + 1], pos)
2233 else
2234 var end_reached = false
2235 if c == '(' then
2236 counter += 1
2237 else if c == ' ' then
2238 if counter == 1 then end_reached = true
2239 else if c == ')' then
2240 counter -= 1
2241 if counter == 0 then end_reached = true
2242 end
2243 if end_reached then break
2244 out.add c
2245 end
2246 pos += 1
2247 end
2248 if pos == length then return -1
2249 return pos
2250 end
2251
2252 # Read a markdown link text and append it to the `out` buffer.
2253 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2254 var pos = start
2255 var counter = 1
2256 while pos < length do
2257 var c = self[pos]
2258 var end_reached = false
2259 if c == '[' then
2260 counter += 1
2261 out.add c
2262 else if c == ']' then
2263 counter -= 1
2264 if counter == 0 then
2265 end_reached = true
2266 else
2267 out.add c
2268 end
2269 else
2270 out.add c
2271 end
2272 if end_reached then break
2273 pos += 1
2274 end
2275 if pos == length then return -1
2276 return pos
2277 end
2278
2279 # Extract the XML tag name from a XML tag.
2280 private fun xml_tag: String do
2281 var tpl = new FlatBuffer
2282 var pos = 1
2283 if pos < length and self[1] == '/' then pos += 1
2284 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2285 tpl.add self[pos]
2286 pos += 1
2287 end
2288 return tpl.write_to_string.to_lower
2289 end
2290
2291 # Read and escape the markdown contained in `self`.
2292 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2293 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2294 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2295 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2296 c == '`' or c == '~' or c == '^' then
2297 out.add c
2298 return pos + 1
2299 end
2300 out.add '\\'
2301 return pos
2302 end
2303
2304 # Is `self` an unsafe HTML element?
2305 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2306
2307 # Is `self` a HRML block element?
2308 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2309
2310 # Is `self` a link prefix?
2311 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2312
2313 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2314
2315 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2316
2317 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2318 end
2319
2320 redef class String
2321
2322 # Parse `self` as markdown and return the HTML representation
2323 #.
2324 # var md = "**Hello World!**"
2325 # var html = md.md_to_html
2326 # assert html == "<p><strong>Hello World!</strong></p>\n"
2327 fun md_to_html: Streamable do
2328 var processor = new MarkdownProcessor
2329 return processor.process(self)
2330 end
2331 end