lib/markdown: move line_kind to MarkdownProcessor
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 var emitter: MarkdownEmitter is noinit
34
35 init do self.emitter = new MarkdownEmitter(self)
36
37 # Process the mardown `input` string and return the processed output.
38 fun process(input: String): Streamable do
39 # init processor
40 link_refs.clear
41 last_link_ref = null
42 current_line = null
43 current_block = null
44 # parse markdown
45 var parent = read_lines(input)
46 parent.remove_surrounding_empty_lines
47 recurse(parent, false)
48 # output processed text
49 return emitter.emit(parent.kind)
50 end
51
52 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
53 private fun read_lines(input: String): MDBlock do
54 var block = new MDBlock
55 var value = new FlatBuffer
56 var i = 0
57 while i < input.length do
58 value.clear
59 var pos = 0
60 var eol = false
61 while not eol and i < input.length do
62 var c = input[i]
63 if c == '\n' then
64 i += 1
65 eol = true
66 else if c == '\t' then
67 var np = pos + (4 - (pos.bin_and(3)))
68 while pos < np do
69 value.add ' '
70 pos += 1
71 end
72 i += 1
73 else
74 pos += 1
75 value.add c
76 i += 1
77 end
78 end
79
80 var line = new MDLine(value.write_to_string)
81 var is_link_ref = check_link_ref(line)
82 # Skip link refs
83 if not is_link_ref then block.add_line line
84 end
85 return block
86 end
87
88 # Check if line is a block link definition.
89 # Return `true` if line contains a valid link ref and save it into `link_refs`.
90 private fun check_link_ref(line: MDLine): Bool do
91 var md = line.value
92 var is_link_ref = false
93 var id = new FlatBuffer
94 var link = new FlatBuffer
95 var comment = new FlatBuffer
96 var pos = -1
97 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
98 pos = line.leading + 1
99 pos = md.read_until(id, pos, ']')
100 if not id.is_empty and pos + 2 < line.value.length then
101 if line.value[pos + 1] == ':' then
102 pos += 2
103 pos = md.skip_spaces(pos)
104 if line.value[pos] == '<' then
105 pos += 1
106 pos = md.read_until(link, pos, '>')
107 pos += 1
108 else
109 pos = md.read_until(link, pos, ' ', '\n')
110 end
111 if not link.is_empty then
112 pos = md.skip_spaces(pos)
113 if pos > 0 and pos < line.value.length then
114 var c = line.value[pos]
115 if c == '\"' or c == '\'' or c == '(' then
116 pos += 1
117 if c == '(' then
118 pos = md.read_until(comment, pos, ')')
119 else
120 pos = md.read_until(comment, pos, c)
121 end
122 if pos > 0 then is_link_ref = true
123 end
124 else
125 is_link_ref = true
126 end
127 end
128 end
129 end
130 end
131 if is_link_ref and not id.is_empty and not link.is_empty then
132 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
133 add_link_ref(id.write_to_string, lr)
134 if comment.is_empty then last_link_ref = lr
135 return true
136 else
137 comment = new FlatBuffer
138 if not line.is_empty and last_link_ref != null then
139 pos = line.leading
140 var c = line.value[pos]
141 if c == '\"' or c == '\'' or c == '(' then
142 pos += 1
143 if c == '(' then
144 pos = md.read_until(comment, pos, ')')
145 else
146 pos = md.read_until(comment, pos, c)
147 end
148 end
149 if not comment.is_empty then last_link_ref.title = comment.write_to_string
150 end
151 if comment.is_empty then return false
152 return true
153 end
154 end
155
156 # Known link refs
157 # This list will be needed during output to expand links.
158 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
159
160 # Last encountered link ref (for multiline definitions)
161 #
162 # Markdown allows link refs to be defined over two lines:
163 #
164 # [id]: http://example.com/longish/path/to/resource/here
165 # "Optional Title Here"
166 #
167 private var last_link_ref: nullable LinkRef = null
168
169 # Add a link ref to the list
170 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
171
172 # Recursively split a `block`.
173 #
174 # The block is splitted according to the type of lines it contains.
175 # Some blocks can be splited again recursively like lists.
176 # The `in_list` mode is used to recurse on list and build
177 # nested paragraphs or code blocks.
178 fun recurse(root: MDBlock, in_list: Bool) do
179 var old_mode = self.in_list
180 var old_root = self.current_block
181 self.in_list = in_list
182
183 var line = root.first_line
184 while line != null and line.is_empty do
185 line = line.next
186 if line == null then return
187 end
188
189 current_line = line
190 current_block = root
191 while current_line != null do
192 line_kind(current_line.as(not null)).process(self)
193 end
194 self.in_list = old_mode
195 self.current_block = old_root
196 end
197
198 # Currently processed line.
199 # Used when visiting blocks with `recurse`.
200 var current_line: nullable MDLine = null is writable
201
202 # Currently processed block.
203 # Used when visiting blocks with `recurse`.
204 var current_block: nullable MDBlock = null is writable
205
206 # Is the current recursion in list mode?
207 # Used when visiting blocks with `recurse`
208 private var in_list = false
209
210 # The type of line.
211 # see: `md_line_*`
212 fun line_kind(md: MDLine): Line do
213 var value = md.value
214 var leading = md.leading
215 var trailing = md.trailing
216 if md.is_empty then return new LineEmpty
217 if md.leading > 3 then return new LineCode
218 if value[leading] == '#' then return new LineHeadline
219 if value[leading] == '>' then return new LineBlockquote
220
221 if value.length - leading - trailing > 2 then
222 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
223 return new LineFence
224 end
225 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
226 return new LineFence
227 end
228 end
229
230 if value.length - leading - trailing > 2 and
231 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
232 if md.count_chars(value[leading]) >= 3 then
233 return new LineHR
234 end
235 end
236
237 if value.length - leading >= 2 and value[leading + 1] == ' ' then
238 var c = value[leading]
239 if c == '*' or c == '-' or c == '+' then return new LineUList
240 end
241
242 if value.length - leading >= 3 and value[leading].is_digit then
243 var i = leading + 1
244 while i < value.length and value[i].is_digit do i += 1
245 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
246 return new LineOList
247 end
248 end
249
250 if value[leading] == '<' and md.check_html then return new LineXML
251
252 var next = md.next
253 if next != null and not next.is_empty then
254 if next.count_chars('=') > 0 then
255 return new LineHeadline1
256 end
257 if next.count_chars('-') > 0 then
258 return new LineHeadline2
259 end
260 end
261 return new LineOther
262 end
263
264 end
265
266 # Emit output corresponding to blocks content.
267 #
268 # Blocks are created by a previous pass in `MarkdownProcessor`.
269 # The emitter use a `Decorator` to select the output format.
270 class MarkdownEmitter
271
272 # Processor containing link refs.
273 var processor: MarkdownProcessor
274
275 # Decorator used for output.
276 # Default is `HTMLDecorator`
277 var decorator: Decorator = new HTMLDecorator is writable
278
279 # Create a new `MardownEmitter` using the default `HTMLDecorator`
280 init(processor: MarkdownProcessor) do
281 self.processor = processor
282 end
283
284 # Create a new `MarkdownEmitter` using a custom `decorator`.
285 init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
286 init processor
287 self.decorator = decorator
288 end
289
290 # Output `block` using `decorator` in the current buffer.
291 fun emit(block: Block): Text do
292 var buffer = push_buffer
293 block.emit(self)
294 pop_buffer
295 return buffer
296 end
297
298 # Output the content of `block`.
299 fun emit_in(block: Block) do block.emit_in(self)
300
301 # Transform and emit mardown text
302 fun emit_text(text: Text) do
303 emit_text_until(text, 0, null)
304 end
305
306 # Transform and emit mardown text starting at `from` and
307 # until a token with the same type as `token` is found.
308 # Go until the end of text if `token` is null.
309 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
310 var old_text = current_text
311 var old_pos = current_pos
312 current_text = text
313 current_pos = start
314 while current_pos < text.length do
315 var mt = text.token_at(current_pos)
316 if (token != null and not token isa TokenNone) and
317 (mt.is_same_type(token) or
318 (token isa TokenEmStar and mt isa TokenStrongStar) or
319 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
320 return current_pos
321 end
322 mt.emit(self)
323 current_pos += 1
324 end
325 current_text = old_text
326 current_pos = old_pos
327 return -1
328 end
329
330 # Currently processed position in `current_text`.
331 # Used when visiting inline production with `emit_text_until`.
332 private var current_pos: Int = -1
333
334 # Currently processed text.
335 # Used when visiting inline production with `emit_text_until`.
336 private var current_text: nullable Text = null
337
338 # Stacked buffers.
339 private var buffer_stack = new List[FlatBuffer]
340
341 # Push a new buffer on the stack.
342 private fun push_buffer: FlatBuffer do
343 var buffer = new FlatBuffer
344 buffer_stack.add buffer
345 return buffer
346 end
347
348 # Pop the last buffer.
349 private fun pop_buffer do buffer_stack.pop
350
351 # Current output buffer.
352 private fun current_buffer: FlatBuffer do
353 assert not buffer_stack.is_empty
354 return buffer_stack.last
355 end
356
357 # Append `e` to current buffer.
358 fun add(e: Streamable) do
359 if e isa Text then
360 current_buffer.append e
361 else
362 current_buffer.append e.write_to_string
363 end
364 end
365
366 # Append `c` to current buffer.
367 fun addc(c: Char) do current_buffer.add c
368
369 # Append a "\n" line break.
370 fun addn do current_buffer.add '\n'
371 end
372
373 # A Link Reference.
374 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
375 #
376 # Example:
377 #
378 # [1]: http://example.com/ "Optional title"
379 class LinkRef
380
381 # Link href
382 var link: String
383
384 # Optional link title
385 var title: nullable String = null
386
387 # Is the link an abreviation?
388 var is_abbrev = false
389
390 init with_title(link: String, title: nullable String) do
391 self.link = link
392 self.title = title
393 end
394 end
395
396 # A `Decorator` is used to emit mardown into a specific format.
397 # Default decorator used is `HTMLDecorator`.
398 interface Decorator
399
400 # Render a ruler block.
401 fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
402
403 # Render a headline block with corresponding level.
404 fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
405
406 # Render a paragraph block.
407 fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
408
409 # Render a code or fence block.
410 fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
411
412 # Render a blockquote.
413 fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
414
415 # Render an unordered list.
416 fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
417
418 # Render an ordered list.
419 fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
420
421 # Render a list item.
422 fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
423
424 # Render an emphasis text.
425 fun add_em(v: MarkdownEmitter, text: Text) is abstract
426
427 # Render a strong text.
428 fun add_strong(v: MarkdownEmitter, text: Text) is abstract
429
430 # Render a super text.
431 fun add_super(v: MarkdownEmitter, text: Text) is abstract
432
433 # Render a link.
434 fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
435
436 # Render an image.
437 fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
438
439 # Render an abbreviation.
440 fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
441
442 # Render a code span reading from a buffer.
443 fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
444
445 # Render a text and escape it.
446 fun append_value(v: MarkdownEmitter, value: Text) is abstract
447
448 # Render code text from buffer and escape it.
449 fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
450
451 # Render a character escape.
452 fun escape_char(v: MarkdownEmitter, char: Char) is abstract
453
454 # Render a line break
455 fun add_line_break(v: MarkdownEmitter) is abstract
456
457 # Generate a new html valid id from a `String`.
458 fun strip_id(txt: String): String is abstract
459
460 # Found headlines during the processing labeled by their ids.
461 fun headlines: ArrayMap[String, HeadLine] is abstract
462 end
463
464 # Class representing a markdown headline.
465 class HeadLine
466 # Unique identifier of this headline.
467 var id: String
468
469 # Text of the headline.
470 var title: String
471
472 # Level of this headline.
473 #
474 # According toe the markdown specification, level must be in `[1..6]`.
475 var level: Int
476 end
477
478 # `Decorator` that outputs HTML.
479 class HTMLDecorator
480 super Decorator
481
482 redef var headlines = new ArrayMap[String, HeadLine]
483
484 redef fun add_ruler(v, block) do v.add "<hr/>\n"
485
486 redef fun add_headline(v, block) do
487 # save headline
488 var txt = block.block.first_line.value
489 var id = strip_id(txt)
490 var lvl = block.depth
491 headlines[id] = new HeadLine(id, txt, lvl)
492 # output it
493 v.add "<h{lvl} id=\"{id}\">"
494 v.emit_in block
495 v.add "</h{lvl}>\n"
496 end
497
498 redef fun add_paragraph(v, block) do
499 v.add "<p>"
500 v.emit_in block
501 v.add "</p>\n"
502 end
503
504 redef fun add_code(v, block) do
505 v.add "<pre><code>"
506 v.emit_in block
507 v.add "</code></pre>\n"
508 end
509
510 redef fun add_blockquote(v, block) do
511 v.add "<blockquote>\n"
512 v.emit_in block
513 v.add "</blockquote>\n"
514 end
515
516 redef fun add_unorderedlist(v, block) do
517 v.add "<ul>\n"
518 v.emit_in block
519 v.add "</ul>\n"
520 end
521
522 redef fun add_orderedlist(v, block) do
523 v.add "<ol>\n"
524 v.emit_in block
525 v.add "</ol>\n"
526 end
527
528 redef fun add_listitem(v, block) do
529 v.add "<li>"
530 v.emit_in block
531 v.add "</li>\n"
532 end
533
534 redef fun add_em(v, text) do
535 v.add "<em>"
536 v.add text
537 v.add "</em>"
538 end
539
540 redef fun add_strong(v, text) do
541 v.add "<strong>"
542 v.add text
543 v.add "</strong>"
544 end
545
546 redef fun add_super(v, text) do
547 v.add "<sup>"
548 v.add text
549 v.add "</sup>"
550 end
551
552 redef fun add_image(v, link, name, comment) do
553 v.add "<img src=\""
554 append_value(v, link)
555 v.add "\" alt=\""
556 append_value(v, name)
557 v.add "\""
558 if comment != null and not comment.is_empty then
559 v.add " title=\""
560 append_value(v, comment)
561 v.add "\""
562 end
563 v.add "/>"
564 end
565
566 redef fun add_link(v, link, name, comment) do
567 v.add "<a href=\""
568 append_value(v, link)
569 v.add "\""
570 if comment != null and not comment.is_empty then
571 v.add " title=\""
572 append_value(v, comment)
573 v.add "\""
574 end
575 v.add ">"
576 v.emit_text(name)
577 v.add "</a>"
578 end
579
580 redef fun add_abbr(v, name, comment) do
581 v.add "<abbr title=\""
582 append_value(v, comment)
583 v.add "\">"
584 v.emit_text(name)
585 v.add "</abbr>"
586 end
587
588 redef fun add_span_code(v, text, from, to) do
589 v.add "<code>"
590 append_code(v, text, from, to)
591 v.add "</code>"
592 end
593
594 redef fun add_line_break(v) do
595 v.add "<br/>"
596 end
597
598 redef fun append_value(v, text) do for c in text do escape_char(v, c)
599
600 redef fun escape_char(v, c) do
601 if c == '&' then
602 v.add "&amp;"
603 else if c == '<' then
604 v.add "&lt;"
605 else if c == '>' then
606 v.add "&gt;"
607 else if c == '"' then
608 v.add "&quot;"
609 else if c == '\'' then
610 v.add "&apos;"
611 else
612 v.addc c
613 end
614 end
615
616 redef fun append_code(v, buffer, from, to) do
617 for i in [from..to[ do
618 var c = buffer[i]
619 if c == '&' then
620 v.add "&amp;"
621 else if c == '<' then
622 v.add "&lt;"
623 else if c == '>' then
624 v.add "&gt;"
625 else
626 v.addc c
627 end
628 end
629 end
630
631 redef fun strip_id(txt) do
632 # strip id
633 var b = new FlatBuffer
634 for c in txt do
635 if c == ' ' then
636 b.add '_'
637 else
638 if not c.is_letter and
639 not c.is_digit and
640 not allowed_id_chars.has(c) then continue
641 b.add c
642 end
643 end
644 var res = b.to_s
645 var key = res
646 # check for multiple id definitions
647 if headlines.has_key(key) then
648 var i = 1
649 key = "{res}_{i}"
650 while headlines.has_key(key) do
651 i += 1
652 key = "{res}_{i}"
653 end
654 end
655 return key
656 end
657
658 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
659 end
660
661 # A block of markdown lines.
662 # A `MDBlock` can contains lines and/or sub-blocks.
663 class MDBlock
664 # Kind of block.
665 # See `Block`.
666 var kind: Block = new BlockNone(self) is writable
667
668 # First line if any.
669 var first_line: nullable MDLine = null is writable
670
671 # Last line if any.
672 var last_line: nullable MDLine = null is writable
673
674 # First sub-block if any.
675 var first_block: nullable MDBlock = null is writable
676
677 # Last sub-block if any.
678 var last_block: nullable MDBlock = null is writable
679
680 # Previous block if any.
681 var prev: nullable MDBlock = null is writable
682
683 # Next block if any.
684 var next: nullable MDBlock = null is writable
685
686 # Does this block contain subblocks?
687 fun has_blocks: Bool do return first_block != null
688
689 # Count sub-blocks.
690 fun count_blocks: Int do
691 var count = 0
692 var block = first_block
693 while block != null do
694 count += 1
695 block = block.next
696 end
697 return count
698 end
699
700 # Does this block contain lines?
701 fun has_lines: Bool do return first_line != null
702
703 # Count block lines.
704 fun count_lines: Int do
705 var count = 0
706 var line = first_line
707 while line != null do
708 count += 1
709 line = line.next
710 end
711 return count
712 end
713
714 # Split `self` creating a new sub-block having `line` has `last_line`.
715 fun split(line: MDLine): MDBlock do
716 var block = new MDBlock
717 block.first_line = first_line
718 block.last_line = line
719 first_line = line.next
720 line.next = null
721 if first_line == null then
722 last_line = null
723 else
724 first_line.prev = null
725 end
726 if first_block == null then
727 first_block = block
728 last_block = block
729 else
730 last_block.next = block
731 last_block = block
732 end
733 return block
734 end
735
736 # Add a `line` to this block.
737 fun add_line(line: MDLine) do
738 if last_line == null then
739 first_line = line
740 last_line = line
741 else
742 last_line.next_empty = line.is_empty
743 line.prev_empty = last_line.is_empty
744 line.prev = last_line
745 last_line.next = line
746 last_line = line
747 end
748 end
749
750 # Remove `line` from this block.
751 fun remove_line(line: MDLine) do
752 if line.prev == null then
753 first_line = line.next
754 else
755 line.prev.next = line.next
756 end
757 if line.next == null then
758 last_line = line.prev
759 else
760 line.next.prev = line.prev
761 end
762 line.prev = null
763 line.next = null
764 end
765
766 # Remove leading empty lines.
767 fun remove_leading_empty_lines: Bool do
768 var was_empty = false
769 var line = first_line
770 while line != null and line.is_empty do
771 remove_line line
772 line = first_line
773 was_empty = true
774 end
775 return was_empty
776 end
777
778 # Remove trailing empty lines.
779 fun remove_trailing_empty_lines: Bool do
780 var was_empty = false
781 var line = last_line
782 while line != null and line.is_empty do
783 remove_line line
784 line = last_line
785 was_empty = true
786 end
787 return was_empty
788 end
789
790 # Remove leading and trailing empty lines.
791 fun remove_surrounding_empty_lines: Bool do
792 var was_empty = false
793 if remove_leading_empty_lines then was_empty = true
794 if remove_trailing_empty_lines then was_empty = true
795 return was_empty
796 end
797
798 # Remove list markers and up to 4 leading spaces.
799 # Used to clean nested lists.
800 fun remove_list_indent(v: MarkdownProcessor) do
801 var line = first_line
802 while line != null do
803 if not line.is_empty then
804 var kind = v.line_kind(line)
805 if kind isa LineList then
806 line.value = kind.extract_value(line)
807 else
808 line.value = line.value.substring_from(line.leading.min(4))
809 end
810 line.leading = line.process_leading
811 end
812 line = line.next
813 end
814 end
815
816 # Collect block line text.
817 fun text: String do
818 var text = new FlatBuffer
819 var line = first_line
820 while line != null do
821 if not line.is_empty then
822 text.append line.text
823 end
824 text.append "\n"
825 line = line.next
826 end
827 return text.write_to_string
828 end
829 end
830
831 # Representation of a markdown block in the AST.
832 # Each `Block` is linked to a `MDBlock` that contains mardown code.
833 abstract class Block
834
835 # The markdown block `self` is related to.
836 var block: MDBlock
837
838 # Output `self` using `v.decorator`.
839 fun emit(v: MarkdownEmitter) do v.emit_in(self)
840
841 # Emit the containts of `self`, lines or blocks.
842 fun emit_in(v: MarkdownEmitter) do
843 block.remove_surrounding_empty_lines
844 if block.has_lines then
845 emit_lines(v)
846 else
847 emit_blocks(v)
848 end
849 end
850
851 # Emit lines contained in `block`.
852 fun emit_lines(v: MarkdownEmitter) do
853 var tpl = v.push_buffer
854 var line = block.first_line
855 while line != null do
856 if not line.is_empty then
857 v.add line.value.substring(line.leading, line.value.length - line.trailing)
858 if line.trailing >= 2 then v.decorator.add_line_break(v)
859 end
860 if line.next != null then
861 v.addn
862 end
863 line = line.next
864 end
865 v.pop_buffer
866 v.emit_text(tpl)
867 end
868
869 # Emit sub-blocks contained in `block`.
870 fun emit_blocks(v: MarkdownEmitter) do
871 var block = self.block.first_block
872 while block != null do
873 block.kind.emit(v)
874 block = block.next
875 end
876 end
877 end
878
879 # A block without any markdown specificities.
880 #
881 # Actually use the same implementation than `BlockCode`,
882 # this class is only used for typing purposes.
883 class BlockNone
884 super Block
885 end
886
887 # A markdown blockquote.
888 class BlockQuote
889 super Block
890
891 redef fun emit(v) do v.decorator.add_blockquote(v, self)
892
893 # Remove blockquote markers.
894 private fun remove_block_quote_prefix(block: MDBlock) do
895 var line = block.first_line
896 while line != null do
897 if not line.is_empty then
898 if line.value[line.leading] == '>' then
899 var rem = line.leading + 1
900 if line.leading + 1 < line.value.length and
901 line.value[line.leading + 1] == ' ' then
902 rem += 1
903 end
904 line.value = line.value.substring_from(rem)
905 line.leading = line.process_leading
906 end
907 end
908 line = line.next
909 end
910 end
911 end
912
913 # A markdown code block.
914 class BlockCode
915 super Block
916
917 redef fun emit(v) do v.decorator.add_code(v, self)
918
919 redef fun emit_lines(v) do
920 var line = block.first_line
921 while line != null do
922 if not line.is_empty then
923 v.decorator.append_code(v, line.value, 4, line.value.length)
924 end
925 v.addn
926 line = line.next
927 end
928 end
929 end
930
931 # A markdown code-fence block.
932 #
933 # Actually use the same implementation than `BlockCode`,
934 # this class is only used for typing purposes.
935 class BlockFence
936 super BlockCode
937 end
938
939 # A markdown headline.
940 class BlockHeadline
941 super Block
942
943 redef fun emit(v) do v.decorator.add_headline(v, self)
944
945 # Depth of the headline used to determine the headline level.
946 var depth = 0
947
948 # Remove healine marks from lines contained in `self`.
949 private fun transform_headline(block: MDBlock) do
950 if depth > 0 then return
951 var level = 0
952 var line = block.first_line
953 if line.is_empty then return
954 var start = line.leading
955 while start < line.value.length and line.value[start] == '#' do
956 level += 1
957 start += 1
958 end
959 while start < line.value.length and line.value[start] == ' ' do
960 start += 1
961 end
962 if start >= line.value.length then
963 line.is_empty = true
964 else
965 var nend = line.value.length - line.trailing - 1
966 while line.value[nend] == '#' do nend -= 1
967 while line.value[nend] == ' ' do nend -= 1
968 line.value = line.value.substring(start, nend - start + 1)
969 line.leading = 0
970 line.trailing = 0
971 end
972 depth = level.min(6)
973 end
974 end
975
976 # A markdown list item block.
977 class BlockListItem
978 super Block
979
980 redef fun emit(v) do v.decorator.add_listitem(v, self)
981 end
982
983 # A markdown list block.
984 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
985 abstract class BlockList
986 super Block
987
988 # Split list block into list items sub-blocks.
989 private fun init_block(v: MarkdownProcessor) do
990 var line = block.first_line
991 line = line.next
992 while line != null do
993 var t = v.line_kind(line)
994 if t isa LineList or
995 (not line.is_empty and (line.prev_empty and line.leading == 0 and
996 not (t isa LineList))) then
997 var sblock = block.split(line.prev.as(not null))
998 sblock.kind = new BlockListItem(sblock)
999 end
1000 line = line.next
1001 end
1002 var sblock = block.split(block.last_line.as(not null))
1003 sblock.kind = new BlockListItem(sblock)
1004 end
1005
1006 # Expand list items as paragraphs if needed.
1007 private fun expand_paragraphs(block: MDBlock) do
1008 var outer = block.first_block
1009 var inner: nullable MDBlock
1010 var has_paragraph = false
1011 while outer != null and not has_paragraph do
1012 if outer.kind isa BlockListItem then
1013 inner = outer.first_block
1014 while inner != null and not has_paragraph do
1015 if inner.kind isa BlockParagraph then
1016 has_paragraph = true
1017 end
1018 inner = inner.next
1019 end
1020 end
1021 outer = outer.next
1022 end
1023 if has_paragraph then
1024 outer = block.first_block
1025 while outer != null do
1026 if outer.kind isa BlockListItem then
1027 inner = outer.first_block
1028 while inner != null do
1029 if inner.kind isa BlockNone then
1030 inner.kind = new BlockParagraph(inner)
1031 end
1032 inner = inner.next
1033 end
1034 end
1035 outer = outer.next
1036 end
1037 end
1038 end
1039 end
1040
1041 # A markdown ordered list.
1042 class BlockOrderedList
1043 super BlockList
1044
1045 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1046 end
1047
1048 # A markdown unordred list.
1049 class BlockUnorderedList
1050 super BlockList
1051
1052 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1053 end
1054
1055 # A markdown paragraph block.
1056 class BlockParagraph
1057 super Block
1058
1059 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1060 end
1061
1062 # A markdown ruler.
1063 class BlockRuler
1064 super Block
1065
1066 redef fun emit(v) do v.decorator.add_ruler(v, self)
1067 end
1068
1069 # Xml blocks that can be found in markdown markup.
1070 class BlockXML
1071 super Block
1072
1073 redef fun emit_lines(v) do
1074 var line = block.first_line
1075 while line != null do
1076 if not line.is_empty then v.add line.value
1077 v.addn
1078 line = line.next
1079 end
1080 end
1081 end
1082
1083 # A markdown line.
1084 class MDLine
1085
1086 # Text contained in this line.
1087 var value: String is writable
1088
1089 # Is this line empty?
1090 # Lines containing only spaces are considered empty.
1091 var is_empty: Bool = true is writable
1092
1093 # Previous line in `MDBlock` or null if first line.
1094 var prev: nullable MDLine = null is writable
1095
1096 # Next line in `MDBlock` or null if last line.
1097 var next: nullable MDLine = null is writable
1098
1099 # Is the previous line empty?
1100 var prev_empty: Bool = false is writable
1101
1102 # Is the next line empty?
1103 var next_empty: Bool = false is writable
1104
1105 init(value: String) do
1106 self.value = value
1107 self.leading = process_leading
1108 if leading != value.length then
1109 self.is_empty = false
1110 self.trailing = process_trailing
1111 end
1112 end
1113
1114 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1115 fun clear do
1116 value = ""
1117 leading = 0
1118 trailing = 0
1119 is_empty = true
1120 if prev != null then prev.next_empty = true
1121 if next != null then next.prev_empty = true
1122 end
1123
1124 # Number or leading spaces on this line.
1125 var leading: Int = 0 is writable
1126
1127 # Compute `leading` depending on `value`.
1128 fun process_leading: Int do
1129 var count = 0
1130 var value = self.value
1131 while count < value.length and value[count] == ' ' do count += 1
1132 if leading == value.length then clear
1133 return count
1134 end
1135
1136 # Number of trailing spaces on this line.
1137 var trailing: Int = 0 is writable
1138
1139 # Compute `trailing` depending on `value`.
1140 fun process_trailing: Int do
1141 var count = 0
1142 var value = self.value
1143 while value[value.length - count - 1] == ' ' do
1144 count += 1
1145 end
1146 return count
1147 end
1148
1149 # Count the amount of `ch` in this line.
1150 # Return A value > 0 if this line only consists of `ch` end spaces.
1151 fun count_chars(ch: Char): Int do
1152 var count = 0
1153 for c in value do
1154 if c == ' ' then
1155 continue
1156 end
1157 if c == ch then
1158 count += 1
1159 continue
1160 end
1161 count = 0
1162 break
1163 end
1164 return count
1165 end
1166
1167 # Count the amount of `ch` at the start of this line ignoring spaces.
1168 fun count_chars_start(ch: Char): Int do
1169 var count = 0
1170 for c in value do
1171 if c == ' ' then
1172 continue
1173 end
1174 if c == ch then
1175 count += 1
1176 else
1177 break
1178 end
1179 end
1180 return count
1181 end
1182
1183 # Last XML line if any.
1184 private var xml_end_line: nullable MDLine = null
1185
1186 # Does `value` contains valid XML markup?
1187 private fun check_html: Bool do
1188 var tags = new Array[String]
1189 var tmp = new FlatBuffer
1190 var pos = leading
1191 if pos + 1 < value.length and value[pos + 1] == '!' then
1192 if read_xml_comment(self, pos) > 0 then return true
1193 end
1194 pos = value.read_xml(tmp, pos, false)
1195 var tag: String
1196 if pos > -1 then
1197 tag = tmp.xml_tag
1198 if not tag.is_html_block then
1199 return false
1200 end
1201 if tag == "hr" then
1202 xml_end_line = self
1203 return true
1204 end
1205 tags.add tag
1206 var line: nullable MDLine = self
1207 while line != null do
1208 while pos < line.value.length and line.value[pos] != '<' do
1209 pos += 1
1210 end
1211 if pos >= line.value.length then
1212 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1213 tags.pop
1214 if tags.is_empty then
1215 xml_end_line = line
1216 break
1217 end
1218 end
1219 line = line.next
1220 pos = 0
1221 else
1222 tmp = new FlatBuffer
1223 var new_pos = line.value.read_xml(tmp, pos, false)
1224 if new_pos > 0 then
1225 tag = tmp.xml_tag
1226 if tag.is_html_block and not tag == "hr" then
1227 if tmp[1] == '/' then
1228 if tags.last != tag then
1229 return false
1230 end
1231 tags.pop
1232 else
1233 tags.add tag
1234 end
1235 end
1236 if tags.is_empty then
1237 xml_end_line = line
1238 break
1239 end
1240 pos = new_pos
1241 else
1242 pos += 1
1243 end
1244 end
1245 end
1246 return tags.is_empty
1247 end
1248 return false
1249 end
1250
1251 # Read a XML comment.
1252 # Used by `check_html`.
1253 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1254 var line: nullable MDLine = first_line
1255 if start + 3 < line.value.length then
1256 if line.value[2] == '-' and line.value[3] == '-' then
1257 var pos = start + 4
1258 while line != null do
1259 while pos < line.value.length and line.value[pos] != '-' do
1260 pos += 1
1261 end
1262 if pos == line.value.length then
1263 line = line.next
1264 pos = 0
1265 else
1266 if pos + 2 < line.value.length then
1267 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1268 first_line.xml_end_line = line
1269 return pos + 3
1270 end
1271 end
1272 pos += 1
1273 end
1274 end
1275 end
1276 end
1277 return -1
1278 end
1279
1280 # Extract the text of `self` without leading and trailing.
1281 fun text: String do return value.substring(leading, value.length - trailing)
1282 end
1283
1284 # A markdown line.
1285 interface Line
1286
1287 # Parse the line.
1288 # See `MarkdownProcessor::recurse`.
1289 fun process(v: MarkdownProcessor) is abstract
1290 end
1291
1292 # An empty markdown line.
1293 class LineEmpty
1294 super Line
1295
1296 redef fun process(v) do
1297 v.current_line = v.current_line.next
1298 end
1299 end
1300
1301 # A non-specific markdown construction.
1302 # Mainly used as part of another line construct such as paragraphs or lists.
1303 class LineOther
1304 super Line
1305
1306 redef fun process(v) do
1307 var line = v.current_line
1308 # go to block end
1309 var was_empty = line.prev_empty
1310 while line != null and not line.is_empty do
1311 var t = v.line_kind(line)
1312 if v.in_list and t isa LineList then
1313 break
1314 end
1315 if t isa LineCode or t isa LineFence then
1316 break
1317 end
1318 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1319 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1320 break
1321 end
1322 line = line.next
1323 end
1324 # build block
1325 var bk: Block
1326 if line != null and not line.is_empty then
1327 var block = v.current_block.split(line.prev.as(not null))
1328 if v.in_list and not was_empty then
1329 block.kind = new BlockNone(block)
1330 else
1331 block.kind = new BlockParagraph(block)
1332 end
1333 v.current_block.remove_leading_empty_lines
1334 else
1335 var block: MDBlock
1336 if line != null then
1337 block = v.current_block.split(line)
1338 else
1339 block = v.current_block.split(v.current_block.last_line.as(not null))
1340 end
1341 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1342 block.kind = new BlockNone(block)
1343 else
1344 block.kind = new BlockParagraph(block)
1345 end
1346 v.current_block.remove_leading_empty_lines
1347 end
1348 v.current_line = v.current_block.first_line
1349 end
1350 end
1351
1352 # A line of markdown code.
1353 class LineCode
1354 super Line
1355
1356 redef fun process(v) do
1357 var line = v.current_line
1358 # lookup block end
1359 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1360 line = line.next
1361 end
1362 # split at block end line
1363 var block: MDBlock
1364 if line != null then
1365 block = v.current_block.split(line.prev.as(not null))
1366 else
1367 block = v.current_block.split(v.current_block.last_line.as(not null))
1368 end
1369 block.kind = new BlockCode(block)
1370 block.remove_surrounding_empty_lines
1371 v.current_line = v.current_block.first_line
1372 end
1373 end
1374
1375 # A line of raw XML.
1376 class LineXML
1377 super Line
1378
1379 redef fun process(v) do
1380 var line = v.current_line
1381 var prev = line.prev
1382 if prev != null then v.current_block.split(prev)
1383 var block = v.current_block.split(line.xml_end_line.as(not null))
1384 block.kind = new BlockXML(block)
1385 v.current_block.remove_leading_empty_lines
1386 v.current_line = v.current_block.first_line
1387 end
1388 end
1389
1390 # A markdown blockquote line.
1391 class LineBlockquote
1392 super Line
1393
1394 redef fun process(v) do
1395 var line = v.current_line
1396 # go to bquote end
1397 while line != null do
1398 if not line.is_empty and (line.prev_empty and
1399 line.leading == 0 and
1400 not v.line_kind(line) isa LineBlockquote) then break
1401 line = line.next
1402 end
1403 # build sub block
1404 var block: MDBlock
1405 if line != null then
1406 block = v.current_block.split(line.prev.as(not null))
1407 else
1408 block = v.current_block.split(v.current_block.last_line.as(not null))
1409 end
1410 var kind = new BlockQuote(block)
1411 block.kind = kind
1412 block.remove_surrounding_empty_lines
1413 kind.remove_block_quote_prefix(block)
1414 v.current_line = line
1415 v.recurse(block, false)
1416 v.current_line = v.current_block.first_line
1417 end
1418 end
1419
1420 # A markdown ruler line.
1421 class LineHR
1422 super Line
1423
1424 redef fun process(v) do
1425 var line = v.current_line
1426 if line.prev != null then v.current_block.split(line.prev.as(not null))
1427 var block = v.current_block.split(line.as(not null))
1428 block.kind = new BlockRuler(block)
1429 v.current_block.remove_leading_empty_lines
1430 v.current_line = v.current_block.first_line
1431 end
1432 end
1433
1434 # A markdown fence code line.
1435 class LineFence
1436 super Line
1437
1438 redef fun process(v) do
1439 # go to fence end
1440 var line = v.current_line.next
1441 while line != null do
1442 if v.line_kind(line) isa LineFence then break
1443 line = line.next
1444 end
1445 if line != null then
1446 line = line.next
1447 end
1448 # build fence block
1449 var block: MDBlock
1450 if line != null then
1451 block = v.current_block.split(line.prev.as(not null))
1452 else
1453 block = v.current_block.split(v.current_block.last_line.as(not null))
1454 end
1455 block.kind = new BlockFence(block)
1456 block.first_line.clear
1457 var last = block.last_line
1458 if last != null and v.line_kind(last) isa LineFence then
1459 block.last_line.clear
1460 end
1461 block.remove_surrounding_empty_lines
1462 v.current_line = line
1463 end
1464 end
1465
1466 # A markdown headline.
1467 class LineHeadline
1468 super Line
1469
1470 redef fun process(v) do
1471 var line = v.current_line
1472 var lprev = line.prev
1473 if lprev != null then v.current_block.split(lprev)
1474 var block = v.current_block.split(line.as(not null))
1475 var kind = new BlockHeadline(block)
1476 block.kind = kind
1477 kind.transform_headline(block)
1478 v.current_block.remove_leading_empty_lines
1479 v.current_line = v.current_block.first_line
1480 end
1481 end
1482
1483 # A markdown headline of level 1.
1484 class LineHeadline1
1485 super LineHeadline
1486
1487 redef fun process(v) do
1488 var line = v.current_line
1489 var lprev = line.prev
1490 if lprev != null then v.current_block.split(lprev)
1491 line.next.clear
1492 var block = v.current_block.split(line.as(not null))
1493 var kind = new BlockHeadline(block)
1494 kind.depth = 1
1495 kind.transform_headline(block)
1496 block.kind = kind
1497 v.current_block.remove_leading_empty_lines
1498 v.current_line = v.current_block.first_line
1499 end
1500 end
1501
1502 # A markdown headline of level 2.
1503 class LineHeadline2
1504 super LineHeadline
1505
1506 redef fun process(v) do
1507 var line = v.current_line
1508 var lprev = line.prev
1509 if lprev != null then v.current_block.split(lprev)
1510 line.next.clear
1511 var block = v.current_block.split(line.as(not null))
1512 var kind = new BlockHeadline(block)
1513 kind.depth = 2
1514 kind.transform_headline(block)
1515 block.kind = kind
1516 v.current_block.remove_leading_empty_lines
1517 v.current_line = v.current_block.first_line
1518 end
1519 end
1520
1521 # A markdown list line.
1522 # Mainly used to factorize code between ordered and unordered lists.
1523 class LineList
1524 super Line
1525
1526 redef fun process(v) do
1527 var line = v.current_line
1528 # go to list end
1529 while line != null do
1530 var t = v.line_kind(line)
1531 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1532 not t isa LineList) then break
1533 line = line.next
1534 end
1535 # build list block
1536 var list: MDBlock
1537 if line != null then
1538 list = v.current_block.split(line.prev.as(not null))
1539 else
1540 list = v.current_block.split(v.current_block.last_line.as(not null))
1541 end
1542 var kind = block_kind(list)
1543 list.kind = kind
1544 list.first_line.prev_empty = false
1545 list.last_line.next_empty = false
1546 list.remove_surrounding_empty_lines
1547 list.first_line.prev_empty = false
1548 list.last_line.next_empty = false
1549 kind.init_block(v)
1550 var block = list.first_block
1551 while block != null do
1552 block.remove_list_indent(v)
1553 v.recurse(block, true)
1554 block = block.next
1555 end
1556 kind.expand_paragraphs(list)
1557 v.current_line = line
1558 end
1559
1560 # Create a new block kind based on this line.
1561 protected fun block_kind(block: MDBlock): BlockList is abstract
1562
1563 protected fun extract_value(line: MDLine): String is abstract
1564 end
1565
1566 # An ordered list line.
1567 class LineOList
1568 super LineList
1569
1570 redef fun block_kind(block) do return new BlockOrderedList(block)
1571
1572 redef fun extract_value(line) do
1573 return line.value.substring_from(line.value.index_of('.') + 2)
1574 end
1575 end
1576
1577 # An unordered list line.
1578 class LineUList
1579 super LineList
1580
1581 redef fun block_kind(block) do return new BlockUnorderedList(block)
1582
1583 redef fun extract_value(line) do
1584 return line.value.substring_from(line.leading + 2)
1585 end
1586 end
1587
1588 # A token represent a character in the markdown input.
1589 # Some tokens have a specific markup behaviour that is handled here.
1590 abstract class Token
1591
1592 # Position of `self` in markdown input.
1593 var pos: Int
1594
1595 # Character found at `pos` in the markdown input.
1596 var char: Char
1597
1598 # Output that token using `MarkdownEmitter::decorator`.
1599 fun emit(v: MarkdownEmitter) do v.addc char
1600 end
1601
1602 # A token without a specific meaning.
1603 class TokenNone
1604 super Token
1605 end
1606
1607 # An emphasis token.
1608 abstract class TokenEm
1609 super Token
1610
1611 redef fun emit(v) do
1612 var tmp = v.push_buffer
1613 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1614 v.pop_buffer
1615 if b > 0 then
1616 v.decorator.add_em(v, tmp)
1617 v.current_pos = b
1618 else
1619 v.addc char
1620 end
1621 end
1622 end
1623
1624 # An emphasis star token.
1625 class TokenEmStar
1626 super TokenEm
1627 end
1628
1629 # An emphasis underscore token.
1630 class TokenEmUnderscore
1631 super TokenEm
1632 end
1633
1634 # A strong token.
1635 abstract class TokenStrong
1636 super Token
1637
1638 redef fun emit(v) do
1639 var tmp = v.push_buffer
1640 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1641 v.pop_buffer
1642 if b > 0 then
1643 v.decorator.add_strong(v, tmp)
1644 v.current_pos = b + 1
1645 else
1646 v.addc char
1647 end
1648 end
1649 end
1650
1651 # A strong star token.
1652 class TokenStrongStar
1653 super TokenStrong
1654 end
1655
1656 # A strong underscore token.
1657 class TokenStrongUnderscore
1658 super TokenStrong
1659 end
1660
1661 # A code token.
1662 # This class is mainly used to factorize work between single and double quoted span codes.
1663 abstract class TokenCode
1664 super Token
1665
1666 redef fun emit(v) do
1667 var a = pos + next_pos + 1
1668 var b = v.current_text.find_token(a, self)
1669 if b > 0 then
1670 v.current_pos = b + next_pos
1671 while a < b and v.current_text[a] == ' ' do a += 1
1672 if a < b then
1673 while v.current_text[b - 1] == ' ' do b -= 1
1674 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1675 end
1676 else
1677 v.addc char
1678 end
1679 end
1680
1681 private fun next_pos: Int is abstract
1682 end
1683
1684 # A span code token.
1685 class TokenCodeSingle
1686 super TokenCode
1687
1688 redef fun next_pos do return 0
1689 end
1690
1691 # A doubled span code token.
1692 class TokenCodeDouble
1693 super TokenCode
1694
1695 redef fun next_pos do return 1
1696 end
1697
1698 # A link or image token.
1699 # This class is mainly used to factorize work between images and links.
1700 abstract class TokenLinkOrImage
1701 super Token
1702
1703 # Link adress
1704 var link: nullable Text = null
1705
1706 # Link text
1707 var name: nullable Text = null
1708
1709 # Link title
1710 var comment: nullable Text = null
1711
1712 # Is the link construct an abbreviation?
1713 var is_abbrev = false
1714
1715 redef fun emit(v) do
1716 var tmp = new FlatBuffer
1717 var b = check_link(v, tmp, pos, self)
1718 if b > 0 then
1719 emit_hyper(v)
1720 v.current_pos = b
1721 else
1722 v.addc char
1723 end
1724 end
1725
1726 # Emit the hyperlink as link or image.
1727 private fun emit_hyper(v: MarkdownEmitter) is abstract
1728
1729 # Check if the link is a valid link.
1730 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1731 var md = v.current_text
1732 var pos
1733 if token isa TokenLink then
1734 pos = start + 1
1735 else
1736 pos = start + 2
1737 end
1738 var tmp = new FlatBuffer
1739 pos = md.read_md_link_id(tmp, pos)
1740 if pos < start then return -1
1741 name = tmp
1742 var old_pos = pos
1743 pos += 1
1744 pos = md.skip_spaces(pos)
1745 if pos < start then
1746 var tid = name.write_to_string.to_lower
1747 if v.processor.link_refs.has_key(tid) then
1748 var lr = v.processor.link_refs[tid]
1749 is_abbrev = lr.is_abbrev
1750 link = lr.link
1751 comment = lr.title
1752 pos = old_pos
1753 else
1754 return -1
1755 end
1756 else if md[pos] == '(' then
1757 pos += 1
1758 pos = md.skip_spaces(pos)
1759 if pos < start then return -1
1760 tmp = new FlatBuffer
1761 var use_lt = md[pos] == '<'
1762 if use_lt then
1763 pos = md.read_until(tmp, pos + 1, '>')
1764 else
1765 pos = md.read_md_link(tmp, pos)
1766 end
1767 if pos < start then return -1
1768 if use_lt then pos += 1
1769 link = tmp.write_to_string
1770 if md[pos] == ' ' then
1771 pos = md.skip_spaces(pos)
1772 if pos > start and md[pos] == '"' then
1773 pos += 1
1774 tmp = new FlatBuffer
1775 pos = md.read_until(tmp, pos, '"')
1776 if pos < start then return -1
1777 comment = tmp.write_to_string
1778 pos += 1
1779 pos = md.skip_spaces(pos)
1780 if pos == -1 then return -1
1781 end
1782 end
1783 if md[pos] != ')' then return -1
1784 else if md[pos] == '[' then
1785 pos += 1
1786 tmp = new FlatBuffer
1787 pos = md.read_raw_until(tmp, pos, ']')
1788 if pos < start then return -1
1789 var id
1790 if tmp.length > 0 then
1791 id = tmp
1792 else
1793 id = name
1794 end
1795 var tid = id.write_to_string.to_lower
1796 if v.processor.link_refs.has_key(tid) then
1797 var lr = v.processor.link_refs[tid]
1798 link = lr.link
1799 comment = lr.title
1800 end
1801 else
1802 var tid = name.write_to_string.replace("\n", " ").to_lower
1803 if v.processor.link_refs.has_key(tid) then
1804 var lr = v.processor.link_refs[tid]
1805 link = lr.link
1806 comment = lr.title
1807 pos = old_pos
1808 else
1809 return -1
1810 end
1811 end
1812 if link == null then return -1
1813 return pos
1814 end
1815 end
1816
1817 # A markdown link token.
1818 class TokenLink
1819 super TokenLinkOrImage
1820
1821 redef fun emit_hyper(v) do
1822 if is_abbrev and comment != null then
1823 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
1824 else
1825 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
1826 end
1827 end
1828 end
1829
1830 # A markdown image token.
1831 class TokenImage
1832 super TokenLinkOrImage
1833
1834 redef fun emit_hyper(v) do
1835 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
1836 end
1837 end
1838
1839 # A HTML/XML token.
1840 class TokenHTML
1841 super Token
1842
1843 redef fun emit(v) do
1844 var tmp = new FlatBuffer
1845 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
1846 if b > 0 then
1847 v.add tmp
1848 v.current_pos = b
1849 else
1850 v.decorator.escape_char(v, char)
1851 end
1852 end
1853
1854 # Is the HTML valid?
1855 # Also take care of link and mailto shortcuts.
1856 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
1857 # check for auto links
1858 var tmp = new FlatBuffer
1859 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
1860 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
1861 pos = md.read_until(tmp, pos, '>')
1862 if pos != -1 then
1863 var link = tmp.write_to_string
1864 v.decorator.add_link(v, link, link, null)
1865 return pos
1866 end
1867 end
1868 # TODO check for mailto
1869 # check for inline html
1870 if start + 2 < md.length then
1871 return md.read_xml(out, start, true)
1872 end
1873 return -1
1874 end
1875 end
1876
1877 # An HTML entity token.
1878 class TokenEntity
1879 super Token
1880
1881 redef fun emit(v) do
1882 var tmp = new FlatBuffer
1883 var b = check_entity(tmp, v.current_text.as(not null), pos)
1884 if b > 0 then
1885 v.add tmp
1886 v.current_pos = b
1887 else
1888 v.decorator.escape_char(v, char)
1889 end
1890 end
1891
1892 # Is the entity valid?
1893 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
1894 var pos = md.read_until(out, start, ';')
1895 if pos < 0 or out.length < 3 then
1896 return -1
1897 end
1898 if out[1] == '#' then
1899 if out[2] == 'x' or out[2] == 'X' then
1900 if out.length < 4 then return -1
1901 for i in [3..out.length[ do
1902 var c = out[i]
1903 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
1904 return -1
1905 end
1906 end
1907 else
1908 for i in [2..out.length[ do
1909 var c = out[i]
1910 if c < '0' or c > '9' then return -1
1911 end
1912 end
1913 out.add ';'
1914 else
1915 for i in [1..out.length[ do
1916 var c = out[i]
1917 if not c.is_digit and not c.is_letter then return -1
1918 end
1919 out.add ';'
1920 # TODO check entity is valid
1921 # if out.is_entity then
1922 return pos
1923 # else
1924 # return -1
1925 # end
1926 end
1927 return pos
1928 end
1929 end
1930
1931 # A markdown escape token.
1932 class TokenEscape
1933 super Token
1934
1935 redef fun emit(v) do
1936 v.current_pos += 1
1937 v.addc v.current_text[v.current_pos]
1938 end
1939 end
1940
1941 # A markdown super token.
1942 class TokenSuper
1943 super Token
1944
1945 redef fun emit(v) do
1946 var tmp = v.push_buffer
1947 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1948 v.pop_buffer
1949 if b > 0 then
1950 v.decorator.add_super(v, tmp)
1951 v.current_pos = b
1952 else
1953 v.addc char
1954 end
1955 end
1956 end
1957
1958 redef class Text
1959
1960 # Get the token kind at `pos`.
1961 private fun token_at(pos: Int): Token do
1962 var c0: Char
1963 var c1: Char
1964 var c2: Char
1965 var c3: Char
1966
1967 if pos > 0 then
1968 c0 = self[pos - 1]
1969 else
1970 c0 = ' '
1971 end
1972 var c = self[pos]
1973
1974 if pos + 1 < length then
1975 c1 = self[pos + 1]
1976 else
1977 c1 = ' '
1978 end
1979 if pos + 2 < length then
1980 c2 = self[pos + 2]
1981 else
1982 c2 = ' '
1983 end
1984 if pos + 3 < length then
1985 c3 = self[pos + 3]
1986 else
1987 c3 = ' '
1988 end
1989
1990 if c == '*' then
1991 if c1 == '*' then
1992 if c0 != ' ' or c2 != ' ' then
1993 return new TokenStrongStar(pos, c)
1994 else
1995 return new TokenEmStar(pos, c)
1996 end
1997 end
1998 if c0 != ' ' or c1 != ' ' then
1999 return new TokenEmStar(pos, c)
2000 else
2001 return new TokenNone(pos, c)
2002 end
2003 else if c == '_' then
2004 if c1 == '_' then
2005 if c0 != ' ' or c2 != ' 'then
2006 return new TokenStrongUnderscore(pos, c)
2007 else
2008 return new TokenEmUnderscore(pos, c)
2009 end
2010 end
2011 if c0 != ' ' or c1 != ' ' then
2012 return new TokenEmUnderscore(pos, c)
2013 else
2014 return new TokenNone(pos, c)
2015 end
2016 else if c == '!' then
2017 if c1 == '[' then return new TokenImage(pos, c)
2018 return new TokenNone(pos, c)
2019 else if c == '[' then
2020 return new TokenLink(pos, c)
2021 else if c == ']' then
2022 return new TokenNone(pos, c)
2023 else if c == '`' then
2024 if c1 == '`' then
2025 return new TokenCodeDouble(pos, c)
2026 else
2027 return new TokenCodeSingle(pos, c)
2028 end
2029 else if c == '\\' then
2030 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
2031 return new TokenEscape(pos, c)
2032 else
2033 return new TokenNone(pos, c)
2034 end
2035 else if c == '<' then
2036 return new TokenHTML(pos, c)
2037 else if c == '&' then
2038 return new TokenEntity(pos, c)
2039 else if c == '^' then
2040 if c0 == '^' or c1 == '^' then
2041 return new TokenNone(pos, c)
2042 else
2043 return new TokenSuper(pos, c)
2044 end
2045 else
2046 return new TokenNone(pos, c)
2047 end
2048 end
2049
2050 # Find the position of a `token` in `self`.
2051 private fun find_token(start: Int, token: Token): Int do
2052 var pos = start
2053 while pos < length do
2054 if token_at(pos).is_same_type(token) then
2055 return pos
2056 end
2057 pos += 1
2058 end
2059 return -1
2060 end
2061
2062 # Get the position of the next non-space character.
2063 private fun skip_spaces(start: Int): Int do
2064 var pos = start
2065 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2066 pos += 1
2067 end
2068 if pos < length then return pos
2069 return -1
2070 end
2071
2072 # Read `self` until `nend` and append it to the `out` buffer.
2073 # Escape markdown special chars.
2074 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2075 var pos = start
2076 while pos < length do
2077 var c = self[pos]
2078 if c == '\\' and pos + 1 < length then
2079 pos = escape(out, self[pos + 1], pos)
2080 else
2081 var end_reached = false
2082 for n in nend do
2083 if c == n then
2084 end_reached = true
2085 break
2086 end
2087 end
2088 if end_reached then break
2089 out.add c
2090 end
2091 pos += 1
2092 end
2093 if pos == length then return -1
2094 return pos
2095 end
2096
2097 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2098 # No escape is made.
2099 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2100 var pos = start
2101 while pos < length do
2102 var c = self[pos]
2103 var end_reached = false
2104 for n in nend do
2105 if c == n then
2106 end_reached = true
2107 break
2108 end
2109 end
2110 if end_reached then break
2111 out.add c
2112 pos += 1
2113 end
2114 if pos == length then return -1
2115 return pos
2116 end
2117
2118 # Read `self` as XML until `to` and append it to the `out` buffer.
2119 # Escape HTML special chars.
2120 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2121 var pos = from
2122 var in_str = false
2123 var str_char: nullable Char = null
2124 while pos < length do
2125 var c = self[pos]
2126 if in_str then
2127 if c == '\\' then
2128 out.add c
2129 pos += 1
2130 if pos < length then
2131 out.add c
2132 pos += 1
2133 end
2134 continue
2135 end
2136 if c == str_char then
2137 in_str = false
2138 out.add c
2139 pos += 1
2140 continue
2141 end
2142 end
2143 if c == '"' or c == '\'' then
2144 in_str = true
2145 str_char = c
2146 end
2147 if not in_str then
2148 var end_reached = false
2149 for n in [0..to.length[ do
2150 if c == to[n] then
2151 end_reached = true
2152 break
2153 end
2154 end
2155 if end_reached then break
2156 end
2157 out.add c
2158 pos += 1
2159 end
2160 if pos == length then return -1
2161 return pos
2162 end
2163
2164 # Read `self` as XML and append it to the `out` buffer.
2165 # Safe mode can be activated to limit reading to valid xml.
2166 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2167 var pos = 0
2168 var is_close_tag = false
2169 if start + 1 >= length then return -1
2170 if self[start + 1] == '/' then
2171 is_close_tag = true
2172 pos = start + 2
2173 else if self[start + 1] == '!' then
2174 out.append "<!"
2175 return start + 1
2176 else
2177 is_close_tag = false
2178 pos = start + 1
2179 end
2180 if safe_mode then
2181 var tmp = new FlatBuffer
2182 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2183 if pos == -1 then return -1
2184 var tag = tmp.write_to_string.trim.to_lower
2185 if tag.is_html_unsafe then
2186 out.append "&lt;"
2187 if is_close_tag then out.add '/'
2188 out.append tmp
2189 else
2190 out.append "<"
2191 if is_close_tag then out.add '/'
2192 out.append tmp
2193 end
2194 else
2195 out.add '<'
2196 if is_close_tag then out.add '/'
2197 pos = read_xml_until(out, pos, ' ', '/', '>')
2198 end
2199 if pos == -1 then return -1
2200 pos = read_xml_until(out, pos, '/', '>')
2201 if pos == -1 then return -1
2202 if self[pos] == '/' then
2203 out.append " /"
2204 pos = self.read_xml_until(out, pos + 1, '>')
2205 if pos == -1 then return -1
2206 end
2207 if self[pos] == '>' then
2208 out.add '>'
2209 return pos
2210 end
2211 return -1
2212 end
2213
2214 # Read a markdown link address and append it to the `out` buffer.
2215 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2216 var pos = start
2217 var counter = 1
2218 while pos < length do
2219 var c = self[pos]
2220 if c == '\\' and pos + 1 < length then
2221 pos = escape(out, self[pos + 1], pos)
2222 else
2223 var end_reached = false
2224 if c == '(' then
2225 counter += 1
2226 else if c == ' ' then
2227 if counter == 1 then end_reached = true
2228 else if c == ')' then
2229 counter -= 1
2230 if counter == 0 then end_reached = true
2231 end
2232 if end_reached then break
2233 out.add c
2234 end
2235 pos += 1
2236 end
2237 if pos == length then return -1
2238 return pos
2239 end
2240
2241 # Read a markdown link text and append it to the `out` buffer.
2242 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2243 var pos = start
2244 var counter = 1
2245 while pos < length do
2246 var c = self[pos]
2247 var end_reached = false
2248 if c == '[' then
2249 counter += 1
2250 out.add c
2251 else if c == ']' then
2252 counter -= 1
2253 if counter == 0 then
2254 end_reached = true
2255 else
2256 out.add c
2257 end
2258 else
2259 out.add c
2260 end
2261 if end_reached then break
2262 pos += 1
2263 end
2264 if pos == length then return -1
2265 return pos
2266 end
2267
2268 # Extract the XML tag name from a XML tag.
2269 private fun xml_tag: String do
2270 var tpl = new FlatBuffer
2271 var pos = 1
2272 if pos < length and self[1] == '/' then pos += 1
2273 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2274 tpl.add self[pos]
2275 pos += 1
2276 end
2277 return tpl.write_to_string.to_lower
2278 end
2279
2280 # Read and escape the markdown contained in `self`.
2281 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2282 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2283 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2284 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2285 c == '`' or c == '~' or c == '^' then
2286 out.add c
2287 return pos + 1
2288 end
2289 out.add '\\'
2290 return pos
2291 end
2292
2293 # Is `self` an unsafe HTML element?
2294 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2295
2296 # Is `self` a HRML block element?
2297 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2298
2299 # Is `self` a link prefix?
2300 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2301
2302 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2303
2304 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2305
2306 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2307 end
2308
2309 redef class String
2310
2311 # Parse `self` as markdown and return the HTML representation
2312 #.
2313 # var md = "**Hello World!**"
2314 # var html = md.md_to_html
2315 # assert html == "<p><strong>Hello World!</strong></p>\n"
2316 fun md_to_html: Streamable do
2317 var processor = new MarkdownProcessor
2318 return processor.process(self)
2319 end
2320 end