lib/markdown: suppress super notation with `^`.
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 # `MarkdownEmitter` used for ouput.
34 var emitter: MarkdownEmitter is noinit
35
36 init do self.emitter = new MarkdownEmitter(self)
37
38 # Process the mardown `input` string and return the processed output.
39 fun process(input: String): Streamable do
40 # init processor
41 link_refs.clear
42 last_link_ref = null
43 current_line = null
44 current_block = null
45 # parse markdown
46 var parent = read_lines(input)
47 parent.remove_surrounding_empty_lines
48 recurse(parent, false)
49 # output processed text
50 return emitter.emit(parent.kind)
51 end
52
53 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
54 private fun read_lines(input: String): MDBlock do
55 var block = new MDBlock
56 var value = new FlatBuffer
57 var i = 0
58 while i < input.length do
59 value.clear
60 var pos = 0
61 var eol = false
62 while not eol and i < input.length do
63 var c = input[i]
64 if c == '\n' then
65 i += 1
66 eol = true
67 else if c == '\t' then
68 var np = pos + (4 - (pos.bin_and(3)))
69 while pos < np do
70 value.add ' '
71 pos += 1
72 end
73 i += 1
74 else
75 pos += 1
76 value.add c
77 i += 1
78 end
79 end
80
81 var line = new MDLine(value.write_to_string)
82 var is_link_ref = check_link_ref(line)
83 # Skip link refs
84 if not is_link_ref then block.add_line line
85 end
86 return block
87 end
88
89 # Check if line is a block link definition.
90 # Return `true` if line contains a valid link ref and save it into `link_refs`.
91 private fun check_link_ref(line: MDLine): Bool do
92 var md = line.value
93 var is_link_ref = false
94 var id = new FlatBuffer
95 var link = new FlatBuffer
96 var comment = new FlatBuffer
97 var pos = -1
98 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
99 pos = line.leading + 1
100 pos = md.read_until(id, pos, ']')
101 if not id.is_empty and pos + 2 < line.value.length then
102 if line.value[pos + 1] == ':' then
103 pos += 2
104 pos = md.skip_spaces(pos)
105 if line.value[pos] == '<' then
106 pos += 1
107 pos = md.read_until(link, pos, '>')
108 pos += 1
109 else
110 pos = md.read_until(link, pos, ' ', '\n')
111 end
112 if not link.is_empty then
113 pos = md.skip_spaces(pos)
114 if pos > 0 and pos < line.value.length then
115 var c = line.value[pos]
116 if c == '\"' or c == '\'' or c == '(' then
117 pos += 1
118 if c == '(' then
119 pos = md.read_until(comment, pos, ')')
120 else
121 pos = md.read_until(comment, pos, c)
122 end
123 if pos > 0 then is_link_ref = true
124 end
125 else
126 is_link_ref = true
127 end
128 end
129 end
130 end
131 end
132 if is_link_ref and not id.is_empty and not link.is_empty then
133 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
134 add_link_ref(id.write_to_string, lr)
135 if comment.is_empty then last_link_ref = lr
136 return true
137 else
138 comment = new FlatBuffer
139 if not line.is_empty and last_link_ref != null then
140 pos = line.leading
141 var c = line.value[pos]
142 if c == '\"' or c == '\'' or c == '(' then
143 pos += 1
144 if c == '(' then
145 pos = md.read_until(comment, pos, ')')
146 else
147 pos = md.read_until(comment, pos, c)
148 end
149 end
150 if not comment.is_empty then last_link_ref.title = comment.write_to_string
151 end
152 if comment.is_empty then return false
153 return true
154 end
155 end
156
157 # Known link refs
158 # This list will be needed during output to expand links.
159 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
160
161 # Last encountered link ref (for multiline definitions)
162 #
163 # Markdown allows link refs to be defined over two lines:
164 #
165 # [id]: http://example.com/longish/path/to/resource/here
166 # "Optional Title Here"
167 #
168 private var last_link_ref: nullable LinkRef = null
169
170 # Add a link ref to the list
171 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
172
173 # Recursively split a `block`.
174 #
175 # The block is splitted according to the type of lines it contains.
176 # Some blocks can be splited again recursively like lists.
177 # The `in_list` mode is used to recurse on list and build
178 # nested paragraphs or code blocks.
179 fun recurse(root: MDBlock, in_list: Bool) do
180 var old_mode = self.in_list
181 var old_root = self.current_block
182 self.in_list = in_list
183
184 var line = root.first_line
185 while line != null and line.is_empty do
186 line = line.next
187 if line == null then return
188 end
189
190 current_line = line
191 current_block = root
192 while current_line != null do
193 line_kind(current_line.as(not null)).process(self)
194 end
195 self.in_list = old_mode
196 self.current_block = old_root
197 end
198
199 # Currently processed line.
200 # Used when visiting blocks with `recurse`.
201 var current_line: nullable MDLine = null is writable
202
203 # Currently processed block.
204 # Used when visiting blocks with `recurse`.
205 var current_block: nullable MDBlock = null is writable
206
207 # Is the current recursion in list mode?
208 # Used when visiting blocks with `recurse`
209 private var in_list = false
210
211 # The type of line.
212 # see: `md_line_*`
213 fun line_kind(md: MDLine): Line do
214 var value = md.value
215 var leading = md.leading
216 var trailing = md.trailing
217 if md.is_empty then return new LineEmpty
218 if md.leading > 3 then return new LineCode
219 if value[leading] == '#' then return new LineHeadline
220 if value[leading] == '>' then return new LineBlockquote
221
222 if value.length - leading - trailing > 2 then
223 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
224 return new LineFence
225 end
226 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
227 return new LineFence
228 end
229 end
230
231 if value.length - leading - trailing > 2 and
232 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
233 if md.count_chars(value[leading]) >= 3 then
234 return new LineHR
235 end
236 end
237
238 if value.length - leading >= 2 and value[leading + 1] == ' ' then
239 var c = value[leading]
240 if c == '*' or c == '-' or c == '+' then return new LineUList
241 end
242
243 if value.length - leading >= 3 and value[leading].is_digit then
244 var i = leading + 1
245 while i < value.length and value[i].is_digit do i += 1
246 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
247 return new LineOList
248 end
249 end
250
251 if value[leading] == '<' and md.check_html then return new LineXML
252
253 var next = md.next
254 if next != null and not next.is_empty then
255 if next.count_chars('=') > 0 then
256 return new LineHeadline1
257 end
258 if next.count_chars('-') > 0 then
259 return new LineHeadline2
260 end
261 end
262 return new LineOther
263 end
264
265 # Get the token kind at `pos`.
266 fun token_at(text: Text, pos: Int): Token do
267 var c0: Char
268 var c1: Char
269 var c2: Char
270
271 if pos > 0 then
272 c0 = text[pos - 1]
273 else
274 c0 = ' '
275 end
276 var c = text[pos]
277
278 if pos + 1 < text.length then
279 c1 = text[pos + 1]
280 else
281 c1 = ' '
282 end
283 if pos + 2 < text.length then
284 c2 = text[pos + 2]
285 else
286 c2 = ' '
287 end
288
289 if c == '*' then
290 if c1 == '*' then
291 if c0 != ' ' or c2 != ' ' then
292 return new TokenStrongStar(pos, c)
293 else
294 return new TokenEmStar(pos, c)
295 end
296 end
297 if c0 != ' ' or c1 != ' ' then
298 return new TokenEmStar(pos, c)
299 else
300 return new TokenNone(pos, c)
301 end
302 else if c == '_' then
303 if c1 == '_' then
304 if c0 != ' ' or c2 != ' 'then
305 return new TokenStrongUnderscore(pos, c)
306 else
307 return new TokenEmUnderscore(pos, c)
308 end
309 end
310 if c0 != ' ' or c1 != ' ' then
311 return new TokenEmUnderscore(pos, c)
312 else
313 return new TokenNone(pos, c)
314 end
315 else if c == '!' then
316 if c1 == '[' then return new TokenImage(pos, c)
317 return new TokenNone(pos, c)
318 else if c == '[' then
319 return new TokenLink(pos, c)
320 else if c == ']' then
321 return new TokenNone(pos, c)
322 else if c == '`' then
323 if c1 == '`' then
324 return new TokenCodeDouble(pos, c)
325 else
326 return new TokenCodeSingle(pos, c)
327 end
328 else if c == '\\' then
329 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
330 return new TokenEscape(pos, c)
331 else
332 return new TokenNone(pos, c)
333 end
334 else if c == '<' then
335 return new TokenHTML(pos, c)
336 else if c == '&' then
337 return new TokenEntity(pos, c)
338 else
339 return new TokenNone(pos, c)
340 end
341 end
342
343 # Find the position of a `token` in `self`.
344 fun find_token(text: Text, start: Int, token: Token): Int do
345 var pos = start
346 while pos < text.length do
347 if token_at(text, pos).is_same_type(token) then
348 return pos
349 end
350 pos += 1
351 end
352 return -1
353 end
354 end
355
356 # Emit output corresponding to blocks content.
357 #
358 # Blocks are created by a previous pass in `MarkdownProcessor`.
359 # The emitter use a `Decorator` to select the output format.
360 class MarkdownEmitter
361
362 # Processor containing link refs.
363 var processor: MarkdownProcessor
364
365 # Decorator used for output.
366 # Default is `HTMLDecorator`
367 var decorator: Decorator = new HTMLDecorator is writable
368
369 # Create a new `MarkdownEmitter` using a custom `decorator`.
370 init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
371 init processor
372 self.decorator = decorator
373 end
374
375 # Output `block` using `decorator` in the current buffer.
376 fun emit(block: Block): Text do
377 var buffer = push_buffer
378 block.emit(self)
379 pop_buffer
380 return buffer
381 end
382
383 # Output the content of `block`.
384 fun emit_in(block: Block) do block.emit_in(self)
385
386 # Transform and emit mardown text
387 fun emit_text(text: Text) do
388 emit_text_until(text, 0, null)
389 end
390
391 # Transform and emit mardown text starting at `from` and
392 # until a token with the same type as `token` is found.
393 # Go until the end of text if `token` is null.
394 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
395 var old_text = current_text
396 var old_pos = current_pos
397 current_text = text
398 current_pos = start
399 while current_pos < text.length do
400 var mt = processor.token_at(text, current_pos)
401 if (token != null and not token isa TokenNone) and
402 (mt.is_same_type(token) or
403 (token isa TokenEmStar and mt isa TokenStrongStar) or
404 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
405 return current_pos
406 end
407 mt.emit(self)
408 current_pos += 1
409 end
410 current_text = old_text
411 current_pos = old_pos
412 return -1
413 end
414
415 # Currently processed position in `current_text`.
416 # Used when visiting inline production with `emit_text_until`.
417 private var current_pos: Int = -1
418
419 # Currently processed text.
420 # Used when visiting inline production with `emit_text_until`.
421 private var current_text: nullable Text = null
422
423 # Stacked buffers.
424 private var buffer_stack = new List[FlatBuffer]
425
426 # Push a new buffer on the stack.
427 private fun push_buffer: FlatBuffer do
428 var buffer = new FlatBuffer
429 buffer_stack.add buffer
430 return buffer
431 end
432
433 # Pop the last buffer.
434 private fun pop_buffer do buffer_stack.pop
435
436 # Current output buffer.
437 private fun current_buffer: FlatBuffer do
438 assert not buffer_stack.is_empty
439 return buffer_stack.last
440 end
441
442 # Append `e` to current buffer.
443 fun add(e: Streamable) do
444 if e isa Text then
445 current_buffer.append e
446 else
447 current_buffer.append e.write_to_string
448 end
449 end
450
451 # Append `c` to current buffer.
452 fun addc(c: Char) do current_buffer.add c
453
454 # Append a "\n" line break.
455 fun addn do current_buffer.add '\n'
456 end
457
458 # A Link Reference.
459 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
460 #
461 # Example:
462 #
463 # [1]: http://example.com/ "Optional title"
464 class LinkRef
465
466 # Link href
467 var link: String
468
469 # Optional link title
470 var title: nullable String = null
471
472 # Is the link an abreviation?
473 var is_abbrev = false
474
475 # Create a link with a title.
476 init with_title(link: String, title: nullable String) do
477 self.link = link
478 self.title = title
479 end
480 end
481
482 # A `Decorator` is used to emit mardown into a specific format.
483 # Default decorator used is `HTMLDecorator`.
484 interface Decorator
485
486 # Render a ruler block.
487 fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
488
489 # Render a headline block with corresponding level.
490 fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
491
492 # Render a paragraph block.
493 fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
494
495 # Render a code or fence block.
496 fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
497
498 # Render a blockquote.
499 fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
500
501 # Render an unordered list.
502 fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
503
504 # Render an ordered list.
505 fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
506
507 # Render a list item.
508 fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
509
510 # Render an emphasis text.
511 fun add_em(v: MarkdownEmitter, text: Text) is abstract
512
513 # Render a strong text.
514 fun add_strong(v: MarkdownEmitter, text: Text) is abstract
515
516 # Render a link.
517 fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
518
519 # Render an image.
520 fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
521
522 # Render an abbreviation.
523 fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
524
525 # Render a code span reading from a buffer.
526 fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
527
528 # Render a text and escape it.
529 fun append_value(v: MarkdownEmitter, value: Text) is abstract
530
531 # Render code text from buffer and escape it.
532 fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
533
534 # Render a character escape.
535 fun escape_char(v: MarkdownEmitter, char: Char) is abstract
536
537 # Render a line break
538 fun add_line_break(v: MarkdownEmitter) is abstract
539
540 # Generate a new html valid id from a `String`.
541 fun strip_id(txt: String): String is abstract
542
543 # Found headlines during the processing labeled by their ids.
544 fun headlines: ArrayMap[String, HeadLine] is abstract
545 end
546
547 # Class representing a markdown headline.
548 class HeadLine
549 # Unique identifier of this headline.
550 var id: String
551
552 # Text of the headline.
553 var title: String
554
555 # Level of this headline.
556 #
557 # According toe the markdown specification, level must be in `[1..6]`.
558 var level: Int
559 end
560
561 # `Decorator` that outputs HTML.
562 class HTMLDecorator
563 super Decorator
564
565 redef var headlines = new ArrayMap[String, HeadLine]
566
567 redef fun add_ruler(v, block) do v.add "<hr/>\n"
568
569 redef fun add_headline(v, block) do
570 # save headline
571 var txt = block.block.first_line.value
572 var id = strip_id(txt)
573 var lvl = block.depth
574 headlines[id] = new HeadLine(id, txt, lvl)
575 # output it
576 v.add "<h{lvl} id=\"{id}\">"
577 v.emit_in block
578 v.add "</h{lvl}>\n"
579 end
580
581 redef fun add_paragraph(v, block) do
582 v.add "<p>"
583 v.emit_in block
584 v.add "</p>\n"
585 end
586
587 redef fun add_code(v, block) do
588 v.add "<pre><code>"
589 v.emit_in block
590 v.add "</code></pre>\n"
591 end
592
593 redef fun add_blockquote(v, block) do
594 v.add "<blockquote>\n"
595 v.emit_in block
596 v.add "</blockquote>\n"
597 end
598
599 redef fun add_unorderedlist(v, block) do
600 v.add "<ul>\n"
601 v.emit_in block
602 v.add "</ul>\n"
603 end
604
605 redef fun add_orderedlist(v, block) do
606 v.add "<ol>\n"
607 v.emit_in block
608 v.add "</ol>\n"
609 end
610
611 redef fun add_listitem(v, block) do
612 v.add "<li>"
613 v.emit_in block
614 v.add "</li>\n"
615 end
616
617 redef fun add_em(v, text) do
618 v.add "<em>"
619 v.add text
620 v.add "</em>"
621 end
622
623 redef fun add_strong(v, text) do
624 v.add "<strong>"
625 v.add text
626 v.add "</strong>"
627 end
628
629 redef fun add_image(v, link, name, comment) do
630 v.add "<img src=\""
631 append_value(v, link)
632 v.add "\" alt=\""
633 append_value(v, name)
634 v.add "\""
635 if comment != null and not comment.is_empty then
636 v.add " title=\""
637 append_value(v, comment)
638 v.add "\""
639 end
640 v.add "/>"
641 end
642
643 redef fun add_link(v, link, name, comment) do
644 v.add "<a href=\""
645 append_value(v, link)
646 v.add "\""
647 if comment != null and not comment.is_empty then
648 v.add " title=\""
649 append_value(v, comment)
650 v.add "\""
651 end
652 v.add ">"
653 v.emit_text(name)
654 v.add "</a>"
655 end
656
657 redef fun add_abbr(v, name, comment) do
658 v.add "<abbr title=\""
659 append_value(v, comment)
660 v.add "\">"
661 v.emit_text(name)
662 v.add "</abbr>"
663 end
664
665 redef fun add_span_code(v, text, from, to) do
666 v.add "<code>"
667 append_code(v, text, from, to)
668 v.add "</code>"
669 end
670
671 redef fun add_line_break(v) do
672 v.add "<br/>"
673 end
674
675 redef fun append_value(v, text) do for c in text do escape_char(v, c)
676
677 redef fun escape_char(v, c) do
678 if c == '&' then
679 v.add "&amp;"
680 else if c == '<' then
681 v.add "&lt;"
682 else if c == '>' then
683 v.add "&gt;"
684 else if c == '"' then
685 v.add "&quot;"
686 else if c == '\'' then
687 v.add "&apos;"
688 else
689 v.addc c
690 end
691 end
692
693 redef fun append_code(v, buffer, from, to) do
694 for i in [from..to[ do
695 var c = buffer[i]
696 if c == '&' then
697 v.add "&amp;"
698 else if c == '<' then
699 v.add "&lt;"
700 else if c == '>' then
701 v.add "&gt;"
702 else
703 v.addc c
704 end
705 end
706 end
707
708 redef fun strip_id(txt) do
709 # strip id
710 var b = new FlatBuffer
711 for c in txt do
712 if c == ' ' then
713 b.add '_'
714 else
715 if not c.is_letter and
716 not c.is_digit and
717 not allowed_id_chars.has(c) then continue
718 b.add c
719 end
720 end
721 var res = b.to_s
722 var key = res
723 # check for multiple id definitions
724 if headlines.has_key(key) then
725 var i = 1
726 key = "{res}_{i}"
727 while headlines.has_key(key) do
728 i += 1
729 key = "{res}_{i}"
730 end
731 end
732 return key
733 end
734
735 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
736 end
737
738 # A block of markdown lines.
739 # A `MDBlock` can contains lines and/or sub-blocks.
740 class MDBlock
741 # Kind of block.
742 # See `Block`.
743 var kind: Block = new BlockNone(self) is writable
744
745 # First line if any.
746 var first_line: nullable MDLine = null is writable
747
748 # Last line if any.
749 var last_line: nullable MDLine = null is writable
750
751 # First sub-block if any.
752 var first_block: nullable MDBlock = null is writable
753
754 # Last sub-block if any.
755 var last_block: nullable MDBlock = null is writable
756
757 # Previous block if any.
758 var prev: nullable MDBlock = null is writable
759
760 # Next block if any.
761 var next: nullable MDBlock = null is writable
762
763 # Does this block contain subblocks?
764 fun has_blocks: Bool do return first_block != null
765
766 # Count sub-blocks.
767 fun count_blocks: Int do
768 var count = 0
769 var block = first_block
770 while block != null do
771 count += 1
772 block = block.next
773 end
774 return count
775 end
776
777 # Does this block contain lines?
778 fun has_lines: Bool do return first_line != null
779
780 # Count block lines.
781 fun count_lines: Int do
782 var count = 0
783 var line = first_line
784 while line != null do
785 count += 1
786 line = line.next
787 end
788 return count
789 end
790
791 # Split `self` creating a new sub-block having `line` has `last_line`.
792 fun split(line: MDLine): MDBlock do
793 var block = new MDBlock
794 block.first_line = first_line
795 block.last_line = line
796 first_line = line.next
797 line.next = null
798 if first_line == null then
799 last_line = null
800 else
801 first_line.prev = null
802 end
803 if first_block == null then
804 first_block = block
805 last_block = block
806 else
807 last_block.next = block
808 last_block = block
809 end
810 return block
811 end
812
813 # Add a `line` to this block.
814 fun add_line(line: MDLine) do
815 if last_line == null then
816 first_line = line
817 last_line = line
818 else
819 last_line.next_empty = line.is_empty
820 line.prev_empty = last_line.is_empty
821 line.prev = last_line
822 last_line.next = line
823 last_line = line
824 end
825 end
826
827 # Remove `line` from this block.
828 fun remove_line(line: MDLine) do
829 if line.prev == null then
830 first_line = line.next
831 else
832 line.prev.next = line.next
833 end
834 if line.next == null then
835 last_line = line.prev
836 else
837 line.next.prev = line.prev
838 end
839 line.prev = null
840 line.next = null
841 end
842
843 # Remove leading empty lines.
844 fun remove_leading_empty_lines: Bool do
845 var was_empty = false
846 var line = first_line
847 while line != null and line.is_empty do
848 remove_line line
849 line = first_line
850 was_empty = true
851 end
852 return was_empty
853 end
854
855 # Remove trailing empty lines.
856 fun remove_trailing_empty_lines: Bool do
857 var was_empty = false
858 var line = last_line
859 while line != null and line.is_empty do
860 remove_line line
861 line = last_line
862 was_empty = true
863 end
864 return was_empty
865 end
866
867 # Remove leading and trailing empty lines.
868 fun remove_surrounding_empty_lines: Bool do
869 var was_empty = false
870 if remove_leading_empty_lines then was_empty = true
871 if remove_trailing_empty_lines then was_empty = true
872 return was_empty
873 end
874
875 # Remove list markers and up to 4 leading spaces.
876 # Used to clean nested lists.
877 fun remove_list_indent(v: MarkdownProcessor) do
878 var line = first_line
879 while line != null do
880 if not line.is_empty then
881 var kind = v.line_kind(line)
882 if kind isa LineList then
883 line.value = kind.extract_value(line)
884 else
885 line.value = line.value.substring_from(line.leading.min(4))
886 end
887 line.leading = line.process_leading
888 end
889 line = line.next
890 end
891 end
892
893 # Collect block line text.
894 fun text: String do
895 var text = new FlatBuffer
896 var line = first_line
897 while line != null do
898 if not line.is_empty then
899 text.append line.text
900 end
901 text.append "\n"
902 line = line.next
903 end
904 return text.write_to_string
905 end
906 end
907
908 # Representation of a markdown block in the AST.
909 # Each `Block` is linked to a `MDBlock` that contains mardown code.
910 abstract class Block
911
912 # The markdown block `self` is related to.
913 var block: MDBlock
914
915 # Output `self` using `v.decorator`.
916 fun emit(v: MarkdownEmitter) do v.emit_in(self)
917
918 # Emit the containts of `self`, lines or blocks.
919 fun emit_in(v: MarkdownEmitter) do
920 block.remove_surrounding_empty_lines
921 if block.has_lines then
922 emit_lines(v)
923 else
924 emit_blocks(v)
925 end
926 end
927
928 # Emit lines contained in `block`.
929 fun emit_lines(v: MarkdownEmitter) do
930 var tpl = v.push_buffer
931 var line = block.first_line
932 while line != null do
933 if not line.is_empty then
934 v.add line.value.substring(line.leading, line.value.length - line.trailing)
935 if line.trailing >= 2 then v.decorator.add_line_break(v)
936 end
937 if line.next != null then
938 v.addn
939 end
940 line = line.next
941 end
942 v.pop_buffer
943 v.emit_text(tpl)
944 end
945
946 # Emit sub-blocks contained in `block`.
947 fun emit_blocks(v: MarkdownEmitter) do
948 var block = self.block.first_block
949 while block != null do
950 block.kind.emit(v)
951 block = block.next
952 end
953 end
954 end
955
956 # A block without any markdown specificities.
957 #
958 # Actually use the same implementation than `BlockCode`,
959 # this class is only used for typing purposes.
960 class BlockNone
961 super Block
962 end
963
964 # A markdown blockquote.
965 class BlockQuote
966 super Block
967
968 redef fun emit(v) do v.decorator.add_blockquote(v, self)
969
970 # Remove blockquote markers.
971 private fun remove_block_quote_prefix(block: MDBlock) do
972 var line = block.first_line
973 while line != null do
974 if not line.is_empty then
975 if line.value[line.leading] == '>' then
976 var rem = line.leading + 1
977 if line.leading + 1 < line.value.length and
978 line.value[line.leading + 1] == ' ' then
979 rem += 1
980 end
981 line.value = line.value.substring_from(rem)
982 line.leading = line.process_leading
983 end
984 end
985 line = line.next
986 end
987 end
988 end
989
990 # A markdown code block.
991 class BlockCode
992 super Block
993
994 # Number of char to skip at the beginning of the line.
995 #
996 # Block code lines start at 4 spaces.
997 protected var line_start = 4
998
999 redef fun emit(v) do v.decorator.add_code(v, self)
1000
1001 redef fun emit_lines(v) do
1002 var line = block.first_line
1003 while line != null do
1004 if not line.is_empty then
1005 v.decorator.append_code(v, line.value, line_start, line.value.length)
1006 end
1007 v.addn
1008 line = line.next
1009 end
1010 end
1011 end
1012
1013 # A markdown code-fence block.
1014 #
1015 # Actually use the same implementation than `BlockCode`,
1016 # this class is only used for typing purposes.
1017 class BlockFence
1018 super BlockCode
1019
1020 # Fence code lines start at 0 spaces.
1021 redef var line_start = 0
1022 end
1023
1024 # A markdown headline.
1025 class BlockHeadline
1026 super Block
1027
1028 redef fun emit(v) do v.decorator.add_headline(v, self)
1029
1030 # Depth of the headline used to determine the headline level.
1031 var depth = 0
1032
1033 # Remove healine marks from lines contained in `self`.
1034 private fun transform_headline(block: MDBlock) do
1035 if depth > 0 then return
1036 var level = 0
1037 var line = block.first_line
1038 if line.is_empty then return
1039 var start = line.leading
1040 while start < line.value.length and line.value[start] == '#' do
1041 level += 1
1042 start += 1
1043 end
1044 while start < line.value.length and line.value[start] == ' ' do
1045 start += 1
1046 end
1047 if start >= line.value.length then
1048 line.is_empty = true
1049 else
1050 var nend = line.value.length - line.trailing - 1
1051 while line.value[nend] == '#' do nend -= 1
1052 while line.value[nend] == ' ' do nend -= 1
1053 line.value = line.value.substring(start, nend - start + 1)
1054 line.leading = 0
1055 line.trailing = 0
1056 end
1057 depth = level.min(6)
1058 end
1059 end
1060
1061 # A markdown list item block.
1062 class BlockListItem
1063 super Block
1064
1065 redef fun emit(v) do v.decorator.add_listitem(v, self)
1066 end
1067
1068 # A markdown list block.
1069 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1070 abstract class BlockList
1071 super Block
1072
1073 # Split list block into list items sub-blocks.
1074 private fun init_block(v: MarkdownProcessor) do
1075 var line = block.first_line
1076 line = line.next
1077 while line != null do
1078 var t = v.line_kind(line)
1079 if t isa LineList or
1080 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1081 not (t isa LineList))) then
1082 var sblock = block.split(line.prev.as(not null))
1083 sblock.kind = new BlockListItem(sblock)
1084 end
1085 line = line.next
1086 end
1087 var sblock = block.split(block.last_line.as(not null))
1088 sblock.kind = new BlockListItem(sblock)
1089 end
1090
1091 # Expand list items as paragraphs if needed.
1092 private fun expand_paragraphs(block: MDBlock) do
1093 var outer = block.first_block
1094 var inner: nullable MDBlock
1095 var has_paragraph = false
1096 while outer != null and not has_paragraph do
1097 if outer.kind isa BlockListItem then
1098 inner = outer.first_block
1099 while inner != null and not has_paragraph do
1100 if inner.kind isa BlockParagraph then
1101 has_paragraph = true
1102 end
1103 inner = inner.next
1104 end
1105 end
1106 outer = outer.next
1107 end
1108 if has_paragraph then
1109 outer = block.first_block
1110 while outer != null do
1111 if outer.kind isa BlockListItem then
1112 inner = outer.first_block
1113 while inner != null do
1114 if inner.kind isa BlockNone then
1115 inner.kind = new BlockParagraph(inner)
1116 end
1117 inner = inner.next
1118 end
1119 end
1120 outer = outer.next
1121 end
1122 end
1123 end
1124 end
1125
1126 # A markdown ordered list.
1127 class BlockOrderedList
1128 super BlockList
1129
1130 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1131 end
1132
1133 # A markdown unordred list.
1134 class BlockUnorderedList
1135 super BlockList
1136
1137 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1138 end
1139
1140 # A markdown paragraph block.
1141 class BlockParagraph
1142 super Block
1143
1144 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1145 end
1146
1147 # A markdown ruler.
1148 class BlockRuler
1149 super Block
1150
1151 redef fun emit(v) do v.decorator.add_ruler(v, self)
1152 end
1153
1154 # Xml blocks that can be found in markdown markup.
1155 class BlockXML
1156 super Block
1157
1158 redef fun emit_lines(v) do
1159 var line = block.first_line
1160 while line != null do
1161 if not line.is_empty then v.add line.value
1162 v.addn
1163 line = line.next
1164 end
1165 end
1166 end
1167
1168 # A markdown line.
1169 class MDLine
1170
1171 # Text contained in this line.
1172 var value: String is writable
1173
1174 # Is this line empty?
1175 # Lines containing only spaces are considered empty.
1176 var is_empty: Bool = true is writable
1177
1178 # Previous line in `MDBlock` or null if first line.
1179 var prev: nullable MDLine = null is writable
1180
1181 # Next line in `MDBlock` or null if last line.
1182 var next: nullable MDLine = null is writable
1183
1184 # Is the previous line empty?
1185 var prev_empty: Bool = false is writable
1186
1187 # Is the next line empty?
1188 var next_empty: Bool = false is writable
1189
1190 # Initialize a new MDLine from its string value
1191 init do
1192 self.leading = process_leading
1193 if leading != value.length then
1194 self.is_empty = false
1195 self.trailing = process_trailing
1196 end
1197 end
1198
1199 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1200 fun clear do
1201 value = ""
1202 leading = 0
1203 trailing = 0
1204 is_empty = true
1205 if prev != null then prev.next_empty = true
1206 if next != null then next.prev_empty = true
1207 end
1208
1209 # Number or leading spaces on this line.
1210 var leading: Int = 0 is writable
1211
1212 # Compute `leading` depending on `value`.
1213 fun process_leading: Int do
1214 var count = 0
1215 var value = self.value
1216 while count < value.length and value[count] == ' ' do count += 1
1217 if leading == value.length then clear
1218 return count
1219 end
1220
1221 # Number of trailing spaces on this line.
1222 var trailing: Int = 0 is writable
1223
1224 # Compute `trailing` depending on `value`.
1225 fun process_trailing: Int do
1226 var count = 0
1227 var value = self.value
1228 while value[value.length - count - 1] == ' ' do
1229 count += 1
1230 end
1231 return count
1232 end
1233
1234 # Count the amount of `ch` in this line.
1235 # Return A value > 0 if this line only consists of `ch` end spaces.
1236 fun count_chars(ch: Char): Int do
1237 var count = 0
1238 for c in value do
1239 if c == ' ' then
1240 continue
1241 end
1242 if c == ch then
1243 count += 1
1244 continue
1245 end
1246 count = 0
1247 break
1248 end
1249 return count
1250 end
1251
1252 # Count the amount of `ch` at the start of this line ignoring spaces.
1253 fun count_chars_start(ch: Char): Int do
1254 var count = 0
1255 for c in value do
1256 if c == ' ' then
1257 continue
1258 end
1259 if c == ch then
1260 count += 1
1261 else
1262 break
1263 end
1264 end
1265 return count
1266 end
1267
1268 # Last XML line if any.
1269 private var xml_end_line: nullable MDLine = null
1270
1271 # Does `value` contains valid XML markup?
1272 private fun check_html: Bool do
1273 var tags = new Array[String]
1274 var tmp = new FlatBuffer
1275 var pos = leading
1276 if pos + 1 < value.length and value[pos + 1] == '!' then
1277 if read_xml_comment(self, pos) > 0 then return true
1278 end
1279 pos = value.read_xml(tmp, pos, false)
1280 var tag: String
1281 if pos > -1 then
1282 tag = tmp.xml_tag
1283 if not tag.is_html_block then
1284 return false
1285 end
1286 if tag == "hr" then
1287 xml_end_line = self
1288 return true
1289 end
1290 tags.add tag
1291 var line: nullable MDLine = self
1292 while line != null do
1293 while pos < line.value.length and line.value[pos] != '<' do
1294 pos += 1
1295 end
1296 if pos >= line.value.length then
1297 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1298 tags.pop
1299 if tags.is_empty then
1300 xml_end_line = line
1301 break
1302 end
1303 end
1304 line = line.next
1305 pos = 0
1306 else
1307 tmp = new FlatBuffer
1308 var new_pos = line.value.read_xml(tmp, pos, false)
1309 if new_pos > 0 then
1310 tag = tmp.xml_tag
1311 if tag.is_html_block and not tag == "hr" then
1312 if tmp[1] == '/' then
1313 if tags.last != tag then
1314 return false
1315 end
1316 tags.pop
1317 else
1318 tags.add tag
1319 end
1320 end
1321 if tags.is_empty then
1322 xml_end_line = line
1323 break
1324 end
1325 pos = new_pos
1326 else
1327 pos += 1
1328 end
1329 end
1330 end
1331 return tags.is_empty
1332 end
1333 return false
1334 end
1335
1336 # Read a XML comment.
1337 # Used by `check_html`.
1338 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1339 var line: nullable MDLine = first_line
1340 if start + 3 < line.value.length then
1341 if line.value[2] == '-' and line.value[3] == '-' then
1342 var pos = start + 4
1343 while line != null do
1344 while pos < line.value.length and line.value[pos] != '-' do
1345 pos += 1
1346 end
1347 if pos == line.value.length then
1348 line = line.next
1349 pos = 0
1350 else
1351 if pos + 2 < line.value.length then
1352 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1353 first_line.xml_end_line = line
1354 return pos + 3
1355 end
1356 end
1357 pos += 1
1358 end
1359 end
1360 end
1361 end
1362 return -1
1363 end
1364
1365 # Extract the text of `self` without leading and trailing.
1366 fun text: String do return value.substring(leading, value.length - trailing)
1367 end
1368
1369 # A markdown line.
1370 interface Line
1371
1372 # Parse the line.
1373 # See `MarkdownProcessor::recurse`.
1374 fun process(v: MarkdownProcessor) is abstract
1375 end
1376
1377 # An empty markdown line.
1378 class LineEmpty
1379 super Line
1380
1381 redef fun process(v) do
1382 v.current_line = v.current_line.next
1383 end
1384 end
1385
1386 # A non-specific markdown construction.
1387 # Mainly used as part of another line construct such as paragraphs or lists.
1388 class LineOther
1389 super Line
1390
1391 redef fun process(v) do
1392 var line = v.current_line
1393 # go to block end
1394 var was_empty = line.prev_empty
1395 while line != null and not line.is_empty do
1396 var t = v.line_kind(line)
1397 if v.in_list and t isa LineList then
1398 break
1399 end
1400 if t isa LineCode or t isa LineFence then
1401 break
1402 end
1403 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1404 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1405 break
1406 end
1407 line = line.next
1408 end
1409 # build block
1410 if line != null and not line.is_empty then
1411 var block = v.current_block.split(line.prev.as(not null))
1412 if v.in_list and not was_empty then
1413 block.kind = new BlockNone(block)
1414 else
1415 block.kind = new BlockParagraph(block)
1416 end
1417 v.current_block.remove_leading_empty_lines
1418 else
1419 var block: MDBlock
1420 if line != null then
1421 block = v.current_block.split(line)
1422 else
1423 block = v.current_block.split(v.current_block.last_line.as(not null))
1424 end
1425 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1426 block.kind = new BlockNone(block)
1427 else
1428 block.kind = new BlockParagraph(block)
1429 end
1430 v.current_block.remove_leading_empty_lines
1431 end
1432 v.current_line = v.current_block.first_line
1433 end
1434 end
1435
1436 # A line of markdown code.
1437 class LineCode
1438 super Line
1439
1440 redef fun process(v) do
1441 var line = v.current_line
1442 # lookup block end
1443 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1444 line = line.next
1445 end
1446 # split at block end line
1447 var block: MDBlock
1448 if line != null then
1449 block = v.current_block.split(line.prev.as(not null))
1450 else
1451 block = v.current_block.split(v.current_block.last_line.as(not null))
1452 end
1453 block.kind = new BlockCode(block)
1454 block.remove_surrounding_empty_lines
1455 v.current_line = v.current_block.first_line
1456 end
1457 end
1458
1459 # A line of raw XML.
1460 class LineXML
1461 super Line
1462
1463 redef fun process(v) do
1464 var line = v.current_line
1465 var prev = line.prev
1466 if prev != null then v.current_block.split(prev)
1467 var block = v.current_block.split(line.xml_end_line.as(not null))
1468 block.kind = new BlockXML(block)
1469 v.current_block.remove_leading_empty_lines
1470 v.current_line = v.current_block.first_line
1471 end
1472 end
1473
1474 # A markdown blockquote line.
1475 class LineBlockquote
1476 super Line
1477
1478 redef fun process(v) do
1479 var line = v.current_line
1480 # go to bquote end
1481 while line != null do
1482 if not line.is_empty and (line.prev_empty and
1483 line.leading == 0 and
1484 not v.line_kind(line) isa LineBlockquote) then break
1485 line = line.next
1486 end
1487 # build sub block
1488 var block: MDBlock
1489 if line != null then
1490 block = v.current_block.split(line.prev.as(not null))
1491 else
1492 block = v.current_block.split(v.current_block.last_line.as(not null))
1493 end
1494 var kind = new BlockQuote(block)
1495 block.kind = kind
1496 block.remove_surrounding_empty_lines
1497 kind.remove_block_quote_prefix(block)
1498 v.current_line = line
1499 v.recurse(block, false)
1500 v.current_line = v.current_block.first_line
1501 end
1502 end
1503
1504 # A markdown ruler line.
1505 class LineHR
1506 super Line
1507
1508 redef fun process(v) do
1509 var line = v.current_line
1510 if line.prev != null then v.current_block.split(line.prev.as(not null))
1511 var block = v.current_block.split(line.as(not null))
1512 block.kind = new BlockRuler(block)
1513 v.current_block.remove_leading_empty_lines
1514 v.current_line = v.current_block.first_line
1515 end
1516 end
1517
1518 # A markdown fence code line.
1519 class LineFence
1520 super Line
1521
1522 redef fun process(v) do
1523 # go to fence end
1524 var line = v.current_line.next
1525 while line != null do
1526 if v.line_kind(line) isa LineFence then break
1527 line = line.next
1528 end
1529 if line != null then
1530 line = line.next
1531 end
1532 # build fence block
1533 var block: MDBlock
1534 if line != null then
1535 block = v.current_block.split(line.prev.as(not null))
1536 else
1537 block = v.current_block.split(v.current_block.last_line.as(not null))
1538 end
1539 block.kind = new BlockFence(block)
1540 block.first_line.clear
1541 var last = block.last_line
1542 if last != null and v.line_kind(last) isa LineFence then
1543 block.last_line.clear
1544 end
1545 block.remove_surrounding_empty_lines
1546 v.current_line = line
1547 end
1548 end
1549
1550 # A markdown headline.
1551 class LineHeadline
1552 super Line
1553
1554 redef fun process(v) do
1555 var line = v.current_line
1556 var lprev = line.prev
1557 if lprev != null then v.current_block.split(lprev)
1558 var block = v.current_block.split(line.as(not null))
1559 var kind = new BlockHeadline(block)
1560 block.kind = kind
1561 kind.transform_headline(block)
1562 v.current_block.remove_leading_empty_lines
1563 v.current_line = v.current_block.first_line
1564 end
1565 end
1566
1567 # A markdown headline of level 1.
1568 class LineHeadline1
1569 super LineHeadline
1570
1571 redef fun process(v) do
1572 var line = v.current_line
1573 var lprev = line.prev
1574 if lprev != null then v.current_block.split(lprev)
1575 line.next.clear
1576 var block = v.current_block.split(line.as(not null))
1577 var kind = new BlockHeadline(block)
1578 kind.depth = 1
1579 kind.transform_headline(block)
1580 block.kind = kind
1581 v.current_block.remove_leading_empty_lines
1582 v.current_line = v.current_block.first_line
1583 end
1584 end
1585
1586 # A markdown headline of level 2.
1587 class LineHeadline2
1588 super LineHeadline
1589
1590 redef fun process(v) do
1591 var line = v.current_line
1592 var lprev = line.prev
1593 if lprev != null then v.current_block.split(lprev)
1594 line.next.clear
1595 var block = v.current_block.split(line.as(not null))
1596 var kind = new BlockHeadline(block)
1597 kind.depth = 2
1598 kind.transform_headline(block)
1599 block.kind = kind
1600 v.current_block.remove_leading_empty_lines
1601 v.current_line = v.current_block.first_line
1602 end
1603 end
1604
1605 # A markdown list line.
1606 # Mainly used to factorize code between ordered and unordered lists.
1607 class LineList
1608 super Line
1609
1610 redef fun process(v) do
1611 var line = v.current_line
1612 # go to list end
1613 while line != null do
1614 var t = v.line_kind(line)
1615 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1616 not t isa LineList) then break
1617 line = line.next
1618 end
1619 # build list block
1620 var list: MDBlock
1621 if line != null then
1622 list = v.current_block.split(line.prev.as(not null))
1623 else
1624 list = v.current_block.split(v.current_block.last_line.as(not null))
1625 end
1626 var kind = block_kind(list)
1627 list.kind = kind
1628 list.first_line.prev_empty = false
1629 list.last_line.next_empty = false
1630 list.remove_surrounding_empty_lines
1631 list.first_line.prev_empty = false
1632 list.last_line.next_empty = false
1633 kind.init_block(v)
1634 var block = list.first_block
1635 while block != null do
1636 block.remove_list_indent(v)
1637 v.recurse(block, true)
1638 block = block.next
1639 end
1640 kind.expand_paragraphs(list)
1641 v.current_line = line
1642 end
1643
1644 # Create a new block kind based on this line.
1645 protected fun block_kind(block: MDBlock): BlockList is abstract
1646
1647 # Extract string value from `MDLine`.
1648 protected fun extract_value(line: MDLine): String is abstract
1649 end
1650
1651 # An ordered list line.
1652 class LineOList
1653 super LineList
1654
1655 redef fun block_kind(block) do return new BlockOrderedList(block)
1656
1657 redef fun extract_value(line) do
1658 return line.value.substring_from(line.value.index_of('.') + 2)
1659 end
1660 end
1661
1662 # An unordered list line.
1663 class LineUList
1664 super LineList
1665
1666 redef fun block_kind(block) do return new BlockUnorderedList(block)
1667
1668 redef fun extract_value(line) do
1669 return line.value.substring_from(line.leading + 2)
1670 end
1671 end
1672
1673 # A token represent a character in the markdown input.
1674 # Some tokens have a specific markup behaviour that is handled here.
1675 abstract class Token
1676
1677 # Position of `self` in markdown input.
1678 var pos: Int
1679
1680 # Character found at `pos` in the markdown input.
1681 var char: Char
1682
1683 # Output that token using `MarkdownEmitter::decorator`.
1684 fun emit(v: MarkdownEmitter) do v.addc char
1685 end
1686
1687 # A token without a specific meaning.
1688 class TokenNone
1689 super Token
1690 end
1691
1692 # An emphasis token.
1693 abstract class TokenEm
1694 super Token
1695
1696 redef fun emit(v) do
1697 var tmp = v.push_buffer
1698 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1699 v.pop_buffer
1700 if b > 0 then
1701 v.decorator.add_em(v, tmp)
1702 v.current_pos = b
1703 else
1704 v.addc char
1705 end
1706 end
1707 end
1708
1709 # An emphasis star token.
1710 class TokenEmStar
1711 super TokenEm
1712 end
1713
1714 # An emphasis underscore token.
1715 class TokenEmUnderscore
1716 super TokenEm
1717 end
1718
1719 # A strong token.
1720 abstract class TokenStrong
1721 super Token
1722
1723 redef fun emit(v) do
1724 var tmp = v.push_buffer
1725 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1726 v.pop_buffer
1727 if b > 0 then
1728 v.decorator.add_strong(v, tmp)
1729 v.current_pos = b + 1
1730 else
1731 v.addc char
1732 end
1733 end
1734 end
1735
1736 # A strong star token.
1737 class TokenStrongStar
1738 super TokenStrong
1739 end
1740
1741 # A strong underscore token.
1742 class TokenStrongUnderscore
1743 super TokenStrong
1744 end
1745
1746 # A code token.
1747 # This class is mainly used to factorize work between single and double quoted span codes.
1748 abstract class TokenCode
1749 super Token
1750
1751 redef fun emit(v) do
1752 var a = pos + next_pos + 1
1753 var b = v.processor.find_token(v.current_text.as(not null), a, self)
1754 if b > 0 then
1755 v.current_pos = b + next_pos
1756 while a < b and v.current_text[a] == ' ' do a += 1
1757 if a < b then
1758 while v.current_text[b - 1] == ' ' do b -= 1
1759 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1760 end
1761 else
1762 v.addc char
1763 end
1764 end
1765
1766 private fun next_pos: Int is abstract
1767 end
1768
1769 # A span code token.
1770 class TokenCodeSingle
1771 super TokenCode
1772
1773 redef fun next_pos do return 0
1774 end
1775
1776 # A doubled span code token.
1777 class TokenCodeDouble
1778 super TokenCode
1779
1780 redef fun next_pos do return 1
1781 end
1782
1783 # A link or image token.
1784 # This class is mainly used to factorize work between images and links.
1785 abstract class TokenLinkOrImage
1786 super Token
1787
1788 # Link adress
1789 var link: nullable Text = null
1790
1791 # Link text
1792 var name: nullable Text = null
1793
1794 # Link title
1795 var comment: nullable Text = null
1796
1797 # Is the link construct an abbreviation?
1798 var is_abbrev = false
1799
1800 redef fun emit(v) do
1801 var tmp = new FlatBuffer
1802 var b = check_link(v, tmp, pos, self)
1803 if b > 0 then
1804 emit_hyper(v)
1805 v.current_pos = b
1806 else
1807 v.addc char
1808 end
1809 end
1810
1811 # Emit the hyperlink as link or image.
1812 private fun emit_hyper(v: MarkdownEmitter) is abstract
1813
1814 # Check if the link is a valid link.
1815 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1816 var md = v.current_text
1817 var pos
1818 if token isa TokenLink then
1819 pos = start + 1
1820 else
1821 pos = start + 2
1822 end
1823 var tmp = new FlatBuffer
1824 pos = md.read_md_link_id(tmp, pos)
1825 if pos < start then return -1
1826 name = tmp
1827 var old_pos = pos
1828 pos += 1
1829 pos = md.skip_spaces(pos)
1830 if pos < start then
1831 var tid = name.write_to_string.to_lower
1832 if v.processor.link_refs.has_key(tid) then
1833 var lr = v.processor.link_refs[tid]
1834 is_abbrev = lr.is_abbrev
1835 link = lr.link
1836 comment = lr.title
1837 pos = old_pos
1838 else
1839 return -1
1840 end
1841 else if md[pos] == '(' then
1842 pos += 1
1843 pos = md.skip_spaces(pos)
1844 if pos < start then return -1
1845 tmp = new FlatBuffer
1846 var use_lt = md[pos] == '<'
1847 if use_lt then
1848 pos = md.read_until(tmp, pos + 1, '>')
1849 else
1850 pos = md.read_md_link(tmp, pos)
1851 end
1852 if pos < start then return -1
1853 if use_lt then pos += 1
1854 link = tmp.write_to_string
1855 if md[pos] == ' ' then
1856 pos = md.skip_spaces(pos)
1857 if pos > start and md[pos] == '"' then
1858 pos += 1
1859 tmp = new FlatBuffer
1860 pos = md.read_until(tmp, pos, '"')
1861 if pos < start then return -1
1862 comment = tmp.write_to_string
1863 pos += 1
1864 pos = md.skip_spaces(pos)
1865 if pos == -1 then return -1
1866 end
1867 end
1868 if md[pos] != ')' then return -1
1869 else if md[pos] == '[' then
1870 pos += 1
1871 tmp = new FlatBuffer
1872 pos = md.read_raw_until(tmp, pos, ']')
1873 if pos < start then return -1
1874 var id
1875 if tmp.length > 0 then
1876 id = tmp
1877 else
1878 id = name
1879 end
1880 var tid = id.write_to_string.to_lower
1881 if v.processor.link_refs.has_key(tid) then
1882 var lr = v.processor.link_refs[tid]
1883 link = lr.link
1884 comment = lr.title
1885 end
1886 else
1887 var tid = name.write_to_string.replace("\n", " ").to_lower
1888 if v.processor.link_refs.has_key(tid) then
1889 var lr = v.processor.link_refs[tid]
1890 link = lr.link
1891 comment = lr.title
1892 pos = old_pos
1893 else
1894 return -1
1895 end
1896 end
1897 if link == null then return -1
1898 return pos
1899 end
1900 end
1901
1902 # A markdown link token.
1903 class TokenLink
1904 super TokenLinkOrImage
1905
1906 redef fun emit_hyper(v) do
1907 if is_abbrev and comment != null then
1908 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
1909 else
1910 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
1911 end
1912 end
1913 end
1914
1915 # A markdown image token.
1916 class TokenImage
1917 super TokenLinkOrImage
1918
1919 redef fun emit_hyper(v) do
1920 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
1921 end
1922 end
1923
1924 # A HTML/XML token.
1925 class TokenHTML
1926 super Token
1927
1928 redef fun emit(v) do
1929 var tmp = new FlatBuffer
1930 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
1931 if b > 0 then
1932 v.add tmp
1933 v.current_pos = b
1934 else
1935 v.decorator.escape_char(v, char)
1936 end
1937 end
1938
1939 # Is the HTML valid?
1940 # Also take care of link and mailto shortcuts.
1941 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
1942 # check for auto links
1943 var tmp = new FlatBuffer
1944 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
1945 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
1946 pos = md.read_until(tmp, pos, '>')
1947 if pos != -1 then
1948 var link = tmp.write_to_string
1949 v.decorator.add_link(v, link, link, null)
1950 return pos
1951 end
1952 end
1953 # TODO check for mailto
1954 # check for inline html
1955 if start + 2 < md.length then
1956 return md.read_xml(out, start, true)
1957 end
1958 return -1
1959 end
1960 end
1961
1962 # An HTML entity token.
1963 class TokenEntity
1964 super Token
1965
1966 redef fun emit(v) do
1967 var tmp = new FlatBuffer
1968 var b = check_entity(tmp, v.current_text.as(not null), pos)
1969 if b > 0 then
1970 v.add tmp
1971 v.current_pos = b
1972 else
1973 v.decorator.escape_char(v, char)
1974 end
1975 end
1976
1977 # Is the entity valid?
1978 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
1979 var pos = md.read_until(out, start, ';')
1980 if pos < 0 or out.length < 3 then
1981 return -1
1982 end
1983 if out[1] == '#' then
1984 if out[2] == 'x' or out[2] == 'X' then
1985 if out.length < 4 then return -1
1986 for i in [3..out.length[ do
1987 var c = out[i]
1988 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
1989 return -1
1990 end
1991 end
1992 else
1993 for i in [2..out.length[ do
1994 var c = out[i]
1995 if c < '0' or c > '9' then return -1
1996 end
1997 end
1998 out.add ';'
1999 else
2000 for i in [1..out.length[ do
2001 var c = out[i]
2002 if not c.is_digit and not c.is_letter then return -1
2003 end
2004 out.add ';'
2005 # TODO check entity is valid
2006 # if out.is_entity then
2007 return pos
2008 # else
2009 # return -1
2010 # end
2011 end
2012 return pos
2013 end
2014 end
2015
2016 # A markdown escape token.
2017 class TokenEscape
2018 super Token
2019
2020 redef fun emit(v) do
2021 v.current_pos += 1
2022 v.addc v.current_text[v.current_pos]
2023 end
2024 end
2025
2026 redef class Text
2027
2028 # Get the position of the next non-space character.
2029 private fun skip_spaces(start: Int): Int do
2030 var pos = start
2031 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2032 pos += 1
2033 end
2034 if pos < length then return pos
2035 return -1
2036 end
2037
2038 # Read `self` until `nend` and append it to the `out` buffer.
2039 # Escape markdown special chars.
2040 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2041 var pos = start
2042 while pos < length do
2043 var c = self[pos]
2044 if c == '\\' and pos + 1 < length then
2045 pos = escape(out, self[pos + 1], pos)
2046 else
2047 var end_reached = false
2048 for n in nend do
2049 if c == n then
2050 end_reached = true
2051 break
2052 end
2053 end
2054 if end_reached then break
2055 out.add c
2056 end
2057 pos += 1
2058 end
2059 if pos == length then return -1
2060 return pos
2061 end
2062
2063 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2064 # No escape is made.
2065 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2066 var pos = start
2067 while pos < length do
2068 var c = self[pos]
2069 var end_reached = false
2070 for n in nend do
2071 if c == n then
2072 end_reached = true
2073 break
2074 end
2075 end
2076 if end_reached then break
2077 out.add c
2078 pos += 1
2079 end
2080 if pos == length then return -1
2081 return pos
2082 end
2083
2084 # Read `self` as XML until `to` and append it to the `out` buffer.
2085 # Escape HTML special chars.
2086 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2087 var pos = from
2088 var in_str = false
2089 var str_char: nullable Char = null
2090 while pos < length do
2091 var c = self[pos]
2092 if in_str then
2093 if c == '\\' then
2094 out.add c
2095 pos += 1
2096 if pos < length then
2097 out.add c
2098 pos += 1
2099 end
2100 continue
2101 end
2102 if c == str_char then
2103 in_str = false
2104 out.add c
2105 pos += 1
2106 continue
2107 end
2108 end
2109 if c == '"' or c == '\'' then
2110 in_str = true
2111 str_char = c
2112 end
2113 if not in_str then
2114 var end_reached = false
2115 for n in [0..to.length[ do
2116 if c == to[n] then
2117 end_reached = true
2118 break
2119 end
2120 end
2121 if end_reached then break
2122 end
2123 out.add c
2124 pos += 1
2125 end
2126 if pos == length then return -1
2127 return pos
2128 end
2129
2130 # Read `self` as XML and append it to the `out` buffer.
2131 # Safe mode can be activated to limit reading to valid xml.
2132 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2133 var pos = 0
2134 var is_close_tag = false
2135 if start + 1 >= length then return -1
2136 if self[start + 1] == '/' then
2137 is_close_tag = true
2138 pos = start + 2
2139 else if self[start + 1] == '!' then
2140 out.append "<!"
2141 return start + 1
2142 else
2143 is_close_tag = false
2144 pos = start + 1
2145 end
2146 if safe_mode then
2147 var tmp = new FlatBuffer
2148 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2149 if pos == -1 then return -1
2150 var tag = tmp.write_to_string.trim.to_lower
2151 if tag.is_html_unsafe then
2152 out.append "&lt;"
2153 if is_close_tag then out.add '/'
2154 out.append tmp
2155 else
2156 out.append "<"
2157 if is_close_tag then out.add '/'
2158 out.append tmp
2159 end
2160 else
2161 out.add '<'
2162 if is_close_tag then out.add '/'
2163 pos = read_xml_until(out, pos, ' ', '/', '>')
2164 end
2165 if pos == -1 then return -1
2166 pos = read_xml_until(out, pos, '/', '>')
2167 if pos == -1 then return -1
2168 if self[pos] == '/' then
2169 out.append " /"
2170 pos = self.read_xml_until(out, pos + 1, '>')
2171 if pos == -1 then return -1
2172 end
2173 if self[pos] == '>' then
2174 out.add '>'
2175 return pos
2176 end
2177 return -1
2178 end
2179
2180 # Read a markdown link address and append it to the `out` buffer.
2181 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2182 var pos = start
2183 var counter = 1
2184 while pos < length do
2185 var c = self[pos]
2186 if c == '\\' and pos + 1 < length then
2187 pos = escape(out, self[pos + 1], pos)
2188 else
2189 var end_reached = false
2190 if c == '(' then
2191 counter += 1
2192 else if c == ' ' then
2193 if counter == 1 then end_reached = true
2194 else if c == ')' then
2195 counter -= 1
2196 if counter == 0 then end_reached = true
2197 end
2198 if end_reached then break
2199 out.add c
2200 end
2201 pos += 1
2202 end
2203 if pos == length then return -1
2204 return pos
2205 end
2206
2207 # Read a markdown link text and append it to the `out` buffer.
2208 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2209 var pos = start
2210 var counter = 1
2211 while pos < length do
2212 var c = self[pos]
2213 var end_reached = false
2214 if c == '[' then
2215 counter += 1
2216 out.add c
2217 else if c == ']' then
2218 counter -= 1
2219 if counter == 0 then
2220 end_reached = true
2221 else
2222 out.add c
2223 end
2224 else
2225 out.add c
2226 end
2227 if end_reached then break
2228 pos += 1
2229 end
2230 if pos == length then return -1
2231 return pos
2232 end
2233
2234 # Extract the XML tag name from a XML tag.
2235 private fun xml_tag: String do
2236 var tpl = new FlatBuffer
2237 var pos = 1
2238 if pos < length and self[1] == '/' then pos += 1
2239 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2240 tpl.add self[pos]
2241 pos += 1
2242 end
2243 return tpl.write_to_string.to_lower
2244 end
2245
2246 # Read and escape the markdown contained in `self`.
2247 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2248 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2249 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2250 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2251 c == '`' or c == '~' or c == '^' then
2252 out.add c
2253 return pos + 1
2254 end
2255 out.add '\\'
2256 return pos
2257 end
2258
2259 # Is `self` an unsafe HTML element?
2260 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2261
2262 # Is `self` a HRML block element?
2263 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2264
2265 # Is `self` a link prefix?
2266 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2267
2268 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2269
2270 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2271
2272 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2273 end
2274
2275 redef class String
2276
2277 # Parse `self` as markdown and return the HTML representation
2278 #.
2279 # var md = "**Hello World!**"
2280 # var html = md.md_to_html
2281 # assert html == "<p><strong>Hello World!</strong></p>\n"
2282 fun md_to_html: Streamable do
2283 var processor = new MarkdownProcessor
2284 return processor.process(self)
2285 end
2286 end