Merge: nitdoc: Do not overwrite search results.
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 # `MarkdownEmitter` used for ouput.
34 var emitter: MarkdownEmitter is noinit
35
36 init do self.emitter = new MarkdownEmitter(self)
37
38 # Process the mardown `input` string and return the processed output.
39 fun process(input: String): Streamable do
40 # init processor
41 link_refs.clear
42 last_link_ref = null
43 current_line = null
44 current_block = null
45 # parse markdown
46 var parent = read_lines(input)
47 parent.remove_surrounding_empty_lines
48 recurse(parent, false)
49 # output processed text
50 return emitter.emit(parent.kind)
51 end
52
53 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
54 private fun read_lines(input: String): MDBlock do
55 var block = new MDBlock
56 var value = new FlatBuffer
57 var i = 0
58 while i < input.length do
59 value.clear
60 var pos = 0
61 var eol = false
62 while not eol and i < input.length do
63 var c = input[i]
64 if c == '\n' then
65 i += 1
66 eol = true
67 else if c == '\t' then
68 var np = pos + (4 - (pos.bin_and(3)))
69 while pos < np do
70 value.add ' '
71 pos += 1
72 end
73 i += 1
74 else
75 pos += 1
76 value.add c
77 i += 1
78 end
79 end
80
81 var line = new MDLine(value.write_to_string)
82 var is_link_ref = check_link_ref(line)
83 # Skip link refs
84 if not is_link_ref then block.add_line line
85 end
86 return block
87 end
88
89 # Check if line is a block link definition.
90 # Return `true` if line contains a valid link ref and save it into `link_refs`.
91 private fun check_link_ref(line: MDLine): Bool do
92 var md = line.value
93 var is_link_ref = false
94 var id = new FlatBuffer
95 var link = new FlatBuffer
96 var comment = new FlatBuffer
97 var pos = -1
98 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
99 pos = line.leading + 1
100 pos = md.read_until(id, pos, ']')
101 if not id.is_empty and pos + 2 < line.value.length then
102 if line.value[pos + 1] == ':' then
103 pos += 2
104 pos = md.skip_spaces(pos)
105 if line.value[pos] == '<' then
106 pos += 1
107 pos = md.read_until(link, pos, '>')
108 pos += 1
109 else
110 pos = md.read_until(link, pos, ' ', '\n')
111 end
112 if not link.is_empty then
113 pos = md.skip_spaces(pos)
114 if pos > 0 and pos < line.value.length then
115 var c = line.value[pos]
116 if c == '\"' or c == '\'' or c == '(' then
117 pos += 1
118 if c == '(' then
119 pos = md.read_until(comment, pos, ')')
120 else
121 pos = md.read_until(comment, pos, c)
122 end
123 if pos > 0 then is_link_ref = true
124 end
125 else
126 is_link_ref = true
127 end
128 end
129 end
130 end
131 end
132 if is_link_ref and not id.is_empty and not link.is_empty then
133 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
134 add_link_ref(id.write_to_string, lr)
135 if comment.is_empty then last_link_ref = lr
136 return true
137 else
138 comment = new FlatBuffer
139 if not line.is_empty and last_link_ref != null then
140 pos = line.leading
141 var c = line.value[pos]
142 if c == '\"' or c == '\'' or c == '(' then
143 pos += 1
144 if c == '(' then
145 pos = md.read_until(comment, pos, ')')
146 else
147 pos = md.read_until(comment, pos, c)
148 end
149 end
150 if not comment.is_empty then last_link_ref.title = comment.write_to_string
151 end
152 if comment.is_empty then return false
153 return true
154 end
155 end
156
157 # Known link refs
158 # This list will be needed during output to expand links.
159 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
160
161 # Last encountered link ref (for multiline definitions)
162 #
163 # Markdown allows link refs to be defined over two lines:
164 #
165 # [id]: http://example.com/longish/path/to/resource/here
166 # "Optional Title Here"
167 #
168 private var last_link_ref: nullable LinkRef = null
169
170 # Add a link ref to the list
171 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
172
173 # Recursively split a `block`.
174 #
175 # The block is splitted according to the type of lines it contains.
176 # Some blocks can be splited again recursively like lists.
177 # The `in_list` mode is used to recurse on list and build
178 # nested paragraphs or code blocks.
179 fun recurse(root: MDBlock, in_list: Bool) do
180 var old_mode = self.in_list
181 var old_root = self.current_block
182 self.in_list = in_list
183
184 var line = root.first_line
185 while line != null and line.is_empty do
186 line = line.next
187 if line == null then return
188 end
189
190 current_line = line
191 current_block = root
192 while current_line != null do
193 line_kind(current_line.as(not null)).process(self)
194 end
195 self.in_list = old_mode
196 self.current_block = old_root
197 end
198
199 # Currently processed line.
200 # Used when visiting blocks with `recurse`.
201 var current_line: nullable MDLine = null is writable
202
203 # Currently processed block.
204 # Used when visiting blocks with `recurse`.
205 var current_block: nullable MDBlock = null is writable
206
207 # Is the current recursion in list mode?
208 # Used when visiting blocks with `recurse`
209 private var in_list = false
210
211 # The type of line.
212 # see: `md_line_*`
213 fun line_kind(md: MDLine): Line do
214 var value = md.value
215 var leading = md.leading
216 var trailing = md.trailing
217 if md.is_empty then return new LineEmpty
218 if md.leading > 3 then return new LineCode
219 if value[leading] == '#' then return new LineHeadline
220 if value[leading] == '>' then return new LineBlockquote
221
222 if value.length - leading - trailing > 2 then
223 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
224 return new LineFence
225 end
226 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
227 return new LineFence
228 end
229 end
230
231 if value.length - leading - trailing > 2 and
232 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
233 if md.count_chars(value[leading]) >= 3 then
234 return new LineHR
235 end
236 end
237
238 if value.length - leading >= 2 and value[leading + 1] == ' ' then
239 var c = value[leading]
240 if c == '*' or c == '-' or c == '+' then return new LineUList
241 end
242
243 if value.length - leading >= 3 and value[leading].is_digit then
244 var i = leading + 1
245 while i < value.length and value[i].is_digit do i += 1
246 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
247 return new LineOList
248 end
249 end
250
251 if value[leading] == '<' and md.check_html then return new LineXML
252
253 var next = md.next
254 if next != null and not next.is_empty then
255 if next.count_chars('=') > 0 then
256 return new LineHeadline1
257 end
258 if next.count_chars('-') > 0 then
259 return new LineHeadline2
260 end
261 end
262 return new LineOther
263 end
264
265 # Get the token kind at `pos`.
266 fun token_at(text: Text, pos: Int): Token do
267 var c0: Char
268 var c1: Char
269 var c2: Char
270
271 if pos > 0 then
272 c0 = text[pos - 1]
273 else
274 c0 = ' '
275 end
276 var c = text[pos]
277
278 if pos + 1 < text.length then
279 c1 = text[pos + 1]
280 else
281 c1 = ' '
282 end
283 if pos + 2 < text.length then
284 c2 = text[pos + 2]
285 else
286 c2 = ' '
287 end
288
289 if c == '*' then
290 if c1 == '*' then
291 if c0 != ' ' or c2 != ' ' then
292 return new TokenStrongStar(pos, c)
293 else
294 return new TokenEmStar(pos, c)
295 end
296 end
297 if c0 != ' ' or c1 != ' ' then
298 return new TokenEmStar(pos, c)
299 else
300 return new TokenNone(pos, c)
301 end
302 else if c == '_' then
303 if c1 == '_' then
304 if c0 != ' ' or c2 != ' 'then
305 return new TokenStrongUnderscore(pos, c)
306 else
307 return new TokenEmUnderscore(pos, c)
308 end
309 end
310 if c0 != ' ' or c1 != ' ' then
311 return new TokenEmUnderscore(pos, c)
312 else
313 return new TokenNone(pos, c)
314 end
315 else if c == '!' then
316 if c1 == '[' then return new TokenImage(pos, c)
317 return new TokenNone(pos, c)
318 else if c == '[' then
319 return new TokenLink(pos, c)
320 else if c == ']' then
321 return new TokenNone(pos, c)
322 else if c == '`' then
323 if c1 == '`' then
324 return new TokenCodeDouble(pos, c)
325 else
326 return new TokenCodeSingle(pos, c)
327 end
328 else if c == '\\' then
329 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
330 return new TokenEscape(pos, c)
331 else
332 return new TokenNone(pos, c)
333 end
334 else if c == '<' then
335 return new TokenHTML(pos, c)
336 else if c == '&' then
337 return new TokenEntity(pos, c)
338 else if c == '^' then
339 if c0 == '^' or c1 == '^' then
340 return new TokenNone(pos, c)
341 else
342 return new TokenSuper(pos, c)
343 end
344 else
345 return new TokenNone(pos, c)
346 end
347 end
348
349 # Find the position of a `token` in `self`.
350 fun find_token(text: Text, start: Int, token: Token): Int do
351 var pos = start
352 while pos < text.length do
353 if token_at(text, pos).is_same_type(token) then
354 return pos
355 end
356 pos += 1
357 end
358 return -1
359 end
360 end
361
362 # Emit output corresponding to blocks content.
363 #
364 # Blocks are created by a previous pass in `MarkdownProcessor`.
365 # The emitter use a `Decorator` to select the output format.
366 class MarkdownEmitter
367
368 # Processor containing link refs.
369 var processor: MarkdownProcessor
370
371 # Decorator used for output.
372 # Default is `HTMLDecorator`
373 var decorator: Decorator = new HTMLDecorator is writable
374
375 # Create a new `MarkdownEmitter` using a custom `decorator`.
376 init with_decorator(processor: MarkdownProcessor, decorator: Decorator) do
377 init processor
378 self.decorator = decorator
379 end
380
381 # Output `block` using `decorator` in the current buffer.
382 fun emit(block: Block): Text do
383 var buffer = push_buffer
384 block.emit(self)
385 pop_buffer
386 return buffer
387 end
388
389 # Output the content of `block`.
390 fun emit_in(block: Block) do block.emit_in(self)
391
392 # Transform and emit mardown text
393 fun emit_text(text: Text) do
394 emit_text_until(text, 0, null)
395 end
396
397 # Transform and emit mardown text starting at `from` and
398 # until a token with the same type as `token` is found.
399 # Go until the end of text if `token` is null.
400 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
401 var old_text = current_text
402 var old_pos = current_pos
403 current_text = text
404 current_pos = start
405 while current_pos < text.length do
406 var mt = processor.token_at(text, current_pos)
407 if (token != null and not token isa TokenNone) and
408 (mt.is_same_type(token) or
409 (token isa TokenEmStar and mt isa TokenStrongStar) or
410 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
411 return current_pos
412 end
413 mt.emit(self)
414 current_pos += 1
415 end
416 current_text = old_text
417 current_pos = old_pos
418 return -1
419 end
420
421 # Currently processed position in `current_text`.
422 # Used when visiting inline production with `emit_text_until`.
423 private var current_pos: Int = -1
424
425 # Currently processed text.
426 # Used when visiting inline production with `emit_text_until`.
427 private var current_text: nullable Text = null
428
429 # Stacked buffers.
430 private var buffer_stack = new List[FlatBuffer]
431
432 # Push a new buffer on the stack.
433 private fun push_buffer: FlatBuffer do
434 var buffer = new FlatBuffer
435 buffer_stack.add buffer
436 return buffer
437 end
438
439 # Pop the last buffer.
440 private fun pop_buffer do buffer_stack.pop
441
442 # Current output buffer.
443 private fun current_buffer: FlatBuffer do
444 assert not buffer_stack.is_empty
445 return buffer_stack.last
446 end
447
448 # Append `e` to current buffer.
449 fun add(e: Streamable) do
450 if e isa Text then
451 current_buffer.append e
452 else
453 current_buffer.append e.write_to_string
454 end
455 end
456
457 # Append `c` to current buffer.
458 fun addc(c: Char) do current_buffer.add c
459
460 # Append a "\n" line break.
461 fun addn do current_buffer.add '\n'
462 end
463
464 # A Link Reference.
465 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
466 #
467 # Example:
468 #
469 # [1]: http://example.com/ "Optional title"
470 class LinkRef
471
472 # Link href
473 var link: String
474
475 # Optional link title
476 var title: nullable String = null
477
478 # Is the link an abreviation?
479 var is_abbrev = false
480
481 # Create a link with a title.
482 init with_title(link: String, title: nullable String) do
483 self.link = link
484 self.title = title
485 end
486 end
487
488 # A `Decorator` is used to emit mardown into a specific format.
489 # Default decorator used is `HTMLDecorator`.
490 interface Decorator
491
492 # Render a ruler block.
493 fun add_ruler(v: MarkdownEmitter, block: BlockRuler) is abstract
494
495 # Render a headline block with corresponding level.
496 fun add_headline(v: MarkdownEmitter, block: BlockHeadline) is abstract
497
498 # Render a paragraph block.
499 fun add_paragraph(v: MarkdownEmitter, block: BlockParagraph) is abstract
500
501 # Render a code or fence block.
502 fun add_code(v: MarkdownEmitter, block: BlockCode) is abstract
503
504 # Render a blockquote.
505 fun add_blockquote(v: MarkdownEmitter, block: BlockQuote) is abstract
506
507 # Render an unordered list.
508 fun add_unorderedlist(v: MarkdownEmitter, block: BlockUnorderedList) is abstract
509
510 # Render an ordered list.
511 fun add_orderedlist(v: MarkdownEmitter, block: BlockOrderedList) is abstract
512
513 # Render a list item.
514 fun add_listitem(v: MarkdownEmitter, block: BlockListItem) is abstract
515
516 # Render an emphasis text.
517 fun add_em(v: MarkdownEmitter, text: Text) is abstract
518
519 # Render a strong text.
520 fun add_strong(v: MarkdownEmitter, text: Text) is abstract
521
522 # Render a super text.
523 fun add_super(v: MarkdownEmitter, text: Text) is abstract
524
525 # Render a link.
526 fun add_link(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
527
528 # Render an image.
529 fun add_image(v: MarkdownEmitter, link: Text, name: Text, comment: nullable Text) is abstract
530
531 # Render an abbreviation.
532 fun add_abbr(v: MarkdownEmitter, name: Text, comment: Text) is abstract
533
534 # Render a code span reading from a buffer.
535 fun add_span_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
536
537 # Render a text and escape it.
538 fun append_value(v: MarkdownEmitter, value: Text) is abstract
539
540 # Render code text from buffer and escape it.
541 fun append_code(v: MarkdownEmitter, buffer: Text, from, to: Int) is abstract
542
543 # Render a character escape.
544 fun escape_char(v: MarkdownEmitter, char: Char) is abstract
545
546 # Render a line break
547 fun add_line_break(v: MarkdownEmitter) is abstract
548
549 # Generate a new html valid id from a `String`.
550 fun strip_id(txt: String): String is abstract
551
552 # Found headlines during the processing labeled by their ids.
553 fun headlines: ArrayMap[String, HeadLine] is abstract
554 end
555
556 # Class representing a markdown headline.
557 class HeadLine
558 # Unique identifier of this headline.
559 var id: String
560
561 # Text of the headline.
562 var title: String
563
564 # Level of this headline.
565 #
566 # According toe the markdown specification, level must be in `[1..6]`.
567 var level: Int
568 end
569
570 # `Decorator` that outputs HTML.
571 class HTMLDecorator
572 super Decorator
573
574 redef var headlines = new ArrayMap[String, HeadLine]
575
576 redef fun add_ruler(v, block) do v.add "<hr/>\n"
577
578 redef fun add_headline(v, block) do
579 # save headline
580 var txt = block.block.first_line.value
581 var id = strip_id(txt)
582 var lvl = block.depth
583 headlines[id] = new HeadLine(id, txt, lvl)
584 # output it
585 v.add "<h{lvl} id=\"{id}\">"
586 v.emit_in block
587 v.add "</h{lvl}>\n"
588 end
589
590 redef fun add_paragraph(v, block) do
591 v.add "<p>"
592 v.emit_in block
593 v.add "</p>\n"
594 end
595
596 redef fun add_code(v, block) do
597 v.add "<pre><code>"
598 v.emit_in block
599 v.add "</code></pre>\n"
600 end
601
602 redef fun add_blockquote(v, block) do
603 v.add "<blockquote>\n"
604 v.emit_in block
605 v.add "</blockquote>\n"
606 end
607
608 redef fun add_unorderedlist(v, block) do
609 v.add "<ul>\n"
610 v.emit_in block
611 v.add "</ul>\n"
612 end
613
614 redef fun add_orderedlist(v, block) do
615 v.add "<ol>\n"
616 v.emit_in block
617 v.add "</ol>\n"
618 end
619
620 redef fun add_listitem(v, block) do
621 v.add "<li>"
622 v.emit_in block
623 v.add "</li>\n"
624 end
625
626 redef fun add_em(v, text) do
627 v.add "<em>"
628 v.add text
629 v.add "</em>"
630 end
631
632 redef fun add_strong(v, text) do
633 v.add "<strong>"
634 v.add text
635 v.add "</strong>"
636 end
637
638 redef fun add_super(v, text) do
639 v.add "<sup>"
640 v.add text
641 v.add "</sup>"
642 end
643
644 redef fun add_image(v, link, name, comment) do
645 v.add "<img src=\""
646 append_value(v, link)
647 v.add "\" alt=\""
648 append_value(v, name)
649 v.add "\""
650 if comment != null and not comment.is_empty then
651 v.add " title=\""
652 append_value(v, comment)
653 v.add "\""
654 end
655 v.add "/>"
656 end
657
658 redef fun add_link(v, link, name, comment) do
659 v.add "<a href=\""
660 append_value(v, link)
661 v.add "\""
662 if comment != null and not comment.is_empty then
663 v.add " title=\""
664 append_value(v, comment)
665 v.add "\""
666 end
667 v.add ">"
668 v.emit_text(name)
669 v.add "</a>"
670 end
671
672 redef fun add_abbr(v, name, comment) do
673 v.add "<abbr title=\""
674 append_value(v, comment)
675 v.add "\">"
676 v.emit_text(name)
677 v.add "</abbr>"
678 end
679
680 redef fun add_span_code(v, text, from, to) do
681 v.add "<code>"
682 append_code(v, text, from, to)
683 v.add "</code>"
684 end
685
686 redef fun add_line_break(v) do
687 v.add "<br/>"
688 end
689
690 redef fun append_value(v, text) do for c in text do escape_char(v, c)
691
692 redef fun escape_char(v, c) do
693 if c == '&' then
694 v.add "&amp;"
695 else if c == '<' then
696 v.add "&lt;"
697 else if c == '>' then
698 v.add "&gt;"
699 else if c == '"' then
700 v.add "&quot;"
701 else if c == '\'' then
702 v.add "&apos;"
703 else
704 v.addc c
705 end
706 end
707
708 redef fun append_code(v, buffer, from, to) do
709 for i in [from..to[ do
710 var c = buffer[i]
711 if c == '&' then
712 v.add "&amp;"
713 else if c == '<' then
714 v.add "&lt;"
715 else if c == '>' then
716 v.add "&gt;"
717 else
718 v.addc c
719 end
720 end
721 end
722
723 redef fun strip_id(txt) do
724 # strip id
725 var b = new FlatBuffer
726 for c in txt do
727 if c == ' ' then
728 b.add '_'
729 else
730 if not c.is_letter and
731 not c.is_digit and
732 not allowed_id_chars.has(c) then continue
733 b.add c
734 end
735 end
736 var res = b.to_s
737 var key = res
738 # check for multiple id definitions
739 if headlines.has_key(key) then
740 var i = 1
741 key = "{res}_{i}"
742 while headlines.has_key(key) do
743 i += 1
744 key = "{res}_{i}"
745 end
746 end
747 return key
748 end
749
750 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
751 end
752
753 # A block of markdown lines.
754 # A `MDBlock` can contains lines and/or sub-blocks.
755 class MDBlock
756 # Kind of block.
757 # See `Block`.
758 var kind: Block = new BlockNone(self) is writable
759
760 # First line if any.
761 var first_line: nullable MDLine = null is writable
762
763 # Last line if any.
764 var last_line: nullable MDLine = null is writable
765
766 # First sub-block if any.
767 var first_block: nullable MDBlock = null is writable
768
769 # Last sub-block if any.
770 var last_block: nullable MDBlock = null is writable
771
772 # Previous block if any.
773 var prev: nullable MDBlock = null is writable
774
775 # Next block if any.
776 var next: nullable MDBlock = null is writable
777
778 # Does this block contain subblocks?
779 fun has_blocks: Bool do return first_block != null
780
781 # Count sub-blocks.
782 fun count_blocks: Int do
783 var count = 0
784 var block = first_block
785 while block != null do
786 count += 1
787 block = block.next
788 end
789 return count
790 end
791
792 # Does this block contain lines?
793 fun has_lines: Bool do return first_line != null
794
795 # Count block lines.
796 fun count_lines: Int do
797 var count = 0
798 var line = first_line
799 while line != null do
800 count += 1
801 line = line.next
802 end
803 return count
804 end
805
806 # Split `self` creating a new sub-block having `line` has `last_line`.
807 fun split(line: MDLine): MDBlock do
808 var block = new MDBlock
809 block.first_line = first_line
810 block.last_line = line
811 first_line = line.next
812 line.next = null
813 if first_line == null then
814 last_line = null
815 else
816 first_line.prev = null
817 end
818 if first_block == null then
819 first_block = block
820 last_block = block
821 else
822 last_block.next = block
823 last_block = block
824 end
825 return block
826 end
827
828 # Add a `line` to this block.
829 fun add_line(line: MDLine) do
830 if last_line == null then
831 first_line = line
832 last_line = line
833 else
834 last_line.next_empty = line.is_empty
835 line.prev_empty = last_line.is_empty
836 line.prev = last_line
837 last_line.next = line
838 last_line = line
839 end
840 end
841
842 # Remove `line` from this block.
843 fun remove_line(line: MDLine) do
844 if line.prev == null then
845 first_line = line.next
846 else
847 line.prev.next = line.next
848 end
849 if line.next == null then
850 last_line = line.prev
851 else
852 line.next.prev = line.prev
853 end
854 line.prev = null
855 line.next = null
856 end
857
858 # Remove leading empty lines.
859 fun remove_leading_empty_lines: Bool do
860 var was_empty = false
861 var line = first_line
862 while line != null and line.is_empty do
863 remove_line line
864 line = first_line
865 was_empty = true
866 end
867 return was_empty
868 end
869
870 # Remove trailing empty lines.
871 fun remove_trailing_empty_lines: Bool do
872 var was_empty = false
873 var line = last_line
874 while line != null and line.is_empty do
875 remove_line line
876 line = last_line
877 was_empty = true
878 end
879 return was_empty
880 end
881
882 # Remove leading and trailing empty lines.
883 fun remove_surrounding_empty_lines: Bool do
884 var was_empty = false
885 if remove_leading_empty_lines then was_empty = true
886 if remove_trailing_empty_lines then was_empty = true
887 return was_empty
888 end
889
890 # Remove list markers and up to 4 leading spaces.
891 # Used to clean nested lists.
892 fun remove_list_indent(v: MarkdownProcessor) do
893 var line = first_line
894 while line != null do
895 if not line.is_empty then
896 var kind = v.line_kind(line)
897 if kind isa LineList then
898 line.value = kind.extract_value(line)
899 else
900 line.value = line.value.substring_from(line.leading.min(4))
901 end
902 line.leading = line.process_leading
903 end
904 line = line.next
905 end
906 end
907
908 # Collect block line text.
909 fun text: String do
910 var text = new FlatBuffer
911 var line = first_line
912 while line != null do
913 if not line.is_empty then
914 text.append line.text
915 end
916 text.append "\n"
917 line = line.next
918 end
919 return text.write_to_string
920 end
921 end
922
923 # Representation of a markdown block in the AST.
924 # Each `Block` is linked to a `MDBlock` that contains mardown code.
925 abstract class Block
926
927 # The markdown block `self` is related to.
928 var block: MDBlock
929
930 # Output `self` using `v.decorator`.
931 fun emit(v: MarkdownEmitter) do v.emit_in(self)
932
933 # Emit the containts of `self`, lines or blocks.
934 fun emit_in(v: MarkdownEmitter) do
935 block.remove_surrounding_empty_lines
936 if block.has_lines then
937 emit_lines(v)
938 else
939 emit_blocks(v)
940 end
941 end
942
943 # Emit lines contained in `block`.
944 fun emit_lines(v: MarkdownEmitter) do
945 var tpl = v.push_buffer
946 var line = block.first_line
947 while line != null do
948 if not line.is_empty then
949 v.add line.value.substring(line.leading, line.value.length - line.trailing)
950 if line.trailing >= 2 then v.decorator.add_line_break(v)
951 end
952 if line.next != null then
953 v.addn
954 end
955 line = line.next
956 end
957 v.pop_buffer
958 v.emit_text(tpl)
959 end
960
961 # Emit sub-blocks contained in `block`.
962 fun emit_blocks(v: MarkdownEmitter) do
963 var block = self.block.first_block
964 while block != null do
965 block.kind.emit(v)
966 block = block.next
967 end
968 end
969 end
970
971 # A block without any markdown specificities.
972 #
973 # Actually use the same implementation than `BlockCode`,
974 # this class is only used for typing purposes.
975 class BlockNone
976 super Block
977 end
978
979 # A markdown blockquote.
980 class BlockQuote
981 super Block
982
983 redef fun emit(v) do v.decorator.add_blockquote(v, self)
984
985 # Remove blockquote markers.
986 private fun remove_block_quote_prefix(block: MDBlock) do
987 var line = block.first_line
988 while line != null do
989 if not line.is_empty then
990 if line.value[line.leading] == '>' then
991 var rem = line.leading + 1
992 if line.leading + 1 < line.value.length and
993 line.value[line.leading + 1] == ' ' then
994 rem += 1
995 end
996 line.value = line.value.substring_from(rem)
997 line.leading = line.process_leading
998 end
999 end
1000 line = line.next
1001 end
1002 end
1003 end
1004
1005 # A markdown code block.
1006 class BlockCode
1007 super Block
1008
1009 # Number of char to skip at the beginning of the line.
1010 #
1011 # Block code lines start at 4 spaces.
1012 protected var line_start = 4
1013
1014 redef fun emit(v) do v.decorator.add_code(v, self)
1015
1016 redef fun emit_lines(v) do
1017 var line = block.first_line
1018 while line != null do
1019 if not line.is_empty then
1020 v.decorator.append_code(v, line.value, line_start, line.value.length)
1021 end
1022 v.addn
1023 line = line.next
1024 end
1025 end
1026 end
1027
1028 # A markdown code-fence block.
1029 #
1030 # Actually use the same implementation than `BlockCode`,
1031 # this class is only used for typing purposes.
1032 class BlockFence
1033 super BlockCode
1034
1035 # Fence code lines start at 0 spaces.
1036 redef var line_start = 0
1037 end
1038
1039 # A markdown headline.
1040 class BlockHeadline
1041 super Block
1042
1043 redef fun emit(v) do v.decorator.add_headline(v, self)
1044
1045 # Depth of the headline used to determine the headline level.
1046 var depth = 0
1047
1048 # Remove healine marks from lines contained in `self`.
1049 private fun transform_headline(block: MDBlock) do
1050 if depth > 0 then return
1051 var level = 0
1052 var line = block.first_line
1053 if line.is_empty then return
1054 var start = line.leading
1055 while start < line.value.length and line.value[start] == '#' do
1056 level += 1
1057 start += 1
1058 end
1059 while start < line.value.length and line.value[start] == ' ' do
1060 start += 1
1061 end
1062 if start >= line.value.length then
1063 line.is_empty = true
1064 else
1065 var nend = line.value.length - line.trailing - 1
1066 while line.value[nend] == '#' do nend -= 1
1067 while line.value[nend] == ' ' do nend -= 1
1068 line.value = line.value.substring(start, nend - start + 1)
1069 line.leading = 0
1070 line.trailing = 0
1071 end
1072 depth = level.min(6)
1073 end
1074 end
1075
1076 # A markdown list item block.
1077 class BlockListItem
1078 super Block
1079
1080 redef fun emit(v) do v.decorator.add_listitem(v, self)
1081 end
1082
1083 # A markdown list block.
1084 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1085 abstract class BlockList
1086 super Block
1087
1088 # Split list block into list items sub-blocks.
1089 private fun init_block(v: MarkdownProcessor) do
1090 var line = block.first_line
1091 line = line.next
1092 while line != null do
1093 var t = v.line_kind(line)
1094 if t isa LineList or
1095 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1096 not (t isa LineList))) then
1097 var sblock = block.split(line.prev.as(not null))
1098 sblock.kind = new BlockListItem(sblock)
1099 end
1100 line = line.next
1101 end
1102 var sblock = block.split(block.last_line.as(not null))
1103 sblock.kind = new BlockListItem(sblock)
1104 end
1105
1106 # Expand list items as paragraphs if needed.
1107 private fun expand_paragraphs(block: MDBlock) do
1108 var outer = block.first_block
1109 var inner: nullable MDBlock
1110 var has_paragraph = false
1111 while outer != null and not has_paragraph do
1112 if outer.kind isa BlockListItem then
1113 inner = outer.first_block
1114 while inner != null and not has_paragraph do
1115 if inner.kind isa BlockParagraph then
1116 has_paragraph = true
1117 end
1118 inner = inner.next
1119 end
1120 end
1121 outer = outer.next
1122 end
1123 if has_paragraph then
1124 outer = block.first_block
1125 while outer != null do
1126 if outer.kind isa BlockListItem then
1127 inner = outer.first_block
1128 while inner != null do
1129 if inner.kind isa BlockNone then
1130 inner.kind = new BlockParagraph(inner)
1131 end
1132 inner = inner.next
1133 end
1134 end
1135 outer = outer.next
1136 end
1137 end
1138 end
1139 end
1140
1141 # A markdown ordered list.
1142 class BlockOrderedList
1143 super BlockList
1144
1145 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1146 end
1147
1148 # A markdown unordred list.
1149 class BlockUnorderedList
1150 super BlockList
1151
1152 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1153 end
1154
1155 # A markdown paragraph block.
1156 class BlockParagraph
1157 super Block
1158
1159 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1160 end
1161
1162 # A markdown ruler.
1163 class BlockRuler
1164 super Block
1165
1166 redef fun emit(v) do v.decorator.add_ruler(v, self)
1167 end
1168
1169 # Xml blocks that can be found in markdown markup.
1170 class BlockXML
1171 super Block
1172
1173 redef fun emit_lines(v) do
1174 var line = block.first_line
1175 while line != null do
1176 if not line.is_empty then v.add line.value
1177 v.addn
1178 line = line.next
1179 end
1180 end
1181 end
1182
1183 # A markdown line.
1184 class MDLine
1185
1186 # Text contained in this line.
1187 var value: String is writable
1188
1189 # Is this line empty?
1190 # Lines containing only spaces are considered empty.
1191 var is_empty: Bool = true is writable
1192
1193 # Previous line in `MDBlock` or null if first line.
1194 var prev: nullable MDLine = null is writable
1195
1196 # Next line in `MDBlock` or null if last line.
1197 var next: nullable MDLine = null is writable
1198
1199 # Is the previous line empty?
1200 var prev_empty: Bool = false is writable
1201
1202 # Is the next line empty?
1203 var next_empty: Bool = false is writable
1204
1205 # Initialize a new MDLine from its string value
1206 init do
1207 self.leading = process_leading
1208 if leading != value.length then
1209 self.is_empty = false
1210 self.trailing = process_trailing
1211 end
1212 end
1213
1214 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1215 fun clear do
1216 value = ""
1217 leading = 0
1218 trailing = 0
1219 is_empty = true
1220 if prev != null then prev.next_empty = true
1221 if next != null then next.prev_empty = true
1222 end
1223
1224 # Number or leading spaces on this line.
1225 var leading: Int = 0 is writable
1226
1227 # Compute `leading` depending on `value`.
1228 fun process_leading: Int do
1229 var count = 0
1230 var value = self.value
1231 while count < value.length and value[count] == ' ' do count += 1
1232 if leading == value.length then clear
1233 return count
1234 end
1235
1236 # Number of trailing spaces on this line.
1237 var trailing: Int = 0 is writable
1238
1239 # Compute `trailing` depending on `value`.
1240 fun process_trailing: Int do
1241 var count = 0
1242 var value = self.value
1243 while value[value.length - count - 1] == ' ' do
1244 count += 1
1245 end
1246 return count
1247 end
1248
1249 # Count the amount of `ch` in this line.
1250 # Return A value > 0 if this line only consists of `ch` end spaces.
1251 fun count_chars(ch: Char): Int do
1252 var count = 0
1253 for c in value do
1254 if c == ' ' then
1255 continue
1256 end
1257 if c == ch then
1258 count += 1
1259 continue
1260 end
1261 count = 0
1262 break
1263 end
1264 return count
1265 end
1266
1267 # Count the amount of `ch` at the start of this line ignoring spaces.
1268 fun count_chars_start(ch: Char): Int do
1269 var count = 0
1270 for c in value do
1271 if c == ' ' then
1272 continue
1273 end
1274 if c == ch then
1275 count += 1
1276 else
1277 break
1278 end
1279 end
1280 return count
1281 end
1282
1283 # Last XML line if any.
1284 private var xml_end_line: nullable MDLine = null
1285
1286 # Does `value` contains valid XML markup?
1287 private fun check_html: Bool do
1288 var tags = new Array[String]
1289 var tmp = new FlatBuffer
1290 var pos = leading
1291 if pos + 1 < value.length and value[pos + 1] == '!' then
1292 if read_xml_comment(self, pos) > 0 then return true
1293 end
1294 pos = value.read_xml(tmp, pos, false)
1295 var tag: String
1296 if pos > -1 then
1297 tag = tmp.xml_tag
1298 if not tag.is_html_block then
1299 return false
1300 end
1301 if tag == "hr" then
1302 xml_end_line = self
1303 return true
1304 end
1305 tags.add tag
1306 var line: nullable MDLine = self
1307 while line != null do
1308 while pos < line.value.length and line.value[pos] != '<' do
1309 pos += 1
1310 end
1311 if pos >= line.value.length then
1312 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1313 tags.pop
1314 if tags.is_empty then
1315 xml_end_line = line
1316 break
1317 end
1318 end
1319 line = line.next
1320 pos = 0
1321 else
1322 tmp = new FlatBuffer
1323 var new_pos = line.value.read_xml(tmp, pos, false)
1324 if new_pos > 0 then
1325 tag = tmp.xml_tag
1326 if tag.is_html_block and not tag == "hr" then
1327 if tmp[1] == '/' then
1328 if tags.last != tag then
1329 return false
1330 end
1331 tags.pop
1332 else
1333 tags.add tag
1334 end
1335 end
1336 if tags.is_empty then
1337 xml_end_line = line
1338 break
1339 end
1340 pos = new_pos
1341 else
1342 pos += 1
1343 end
1344 end
1345 end
1346 return tags.is_empty
1347 end
1348 return false
1349 end
1350
1351 # Read a XML comment.
1352 # Used by `check_html`.
1353 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1354 var line: nullable MDLine = first_line
1355 if start + 3 < line.value.length then
1356 if line.value[2] == '-' and line.value[3] == '-' then
1357 var pos = start + 4
1358 while line != null do
1359 while pos < line.value.length and line.value[pos] != '-' do
1360 pos += 1
1361 end
1362 if pos == line.value.length then
1363 line = line.next
1364 pos = 0
1365 else
1366 if pos + 2 < line.value.length then
1367 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1368 first_line.xml_end_line = line
1369 return pos + 3
1370 end
1371 end
1372 pos += 1
1373 end
1374 end
1375 end
1376 end
1377 return -1
1378 end
1379
1380 # Extract the text of `self` without leading and trailing.
1381 fun text: String do return value.substring(leading, value.length - trailing)
1382 end
1383
1384 # A markdown line.
1385 interface Line
1386
1387 # Parse the line.
1388 # See `MarkdownProcessor::recurse`.
1389 fun process(v: MarkdownProcessor) is abstract
1390 end
1391
1392 # An empty markdown line.
1393 class LineEmpty
1394 super Line
1395
1396 redef fun process(v) do
1397 v.current_line = v.current_line.next
1398 end
1399 end
1400
1401 # A non-specific markdown construction.
1402 # Mainly used as part of another line construct such as paragraphs or lists.
1403 class LineOther
1404 super Line
1405
1406 redef fun process(v) do
1407 var line = v.current_line
1408 # go to block end
1409 var was_empty = line.prev_empty
1410 while line != null and not line.is_empty do
1411 var t = v.line_kind(line)
1412 if v.in_list and t isa LineList then
1413 break
1414 end
1415 if t isa LineCode or t isa LineFence then
1416 break
1417 end
1418 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1419 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1420 break
1421 end
1422 line = line.next
1423 end
1424 # build block
1425 if line != null and not line.is_empty then
1426 var block = v.current_block.split(line.prev.as(not null))
1427 if v.in_list and not was_empty then
1428 block.kind = new BlockNone(block)
1429 else
1430 block.kind = new BlockParagraph(block)
1431 end
1432 v.current_block.remove_leading_empty_lines
1433 else
1434 var block: MDBlock
1435 if line != null then
1436 block = v.current_block.split(line)
1437 else
1438 block = v.current_block.split(v.current_block.last_line.as(not null))
1439 end
1440 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1441 block.kind = new BlockNone(block)
1442 else
1443 block.kind = new BlockParagraph(block)
1444 end
1445 v.current_block.remove_leading_empty_lines
1446 end
1447 v.current_line = v.current_block.first_line
1448 end
1449 end
1450
1451 # A line of markdown code.
1452 class LineCode
1453 super Line
1454
1455 redef fun process(v) do
1456 var line = v.current_line
1457 # lookup block end
1458 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1459 line = line.next
1460 end
1461 # split at block end line
1462 var block: MDBlock
1463 if line != null then
1464 block = v.current_block.split(line.prev.as(not null))
1465 else
1466 block = v.current_block.split(v.current_block.last_line.as(not null))
1467 end
1468 block.kind = new BlockCode(block)
1469 block.remove_surrounding_empty_lines
1470 v.current_line = v.current_block.first_line
1471 end
1472 end
1473
1474 # A line of raw XML.
1475 class LineXML
1476 super Line
1477
1478 redef fun process(v) do
1479 var line = v.current_line
1480 var prev = line.prev
1481 if prev != null then v.current_block.split(prev)
1482 var block = v.current_block.split(line.xml_end_line.as(not null))
1483 block.kind = new BlockXML(block)
1484 v.current_block.remove_leading_empty_lines
1485 v.current_line = v.current_block.first_line
1486 end
1487 end
1488
1489 # A markdown blockquote line.
1490 class LineBlockquote
1491 super Line
1492
1493 redef fun process(v) do
1494 var line = v.current_line
1495 # go to bquote end
1496 while line != null do
1497 if not line.is_empty and (line.prev_empty and
1498 line.leading == 0 and
1499 not v.line_kind(line) isa LineBlockquote) then break
1500 line = line.next
1501 end
1502 # build sub block
1503 var block: MDBlock
1504 if line != null then
1505 block = v.current_block.split(line.prev.as(not null))
1506 else
1507 block = v.current_block.split(v.current_block.last_line.as(not null))
1508 end
1509 var kind = new BlockQuote(block)
1510 block.kind = kind
1511 block.remove_surrounding_empty_lines
1512 kind.remove_block_quote_prefix(block)
1513 v.current_line = line
1514 v.recurse(block, false)
1515 v.current_line = v.current_block.first_line
1516 end
1517 end
1518
1519 # A markdown ruler line.
1520 class LineHR
1521 super Line
1522
1523 redef fun process(v) do
1524 var line = v.current_line
1525 if line.prev != null then v.current_block.split(line.prev.as(not null))
1526 var block = v.current_block.split(line.as(not null))
1527 block.kind = new BlockRuler(block)
1528 v.current_block.remove_leading_empty_lines
1529 v.current_line = v.current_block.first_line
1530 end
1531 end
1532
1533 # A markdown fence code line.
1534 class LineFence
1535 super Line
1536
1537 redef fun process(v) do
1538 # go to fence end
1539 var line = v.current_line.next
1540 while line != null do
1541 if v.line_kind(line) isa LineFence then break
1542 line = line.next
1543 end
1544 if line != null then
1545 line = line.next
1546 end
1547 # build fence block
1548 var block: MDBlock
1549 if line != null then
1550 block = v.current_block.split(line.prev.as(not null))
1551 else
1552 block = v.current_block.split(v.current_block.last_line.as(not null))
1553 end
1554 block.kind = new BlockFence(block)
1555 block.first_line.clear
1556 var last = block.last_line
1557 if last != null and v.line_kind(last) isa LineFence then
1558 block.last_line.clear
1559 end
1560 block.remove_surrounding_empty_lines
1561 v.current_line = line
1562 end
1563 end
1564
1565 # A markdown headline.
1566 class LineHeadline
1567 super Line
1568
1569 redef fun process(v) do
1570 var line = v.current_line
1571 var lprev = line.prev
1572 if lprev != null then v.current_block.split(lprev)
1573 var block = v.current_block.split(line.as(not null))
1574 var kind = new BlockHeadline(block)
1575 block.kind = kind
1576 kind.transform_headline(block)
1577 v.current_block.remove_leading_empty_lines
1578 v.current_line = v.current_block.first_line
1579 end
1580 end
1581
1582 # A markdown headline of level 1.
1583 class LineHeadline1
1584 super LineHeadline
1585
1586 redef fun process(v) do
1587 var line = v.current_line
1588 var lprev = line.prev
1589 if lprev != null then v.current_block.split(lprev)
1590 line.next.clear
1591 var block = v.current_block.split(line.as(not null))
1592 var kind = new BlockHeadline(block)
1593 kind.depth = 1
1594 kind.transform_headline(block)
1595 block.kind = kind
1596 v.current_block.remove_leading_empty_lines
1597 v.current_line = v.current_block.first_line
1598 end
1599 end
1600
1601 # A markdown headline of level 2.
1602 class LineHeadline2
1603 super LineHeadline
1604
1605 redef fun process(v) do
1606 var line = v.current_line
1607 var lprev = line.prev
1608 if lprev != null then v.current_block.split(lprev)
1609 line.next.clear
1610 var block = v.current_block.split(line.as(not null))
1611 var kind = new BlockHeadline(block)
1612 kind.depth = 2
1613 kind.transform_headline(block)
1614 block.kind = kind
1615 v.current_block.remove_leading_empty_lines
1616 v.current_line = v.current_block.first_line
1617 end
1618 end
1619
1620 # A markdown list line.
1621 # Mainly used to factorize code between ordered and unordered lists.
1622 class LineList
1623 super Line
1624
1625 redef fun process(v) do
1626 var line = v.current_line
1627 # go to list end
1628 while line != null do
1629 var t = v.line_kind(line)
1630 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1631 not t isa LineList) then break
1632 line = line.next
1633 end
1634 # build list block
1635 var list: MDBlock
1636 if line != null then
1637 list = v.current_block.split(line.prev.as(not null))
1638 else
1639 list = v.current_block.split(v.current_block.last_line.as(not null))
1640 end
1641 var kind = block_kind(list)
1642 list.kind = kind
1643 list.first_line.prev_empty = false
1644 list.last_line.next_empty = false
1645 list.remove_surrounding_empty_lines
1646 list.first_line.prev_empty = false
1647 list.last_line.next_empty = false
1648 kind.init_block(v)
1649 var block = list.first_block
1650 while block != null do
1651 block.remove_list_indent(v)
1652 v.recurse(block, true)
1653 block = block.next
1654 end
1655 kind.expand_paragraphs(list)
1656 v.current_line = line
1657 end
1658
1659 # Create a new block kind based on this line.
1660 protected fun block_kind(block: MDBlock): BlockList is abstract
1661
1662 # Extract string value from `MDLine`.
1663 protected fun extract_value(line: MDLine): String is abstract
1664 end
1665
1666 # An ordered list line.
1667 class LineOList
1668 super LineList
1669
1670 redef fun block_kind(block) do return new BlockOrderedList(block)
1671
1672 redef fun extract_value(line) do
1673 return line.value.substring_from(line.value.index_of('.') + 2)
1674 end
1675 end
1676
1677 # An unordered list line.
1678 class LineUList
1679 super LineList
1680
1681 redef fun block_kind(block) do return new BlockUnorderedList(block)
1682
1683 redef fun extract_value(line) do
1684 return line.value.substring_from(line.leading + 2)
1685 end
1686 end
1687
1688 # A token represent a character in the markdown input.
1689 # Some tokens have a specific markup behaviour that is handled here.
1690 abstract class Token
1691
1692 # Position of `self` in markdown input.
1693 var pos: Int
1694
1695 # Character found at `pos` in the markdown input.
1696 var char: Char
1697
1698 # Output that token using `MarkdownEmitter::decorator`.
1699 fun emit(v: MarkdownEmitter) do v.addc char
1700 end
1701
1702 # A token without a specific meaning.
1703 class TokenNone
1704 super Token
1705 end
1706
1707 # An emphasis token.
1708 abstract class TokenEm
1709 super Token
1710
1711 redef fun emit(v) do
1712 var tmp = v.push_buffer
1713 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1714 v.pop_buffer
1715 if b > 0 then
1716 v.decorator.add_em(v, tmp)
1717 v.current_pos = b
1718 else
1719 v.addc char
1720 end
1721 end
1722 end
1723
1724 # An emphasis star token.
1725 class TokenEmStar
1726 super TokenEm
1727 end
1728
1729 # An emphasis underscore token.
1730 class TokenEmUnderscore
1731 super TokenEm
1732 end
1733
1734 # A strong token.
1735 abstract class TokenStrong
1736 super Token
1737
1738 redef fun emit(v) do
1739 var tmp = v.push_buffer
1740 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1741 v.pop_buffer
1742 if b > 0 then
1743 v.decorator.add_strong(v, tmp)
1744 v.current_pos = b + 1
1745 else
1746 v.addc char
1747 end
1748 end
1749 end
1750
1751 # A strong star token.
1752 class TokenStrongStar
1753 super TokenStrong
1754 end
1755
1756 # A strong underscore token.
1757 class TokenStrongUnderscore
1758 super TokenStrong
1759 end
1760
1761 # A code token.
1762 # This class is mainly used to factorize work between single and double quoted span codes.
1763 abstract class TokenCode
1764 super Token
1765
1766 redef fun emit(v) do
1767 var a = pos + next_pos + 1
1768 var b = v.processor.find_token(v.current_text.as(not null), a, self)
1769 if b > 0 then
1770 v.current_pos = b + next_pos
1771 while a < b and v.current_text[a] == ' ' do a += 1
1772 if a < b then
1773 while v.current_text[b - 1] == ' ' do b -= 1
1774 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1775 end
1776 else
1777 v.addc char
1778 end
1779 end
1780
1781 private fun next_pos: Int is abstract
1782 end
1783
1784 # A span code token.
1785 class TokenCodeSingle
1786 super TokenCode
1787
1788 redef fun next_pos do return 0
1789 end
1790
1791 # A doubled span code token.
1792 class TokenCodeDouble
1793 super TokenCode
1794
1795 redef fun next_pos do return 1
1796 end
1797
1798 # A link or image token.
1799 # This class is mainly used to factorize work between images and links.
1800 abstract class TokenLinkOrImage
1801 super Token
1802
1803 # Link adress
1804 var link: nullable Text = null
1805
1806 # Link text
1807 var name: nullable Text = null
1808
1809 # Link title
1810 var comment: nullable Text = null
1811
1812 # Is the link construct an abbreviation?
1813 var is_abbrev = false
1814
1815 redef fun emit(v) do
1816 var tmp = new FlatBuffer
1817 var b = check_link(v, tmp, pos, self)
1818 if b > 0 then
1819 emit_hyper(v)
1820 v.current_pos = b
1821 else
1822 v.addc char
1823 end
1824 end
1825
1826 # Emit the hyperlink as link or image.
1827 private fun emit_hyper(v: MarkdownEmitter) is abstract
1828
1829 # Check if the link is a valid link.
1830 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1831 var md = v.current_text
1832 var pos
1833 if token isa TokenLink then
1834 pos = start + 1
1835 else
1836 pos = start + 2
1837 end
1838 var tmp = new FlatBuffer
1839 pos = md.read_md_link_id(tmp, pos)
1840 if pos < start then return -1
1841 name = tmp
1842 var old_pos = pos
1843 pos += 1
1844 pos = md.skip_spaces(pos)
1845 if pos < start then
1846 var tid = name.write_to_string.to_lower
1847 if v.processor.link_refs.has_key(tid) then
1848 var lr = v.processor.link_refs[tid]
1849 is_abbrev = lr.is_abbrev
1850 link = lr.link
1851 comment = lr.title
1852 pos = old_pos
1853 else
1854 return -1
1855 end
1856 else if md[pos] == '(' then
1857 pos += 1
1858 pos = md.skip_spaces(pos)
1859 if pos < start then return -1
1860 tmp = new FlatBuffer
1861 var use_lt = md[pos] == '<'
1862 if use_lt then
1863 pos = md.read_until(tmp, pos + 1, '>')
1864 else
1865 pos = md.read_md_link(tmp, pos)
1866 end
1867 if pos < start then return -1
1868 if use_lt then pos += 1
1869 link = tmp.write_to_string
1870 if md[pos] == ' ' then
1871 pos = md.skip_spaces(pos)
1872 if pos > start and md[pos] == '"' then
1873 pos += 1
1874 tmp = new FlatBuffer
1875 pos = md.read_until(tmp, pos, '"')
1876 if pos < start then return -1
1877 comment = tmp.write_to_string
1878 pos += 1
1879 pos = md.skip_spaces(pos)
1880 if pos == -1 then return -1
1881 end
1882 end
1883 if md[pos] != ')' then return -1
1884 else if md[pos] == '[' then
1885 pos += 1
1886 tmp = new FlatBuffer
1887 pos = md.read_raw_until(tmp, pos, ']')
1888 if pos < start then return -1
1889 var id
1890 if tmp.length > 0 then
1891 id = tmp
1892 else
1893 id = name
1894 end
1895 var tid = id.write_to_string.to_lower
1896 if v.processor.link_refs.has_key(tid) then
1897 var lr = v.processor.link_refs[tid]
1898 link = lr.link
1899 comment = lr.title
1900 end
1901 else
1902 var tid = name.write_to_string.replace("\n", " ").to_lower
1903 if v.processor.link_refs.has_key(tid) then
1904 var lr = v.processor.link_refs[tid]
1905 link = lr.link
1906 comment = lr.title
1907 pos = old_pos
1908 else
1909 return -1
1910 end
1911 end
1912 if link == null then return -1
1913 return pos
1914 end
1915 end
1916
1917 # A markdown link token.
1918 class TokenLink
1919 super TokenLinkOrImage
1920
1921 redef fun emit_hyper(v) do
1922 if is_abbrev and comment != null then
1923 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
1924 else
1925 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
1926 end
1927 end
1928 end
1929
1930 # A markdown image token.
1931 class TokenImage
1932 super TokenLinkOrImage
1933
1934 redef fun emit_hyper(v) do
1935 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
1936 end
1937 end
1938
1939 # A HTML/XML token.
1940 class TokenHTML
1941 super Token
1942
1943 redef fun emit(v) do
1944 var tmp = new FlatBuffer
1945 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
1946 if b > 0 then
1947 v.add tmp
1948 v.current_pos = b
1949 else
1950 v.decorator.escape_char(v, char)
1951 end
1952 end
1953
1954 # Is the HTML valid?
1955 # Also take care of link and mailto shortcuts.
1956 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
1957 # check for auto links
1958 var tmp = new FlatBuffer
1959 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
1960 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
1961 pos = md.read_until(tmp, pos, '>')
1962 if pos != -1 then
1963 var link = tmp.write_to_string
1964 v.decorator.add_link(v, link, link, null)
1965 return pos
1966 end
1967 end
1968 # TODO check for mailto
1969 # check for inline html
1970 if start + 2 < md.length then
1971 return md.read_xml(out, start, true)
1972 end
1973 return -1
1974 end
1975 end
1976
1977 # An HTML entity token.
1978 class TokenEntity
1979 super Token
1980
1981 redef fun emit(v) do
1982 var tmp = new FlatBuffer
1983 var b = check_entity(tmp, v.current_text.as(not null), pos)
1984 if b > 0 then
1985 v.add tmp
1986 v.current_pos = b
1987 else
1988 v.decorator.escape_char(v, char)
1989 end
1990 end
1991
1992 # Is the entity valid?
1993 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
1994 var pos = md.read_until(out, start, ';')
1995 if pos < 0 or out.length < 3 then
1996 return -1
1997 end
1998 if out[1] == '#' then
1999 if out[2] == 'x' or out[2] == 'X' then
2000 if out.length < 4 then return -1
2001 for i in [3..out.length[ do
2002 var c = out[i]
2003 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2004 return -1
2005 end
2006 end
2007 else
2008 for i in [2..out.length[ do
2009 var c = out[i]
2010 if c < '0' or c > '9' then return -1
2011 end
2012 end
2013 out.add ';'
2014 else
2015 for i in [1..out.length[ do
2016 var c = out[i]
2017 if not c.is_digit and not c.is_letter then return -1
2018 end
2019 out.add ';'
2020 # TODO check entity is valid
2021 # if out.is_entity then
2022 return pos
2023 # else
2024 # return -1
2025 # end
2026 end
2027 return pos
2028 end
2029 end
2030
2031 # A markdown escape token.
2032 class TokenEscape
2033 super Token
2034
2035 redef fun emit(v) do
2036 v.current_pos += 1
2037 v.addc v.current_text[v.current_pos]
2038 end
2039 end
2040
2041 # A markdown super token.
2042 class TokenSuper
2043 super Token
2044
2045 redef fun emit(v) do
2046 var tmp = v.push_buffer
2047 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
2048 v.pop_buffer
2049 if b > 0 then
2050 v.decorator.add_super(v, tmp)
2051 v.current_pos = b
2052 else
2053 v.addc char
2054 end
2055 end
2056 end
2057
2058 redef class Text
2059
2060 # Get the position of the next non-space character.
2061 private fun skip_spaces(start: Int): Int do
2062 var pos = start
2063 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2064 pos += 1
2065 end
2066 if pos < length then return pos
2067 return -1
2068 end
2069
2070 # Read `self` until `nend` and append it to the `out` buffer.
2071 # Escape markdown special chars.
2072 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2073 var pos = start
2074 while pos < length do
2075 var c = self[pos]
2076 if c == '\\' and pos + 1 < length then
2077 pos = escape(out, self[pos + 1], pos)
2078 else
2079 var end_reached = false
2080 for n in nend do
2081 if c == n then
2082 end_reached = true
2083 break
2084 end
2085 end
2086 if end_reached then break
2087 out.add c
2088 end
2089 pos += 1
2090 end
2091 if pos == length then return -1
2092 return pos
2093 end
2094
2095 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2096 # No escape is made.
2097 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2098 var pos = start
2099 while pos < length do
2100 var c = self[pos]
2101 var end_reached = false
2102 for n in nend do
2103 if c == n then
2104 end_reached = true
2105 break
2106 end
2107 end
2108 if end_reached then break
2109 out.add c
2110 pos += 1
2111 end
2112 if pos == length then return -1
2113 return pos
2114 end
2115
2116 # Read `self` as XML until `to` and append it to the `out` buffer.
2117 # Escape HTML special chars.
2118 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2119 var pos = from
2120 var in_str = false
2121 var str_char: nullable Char = null
2122 while pos < length do
2123 var c = self[pos]
2124 if in_str then
2125 if c == '\\' then
2126 out.add c
2127 pos += 1
2128 if pos < length then
2129 out.add c
2130 pos += 1
2131 end
2132 continue
2133 end
2134 if c == str_char then
2135 in_str = false
2136 out.add c
2137 pos += 1
2138 continue
2139 end
2140 end
2141 if c == '"' or c == '\'' then
2142 in_str = true
2143 str_char = c
2144 end
2145 if not in_str then
2146 var end_reached = false
2147 for n in [0..to.length[ do
2148 if c == to[n] then
2149 end_reached = true
2150 break
2151 end
2152 end
2153 if end_reached then break
2154 end
2155 out.add c
2156 pos += 1
2157 end
2158 if pos == length then return -1
2159 return pos
2160 end
2161
2162 # Read `self` as XML and append it to the `out` buffer.
2163 # Safe mode can be activated to limit reading to valid xml.
2164 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2165 var pos = 0
2166 var is_close_tag = false
2167 if start + 1 >= length then return -1
2168 if self[start + 1] == '/' then
2169 is_close_tag = true
2170 pos = start + 2
2171 else if self[start + 1] == '!' then
2172 out.append "<!"
2173 return start + 1
2174 else
2175 is_close_tag = false
2176 pos = start + 1
2177 end
2178 if safe_mode then
2179 var tmp = new FlatBuffer
2180 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2181 if pos == -1 then return -1
2182 var tag = tmp.write_to_string.trim.to_lower
2183 if tag.is_html_unsafe then
2184 out.append "&lt;"
2185 if is_close_tag then out.add '/'
2186 out.append tmp
2187 else
2188 out.append "<"
2189 if is_close_tag then out.add '/'
2190 out.append tmp
2191 end
2192 else
2193 out.add '<'
2194 if is_close_tag then out.add '/'
2195 pos = read_xml_until(out, pos, ' ', '/', '>')
2196 end
2197 if pos == -1 then return -1
2198 pos = read_xml_until(out, pos, '/', '>')
2199 if pos == -1 then return -1
2200 if self[pos] == '/' then
2201 out.append " /"
2202 pos = self.read_xml_until(out, pos + 1, '>')
2203 if pos == -1 then return -1
2204 end
2205 if self[pos] == '>' then
2206 out.add '>'
2207 return pos
2208 end
2209 return -1
2210 end
2211
2212 # Read a markdown link address and append it to the `out` buffer.
2213 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2214 var pos = start
2215 var counter = 1
2216 while pos < length do
2217 var c = self[pos]
2218 if c == '\\' and pos + 1 < length then
2219 pos = escape(out, self[pos + 1], pos)
2220 else
2221 var end_reached = false
2222 if c == '(' then
2223 counter += 1
2224 else if c == ' ' then
2225 if counter == 1 then end_reached = true
2226 else if c == ')' then
2227 counter -= 1
2228 if counter == 0 then end_reached = true
2229 end
2230 if end_reached then break
2231 out.add c
2232 end
2233 pos += 1
2234 end
2235 if pos == length then return -1
2236 return pos
2237 end
2238
2239 # Read a markdown link text and append it to the `out` buffer.
2240 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2241 var pos = start
2242 var counter = 1
2243 while pos < length do
2244 var c = self[pos]
2245 var end_reached = false
2246 if c == '[' then
2247 counter += 1
2248 out.add c
2249 else if c == ']' then
2250 counter -= 1
2251 if counter == 0 then
2252 end_reached = true
2253 else
2254 out.add c
2255 end
2256 else
2257 out.add c
2258 end
2259 if end_reached then break
2260 pos += 1
2261 end
2262 if pos == length then return -1
2263 return pos
2264 end
2265
2266 # Extract the XML tag name from a XML tag.
2267 private fun xml_tag: String do
2268 var tpl = new FlatBuffer
2269 var pos = 1
2270 if pos < length and self[1] == '/' then pos += 1
2271 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2272 tpl.add self[pos]
2273 pos += 1
2274 end
2275 return tpl.write_to_string.to_lower
2276 end
2277
2278 # Read and escape the markdown contained in `self`.
2279 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2280 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2281 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2282 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2283 c == '`' or c == '~' or c == '^' then
2284 out.add c
2285 return pos + 1
2286 end
2287 out.add '\\'
2288 return pos
2289 end
2290
2291 # Is `self` an unsafe HTML element?
2292 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2293
2294 # Is `self` a HRML block element?
2295 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2296
2297 # Is `self` a link prefix?
2298 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2299
2300 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2301
2302 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2303
2304 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2305 end
2306
2307 redef class String
2308
2309 # Parse `self` as markdown and return the HTML representation
2310 #.
2311 # var md = "**Hello World!**"
2312 # var html = md.md_to_html
2313 # assert html == "<p><strong>Hello World!</strong></p>\n"
2314 fun md_to_html: Streamable do
2315 var processor = new MarkdownProcessor
2316 return processor.process(self)
2317 end
2318 end