854eed5302fc71afc376209cffa1cb9c78015bf1
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 # `MarkdownEmitter` used for ouput.
34 var emitter: MarkdownEmitter is noinit, protected writable
35
36 # Work in extended mode (default).
37 #
38 # Behavior changes when using extended mode:
39 #
40 # * Lists and code blocks end a paragraph
41 #
42 # In normal markdown the following:
43 #
44 # ~~~md
45 # This is a paragraph
46 # * and this is not a list
47 # ~~~
48 #
49 # Will produce:
50 #
51 # ~~~html
52 # <p>This is a paragraph
53 # * and this is not a list</p>
54 # ~~~
55 #
56 # When using extended mode this changes to:
57 #
58 # ~~~html
59 # <p>This is a paragraph</p>
60 # <ul>
61 # <li>and this is not a list</li>
62 # </ul>
63 # ~~~
64 #
65 # * Fences code blocks
66 #
67 # If you don't want to indent your all your code with 4 spaces,
68 # you can wrap your code in ``` ``` ``` or `~~~`.
69 #
70 # Here's an example:
71 #
72 # ~~~md
73 # fun test do
74 # print "Hello World!"
75 # end
76 # ~~~
77 #
78 # * Code blocks meta
79 #
80 # If you want to use syntax highlighting tools, most of them need to know what kind
81 # of language they are highlighting.
82 # You can add an optional language identifier after the fence declaration to output
83 # it in the HTML render.
84 #
85 # ```nit
86 # import markdown
87 #
88 # print "# Hello World!".md_to_html
89 # ```
90 #
91 # Becomes
92 #
93 # ~~~html
94 # <pre class="nit"><code>import markdown
95 #
96 # print "Hello World!".md_to_html
97 # </code></pre>
98 # ~~~
99 #
100 # * Underscores (Emphasis)
101 #
102 # Underscores in the middle of a word like:
103 #
104 # ~~~md
105 # Con_cat_this
106 # ~~~
107 #
108 # normally produces this:
109 #
110 # ~~~html
111 # <p>Con<em>cat</em>this</p>
112 # ~~~
113 #
114 # With extended mode they don't result in emphasis.
115 #
116 # ~~~html
117 # <p>Con_cat_this</p>
118 # ~~~
119 #
120 # * Strikethrough
121 #
122 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
123 # strikethrought span is marked with `~~`.
124 #
125 # ~~~md
126 # ~~Mistaken text.~~
127 # ~~~
128 #
129 # becomes
130 #
131 # ~~~html
132 # <del>Mistaken text.</del>
133 # ~~~
134 var ext_mode = true
135
136 init do self.emitter = new MarkdownEmitter(self)
137
138 # Process the mardown `input` string and return the processed output.
139 fun process(input: String): Writable do
140 # init processor
141 link_refs.clear
142 last_link_ref = null
143 current_line = null
144 current_block = null
145 # parse markdown
146 var parent = read_lines(input)
147 parent.remove_surrounding_empty_lines
148 recurse(parent, false)
149 # output processed text
150 return emitter.emit(parent.kind)
151 end
152
153 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
154 private fun read_lines(input: String): MDBlock do
155 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
156 var value = new FlatBuffer
157 var i = 0
158
159 var line_pos = 0
160 var col_pos = 0
161
162 while i < input.length do
163 value.clear
164 var pos = 0
165 var eol = false
166 while not eol and i < input.length do
167 col_pos += 1
168 var c = input[i]
169 if c == '\n' then
170 eol = true
171 else if c == '\t' then
172 var np = pos + (4 - (pos.bin_and(3)))
173 while pos < np do
174 value.add ' '
175 pos += 1
176 end
177 else
178 pos += 1
179 value.add c
180 end
181 i += 1
182 end
183 line_pos += 1
184
185 var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
186 var line = new MDLine(loc, value.write_to_string)
187 var is_link_ref = check_link_ref(line)
188 # Skip link refs
189 if not is_link_ref then block.add_line line
190 col_pos = 0
191 end
192 return block
193 end
194
195 # Check if line is a block link definition.
196 # Return `true` if line contains a valid link ref and save it into `link_refs`.
197 private fun check_link_ref(line: MDLine): Bool do
198 var md = line.value
199 var is_link_ref = false
200 var id = new FlatBuffer
201 var link = new FlatBuffer
202 var comment = new FlatBuffer
203 var pos = -1
204 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
205 pos = line.leading + 1
206 pos = md.read_until(id, pos, ']')
207 if not id.is_empty and pos + 2 < line.value.length then
208 if line.value[pos + 1] == ':' then
209 pos += 2
210 pos = md.skip_spaces(pos)
211 if line.value[pos] == '<' then
212 pos += 1
213 pos = md.read_until(link, pos, '>')
214 pos += 1
215 else
216 pos = md.read_until(link, pos, ' ', '\n')
217 end
218 if not link.is_empty then
219 pos = md.skip_spaces(pos)
220 if pos > 0 and pos < line.value.length then
221 var c = line.value[pos]
222 if c == '\"' or c == '\'' or c == '(' then
223 pos += 1
224 if c == '(' then
225 pos = md.read_until(comment, pos, ')')
226 else
227 pos = md.read_until(comment, pos, c)
228 end
229 if pos > 0 then is_link_ref = true
230 end
231 else
232 is_link_ref = true
233 end
234 end
235 end
236 end
237 end
238 if is_link_ref and not id.is_empty and not link.is_empty then
239 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
240 add_link_ref(id.write_to_string, lr)
241 if comment.is_empty then last_link_ref = lr
242 return true
243 else
244 comment = new FlatBuffer
245 if not line.is_empty and last_link_ref != null then
246 pos = line.leading
247 var c = line.value[pos]
248 if c == '\"' or c == '\'' or c == '(' then
249 pos += 1
250 if c == '(' then
251 pos = md.read_until(comment, pos, ')')
252 else
253 pos = md.read_until(comment, pos, c)
254 end
255 end
256 if not comment.is_empty then last_link_ref.title = comment.write_to_string
257 end
258 if comment.is_empty then return false
259 return true
260 end
261 end
262
263 # Known link refs
264 # This list will be needed during output to expand links.
265 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
266
267 # Last encountered link ref (for multiline definitions)
268 #
269 # Markdown allows link refs to be defined over two lines:
270 #
271 # ~~~md
272 # [id]: http://example.com/longish/path/to/resource/here
273 # "Optional Title Here"
274 # ~~~
275 #
276 private var last_link_ref: nullable LinkRef = null
277
278 # Add a link ref to the list
279 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
280
281 # Recursively split a `block`.
282 #
283 # The block is splitted according to the type of lines it contains.
284 # Some blocks can be splited again recursively like lists.
285 # The `in_list` mode is used to recurse on list and build
286 # nested paragraphs or code blocks.
287 fun recurse(root: MDBlock, in_list: Bool) do
288 var old_mode = self.in_list
289 var old_root = self.current_block
290 self.in_list = in_list
291
292 var line = root.first_line
293 while line != null and line.is_empty do
294 line = line.next
295 if line == null then return
296 end
297
298 current_line = line
299 current_block = root
300 while current_line != null do
301 line_kind(current_line.as(not null)).process(self)
302 end
303 self.in_list = old_mode
304 self.current_block = old_root
305 end
306
307 # Currently processed line.
308 # Used when visiting blocks with `recurse`.
309 var current_line: nullable MDLine = null is writable
310
311 # Currently processed block.
312 # Used when visiting blocks with `recurse`.
313 var current_block: nullable MDBlock = null is writable
314
315 # Is the current recursion in list mode?
316 # Used when visiting blocks with `recurse`
317 private var in_list = false
318
319 # The type of line.
320 # see: `md_line_*`
321 fun line_kind(md: MDLine): Line do
322 var value = md.value
323 var leading = md.leading
324 var trailing = md.trailing
325 if md.is_empty then return new LineEmpty
326 if md.leading > 3 then return new LineCode
327 if value[leading] == '#' then return new LineHeadline
328 if value[leading] == '>' then return new LineBlockquote
329
330 if ext_mode then
331 if value.length - leading - trailing > 2 then
332 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
333 return new LineFence
334 end
335 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
336 return new LineFence
337 end
338 end
339 end
340
341 if value.length - leading - trailing > 2 and
342 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
343 if md.count_chars(value[leading]) >= 3 then
344 return new LineHR
345 end
346 end
347
348 if value.length - leading >= 2 and value[leading + 1] == ' ' then
349 var c = value[leading]
350 if c == '*' or c == '-' or c == '+' then return new LineUList
351 end
352
353 if value.length - leading >= 3 and value[leading].is_digit then
354 var i = leading + 1
355 while i < value.length and value[i].is_digit do i += 1
356 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
357 return new LineOList
358 end
359 end
360
361 if value[leading] == '<' and md.check_html then return new LineXML
362
363 var next = md.next
364 if next != null and not next.is_empty then
365 if next.count_chars('=') > 0 then
366 return new LineHeadline1
367 end
368 if next.count_chars('-') > 0 then
369 return new LineHeadline2
370 end
371 end
372 return new LineOther
373 end
374
375 # Get the token kind at `pos`.
376 fun token_at(text: Text, pos: Int): Token do
377 var c0: Char
378 var c1: Char
379 var c2: Char
380
381 if pos > 0 then
382 c0 = text[pos - 1]
383 else
384 c0 = ' '
385 end
386 var c = text[pos]
387
388 if pos + 1 < text.length then
389 c1 = text[pos + 1]
390 else
391 c1 = ' '
392 end
393 if pos + 2 < text.length then
394 c2 = text[pos + 2]
395 else
396 c2 = ' '
397 end
398
399 var loc = new MDLocation(
400 current_loc.line_start,
401 current_loc.column_start + pos,
402 current_loc.line_start,
403 current_loc.column_start + pos)
404
405 if c == '*' then
406 if c1 == '*' then
407 if c0 != ' ' or c2 != ' ' then
408 return new TokenStrongStar(loc, pos, c)
409 else
410 return new TokenEmStar(loc, pos, c)
411 end
412 end
413 if c0 != ' ' or c1 != ' ' then
414 return new TokenEmStar(loc, pos, c)
415 else
416 return new TokenNone(loc, pos, c)
417 end
418 else if c == '_' then
419 if c1 == '_' then
420 if c0 != ' ' or c2 != ' 'then
421 return new TokenStrongUnderscore(loc, pos, c)
422 else
423 return new TokenEmUnderscore(loc, pos, c)
424 end
425 end
426 if ext_mode then
427 if (c0.is_letter or c0.is_digit) and c0 != '_' and
428 (c1.is_letter or c1.is_digit) then
429 return new TokenNone(loc, pos, c)
430 else
431 return new TokenEmUnderscore(loc, pos, c)
432 end
433 end
434 if c0 != ' ' or c1 != ' ' then
435 return new TokenEmUnderscore(loc, pos, c)
436 else
437 return new TokenNone(loc, pos, c)
438 end
439 else if c == '!' then
440 if c1 == '[' then return new TokenImage(loc, pos, c)
441 return new TokenNone(loc, pos, c)
442 else if c == '[' then
443 return new TokenLink(loc, pos, c)
444 else if c == ']' then
445 return new TokenNone(loc, pos, c)
446 else if c == '`' then
447 if c1 == '`' then
448 return new TokenCodeDouble(loc, pos, c)
449 else
450 return new TokenCodeSingle(loc, pos, c)
451 end
452 else if c == '\\' then
453 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
454 return new TokenEscape(loc, pos, c)
455 else
456 return new TokenNone(loc, pos, c)
457 end
458 else if c == '<' then
459 return new TokenHTML(loc, pos, c)
460 else if c == '&' then
461 return new TokenEntity(loc, pos, c)
462 else
463 if ext_mode then
464 if c == '~' and c1 == '~' then
465 return new TokenStrike(loc, pos, c)
466 end
467 end
468 return new TokenNone(loc, pos, c)
469 end
470 end
471
472 # Find the position of a `token` in `self`.
473 fun find_token(text: Text, start: Int, token: Token): Int do
474 var pos = start
475 while pos < text.length do
476 if token_at(text, pos).is_same_type(token) then
477 return pos
478 end
479 pos += 1
480 end
481 return -1
482 end
483
484 # Location used for next parsed token.
485 #
486 # This location can be changed by the emitter to adjust with `\n` found
487 # in the input.
488 private fun current_loc: MDLocation do return emitter.current_loc
489 end
490
491 # Emit output corresponding to blocks content.
492 #
493 # Blocks are created by a previous pass in `MarkdownProcessor`.
494 # The emitter use a `Decorator` to select the output format.
495 class MarkdownEmitter
496
497 # Kind of processor used for parsing.
498 type PROCESSOR: MarkdownProcessor
499
500 # Processor containing link refs.
501 var processor: PROCESSOR
502
503 # Kind of decorator used for decoration.
504 type DECORATOR: Decorator
505
506 # Decorator used for output.
507 # Default is `HTMLDecorator`
508 var decorator: DECORATOR is writable, lazy do
509 return new HTMLDecorator
510 end
511
512 # Create a new `MarkdownEmitter` using a custom `decorator`.
513 init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
514 init processor
515 self.decorator = decorator
516 end
517
518 # Output `block` using `decorator` in the current buffer.
519 fun emit(block: Block): Text do
520 var buffer = push_buffer
521 block.emit(self)
522 pop_buffer
523 return buffer
524 end
525
526 # Output the content of `block`.
527 fun emit_in(block: Block) do block.emit_in(self)
528
529 # Transform and emit mardown text
530 fun emit_text(text: Text) do emit_text_until(text, 0, null)
531
532 # Transform and emit mardown text starting at `start` and
533 # until a token with the same type as `token` is found.
534 # Go until the end of `text` if `token` is null.
535 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
536 var old_text = current_text
537 var old_pos = current_pos
538 current_text = text
539 current_pos = start
540 while current_pos < text.length do
541 if text[current_pos] == '\n' then
542 current_loc.line_start += 1
543 current_loc.column_start = -current_pos
544 end
545 var mt = processor.token_at(text, current_pos)
546 if (token != null and not token isa TokenNone) and
547 (mt.is_same_type(token) or
548 (token isa TokenEmStar and mt isa TokenStrongStar) or
549 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
550 return current_pos
551 end
552 mt.emit(self)
553 current_pos += 1
554 end
555 current_text = old_text
556 current_pos = old_pos
557 return -1
558 end
559
560 # Currently processed position in `current_text`.
561 # Used when visiting inline production with `emit_text_until`.
562 private var current_pos: Int = -1
563
564 # Currently processed text.
565 # Used when visiting inline production with `emit_text_until`.
566 private var current_text: nullable Text = null
567
568 # Stacked buffers.
569 private var buffer_stack = new List[FlatBuffer]
570
571 # Push a new buffer on the stack.
572 private fun push_buffer: FlatBuffer do
573 var buffer = new FlatBuffer
574 buffer_stack.add buffer
575 return buffer
576 end
577
578 # Pop the last buffer.
579 private fun pop_buffer do buffer_stack.pop
580
581 # Current output buffer.
582 private fun current_buffer: FlatBuffer do
583 assert not buffer_stack.is_empty
584 return buffer_stack.last
585 end
586
587 # Stacked locations.
588 private var loc_stack = new List[MDLocation]
589
590 # Push a new MDLocation on the stack.
591 private fun push_loc(location: MDLocation) do loc_stack.add location
592
593 # Pop the last buffer.
594 private fun pop_loc: MDLocation do return loc_stack.pop
595
596 # Current output buffer.
597 private fun current_loc: MDLocation do
598 assert not loc_stack.is_empty
599 return loc_stack.last
600 end
601
602 # Append `e` to current buffer.
603 fun add(e: Writable) do
604 if e isa Text then
605 current_buffer.append e
606 else
607 current_buffer.append e.write_to_string
608 end
609 end
610
611 # Append `c` to current buffer.
612 fun addc(c: Char) do add c.to_s
613
614 # Append a "\n" line break.
615 fun addn do add "\n"
616 end
617
618 # A Link Reference.
619 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
620 #
621 # ~~~raw
622 # [1]: http://example.com/ "Optional title"
623 # ~~~
624 class LinkRef
625
626 # Link href
627 var link: String
628
629 # Optional link title
630 var title: nullable String = null
631
632 # Is the link an abreviation?
633 var is_abbrev = false
634
635 # Create a link with a title.
636 init with_title(link: String, title: nullable String) do
637 self.link = link
638 self.title = title
639 end
640 end
641
642 # A `Decorator` is used to emit mardown into a specific format.
643 # Default decorator used is `HTMLDecorator`.
644 interface Decorator
645
646 # Kind of emitter used for decoration.
647 type EMITTER: MarkdownEmitter
648
649 # Render a ruler block.
650 fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
651
652 # Render a headline block with corresponding level.
653 fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
654
655 # Render a paragraph block.
656 fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
657
658 # Render a code or fence block.
659 fun add_code(v: EMITTER, block: BlockCode) is abstract
660
661 # Render a blockquote.
662 fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
663
664 # Render an unordered list.
665 fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
666
667 # Render an ordered list.
668 fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
669
670 # Render a list item.
671 fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
672
673 # Render an emphasis text.
674 fun add_em(v: EMITTER, text: Text) is abstract
675
676 # Render a strong text.
677 fun add_strong(v: EMITTER, text: Text) is abstract
678
679 # Render a strike text.
680 #
681 # Extended mode only (see `MarkdownProcessor::ext_mode`)
682 fun add_strike(v: EMITTER, text: Text) is abstract
683
684 # Render a link.
685 fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
686
687 # Render an image.
688 fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
689
690 # Render an abbreviation.
691 fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
692
693 # Render a code span reading from a buffer.
694 fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
695
696 # Render a text and escape it.
697 fun append_value(v: EMITTER, value: Text) is abstract
698
699 # Render code text from buffer and escape it.
700 fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
701
702 # Render a character escape.
703 fun escape_char(v: EMITTER, char: Char) is abstract
704
705 # Render a line break
706 fun add_line_break(v: EMITTER) is abstract
707
708 # Generate a new html valid id from a `String`.
709 fun strip_id(txt: String): String is abstract
710
711 # Found headlines during the processing labeled by their ids.
712 fun headlines: ArrayMap[String, HeadLine] is abstract
713 end
714
715 # Class representing a markdown headline.
716 class HeadLine
717 # Unique identifier of this headline.
718 var id: String
719
720 # Text of the headline.
721 var title: String
722
723 # Level of this headline.
724 #
725 # According toe the markdown specification, level must be in `[1..6]`.
726 var level: Int
727 end
728
729 # `Decorator` that outputs HTML.
730 class HTMLDecorator
731 super Decorator
732
733 redef var headlines = new ArrayMap[String, HeadLine]
734
735 redef fun add_ruler(v, block) do v.add "<hr/>\n"
736
737 redef fun add_headline(v, block) do
738 # save headline
739 var txt = block.block.first_line.value
740 var id = strip_id(txt)
741 var lvl = block.depth
742 headlines[id] = new HeadLine(id, txt, lvl)
743 # output it
744 v.add "<h{lvl} id=\"{id}\">"
745 v.emit_in block
746 v.add "</h{lvl}>\n"
747 end
748
749 redef fun add_paragraph(v, block) do
750 v.add "<p>"
751 v.emit_in block
752 v.add "</p>\n"
753 end
754
755 redef fun add_code(v, block) do
756 if block isa BlockFence and block.meta != null then
757 v.add "<pre class=\"{block.meta.to_s}\"><code>"
758 else
759 v.add "<pre><code>"
760 end
761 v.emit_in block
762 v.add "</code></pre>\n"
763 end
764
765 redef fun add_blockquote(v, block) do
766 v.add "<blockquote>\n"
767 v.emit_in block
768 v.add "</blockquote>\n"
769 end
770
771 redef fun add_unorderedlist(v, block) do
772 v.add "<ul>\n"
773 v.emit_in block
774 v.add "</ul>\n"
775 end
776
777 redef fun add_orderedlist(v, block) do
778 v.add "<ol>\n"
779 v.emit_in block
780 v.add "</ol>\n"
781 end
782
783 redef fun add_listitem(v, block) do
784 v.add "<li>"
785 v.emit_in block
786 v.add "</li>\n"
787 end
788
789 redef fun add_em(v, text) do
790 v.add "<em>"
791 v.add text
792 v.add "</em>"
793 end
794
795 redef fun add_strong(v, text) do
796 v.add "<strong>"
797 v.add text
798 v.add "</strong>"
799 end
800
801 redef fun add_strike(v, text) do
802 v.add "<del>"
803 v.add text
804 v.add "</del>"
805 end
806
807 redef fun add_image(v, link, name, comment) do
808 v.add "<img src=\""
809 append_value(v, link)
810 v.add "\" alt=\""
811 append_value(v, name)
812 v.add "\""
813 if comment != null and not comment.is_empty then
814 v.add " title=\""
815 append_value(v, comment)
816 v.add "\""
817 end
818 v.add "/>"
819 end
820
821 redef fun add_link(v, link, name, comment) do
822 v.add "<a href=\""
823 append_value(v, link)
824 v.add "\""
825 if comment != null and not comment.is_empty then
826 v.add " title=\""
827 append_value(v, comment)
828 v.add "\""
829 end
830 v.add ">"
831 v.emit_text(name)
832 v.add "</a>"
833 end
834
835 redef fun add_abbr(v, name, comment) do
836 v.add "<abbr title=\""
837 append_value(v, comment)
838 v.add "\">"
839 v.emit_text(name)
840 v.add "</abbr>"
841 end
842
843 redef fun add_span_code(v, text, from, to) do
844 v.add "<code>"
845 append_code(v, text, from, to)
846 v.add "</code>"
847 end
848
849 redef fun add_line_break(v) do
850 v.add "<br/>"
851 end
852
853 redef fun append_value(v, text) do for c in text do escape_char(v, c)
854
855 redef fun escape_char(v, c) do
856 if c == '&' then
857 v.add "&amp;"
858 else if c == '<' then
859 v.add "&lt;"
860 else if c == '>' then
861 v.add "&gt;"
862 else if c == '"' then
863 v.add "&quot;"
864 else if c == '\'' then
865 v.add "&apos;"
866 else
867 v.addc c
868 end
869 end
870
871 redef fun append_code(v, buffer, from, to) do
872 for i in [from..to[ do
873 var c = buffer[i]
874 if c == '&' then
875 v.add "&amp;"
876 else if c == '<' then
877 v.add "&lt;"
878 else if c == '>' then
879 v.add "&gt;"
880 else
881 v.addc c
882 end
883 end
884 end
885
886 redef fun strip_id(txt) do
887 # strip id
888 var b = new FlatBuffer
889 for c in txt do
890 if c == ' ' then
891 b.add '_'
892 else
893 if not c.is_letter and
894 not c.is_digit and
895 not allowed_id_chars.has(c) then continue
896 b.add c
897 end
898 end
899 var res = b.to_s
900 var key = res
901 # check for multiple id definitions
902 if headlines.has_key(key) then
903 var i = 1
904 key = "{res}_{i}"
905 while headlines.has_key(key) do
906 i += 1
907 key = "{res}_{i}"
908 end
909 end
910 return key
911 end
912
913 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
914 end
915
916 # Location in a Markdown input.
917 class MDLocation
918
919 # Starting line number (starting from 1).
920 var line_start: Int
921
922 # Starting column number (starting from 1).
923 var column_start: Int
924
925 # Stopping line number (starting from 1).
926 var line_end: Int
927
928 # Stopping column number (starting from 1).
929 var column_end: Int
930
931 redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
932
933 # Return a copy of `self`.
934 fun copy: MDLocation do
935 return new MDLocation(line_start, column_start, line_end, column_end)
936 end
937 end
938
939 # A block of markdown lines.
940 # A `MDBlock` can contains lines and/or sub-blocks.
941 class MDBlock
942
943 # Position of `self` in the input.
944 var location: MDLocation
945
946 # Kind of block.
947 # See `Block`.
948 var kind: Block = new BlockNone(self) is writable
949
950 # First line if any.
951 var first_line: nullable MDLine = null is writable
952
953 # Last line if any.
954 var last_line: nullable MDLine = null is writable
955
956 # First sub-block if any.
957 var first_block: nullable MDBlock = null is writable
958
959 # Last sub-block if any.
960 var last_block: nullable MDBlock = null is writable
961
962 # Previous block if any.
963 var prev: nullable MDBlock = null is writable
964
965 # Next block if any.
966 var next: nullable MDBlock = null is writable
967
968 # Does this block contain subblocks?
969 fun has_blocks: Bool do return first_block != null
970
971 # Count sub-blocks.
972 fun count_blocks: Int do
973 var count = 0
974 var block = first_block
975 while block != null do
976 count += 1
977 block = block.next
978 end
979 return count
980 end
981
982 # Does this block contain lines?
983 fun has_lines: Bool do return first_line != null
984
985 # Count block lines.
986 fun count_lines: Int do
987 var count = 0
988 var line = first_line
989 while line != null do
990 count += 1
991 line = line.next
992 end
993 return count
994 end
995
996 # Split `self` creating a new sub-block having `line` has `last_line`.
997 fun split(line: MDLine): MDBlock do
998 # location for new block
999 var new_loc = new MDLocation(
1000 first_line.location.line_start,
1001 first_line.location.column_start,
1002 line.location.line_end,
1003 line.location.column_end)
1004 # create block
1005 var block = new MDBlock(new_loc)
1006 block.first_line = first_line
1007 block.last_line = line
1008 first_line = line.next
1009 line.next = null
1010 if first_line == null then
1011 last_line = null
1012 else
1013 first_line.prev = null
1014 # update current block loc
1015 location.line_start = first_line.location.line_start
1016 location.column_start = first_line.location.column_start
1017 end
1018 if first_block == null then
1019 first_block = block
1020 last_block = block
1021 else
1022 last_block.next = block
1023 last_block = block
1024 end
1025 return block
1026 end
1027
1028 # Add a `line` to this block.
1029 fun add_line(line: MDLine) do
1030 if last_line == null then
1031 first_line = line
1032 last_line = line
1033 else
1034 last_line.next_empty = line.is_empty
1035 line.prev_empty = last_line.is_empty
1036 line.prev = last_line
1037 last_line.next = line
1038 last_line = line
1039 end
1040 end
1041
1042 # Remove `line` from this block.
1043 fun remove_line(line: MDLine) do
1044 if line.prev == null then
1045 first_line = line.next
1046 else
1047 line.prev.next = line.next
1048 end
1049 if line.next == null then
1050 last_line = line.prev
1051 else
1052 line.next.prev = line.prev
1053 end
1054 line.prev = null
1055 line.next = null
1056 end
1057
1058 # Remove leading empty lines.
1059 fun remove_leading_empty_lines: Bool do
1060 var was_empty = false
1061 var line = first_line
1062 while line != null and line.is_empty do
1063 remove_line line
1064 line = first_line
1065 was_empty = true
1066 end
1067 return was_empty
1068 end
1069
1070 # Remove trailing empty lines.
1071 fun remove_trailing_empty_lines: Bool do
1072 var was_empty = false
1073 var line = last_line
1074 while line != null and line.is_empty do
1075 remove_line line
1076 line = last_line
1077 was_empty = true
1078 end
1079 return was_empty
1080 end
1081
1082 # Remove leading and trailing empty lines.
1083 fun remove_surrounding_empty_lines: Bool do
1084 var was_empty = false
1085 if remove_leading_empty_lines then was_empty = true
1086 if remove_trailing_empty_lines then was_empty = true
1087 return was_empty
1088 end
1089
1090 # Remove list markers and up to 4 leading spaces.
1091 # Used to clean nested lists.
1092 fun remove_list_indent(v: MarkdownProcessor) do
1093 var line = first_line
1094 while line != null do
1095 if not line.is_empty then
1096 var kind = v.line_kind(line)
1097 if kind isa LineList then
1098 line.value = kind.extract_value(line)
1099 else
1100 line.value = line.value.substring_from(line.leading.min(4))
1101 end
1102 line.leading = line.process_leading
1103 end
1104 line = line.next
1105 end
1106 end
1107
1108 # Collect block line text.
1109 fun text: String do
1110 var text = new FlatBuffer
1111 var line = first_line
1112 while line != null do
1113 if not line.is_empty then
1114 text.append line.text
1115 end
1116 text.append "\n"
1117 line = line.next
1118 end
1119 return text.write_to_string
1120 end
1121 end
1122
1123 # Representation of a markdown block in the AST.
1124 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1125 abstract class Block
1126
1127 # The markdown block `self` is related to.
1128 var block: MDBlock
1129
1130 # Output `self` using `v.decorator`.
1131 fun emit(v: MarkdownEmitter) do v.emit_in(self)
1132
1133 # Emit the containts of `self`, lines or blocks.
1134 fun emit_in(v: MarkdownEmitter) do
1135 block.remove_surrounding_empty_lines
1136 if block.has_lines then
1137 emit_lines(v)
1138 else
1139 emit_blocks(v)
1140 end
1141 end
1142
1143 # Emit lines contained in `block`.
1144 fun emit_lines(v: MarkdownEmitter) do
1145 var tpl = v.push_buffer
1146 var line = block.first_line
1147 while line != null do
1148 if not line.is_empty then
1149 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1150 if line.trailing >= 2 then v.decorator.add_line_break(v)
1151 end
1152 if line.next != null then
1153 v.addn
1154 end
1155 line = line.next
1156 end
1157 v.pop_buffer
1158 v.emit_text(tpl)
1159 end
1160
1161 # Emit sub-blocks contained in `block`.
1162 fun emit_blocks(v: MarkdownEmitter) do
1163 var block = self.block.first_block
1164 while block != null do
1165 v.push_loc(block.location)
1166 block.kind.emit(v)
1167 v.pop_loc
1168 block = block.next
1169 end
1170 end
1171 end
1172
1173 # A block without any markdown specificities.
1174 #
1175 # Actually use the same implementation than `BlockCode`,
1176 # this class is only used for typing purposes.
1177 class BlockNone
1178 super Block
1179 end
1180
1181 # A markdown blockquote.
1182 class BlockQuote
1183 super Block
1184
1185 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1186
1187 # Remove blockquote markers.
1188 private fun remove_block_quote_prefix(block: MDBlock) do
1189 var line = block.first_line
1190 while line != null do
1191 if not line.is_empty then
1192 if line.value[line.leading] == '>' then
1193 var rem = line.leading + 1
1194 if line.leading + 1 < line.value.length and
1195 line.value[line.leading + 1] == ' ' then
1196 rem += 1
1197 end
1198 line.value = line.value.substring_from(rem)
1199 line.leading = line.process_leading
1200 end
1201 end
1202 line = line.next
1203 end
1204 end
1205 end
1206
1207 # A markdown code block.
1208 class BlockCode
1209 super Block
1210
1211 # Number of char to skip at the beginning of the line.
1212 #
1213 # Block code lines start at 4 spaces.
1214 protected var line_start = 4
1215
1216 redef fun emit(v) do v.decorator.add_code(v, self)
1217
1218 redef fun emit_lines(v) do
1219 var line = block.first_line
1220 while line != null do
1221 if not line.is_empty then
1222 v.decorator.append_code(v, line.value, line_start, line.value.length)
1223 end
1224 v.addn
1225 line = line.next
1226 end
1227 end
1228 end
1229
1230 # A markdown code-fence block.
1231 #
1232 # Actually use the same implementation than `BlockCode`,
1233 # this class is only used for typing purposes.
1234 class BlockFence
1235 super BlockCode
1236
1237 # Any string found after fence token.
1238 var meta: nullable Text
1239
1240 # Fence code lines start at 0 spaces.
1241 redef var line_start = 0
1242 end
1243
1244 # A markdown headline.
1245 class BlockHeadline
1246 super Block
1247
1248 redef fun emit(v) do
1249 var loc = block.location.copy
1250 loc.column_start += start
1251 v.push_loc(loc)
1252 v.decorator.add_headline(v, self)
1253 v.pop_loc
1254 end
1255
1256 private var start = 0
1257
1258 # Depth of the headline used to determine the headline level.
1259 var depth = 0
1260
1261 # Remove healine marks from lines contained in `self`.
1262 private fun transform_headline(block: MDBlock) do
1263 if depth > 0 then return
1264 var level = 0
1265 var line = block.first_line
1266 if line.is_empty then return
1267 var start = line.leading
1268 while start < line.value.length and line.value[start] == '#' do
1269 level += 1
1270 start += 1
1271 end
1272 while start < line.value.length and line.value[start] == ' ' do
1273 start += 1
1274 end
1275 if start >= line.value.length then
1276 line.is_empty = true
1277 else
1278 var nend = line.value.length - line.trailing - 1
1279 while line.value[nend] == '#' do nend -= 1
1280 while line.value[nend] == ' ' do nend -= 1
1281 line.value = line.value.substring(start, nend - start + 1)
1282 line.leading = 0
1283 line.trailing = 0
1284 end
1285 self.start = start
1286 depth = level.min(6)
1287 end
1288 end
1289
1290 # A markdown list item block.
1291 class BlockListItem
1292 super Block
1293
1294 redef fun emit(v) do v.decorator.add_listitem(v, self)
1295 end
1296
1297 # A markdown list block.
1298 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1299 abstract class BlockList
1300 super Block
1301
1302 # Split list block into list items sub-blocks.
1303 private fun init_block(v: MarkdownProcessor) do
1304 var line = block.first_line
1305 line = line.next
1306 while line != null do
1307 var t = v.line_kind(line)
1308 if t isa LineList or
1309 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1310 not (t isa LineList))) then
1311 var sblock = block.split(line.prev.as(not null))
1312 sblock.kind = new BlockListItem(sblock)
1313 end
1314 line = line.next
1315 end
1316 var sblock = block.split(block.last_line.as(not null))
1317 sblock.kind = new BlockListItem(sblock)
1318 end
1319
1320 # Expand list items as paragraphs if needed.
1321 private fun expand_paragraphs(block: MDBlock) do
1322 var outer = block.first_block
1323 var inner: nullable MDBlock
1324 var has_paragraph = false
1325 while outer != null and not has_paragraph do
1326 if outer.kind isa BlockListItem then
1327 inner = outer.first_block
1328 while inner != null and not has_paragraph do
1329 if inner.kind isa BlockParagraph then
1330 has_paragraph = true
1331 end
1332 inner = inner.next
1333 end
1334 end
1335 outer = outer.next
1336 end
1337 if has_paragraph then
1338 outer = block.first_block
1339 while outer != null do
1340 if outer.kind isa BlockListItem then
1341 inner = outer.first_block
1342 while inner != null do
1343 if inner.kind isa BlockNone then
1344 inner.kind = new BlockParagraph(inner)
1345 end
1346 inner = inner.next
1347 end
1348 end
1349 outer = outer.next
1350 end
1351 end
1352 end
1353 end
1354
1355 # A markdown ordered list.
1356 class BlockOrderedList
1357 super BlockList
1358
1359 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1360 end
1361
1362 # A markdown unordred list.
1363 class BlockUnorderedList
1364 super BlockList
1365
1366 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1367 end
1368
1369 # A markdown paragraph block.
1370 class BlockParagraph
1371 super Block
1372
1373 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1374 end
1375
1376 # A markdown ruler.
1377 class BlockRuler
1378 super Block
1379
1380 redef fun emit(v) do v.decorator.add_ruler(v, self)
1381 end
1382
1383 # Xml blocks that can be found in markdown markup.
1384 class BlockXML
1385 super Block
1386
1387 redef fun emit_lines(v) do
1388 var line = block.first_line
1389 while line != null do
1390 if not line.is_empty then v.add line.value
1391 v.addn
1392 line = line.next
1393 end
1394 end
1395 end
1396
1397 # A markdown line.
1398 class MDLine
1399
1400 # Location of `self` in the original input.
1401 var location: MDLocation
1402
1403 # Text contained in this line.
1404 var value: String is writable
1405
1406 # Is this line empty?
1407 # Lines containing only spaces are considered empty.
1408 var is_empty: Bool = true is writable
1409
1410 # Previous line in `MDBlock` or null if first line.
1411 var prev: nullable MDLine = null is writable
1412
1413 # Next line in `MDBlock` or null if last line.
1414 var next: nullable MDLine = null is writable
1415
1416 # Is the previous line empty?
1417 var prev_empty: Bool = false is writable
1418
1419 # Is the next line empty?
1420 var next_empty: Bool = false is writable
1421
1422 # Initialize a new MDLine from its string value
1423 init do
1424 self.leading = process_leading
1425 if leading != value.length then
1426 self.is_empty = false
1427 self.trailing = process_trailing
1428 end
1429 end
1430
1431 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1432 fun clear do
1433 value = ""
1434 leading = 0
1435 trailing = 0
1436 is_empty = true
1437 if prev != null then prev.next_empty = true
1438 if next != null then next.prev_empty = true
1439 end
1440
1441 # Number or leading spaces on this line.
1442 var leading: Int = 0 is writable
1443
1444 # Compute `leading` depending on `value`.
1445 fun process_leading: Int do
1446 var count = 0
1447 var value = self.value
1448 while count < value.length and value[count] == ' ' do count += 1
1449 if leading == value.length then clear
1450 return count
1451 end
1452
1453 # Number of trailing spaces on this line.
1454 var trailing: Int = 0 is writable
1455
1456 # Compute `trailing` depending on `value`.
1457 fun process_trailing: Int do
1458 var count = 0
1459 var value = self.value
1460 while value[value.length - count - 1] == ' ' do
1461 count += 1
1462 end
1463 return count
1464 end
1465
1466 # Count the amount of `ch` in this line.
1467 # Return A value > 0 if this line only consists of `ch` end spaces.
1468 fun count_chars(ch: Char): Int do
1469 var count = 0
1470 for c in value do
1471 if c == ' ' then
1472 continue
1473 end
1474 if c == ch then
1475 count += 1
1476 continue
1477 end
1478 count = 0
1479 break
1480 end
1481 return count
1482 end
1483
1484 # Count the amount of `ch` at the start of this line ignoring spaces.
1485 fun count_chars_start(ch: Char): Int do
1486 var count = 0
1487 for c in value do
1488 if c == ' ' then
1489 continue
1490 end
1491 if c == ch then
1492 count += 1
1493 else
1494 break
1495 end
1496 end
1497 return count
1498 end
1499
1500 # Last XML line if any.
1501 private var xml_end_line: nullable MDLine = null
1502
1503 # Does `value` contains valid XML markup?
1504 private fun check_html: Bool do
1505 var tags = new Array[String]
1506 var tmp = new FlatBuffer
1507 var pos = leading
1508 if pos + 1 < value.length and value[pos + 1] == '!' then
1509 if read_xml_comment(self, pos) > 0 then return true
1510 end
1511 pos = value.read_xml(tmp, pos, false)
1512 var tag: String
1513 if pos > -1 then
1514 tag = tmp.xml_tag
1515 if not tag.is_html_block then
1516 return false
1517 end
1518 if tag == "hr" then
1519 xml_end_line = self
1520 return true
1521 end
1522 tags.add tag
1523 var line: nullable MDLine = self
1524 while line != null do
1525 while pos < line.value.length and line.value[pos] != '<' do
1526 pos += 1
1527 end
1528 if pos >= line.value.length then
1529 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1530 tags.pop
1531 if tags.is_empty then
1532 xml_end_line = line
1533 break
1534 end
1535 end
1536 line = line.next
1537 pos = 0
1538 else
1539 tmp = new FlatBuffer
1540 var new_pos = line.value.read_xml(tmp, pos, false)
1541 if new_pos > 0 then
1542 tag = tmp.xml_tag
1543 if tag.is_html_block and not tag == "hr" then
1544 if tmp[1] == '/' then
1545 if tags.last != tag then
1546 return false
1547 end
1548 tags.pop
1549 else
1550 tags.add tag
1551 end
1552 end
1553 if tags.is_empty then
1554 xml_end_line = line
1555 break
1556 end
1557 pos = new_pos
1558 else
1559 pos += 1
1560 end
1561 end
1562 end
1563 return tags.is_empty
1564 end
1565 return false
1566 end
1567
1568 # Read a XML comment.
1569 # Used by `check_html`.
1570 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1571 var line: nullable MDLine = first_line
1572 if start + 3 < line.value.length then
1573 if line.value[2] == '-' and line.value[3] == '-' then
1574 var pos = start + 4
1575 while line != null do
1576 while pos < line.value.length and line.value[pos] != '-' do
1577 pos += 1
1578 end
1579 if pos == line.value.length then
1580 line = line.next
1581 pos = 0
1582 else
1583 if pos + 2 < line.value.length then
1584 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1585 first_line.xml_end_line = line
1586 return pos + 3
1587 end
1588 end
1589 pos += 1
1590 end
1591 end
1592 end
1593 end
1594 return -1
1595 end
1596
1597 # Extract the text of `self` without leading and trailing.
1598 fun text: String do return value.substring(leading, value.length - trailing)
1599 end
1600
1601 # A markdown line.
1602 interface Line
1603
1604 # Parse the line.
1605 # See `MarkdownProcessor::recurse`.
1606 fun process(v: MarkdownProcessor) is abstract
1607 end
1608
1609 # An empty markdown line.
1610 class LineEmpty
1611 super Line
1612
1613 redef fun process(v) do
1614 v.current_line = v.current_line.next
1615 end
1616 end
1617
1618 # A non-specific markdown construction.
1619 # Mainly used as part of another line construct such as paragraphs or lists.
1620 class LineOther
1621 super Line
1622
1623 redef fun process(v) do
1624 var line = v.current_line
1625 # go to block end
1626 var was_empty = line.prev_empty
1627 while line != null and not line.is_empty do
1628 var t = v.line_kind(line)
1629 if (v.in_list or v.ext_mode) and t isa LineList then
1630 break
1631 end
1632 if v.ext_mode and (t isa LineCode or t isa LineFence) then
1633 break
1634 end
1635 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1636 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1637 break
1638 end
1639 line = line.next
1640 end
1641 # build block
1642 if line != null and not line.is_empty then
1643 var block = v.current_block.split(line.prev.as(not null))
1644 if v.in_list and not was_empty then
1645 block.kind = new BlockNone(block)
1646 else
1647 block.kind = new BlockParagraph(block)
1648 end
1649 v.current_block.remove_leading_empty_lines
1650 else
1651 var block: MDBlock
1652 if line != null then
1653 block = v.current_block.split(line)
1654 else
1655 block = v.current_block.split(v.current_block.last_line.as(not null))
1656 end
1657 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1658 block.kind = new BlockNone(block)
1659 else
1660 block.kind = new BlockParagraph(block)
1661 end
1662 v.current_block.remove_leading_empty_lines
1663 end
1664 v.current_line = v.current_block.first_line
1665 end
1666 end
1667
1668 # A line of markdown code.
1669 class LineCode
1670 super Line
1671
1672 redef fun process(v) do
1673 var line = v.current_line
1674 # lookup block end
1675 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1676 line = line.next
1677 end
1678 # split at block end line
1679 var block: MDBlock
1680 if line != null then
1681 block = v.current_block.split(line.prev.as(not null))
1682 else
1683 block = v.current_block.split(v.current_block.last_line.as(not null))
1684 end
1685 block.kind = new BlockCode(block)
1686 block.remove_surrounding_empty_lines
1687 v.current_line = v.current_block.first_line
1688 end
1689 end
1690
1691 # A line of raw XML.
1692 class LineXML
1693 super Line
1694
1695 redef fun process(v) do
1696 var line = v.current_line
1697 var prev = line.prev
1698 if prev != null then v.current_block.split(prev)
1699 var block = v.current_block.split(line.xml_end_line.as(not null))
1700 block.kind = new BlockXML(block)
1701 v.current_block.remove_leading_empty_lines
1702 v.current_line = v.current_block.first_line
1703 end
1704 end
1705
1706 # A markdown blockquote line.
1707 class LineBlockquote
1708 super Line
1709
1710 redef fun process(v) do
1711 var line = v.current_line
1712 # go to bquote end
1713 while line != null do
1714 if not line.is_empty and (line.prev_empty and
1715 line.leading == 0 and
1716 not v.line_kind(line) isa LineBlockquote) then break
1717 line = line.next
1718 end
1719 # build sub block
1720 var block: MDBlock
1721 if line != null then
1722 block = v.current_block.split(line.prev.as(not null))
1723 else
1724 block = v.current_block.split(v.current_block.last_line.as(not null))
1725 end
1726 var kind = new BlockQuote(block)
1727 block.kind = kind
1728 block.remove_surrounding_empty_lines
1729 kind.remove_block_quote_prefix(block)
1730 v.current_line = line
1731 v.recurse(block, false)
1732 v.current_line = v.current_block.first_line
1733 end
1734 end
1735
1736 # A markdown ruler line.
1737 class LineHR
1738 super Line
1739
1740 redef fun process(v) do
1741 var line = v.current_line
1742 if line.prev != null then v.current_block.split(line.prev.as(not null))
1743 var block = v.current_block.split(line.as(not null))
1744 block.kind = new BlockRuler(block)
1745 v.current_block.remove_leading_empty_lines
1746 v.current_line = v.current_block.first_line
1747 end
1748 end
1749
1750 # A markdown fence code line.
1751 class LineFence
1752 super Line
1753
1754 redef fun process(v) do
1755 # go to fence end
1756 var line = v.current_line.next
1757 while line != null do
1758 if v.line_kind(line) isa LineFence then break
1759 line = line.next
1760 end
1761 if line != null then
1762 line = line.next
1763 end
1764 # build fence block
1765 var block: MDBlock
1766 if line != null then
1767 block = v.current_block.split(line.prev.as(not null))
1768 else
1769 block = v.current_block.split(v.current_block.last_line.as(not null))
1770 end
1771 block.remove_surrounding_empty_lines
1772 var meta = block.first_line.value.meta_from_fence
1773 block.kind = new BlockFence(block, meta)
1774 block.first_line.clear
1775 var last = block.last_line
1776 if last != null and v.line_kind(last) isa LineFence then
1777 block.last_line.clear
1778 end
1779 block.remove_surrounding_empty_lines
1780 v.current_line = line
1781 end
1782 end
1783
1784 # A markdown headline.
1785 class LineHeadline
1786 super Line
1787
1788 redef fun process(v) do
1789 var line = v.current_line
1790 var lprev = line.prev
1791 if lprev != null then v.current_block.split(lprev)
1792 var block = v.current_block.split(line.as(not null))
1793 var kind = new BlockHeadline(block)
1794 block.kind = kind
1795 kind.transform_headline(block)
1796 v.current_block.remove_leading_empty_lines
1797 v.current_line = v.current_block.first_line
1798 end
1799 end
1800
1801 # A markdown headline of level 1.
1802 class LineHeadline1
1803 super LineHeadline
1804
1805 redef fun process(v) do
1806 var line = v.current_line
1807 var lprev = line.prev
1808 if lprev != null then v.current_block.split(lprev)
1809 line.next.clear
1810 var block = v.current_block.split(line.as(not null))
1811 var kind = new BlockHeadline(block)
1812 kind.depth = 1
1813 kind.transform_headline(block)
1814 block.kind = kind
1815 v.current_block.remove_leading_empty_lines
1816 v.current_line = v.current_block.first_line
1817 end
1818 end
1819
1820 # A markdown headline of level 2.
1821 class LineHeadline2
1822 super LineHeadline
1823
1824 redef fun process(v) do
1825 var line = v.current_line
1826 var lprev = line.prev
1827 if lprev != null then v.current_block.split(lprev)
1828 line.next.clear
1829 var block = v.current_block.split(line.as(not null))
1830 var kind = new BlockHeadline(block)
1831 kind.depth = 2
1832 kind.transform_headline(block)
1833 block.kind = kind
1834 v.current_block.remove_leading_empty_lines
1835 v.current_line = v.current_block.first_line
1836 end
1837 end
1838
1839 # A markdown list line.
1840 # Mainly used to factorize code between ordered and unordered lists.
1841 class LineList
1842 super Line
1843
1844 redef fun process(v) do
1845 var line = v.current_line
1846 # go to list end
1847 while line != null do
1848 var t = v.line_kind(line)
1849 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1850 not t isa LineList) then break
1851 line = line.next
1852 end
1853 # build list block
1854 var list: MDBlock
1855 if line != null then
1856 list = v.current_block.split(line.prev.as(not null))
1857 else
1858 list = v.current_block.split(v.current_block.last_line.as(not null))
1859 end
1860 var kind = block_kind(list)
1861 list.kind = kind
1862 list.first_line.prev_empty = false
1863 list.last_line.next_empty = false
1864 list.remove_surrounding_empty_lines
1865 list.first_line.prev_empty = false
1866 list.last_line.next_empty = false
1867 kind.init_block(v)
1868 var block = list.first_block
1869 while block != null do
1870 block.remove_list_indent(v)
1871 v.recurse(block, true)
1872 block = block.next
1873 end
1874 kind.expand_paragraphs(list)
1875 v.current_line = line
1876 end
1877
1878 # Create a new block kind based on this line.
1879 protected fun block_kind(block: MDBlock): BlockList is abstract
1880
1881 # Extract string value from `MDLine`.
1882 protected fun extract_value(line: MDLine): String is abstract
1883 end
1884
1885 # An ordered list line.
1886 class LineOList
1887 super LineList
1888
1889 redef fun block_kind(block) do return new BlockOrderedList(block)
1890
1891 redef fun extract_value(line) do
1892 return line.value.substring_from(line.value.index_of('.') + 2)
1893 end
1894 end
1895
1896 # An unordered list line.
1897 class LineUList
1898 super LineList
1899
1900 redef fun block_kind(block) do return new BlockUnorderedList(block)
1901
1902 redef fun extract_value(line) do
1903 return line.value.substring_from(line.leading + 2)
1904 end
1905 end
1906
1907 # A token represent a character in the markdown input.
1908 # Some tokens have a specific markup behaviour that is handled here.
1909 abstract class Token
1910
1911 # Location of `self` in the original input.
1912 var location: MDLocation
1913
1914 # Position of `self` in input independant from lines.
1915 var pos: Int
1916
1917 # Character found at `pos` in the markdown input.
1918 var char: Char
1919
1920 # Output that token using `MarkdownEmitter::decorator`.
1921 fun emit(v: MarkdownEmitter) do v.addc char
1922 end
1923
1924 # A token without a specific meaning.
1925 class TokenNone
1926 super Token
1927 end
1928
1929 # An emphasis token.
1930 abstract class TokenEm
1931 super Token
1932
1933 redef fun emit(v) do
1934 var tmp = v.push_buffer
1935 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1936 v.pop_buffer
1937 if b > 0 then
1938 v.decorator.add_em(v, tmp)
1939 v.current_pos = b
1940 else
1941 v.addc char
1942 end
1943 end
1944 end
1945
1946 # An emphasis star token.
1947 class TokenEmStar
1948 super TokenEm
1949 end
1950
1951 # An emphasis underscore token.
1952 class TokenEmUnderscore
1953 super TokenEm
1954 end
1955
1956 # A strong token.
1957 abstract class TokenStrong
1958 super Token
1959
1960 redef fun emit(v) do
1961 var tmp = v.push_buffer
1962 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1963 v.pop_buffer
1964 if b > 0 then
1965 v.decorator.add_strong(v, tmp)
1966 v.current_pos = b + 1
1967 else
1968 v.addc char
1969 end
1970 end
1971 end
1972
1973 # A strong star token.
1974 class TokenStrongStar
1975 super TokenStrong
1976 end
1977
1978 # A strong underscore token.
1979 class TokenStrongUnderscore
1980 super TokenStrong
1981 end
1982
1983 # A code token.
1984 # This class is mainly used to factorize work between single and double quoted span codes.
1985 abstract class TokenCode
1986 super Token
1987
1988 redef fun emit(v) do
1989 var a = pos + next_pos + 1
1990 var b = v.processor.find_token(v.current_text.as(not null), a, self)
1991 if b > 0 then
1992 v.current_pos = b + next_pos
1993 while a < b and v.current_text[a] == ' ' do a += 1
1994 if a < b then
1995 while v.current_text[b - 1] == ' ' do b -= 1
1996 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1997 end
1998 else
1999 v.addc char
2000 end
2001 end
2002
2003 private fun next_pos: Int is abstract
2004 end
2005
2006 # A span code token.
2007 class TokenCodeSingle
2008 super TokenCode
2009
2010 redef fun next_pos do return 0
2011 end
2012
2013 # A doubled span code token.
2014 class TokenCodeDouble
2015 super TokenCode
2016
2017 redef fun next_pos do return 1
2018 end
2019
2020 # A link or image token.
2021 # This class is mainly used to factorize work between images and links.
2022 abstract class TokenLinkOrImage
2023 super Token
2024
2025 # Link adress
2026 var link: nullable Text = null
2027
2028 # Link text
2029 var name: nullable Text = null
2030
2031 # Link title
2032 var comment: nullable Text = null
2033
2034 # Is the link construct an abbreviation?
2035 var is_abbrev = false
2036
2037 redef fun emit(v) do
2038 var tmp = new FlatBuffer
2039 var b = check_link(v, tmp, pos, self)
2040 if b > 0 then
2041 emit_hyper(v)
2042 v.current_pos = b
2043 else
2044 v.addc char
2045 end
2046 end
2047
2048 # Emit the hyperlink as link or image.
2049 private fun emit_hyper(v: MarkdownEmitter) is abstract
2050
2051 # Check if the link is a valid link.
2052 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
2053 var md = v.current_text
2054 var pos
2055 if token isa TokenLink then
2056 pos = start + 1
2057 else
2058 pos = start + 2
2059 end
2060 var tmp = new FlatBuffer
2061 pos = md.read_md_link_id(tmp, pos)
2062 if pos < start then return -1
2063 name = tmp
2064 var old_pos = pos
2065 pos += 1
2066 pos = md.skip_spaces(pos)
2067 if pos < start then
2068 var tid = name.write_to_string.to_lower
2069 if v.processor.link_refs.has_key(tid) then
2070 var lr = v.processor.link_refs[tid]
2071 is_abbrev = lr.is_abbrev
2072 link = lr.link
2073 comment = lr.title
2074 pos = old_pos
2075 else
2076 return -1
2077 end
2078 else if md[pos] == '(' then
2079 pos += 1
2080 pos = md.skip_spaces(pos)
2081 if pos < start then return -1
2082 tmp = new FlatBuffer
2083 var use_lt = md[pos] == '<'
2084 if use_lt then
2085 pos = md.read_until(tmp, pos + 1, '>')
2086 else
2087 pos = md.read_md_link(tmp, pos)
2088 end
2089 if pos < start then return -1
2090 if use_lt then pos += 1
2091 link = tmp.write_to_string
2092 if md[pos] == ' ' then
2093 pos = md.skip_spaces(pos)
2094 if pos > start and md[pos] == '"' then
2095 pos += 1
2096 tmp = new FlatBuffer
2097 pos = md.read_until(tmp, pos, '"')
2098 if pos < start then return -1
2099 comment = tmp.write_to_string
2100 pos += 1
2101 pos = md.skip_spaces(pos)
2102 if pos == -1 then return -1
2103 end
2104 end
2105 if md[pos] != ')' then return -1
2106 else if md[pos] == '[' then
2107 pos += 1
2108 tmp = new FlatBuffer
2109 pos = md.read_raw_until(tmp, pos, ']')
2110 if pos < start then return -1
2111 var id
2112 if tmp.length > 0 then
2113 id = tmp
2114 else
2115 id = name
2116 end
2117 var tid = id.write_to_string.to_lower
2118 if v.processor.link_refs.has_key(tid) then
2119 var lr = v.processor.link_refs[tid]
2120 link = lr.link
2121 comment = lr.title
2122 end
2123 else
2124 var tid = name.write_to_string.replace("\n", " ").to_lower
2125 if v.processor.link_refs.has_key(tid) then
2126 var lr = v.processor.link_refs[tid]
2127 link = lr.link
2128 comment = lr.title
2129 pos = old_pos
2130 else
2131 return -1
2132 end
2133 end
2134 if link == null then return -1
2135 return pos
2136 end
2137 end
2138
2139 # A markdown link token.
2140 class TokenLink
2141 super TokenLinkOrImage
2142
2143 redef fun emit_hyper(v) do
2144 if is_abbrev and comment != null then
2145 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2146 else
2147 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2148 end
2149 end
2150 end
2151
2152 # A markdown image token.
2153 class TokenImage
2154 super TokenLinkOrImage
2155
2156 redef fun emit_hyper(v) do
2157 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2158 end
2159 end
2160
2161 # A HTML/XML token.
2162 class TokenHTML
2163 super Token
2164
2165 redef fun emit(v) do
2166 var tmp = new FlatBuffer
2167 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2168 if b > 0 then
2169 v.add tmp
2170 v.current_pos = b
2171 else
2172 v.decorator.escape_char(v, char)
2173 end
2174 end
2175
2176 # Is the HTML valid?
2177 # Also take care of link and mailto shortcuts.
2178 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2179 # check for auto links
2180 var tmp = new FlatBuffer
2181 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2182 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2183 pos = md.read_until(tmp, pos, '>')
2184 if pos != -1 then
2185 var link = tmp.write_to_string
2186 v.decorator.add_link(v, link, link, null)
2187 return pos
2188 end
2189 end
2190 # TODO check for mailto
2191 # check for inline html
2192 if start + 2 < md.length then
2193 return md.read_xml(out, start, true)
2194 end
2195 return -1
2196 end
2197 end
2198
2199 # An HTML entity token.
2200 class TokenEntity
2201 super Token
2202
2203 redef fun emit(v) do
2204 var tmp = new FlatBuffer
2205 var b = check_entity(tmp, v.current_text.as(not null), pos)
2206 if b > 0 then
2207 v.add tmp
2208 v.current_pos = b
2209 else
2210 v.decorator.escape_char(v, char)
2211 end
2212 end
2213
2214 # Is the entity valid?
2215 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2216 var pos = md.read_until(out, start, ';')
2217 if pos < 0 or out.length < 3 then
2218 return -1
2219 end
2220 if out[1] == '#' then
2221 if out[2] == 'x' or out[2] == 'X' then
2222 if out.length < 4 then return -1
2223 for i in [3..out.length[ do
2224 var c = out[i]
2225 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2226 return -1
2227 end
2228 end
2229 else
2230 for i in [2..out.length[ do
2231 var c = out[i]
2232 if c < '0' or c > '9' then return -1
2233 end
2234 end
2235 out.add ';'
2236 else
2237 for i in [1..out.length[ do
2238 var c = out[i]
2239 if not c.is_digit and not c.is_letter then return -1
2240 end
2241 out.add ';'
2242 # TODO check entity is valid
2243 # if out.is_entity then
2244 return pos
2245 # else
2246 # return -1
2247 # end
2248 end
2249 return pos
2250 end
2251 end
2252
2253 # A markdown escape token.
2254 class TokenEscape
2255 super Token
2256
2257 redef fun emit(v) do
2258 v.current_pos += 1
2259 v.addc v.current_text[v.current_pos]
2260 end
2261 end
2262
2263 # A markdown strike token.
2264 #
2265 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2266 class TokenStrike
2267 super Token
2268
2269 redef fun emit(v) do
2270 var tmp = v.push_buffer
2271 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2272 v.pop_buffer
2273 if b > 0 then
2274 v.decorator.add_strike(v, tmp)
2275 v.current_pos = b + 1
2276 else
2277 v.addc char
2278 end
2279 end
2280 end
2281
2282 redef class Text
2283
2284 # Get the position of the next non-space character.
2285 private fun skip_spaces(start: Int): Int do
2286 var pos = start
2287 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2288 pos += 1
2289 end
2290 if pos < length then return pos
2291 return -1
2292 end
2293
2294 # Read `self` until `nend` and append it to the `out` buffer.
2295 # Escape markdown special chars.
2296 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2297 var pos = start
2298 while pos < length do
2299 var c = self[pos]
2300 if c == '\\' and pos + 1 < length then
2301 pos = escape(out, self[pos + 1], pos)
2302 else
2303 var end_reached = false
2304 for n in nend do
2305 if c == n then
2306 end_reached = true
2307 break
2308 end
2309 end
2310 if end_reached then break
2311 out.add c
2312 end
2313 pos += 1
2314 end
2315 if pos == length then return -1
2316 return pos
2317 end
2318
2319 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2320 # No escape is made.
2321 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2322 var pos = start
2323 while pos < length do
2324 var c = self[pos]
2325 var end_reached = false
2326 for n in nend do
2327 if c == n then
2328 end_reached = true
2329 break
2330 end
2331 end
2332 if end_reached then break
2333 out.add c
2334 pos += 1
2335 end
2336 if pos == length then return -1
2337 return pos
2338 end
2339
2340 # Read `self` as XML until `to` and append it to the `out` buffer.
2341 # Escape HTML special chars.
2342 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2343 var pos = from
2344 var in_str = false
2345 var str_char: nullable Char = null
2346 while pos < length do
2347 var c = self[pos]
2348 if in_str then
2349 if c == '\\' then
2350 out.add c
2351 pos += 1
2352 if pos < length then
2353 out.add c
2354 pos += 1
2355 end
2356 continue
2357 end
2358 if c == str_char then
2359 in_str = false
2360 out.add c
2361 pos += 1
2362 continue
2363 end
2364 end
2365 if c == '"' or c == '\'' then
2366 in_str = true
2367 str_char = c
2368 end
2369 if not in_str then
2370 var end_reached = false
2371 for n in [0..to.length[ do
2372 if c == to[n] then
2373 end_reached = true
2374 break
2375 end
2376 end
2377 if end_reached then break
2378 end
2379 out.add c
2380 pos += 1
2381 end
2382 if pos == length then return -1
2383 return pos
2384 end
2385
2386 # Read `self` as XML and append it to the `out` buffer.
2387 # Safe mode can be activated to limit reading to valid xml.
2388 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2389 var pos = 0
2390 var is_valid = true
2391 var is_close_tag = false
2392 if start + 1 >= length then return -1
2393 if self[start + 1] == '/' then
2394 is_close_tag = true
2395 pos = start + 2
2396 else if self[start + 1] == '!' then
2397 out.append "<!"
2398 return start + 1
2399 else
2400 is_close_tag = false
2401 pos = start + 1
2402 end
2403 if safe_mode then
2404 var tmp = new FlatBuffer
2405 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2406 if pos == -1 then return -1
2407 var tag = tmp.write_to_string.trim.to_lower
2408 if not tag.is_valid_html_tag then
2409 out.append "&lt;"
2410 pos = -1
2411 else if tag.is_html_unsafe then
2412 is_valid = false
2413 out.append "&lt;"
2414 if is_close_tag then out.add '/'
2415 out.append tmp
2416 else
2417 out.append "<"
2418 if is_close_tag then out.add '/'
2419 out.append tmp
2420 end
2421 else
2422 out.add '<'
2423 if is_close_tag then out.add '/'
2424 pos = read_xml_until(out, pos, ' ', '/', '>')
2425 end
2426 if pos == -1 then return -1
2427 pos = read_xml_until(out, pos, '/', '>')
2428 if pos == -1 then return -1
2429 if self[pos] == '/' then
2430 out.append " /"
2431 pos = self.read_xml_until(out, pos + 1, '>')
2432 if pos == -1 then return -1
2433 end
2434 if self[pos] == '>' then
2435 if is_valid then
2436 out.add '>'
2437 else
2438 out.append "&gt;"
2439 end
2440 return pos
2441 end
2442 return -1
2443 end
2444
2445 # Read a markdown link address and append it to the `out` buffer.
2446 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2447 var pos = start
2448 var counter = 1
2449 while pos < length do
2450 var c = self[pos]
2451 if c == '\\' and pos + 1 < length then
2452 pos = escape(out, self[pos + 1], pos)
2453 else
2454 var end_reached = false
2455 if c == '(' then
2456 counter += 1
2457 else if c == ' ' then
2458 if counter == 1 then end_reached = true
2459 else if c == ')' then
2460 counter -= 1
2461 if counter == 0 then end_reached = true
2462 end
2463 if end_reached then break
2464 out.add c
2465 end
2466 pos += 1
2467 end
2468 if pos == length then return -1
2469 return pos
2470 end
2471
2472 # Read a markdown link text and append it to the `out` buffer.
2473 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2474 var pos = start
2475 var counter = 1
2476 while pos < length do
2477 var c = self[pos]
2478 var end_reached = false
2479 if c == '[' then
2480 counter += 1
2481 out.add c
2482 else if c == ']' then
2483 counter -= 1
2484 if counter == 0 then
2485 end_reached = true
2486 else
2487 out.add c
2488 end
2489 else
2490 out.add c
2491 end
2492 if end_reached then break
2493 pos += 1
2494 end
2495 if pos == length then return -1
2496 return pos
2497 end
2498
2499 # Extract the XML tag name from a XML tag.
2500 private fun xml_tag: String do
2501 var tpl = new FlatBuffer
2502 var pos = 1
2503 if pos < length and self[1] == '/' then pos += 1
2504 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2505 tpl.add self[pos]
2506 pos += 1
2507 end
2508 return tpl.write_to_string.to_lower
2509 end
2510
2511 private fun is_valid_html_tag: Bool do
2512 if is_empty then return false
2513 for c in self do
2514 if not c.is_alpha then return false
2515 end
2516 return true
2517 end
2518
2519 # Read and escape the markdown contained in `self`.
2520 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2521 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2522 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2523 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2524 c == '`' or c == '~' or c == '^' then
2525 out.add c
2526 return pos + 1
2527 end
2528 out.add '\\'
2529 return pos
2530 end
2531
2532 # Extract string found at end of fence opening.
2533 private fun meta_from_fence: nullable Text do
2534 for i in [0..chars.length[ do
2535 var c = chars[i]
2536 if c != ' ' and c != '`' and c != '~' then
2537 return substring_from(i).trim
2538 end
2539 end
2540 return null
2541 end
2542
2543 # Is `self` an unsafe HTML element?
2544 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2545
2546 # Is `self` a HRML block element?
2547 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2548
2549 # Is `self` a link prefix?
2550 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2551
2552 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2553
2554 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2555
2556 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2557 end
2558
2559 redef class String
2560
2561 # Parse `self` as markdown and return the HTML representation
2562 #.
2563 # var md = "**Hello World!**"
2564 # var html = md.md_to_html
2565 # assert html == "<p><strong>Hello World!</strong></p>\n"
2566 fun md_to_html: Writable do
2567 var processor = new MarkdownProcessor
2568 return processor.process(self)
2569 end
2570 end