Merge: More keep going
[nit.git] / lib / markdown / markdown.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Markdown parsing.
16 module markdown
17
18 import template
19
20 # Parse a markdown string and split it in blocks.
21 #
22 # Blocks are then outputed by an `MarkdownEmitter`.
23 #
24 # Usage:
25 #
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
29 #
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
32
33 # `MarkdownEmitter` used for ouput.
34 var emitter: MarkdownEmitter is noinit, protected writable
35
36 # Work in extended mode (default).
37 #
38 # Behavior changes when using extended mode:
39 #
40 # * Lists and code blocks end a paragraph
41 #
42 # In normal markdown the following:
43 #
44 # This is a paragraph
45 # * and this is not a list
46 #
47 # Will produce:
48 #
49 # <p>This is a paragraph
50 # * and this is not a list</p>
51 #
52 # When using extended mode this changes to:
53 #
54 # <p>This is a paragraph</p>
55 # <ul>
56 # <li>and this is not a list</li>
57 # </ul>
58 #
59 # * Fences code blocks
60 #
61 # If you don't want to indent your all your code with 4 spaces,
62 # you can wrap your code in ``` ``` ``` or `~~~`.
63 #
64 # Here's an example:
65 #
66 # ```
67 # fun test do
68 # print "Hello World!"
69 # end
70 # ```
71 #
72 # * Code blocks meta
73 #
74 # If you want to use syntax highlighting tools, most of them need to know what kind
75 # of language they are highlighting.
76 # You can add an optional language identifier after the fence declaration to output
77 # it in the HTML render.
78 #
79 # ```nit
80 # import markdown
81 #
82 # print "# Hello World!".md_to_html
83 # ```
84 #
85 # Becomes
86 #
87 # <pre class="nit"><code>import markdown
88 #
89 # print "Hello World!".md_to_html
90 # </code></pre>
91 #
92 # * Underscores (Emphasis)
93 #
94 # Underscores in the middle of a word like:
95 #
96 # Con_cat_this
97 #
98 # normally produces this:
99 #
100 # <p>Con<em>cat</em>this</p>
101 #
102 # With extended mode they don't result in emphasis.
103 #
104 # <p>Con_cat_this</p>
105 #
106 # * Strikethrough
107 #
108 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
109 # strikethrought span is marked with `~~`.
110 #
111 # ~~Mistaken text.~~
112 #
113 # becomes
114 #
115 # <del>Mistaken text.</del>
116 var ext_mode = true
117
118 init do self.emitter = new MarkdownEmitter(self)
119
120 # Process the mardown `input` string and return the processed output.
121 fun process(input: String): Writable do
122 # init processor
123 link_refs.clear
124 last_link_ref = null
125 current_line = null
126 current_block = null
127 # parse markdown
128 var parent = read_lines(input)
129 parent.remove_surrounding_empty_lines
130 recurse(parent, false)
131 # output processed text
132 return emitter.emit(parent.kind)
133 end
134
135 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
136 private fun read_lines(input: String): MDBlock do
137 var block = new MDBlock(new MDLocation(1, 1, 1, 1))
138 var value = new FlatBuffer
139 var i = 0
140
141 var line_pos = 0
142 var col_pos = 0
143
144 while i < input.length do
145 value.clear
146 var pos = 0
147 var eol = false
148 while not eol and i < input.length do
149 col_pos += 1
150 var c = input[i]
151 if c == '\n' then
152 eol = true
153 else if c == '\t' then
154 var np = pos + (4 - (pos.bin_and(3)))
155 while pos < np do
156 value.add ' '
157 pos += 1
158 end
159 else
160 pos += 1
161 value.add c
162 end
163 i += 1
164 end
165 line_pos += 1
166
167 var loc = new MDLocation(line_pos, 1, line_pos, col_pos)
168 var line = new MDLine(loc, value.write_to_string)
169 var is_link_ref = check_link_ref(line)
170 # Skip link refs
171 if not is_link_ref then block.add_line line
172 col_pos = 0
173 end
174 return block
175 end
176
177 # Check if line is a block link definition.
178 # Return `true` if line contains a valid link ref and save it into `link_refs`.
179 private fun check_link_ref(line: MDLine): Bool do
180 var md = line.value
181 var is_link_ref = false
182 var id = new FlatBuffer
183 var link = new FlatBuffer
184 var comment = new FlatBuffer
185 var pos = -1
186 if not line.is_empty and line.leading < 4 and line.value[line.leading] == '[' then
187 pos = line.leading + 1
188 pos = md.read_until(id, pos, ']')
189 if not id.is_empty and pos + 2 < line.value.length then
190 if line.value[pos + 1] == ':' then
191 pos += 2
192 pos = md.skip_spaces(pos)
193 if line.value[pos] == '<' then
194 pos += 1
195 pos = md.read_until(link, pos, '>')
196 pos += 1
197 else
198 pos = md.read_until(link, pos, ' ', '\n')
199 end
200 if not link.is_empty then
201 pos = md.skip_spaces(pos)
202 if pos > 0 and pos < line.value.length then
203 var c = line.value[pos]
204 if c == '\"' or c == '\'' or c == '(' then
205 pos += 1
206 if c == '(' then
207 pos = md.read_until(comment, pos, ')')
208 else
209 pos = md.read_until(comment, pos, c)
210 end
211 if pos > 0 then is_link_ref = true
212 end
213 else
214 is_link_ref = true
215 end
216 end
217 end
218 end
219 end
220 if is_link_ref and not id.is_empty and not link.is_empty then
221 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
222 add_link_ref(id.write_to_string, lr)
223 if comment.is_empty then last_link_ref = lr
224 return true
225 else
226 comment = new FlatBuffer
227 if not line.is_empty and last_link_ref != null then
228 pos = line.leading
229 var c = line.value[pos]
230 if c == '\"' or c == '\'' or c == '(' then
231 pos += 1
232 if c == '(' then
233 pos = md.read_until(comment, pos, ')')
234 else
235 pos = md.read_until(comment, pos, c)
236 end
237 end
238 if not comment.is_empty then last_link_ref.title = comment.write_to_string
239 end
240 if comment.is_empty then return false
241 return true
242 end
243 end
244
245 # Known link refs
246 # This list will be needed during output to expand links.
247 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
248
249 # Last encountered link ref (for multiline definitions)
250 #
251 # Markdown allows link refs to be defined over two lines:
252 #
253 # [id]: http://example.com/longish/path/to/resource/here
254 # "Optional Title Here"
255 #
256 private var last_link_ref: nullable LinkRef = null
257
258 # Add a link ref to the list
259 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
260
261 # Recursively split a `block`.
262 #
263 # The block is splitted according to the type of lines it contains.
264 # Some blocks can be splited again recursively like lists.
265 # The `in_list` mode is used to recurse on list and build
266 # nested paragraphs or code blocks.
267 fun recurse(root: MDBlock, in_list: Bool) do
268 var old_mode = self.in_list
269 var old_root = self.current_block
270 self.in_list = in_list
271
272 var line = root.first_line
273 while line != null and line.is_empty do
274 line = line.next
275 if line == null then return
276 end
277
278 current_line = line
279 current_block = root
280 while current_line != null do
281 line_kind(current_line.as(not null)).process(self)
282 end
283 self.in_list = old_mode
284 self.current_block = old_root
285 end
286
287 # Currently processed line.
288 # Used when visiting blocks with `recurse`.
289 var current_line: nullable MDLine = null is writable
290
291 # Currently processed block.
292 # Used when visiting blocks with `recurse`.
293 var current_block: nullable MDBlock = null is writable
294
295 # Is the current recursion in list mode?
296 # Used when visiting blocks with `recurse`
297 private var in_list = false
298
299 # The type of line.
300 # see: `md_line_*`
301 fun line_kind(md: MDLine): Line do
302 var value = md.value
303 var leading = md.leading
304 var trailing = md.trailing
305 if md.is_empty then return new LineEmpty
306 if md.leading > 3 then return new LineCode
307 if value[leading] == '#' then return new LineHeadline
308 if value[leading] == '>' then return new LineBlockquote
309
310 if ext_mode then
311 if value.length - leading - trailing > 2 then
312 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
313 return new LineFence
314 end
315 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
316 return new LineFence
317 end
318 end
319 end
320
321 if value.length - leading - trailing > 2 and
322 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
323 if md.count_chars(value[leading]) >= 3 then
324 return new LineHR
325 end
326 end
327
328 if value.length - leading >= 2 and value[leading + 1] == ' ' then
329 var c = value[leading]
330 if c == '*' or c == '-' or c == '+' then return new LineUList
331 end
332
333 if value.length - leading >= 3 and value[leading].is_digit then
334 var i = leading + 1
335 while i < value.length and value[i].is_digit do i += 1
336 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
337 return new LineOList
338 end
339 end
340
341 if value[leading] == '<' and md.check_html then return new LineXML
342
343 var next = md.next
344 if next != null and not next.is_empty then
345 if next.count_chars('=') > 0 then
346 return new LineHeadline1
347 end
348 if next.count_chars('-') > 0 then
349 return new LineHeadline2
350 end
351 end
352 return new LineOther
353 end
354
355 # Get the token kind at `pos`.
356 fun token_at(text: Text, pos: Int): Token do
357 var c0: Char
358 var c1: Char
359 var c2: Char
360
361 if pos > 0 then
362 c0 = text[pos - 1]
363 else
364 c0 = ' '
365 end
366 var c = text[pos]
367
368 if pos + 1 < text.length then
369 c1 = text[pos + 1]
370 else
371 c1 = ' '
372 end
373 if pos + 2 < text.length then
374 c2 = text[pos + 2]
375 else
376 c2 = ' '
377 end
378
379 var loc = text.pos_to_loc(pos)
380
381 if c == '*' then
382 if c1 == '*' then
383 if c0 != ' ' or c2 != ' ' then
384 return new TokenStrongStar(loc, pos, c)
385 else
386 return new TokenEmStar(loc, pos, c)
387 end
388 end
389 if c0 != ' ' or c1 != ' ' then
390 return new TokenEmStar(loc, pos, c)
391 else
392 return new TokenNone(loc, pos, c)
393 end
394 else if c == '_' then
395 if c1 == '_' then
396 if c0 != ' ' or c2 != ' 'then
397 return new TokenStrongUnderscore(loc, pos, c)
398 else
399 return new TokenEmUnderscore(loc, pos, c)
400 end
401 end
402 if ext_mode then
403 if (c0.is_letter or c0.is_digit) and c0 != '_' and
404 (c1.is_letter or c1.is_digit) then
405 return new TokenNone(loc, pos, c)
406 else
407 return new TokenEmUnderscore(loc, pos, c)
408 end
409 end
410 if c0 != ' ' or c1 != ' ' then
411 return new TokenEmUnderscore(loc, pos, c)
412 else
413 return new TokenNone(loc, pos, c)
414 end
415 else if c == '!' then
416 if c1 == '[' then return new TokenImage(loc, pos, c)
417 return new TokenNone(loc, pos, c)
418 else if c == '[' then
419 return new TokenLink(loc, pos, c)
420 else if c == ']' then
421 return new TokenNone(loc, pos, c)
422 else if c == '`' then
423 if c1 == '`' then
424 return new TokenCodeDouble(loc, pos, c)
425 else
426 return new TokenCodeSingle(loc, pos, c)
427 end
428 else if c == '\\' then
429 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\'' or c1 == '.' or c1 == '<' or c1 == '>' or c1 == '*' or c1 == '+' or c1 == '-' or c1 == '_' or c1 == '!' or c1 == '`' or c1 == '~' or c1 == '^' then
430 return new TokenEscape(loc, pos, c)
431 else
432 return new TokenNone(loc, pos, c)
433 end
434 else if c == '<' then
435 return new TokenHTML(loc, pos, c)
436 else if c == '&' then
437 return new TokenEntity(loc, pos, c)
438 else
439 if ext_mode then
440 if c == '~' and c1 == '~' then
441 return new TokenStrike(loc, pos, c)
442 end
443 end
444 return new TokenNone(loc, pos, c)
445 end
446 end
447
448 # Find the position of a `token` in `self`.
449 fun find_token(text: Text, start: Int, token: Token): Int do
450 var pos = start
451 while pos < text.length do
452 if token_at(text, pos).is_same_type(token) then
453 return pos
454 end
455 pos += 1
456 end
457 return -1
458 end
459 end
460
461 # Emit output corresponding to blocks content.
462 #
463 # Blocks are created by a previous pass in `MarkdownProcessor`.
464 # The emitter use a `Decorator` to select the output format.
465 class MarkdownEmitter
466
467 # Kind of processor used for parsing.
468 type PROCESSOR: MarkdownProcessor
469
470 # Processor containing link refs.
471 var processor: PROCESSOR
472
473 # Kind of decorator used for decoration.
474 type DECORATOR: Decorator
475
476 # Decorator used for output.
477 # Default is `HTMLDecorator`
478 var decorator: DECORATOR is writable, lazy do
479 return new HTMLDecorator
480 end
481
482 # Create a new `MarkdownEmitter` using a custom `decorator`.
483 init with_decorator(processor: PROCESSOR, decorator: DECORATOR) do
484 init processor
485 self.decorator = decorator
486 end
487
488 # Output `block` using `decorator` in the current buffer.
489 fun emit(block: Block): Text do
490 var buffer = push_buffer
491 block.emit(self)
492 pop_buffer
493 return buffer
494 end
495
496 # Output the content of `block`.
497 fun emit_in(block: Block) do block.emit_in(self)
498
499 # Transform and emit mardown text
500 fun emit_text(text: Text) do emit_text_until(text, 0, null)
501
502 # Transform and emit mardown text starting at `from` and
503 # until a token with the same type as `token` is found.
504 # Go until the end of text if `token` is null.
505 fun emit_text_until(text: Text, start: Int, token: nullable Token): Int do
506 var old_text = current_text
507 var old_pos = current_pos
508 current_text = text
509 current_pos = start
510 while current_pos < text.length do
511 var mt = processor.token_at(text, current_pos)
512 if (token != null and not token isa TokenNone) and
513 (mt.is_same_type(token) or
514 (token isa TokenEmStar and mt isa TokenStrongStar) or
515 (token isa TokenEmUnderscore and mt isa TokenStrongUnderscore)) then
516 return current_pos
517 end
518 mt.emit(self)
519 current_pos += 1
520 end
521 current_text = old_text
522 current_pos = old_pos
523 return -1
524 end
525
526 # Currently processed position in `current_text`.
527 # Used when visiting inline production with `emit_text_until`.
528 private var current_pos: Int = -1
529
530 # Currently processed text.
531 # Used when visiting inline production with `emit_text_until`.
532 private var current_text: nullable Text = null
533
534 # Stacked buffers.
535 private var buffer_stack = new List[FlatBuffer]
536
537 # Push a new buffer on the stack.
538 private fun push_buffer: FlatBuffer do
539 var buffer = new FlatBuffer
540 buffer_stack.add buffer
541 return buffer
542 end
543
544 # Pop the last buffer.
545 private fun pop_buffer do buffer_stack.pop
546
547 # Current output buffer.
548 private fun current_buffer: FlatBuffer do
549 assert not buffer_stack.is_empty
550 return buffer_stack.last
551 end
552
553 # Append `e` to current buffer.
554 fun add(e: Writable) do
555 if e isa Text then
556 current_buffer.append e
557 else
558 current_buffer.append e.write_to_string
559 end
560 end
561
562 # Append `c` to current buffer.
563 fun addc(c: Char) do add c.to_s
564
565 # Append a "\n" line break.
566 fun addn do add "\n"
567 end
568
569 # A Link Reference.
570 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
571 #
572 # ~~~raw
573 # [1]: http://example.com/ "Optional title"
574 # ~~~
575 class LinkRef
576
577 # Link href
578 var link: String
579
580 # Optional link title
581 var title: nullable String = null
582
583 # Is the link an abreviation?
584 var is_abbrev = false
585
586 # Create a link with a title.
587 init with_title(link: String, title: nullable String) do
588 self.link = link
589 self.title = title
590 end
591 end
592
593 # A `Decorator` is used to emit mardown into a specific format.
594 # Default decorator used is `HTMLDecorator`.
595 interface Decorator
596
597 # Kind of emitter used for decoration.
598 type EMITTER: MarkdownEmitter
599
600 # Render a ruler block.
601 fun add_ruler(v: EMITTER, block: BlockRuler) is abstract
602
603 # Render a headline block with corresponding level.
604 fun add_headline(v: EMITTER, block: BlockHeadline) is abstract
605
606 # Render a paragraph block.
607 fun add_paragraph(v: EMITTER, block: BlockParagraph) is abstract
608
609 # Render a code or fence block.
610 fun add_code(v: EMITTER, block: BlockCode) is abstract
611
612 # Render a blockquote.
613 fun add_blockquote(v: EMITTER, block: BlockQuote) is abstract
614
615 # Render an unordered list.
616 fun add_unorderedlist(v: EMITTER, block: BlockUnorderedList) is abstract
617
618 # Render an ordered list.
619 fun add_orderedlist(v: EMITTER, block: BlockOrderedList) is abstract
620
621 # Render a list item.
622 fun add_listitem(v: EMITTER, block: BlockListItem) is abstract
623
624 # Render an emphasis text.
625 fun add_em(v: EMITTER, text: Text) is abstract
626
627 # Render a strong text.
628 fun add_strong(v: EMITTER, text: Text) is abstract
629
630 # Render a strike text.
631 #
632 # Extended mode only (see `MarkdownProcessor::ext_mode`)
633 fun add_strike(v: EMITTER, text: Text) is abstract
634
635 # Render a link.
636 fun add_link(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
637
638 # Render an image.
639 fun add_image(v: EMITTER, link: Text, name: Text, comment: nullable Text) is abstract
640
641 # Render an abbreviation.
642 fun add_abbr(v: EMITTER, name: Text, comment: Text) is abstract
643
644 # Render a code span reading from a buffer.
645 fun add_span_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
646
647 # Render a text and escape it.
648 fun append_value(v: EMITTER, value: Text) is abstract
649
650 # Render code text from buffer and escape it.
651 fun append_code(v: EMITTER, buffer: Text, from, to: Int) is abstract
652
653 # Render a character escape.
654 fun escape_char(v: EMITTER, char: Char) is abstract
655
656 # Render a line break
657 fun add_line_break(v: EMITTER) is abstract
658
659 # Generate a new html valid id from a `String`.
660 fun strip_id(txt: String): String is abstract
661
662 # Found headlines during the processing labeled by their ids.
663 fun headlines: ArrayMap[String, HeadLine] is abstract
664 end
665
666 # Class representing a markdown headline.
667 class HeadLine
668 # Unique identifier of this headline.
669 var id: String
670
671 # Text of the headline.
672 var title: String
673
674 # Level of this headline.
675 #
676 # According toe the markdown specification, level must be in `[1..6]`.
677 var level: Int
678 end
679
680 # `Decorator` that outputs HTML.
681 class HTMLDecorator
682 super Decorator
683
684 redef var headlines = new ArrayMap[String, HeadLine]
685
686 redef fun add_ruler(v, block) do v.add "<hr/>\n"
687
688 redef fun add_headline(v, block) do
689 # save headline
690 var txt = block.block.first_line.value
691 var id = strip_id(txt)
692 var lvl = block.depth
693 headlines[id] = new HeadLine(id, txt, lvl)
694 # output it
695 v.add "<h{lvl} id=\"{id}\">"
696 v.emit_in block
697 v.add "</h{lvl}>\n"
698 end
699
700 redef fun add_paragraph(v, block) do
701 v.add "<p>"
702 v.emit_in block
703 v.add "</p>\n"
704 end
705
706 redef fun add_code(v, block) do
707 if block isa BlockFence and block.meta != null then
708 v.add "<pre class=\"{block.meta.to_s}\"><code>"
709 else
710 v.add "<pre><code>"
711 end
712 v.emit_in block
713 v.add "</code></pre>\n"
714 end
715
716 redef fun add_blockquote(v, block) do
717 v.add "<blockquote>\n"
718 v.emit_in block
719 v.add "</blockquote>\n"
720 end
721
722 redef fun add_unorderedlist(v, block) do
723 v.add "<ul>\n"
724 v.emit_in block
725 v.add "</ul>\n"
726 end
727
728 redef fun add_orderedlist(v, block) do
729 v.add "<ol>\n"
730 v.emit_in block
731 v.add "</ol>\n"
732 end
733
734 redef fun add_listitem(v, block) do
735 v.add "<li>"
736 v.emit_in block
737 v.add "</li>\n"
738 end
739
740 redef fun add_em(v, text) do
741 v.add "<em>"
742 v.add text
743 v.add "</em>"
744 end
745
746 redef fun add_strong(v, text) do
747 v.add "<strong>"
748 v.add text
749 v.add "</strong>"
750 end
751
752 redef fun add_strike(v, text) do
753 v.add "<del>"
754 v.add text
755 v.add "</del>"
756 end
757
758 redef fun add_image(v, link, name, comment) do
759 v.add "<img src=\""
760 append_value(v, link)
761 v.add "\" alt=\""
762 append_value(v, name)
763 v.add "\""
764 if comment != null and not comment.is_empty then
765 v.add " title=\""
766 append_value(v, comment)
767 v.add "\""
768 end
769 v.add "/>"
770 end
771
772 redef fun add_link(v, link, name, comment) do
773 v.add "<a href=\""
774 append_value(v, link)
775 v.add "\""
776 if comment != null and not comment.is_empty then
777 v.add " title=\""
778 append_value(v, comment)
779 v.add "\""
780 end
781 v.add ">"
782 v.emit_text(name)
783 v.add "</a>"
784 end
785
786 redef fun add_abbr(v, name, comment) do
787 v.add "<abbr title=\""
788 append_value(v, comment)
789 v.add "\">"
790 v.emit_text(name)
791 v.add "</abbr>"
792 end
793
794 redef fun add_span_code(v, text, from, to) do
795 v.add "<code>"
796 append_code(v, text, from, to)
797 v.add "</code>"
798 end
799
800 redef fun add_line_break(v) do
801 v.add "<br/>"
802 end
803
804 redef fun append_value(v, text) do for c in text do escape_char(v, c)
805
806 redef fun escape_char(v, c) do
807 if c == '&' then
808 v.add "&amp;"
809 else if c == '<' then
810 v.add "&lt;"
811 else if c == '>' then
812 v.add "&gt;"
813 else if c == '"' then
814 v.add "&quot;"
815 else if c == '\'' then
816 v.add "&apos;"
817 else
818 v.addc c
819 end
820 end
821
822 redef fun append_code(v, buffer, from, to) do
823 for i in [from..to[ do
824 var c = buffer[i]
825 if c == '&' then
826 v.add "&amp;"
827 else if c == '<' then
828 v.add "&lt;"
829 else if c == '>' then
830 v.add "&gt;"
831 else
832 v.addc c
833 end
834 end
835 end
836
837 redef fun strip_id(txt) do
838 # strip id
839 var b = new FlatBuffer
840 for c in txt do
841 if c == ' ' then
842 b.add '_'
843 else
844 if not c.is_letter and
845 not c.is_digit and
846 not allowed_id_chars.has(c) then continue
847 b.add c
848 end
849 end
850 var res = b.to_s
851 var key = res
852 # check for multiple id definitions
853 if headlines.has_key(key) then
854 var i = 1
855 key = "{res}_{i}"
856 while headlines.has_key(key) do
857 i += 1
858 key = "{res}_{i}"
859 end
860 end
861 return key
862 end
863
864 private var allowed_id_chars: Array[Char] = ['-', '_', ':', '.']
865 end
866
867 # Location in a Markdown input.
868 class MDLocation
869
870 # Starting line number (starting from 1).
871 var line_start: Int
872
873 # Starting column number (starting from 1).
874 var column_start: Int
875
876 # Stopping line number (starting from 1).
877 var line_end: Int
878
879 # Stopping column number (starting from 1).
880 var column_end: Int
881
882 redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
883 end
884
885 # A block of markdown lines.
886 # A `MDBlock` can contains lines and/or sub-blocks.
887 class MDBlock
888
889 # Position of `self` in the input.
890 var location: MDLocation
891
892 # Kind of block.
893 # See `Block`.
894 var kind: Block = new BlockNone(self) is writable
895
896 # First line if any.
897 var first_line: nullable MDLine = null is writable
898
899 # Last line if any.
900 var last_line: nullable MDLine = null is writable
901
902 # First sub-block if any.
903 var first_block: nullable MDBlock = null is writable
904
905 # Last sub-block if any.
906 var last_block: nullable MDBlock = null is writable
907
908 # Previous block if any.
909 var prev: nullable MDBlock = null is writable
910
911 # Next block if any.
912 var next: nullable MDBlock = null is writable
913
914 # Does this block contain subblocks?
915 fun has_blocks: Bool do return first_block != null
916
917 # Count sub-blocks.
918 fun count_blocks: Int do
919 var count = 0
920 var block = first_block
921 while block != null do
922 count += 1
923 block = block.next
924 end
925 return count
926 end
927
928 # Does this block contain lines?
929 fun has_lines: Bool do return first_line != null
930
931 # Count block lines.
932 fun count_lines: Int do
933 var count = 0
934 var line = first_line
935 while line != null do
936 count += 1
937 line = line.next
938 end
939 return count
940 end
941
942 # Split `self` creating a new sub-block having `line` has `last_line`.
943 fun split(line: MDLine): MDBlock do
944 # location for new block
945 var new_loc = new MDLocation(
946 first_line.location.line_start,
947 first_line.location.column_start,
948 line.location.line_end,
949 line.location.column_end)
950 # create block
951 var block = new MDBlock(new_loc)
952 block.first_line = first_line
953 block.last_line = line
954 first_line = line.next
955 line.next = null
956 if first_line == null then
957 last_line = null
958 else
959 first_line.prev = null
960 # update current block loc
961 location.line_start = first_line.location.line_start
962 location.column_start = first_line.location.column_start
963 end
964 if first_block == null then
965 first_block = block
966 last_block = block
967 else
968 last_block.next = block
969 last_block = block
970 end
971 return block
972 end
973
974 # Add a `line` to this block.
975 fun add_line(line: MDLine) do
976 if last_line == null then
977 first_line = line
978 last_line = line
979 else
980 last_line.next_empty = line.is_empty
981 line.prev_empty = last_line.is_empty
982 line.prev = last_line
983 last_line.next = line
984 last_line = line
985 end
986 end
987
988 # Remove `line` from this block.
989 fun remove_line(line: MDLine) do
990 if line.prev == null then
991 first_line = line.next
992 else
993 line.prev.next = line.next
994 end
995 if line.next == null then
996 last_line = line.prev
997 else
998 line.next.prev = line.prev
999 end
1000 line.prev = null
1001 line.next = null
1002 end
1003
1004 # Remove leading empty lines.
1005 fun remove_leading_empty_lines: Bool do
1006 var was_empty = false
1007 var line = first_line
1008 while line != null and line.is_empty do
1009 remove_line line
1010 line = first_line
1011 was_empty = true
1012 end
1013 return was_empty
1014 end
1015
1016 # Remove trailing empty lines.
1017 fun remove_trailing_empty_lines: Bool do
1018 var was_empty = false
1019 var line = last_line
1020 while line != null and line.is_empty do
1021 remove_line line
1022 line = last_line
1023 was_empty = true
1024 end
1025 return was_empty
1026 end
1027
1028 # Remove leading and trailing empty lines.
1029 fun remove_surrounding_empty_lines: Bool do
1030 var was_empty = false
1031 if remove_leading_empty_lines then was_empty = true
1032 if remove_trailing_empty_lines then was_empty = true
1033 return was_empty
1034 end
1035
1036 # Remove list markers and up to 4 leading spaces.
1037 # Used to clean nested lists.
1038 fun remove_list_indent(v: MarkdownProcessor) do
1039 var line = first_line
1040 while line != null do
1041 if not line.is_empty then
1042 var kind = v.line_kind(line)
1043 if kind isa LineList then
1044 line.value = kind.extract_value(line)
1045 else
1046 line.value = line.value.substring_from(line.leading.min(4))
1047 end
1048 line.leading = line.process_leading
1049 end
1050 line = line.next
1051 end
1052 end
1053
1054 # Collect block line text.
1055 fun text: String do
1056 var text = new FlatBuffer
1057 var line = first_line
1058 while line != null do
1059 if not line.is_empty then
1060 text.append line.text
1061 end
1062 text.append "\n"
1063 line = line.next
1064 end
1065 return text.write_to_string
1066 end
1067 end
1068
1069 # Representation of a markdown block in the AST.
1070 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1071 abstract class Block
1072
1073 # The markdown block `self` is related to.
1074 var block: MDBlock
1075
1076 # Output `self` using `v.decorator`.
1077 fun emit(v: MarkdownEmitter) do v.emit_in(self)
1078
1079 # Emit the containts of `self`, lines or blocks.
1080 fun emit_in(v: MarkdownEmitter) do
1081 block.remove_surrounding_empty_lines
1082 if block.has_lines then
1083 emit_lines(v)
1084 else
1085 emit_blocks(v)
1086 end
1087 end
1088
1089 # Emit lines contained in `block`.
1090 fun emit_lines(v: MarkdownEmitter) do
1091 var tpl = v.push_buffer
1092 var line = block.first_line
1093 while line != null do
1094 if not line.is_empty then
1095 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1096 if line.trailing >= 2 then v.decorator.add_line_break(v)
1097 end
1098 if line.next != null then
1099 v.addn
1100 end
1101 line = line.next
1102 end
1103 v.pop_buffer
1104 v.emit_text(tpl)
1105 end
1106
1107 # Emit sub-blocks contained in `block`.
1108 fun emit_blocks(v: MarkdownEmitter) do
1109 var block = self.block.first_block
1110 while block != null do
1111 block.kind.emit(v)
1112 block = block.next
1113 end
1114 end
1115 end
1116
1117 # A block without any markdown specificities.
1118 #
1119 # Actually use the same implementation than `BlockCode`,
1120 # this class is only used for typing purposes.
1121 class BlockNone
1122 super Block
1123 end
1124
1125 # A markdown blockquote.
1126 class BlockQuote
1127 super Block
1128
1129 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1130
1131 # Remove blockquote markers.
1132 private fun remove_block_quote_prefix(block: MDBlock) do
1133 var line = block.first_line
1134 while line != null do
1135 if not line.is_empty then
1136 if line.value[line.leading] == '>' then
1137 var rem = line.leading + 1
1138 if line.leading + 1 < line.value.length and
1139 line.value[line.leading + 1] == ' ' then
1140 rem += 1
1141 end
1142 line.value = line.value.substring_from(rem)
1143 line.leading = line.process_leading
1144 end
1145 end
1146 line = line.next
1147 end
1148 end
1149 end
1150
1151 # A markdown code block.
1152 class BlockCode
1153 super Block
1154
1155 # Number of char to skip at the beginning of the line.
1156 #
1157 # Block code lines start at 4 spaces.
1158 protected var line_start = 4
1159
1160 redef fun emit(v) do v.decorator.add_code(v, self)
1161
1162 redef fun emit_lines(v) do
1163 var line = block.first_line
1164 while line != null do
1165 if not line.is_empty then
1166 v.decorator.append_code(v, line.value, line_start, line.value.length)
1167 end
1168 v.addn
1169 line = line.next
1170 end
1171 end
1172 end
1173
1174 # A markdown code-fence block.
1175 #
1176 # Actually use the same implementation than `BlockCode`,
1177 # this class is only used for typing purposes.
1178 class BlockFence
1179 super BlockCode
1180
1181 # Any string found after fence token.
1182 var meta: nullable Text
1183
1184 # Fence code lines start at 0 spaces.
1185 redef var line_start = 0
1186 end
1187
1188 # A markdown headline.
1189 class BlockHeadline
1190 super Block
1191
1192 redef fun emit(v) do v.decorator.add_headline(v, self)
1193
1194 # Depth of the headline used to determine the headline level.
1195 var depth = 0
1196
1197 # Remove healine marks from lines contained in `self`.
1198 private fun transform_headline(block: MDBlock) do
1199 if depth > 0 then return
1200 var level = 0
1201 var line = block.first_line
1202 if line.is_empty then return
1203 var start = line.leading
1204 while start < line.value.length and line.value[start] == '#' do
1205 level += 1
1206 start += 1
1207 end
1208 while start < line.value.length and line.value[start] == ' ' do
1209 start += 1
1210 end
1211 if start >= line.value.length then
1212 line.is_empty = true
1213 else
1214 var nend = line.value.length - line.trailing - 1
1215 while line.value[nend] == '#' do nend -= 1
1216 while line.value[nend] == ' ' do nend -= 1
1217 line.value = line.value.substring(start, nend - start + 1)
1218 line.leading = 0
1219 line.trailing = 0
1220 end
1221 depth = level.min(6)
1222 end
1223 end
1224
1225 # A markdown list item block.
1226 class BlockListItem
1227 super Block
1228
1229 redef fun emit(v) do v.decorator.add_listitem(v, self)
1230 end
1231
1232 # A markdown list block.
1233 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1234 abstract class BlockList
1235 super Block
1236
1237 # Split list block into list items sub-blocks.
1238 private fun init_block(v: MarkdownProcessor) do
1239 var line = block.first_line
1240 line = line.next
1241 while line != null do
1242 var t = v.line_kind(line)
1243 if t isa LineList or
1244 (not line.is_empty and (line.prev_empty and line.leading == 0 and
1245 not (t isa LineList))) then
1246 var sblock = block.split(line.prev.as(not null))
1247 sblock.kind = new BlockListItem(sblock)
1248 end
1249 line = line.next
1250 end
1251 var sblock = block.split(block.last_line.as(not null))
1252 sblock.kind = new BlockListItem(sblock)
1253 end
1254
1255 # Expand list items as paragraphs if needed.
1256 private fun expand_paragraphs(block: MDBlock) do
1257 var outer = block.first_block
1258 var inner: nullable MDBlock
1259 var has_paragraph = false
1260 while outer != null and not has_paragraph do
1261 if outer.kind isa BlockListItem then
1262 inner = outer.first_block
1263 while inner != null and not has_paragraph do
1264 if inner.kind isa BlockParagraph then
1265 has_paragraph = true
1266 end
1267 inner = inner.next
1268 end
1269 end
1270 outer = outer.next
1271 end
1272 if has_paragraph then
1273 outer = block.first_block
1274 while outer != null do
1275 if outer.kind isa BlockListItem then
1276 inner = outer.first_block
1277 while inner != null do
1278 if inner.kind isa BlockNone then
1279 inner.kind = new BlockParagraph(inner)
1280 end
1281 inner = inner.next
1282 end
1283 end
1284 outer = outer.next
1285 end
1286 end
1287 end
1288 end
1289
1290 # A markdown ordered list.
1291 class BlockOrderedList
1292 super BlockList
1293
1294 redef fun emit(v) do v.decorator.add_orderedlist(v, self)
1295 end
1296
1297 # A markdown unordred list.
1298 class BlockUnorderedList
1299 super BlockList
1300
1301 redef fun emit(v) do v.decorator.add_unorderedlist(v, self)
1302 end
1303
1304 # A markdown paragraph block.
1305 class BlockParagraph
1306 super Block
1307
1308 redef fun emit(v) do v.decorator.add_paragraph(v, self)
1309 end
1310
1311 # A markdown ruler.
1312 class BlockRuler
1313 super Block
1314
1315 redef fun emit(v) do v.decorator.add_ruler(v, self)
1316 end
1317
1318 # Xml blocks that can be found in markdown markup.
1319 class BlockXML
1320 super Block
1321
1322 redef fun emit_lines(v) do
1323 var line = block.first_line
1324 while line != null do
1325 if not line.is_empty then v.add line.value
1326 v.addn
1327 line = line.next
1328 end
1329 end
1330 end
1331
1332 # A markdown line.
1333 class MDLine
1334
1335 # Location of `self` in the original input.
1336 var location: MDLocation
1337
1338 # Text contained in this line.
1339 var value: String is writable
1340
1341 # Is this line empty?
1342 # Lines containing only spaces are considered empty.
1343 var is_empty: Bool = true is writable
1344
1345 # Previous line in `MDBlock` or null if first line.
1346 var prev: nullable MDLine = null is writable
1347
1348 # Next line in `MDBlock` or null if last line.
1349 var next: nullable MDLine = null is writable
1350
1351 # Is the previous line empty?
1352 var prev_empty: Bool = false is writable
1353
1354 # Is the next line empty?
1355 var next_empty: Bool = false is writable
1356
1357 # Initialize a new MDLine from its string value
1358 init do
1359 self.leading = process_leading
1360 if leading != value.length then
1361 self.is_empty = false
1362 self.trailing = process_trailing
1363 end
1364 end
1365
1366 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1367 fun clear do
1368 value = ""
1369 leading = 0
1370 trailing = 0
1371 is_empty = true
1372 if prev != null then prev.next_empty = true
1373 if next != null then next.prev_empty = true
1374 end
1375
1376 # Number or leading spaces on this line.
1377 var leading: Int = 0 is writable
1378
1379 # Compute `leading` depending on `value`.
1380 fun process_leading: Int do
1381 var count = 0
1382 var value = self.value
1383 while count < value.length and value[count] == ' ' do count += 1
1384 if leading == value.length then clear
1385 return count
1386 end
1387
1388 # Number of trailing spaces on this line.
1389 var trailing: Int = 0 is writable
1390
1391 # Compute `trailing` depending on `value`.
1392 fun process_trailing: Int do
1393 var count = 0
1394 var value = self.value
1395 while value[value.length - count - 1] == ' ' do
1396 count += 1
1397 end
1398 return count
1399 end
1400
1401 # Count the amount of `ch` in this line.
1402 # Return A value > 0 if this line only consists of `ch` end spaces.
1403 fun count_chars(ch: Char): Int do
1404 var count = 0
1405 for c in value do
1406 if c == ' ' then
1407 continue
1408 end
1409 if c == ch then
1410 count += 1
1411 continue
1412 end
1413 count = 0
1414 break
1415 end
1416 return count
1417 end
1418
1419 # Count the amount of `ch` at the start of this line ignoring spaces.
1420 fun count_chars_start(ch: Char): Int do
1421 var count = 0
1422 for c in value do
1423 if c == ' ' then
1424 continue
1425 end
1426 if c == ch then
1427 count += 1
1428 else
1429 break
1430 end
1431 end
1432 return count
1433 end
1434
1435 # Last XML line if any.
1436 private var xml_end_line: nullable MDLine = null
1437
1438 # Does `value` contains valid XML markup?
1439 private fun check_html: Bool do
1440 var tags = new Array[String]
1441 var tmp = new FlatBuffer
1442 var pos = leading
1443 if pos + 1 < value.length and value[pos + 1] == '!' then
1444 if read_xml_comment(self, pos) > 0 then return true
1445 end
1446 pos = value.read_xml(tmp, pos, false)
1447 var tag: String
1448 if pos > -1 then
1449 tag = tmp.xml_tag
1450 if not tag.is_html_block then
1451 return false
1452 end
1453 if tag == "hr" then
1454 xml_end_line = self
1455 return true
1456 end
1457 tags.add tag
1458 var line: nullable MDLine = self
1459 while line != null do
1460 while pos < line.value.length and line.value[pos] != '<' do
1461 pos += 1
1462 end
1463 if pos >= line.value.length then
1464 if pos - 2 >= 0 and line.value[pos - 2] == '/' then
1465 tags.pop
1466 if tags.is_empty then
1467 xml_end_line = line
1468 break
1469 end
1470 end
1471 line = line.next
1472 pos = 0
1473 else
1474 tmp = new FlatBuffer
1475 var new_pos = line.value.read_xml(tmp, pos, false)
1476 if new_pos > 0 then
1477 tag = tmp.xml_tag
1478 if tag.is_html_block and not tag == "hr" then
1479 if tmp[1] == '/' then
1480 if tags.last != tag then
1481 return false
1482 end
1483 tags.pop
1484 else
1485 tags.add tag
1486 end
1487 end
1488 if tags.is_empty then
1489 xml_end_line = line
1490 break
1491 end
1492 pos = new_pos
1493 else
1494 pos += 1
1495 end
1496 end
1497 end
1498 return tags.is_empty
1499 end
1500 return false
1501 end
1502
1503 # Read a XML comment.
1504 # Used by `check_html`.
1505 private fun read_xml_comment(first_line: MDLine, start: Int): Int do
1506 var line: nullable MDLine = first_line
1507 if start + 3 < line.value.length then
1508 if line.value[2] == '-' and line.value[3] == '-' then
1509 var pos = start + 4
1510 while line != null do
1511 while pos < line.value.length and line.value[pos] != '-' do
1512 pos += 1
1513 end
1514 if pos == line.value.length then
1515 line = line.next
1516 pos = 0
1517 else
1518 if pos + 2 < line.value.length then
1519 if line.value[pos + 1] == '-' and line.value[pos + 2] == '>' then
1520 first_line.xml_end_line = line
1521 return pos + 3
1522 end
1523 end
1524 pos += 1
1525 end
1526 end
1527 end
1528 end
1529 return -1
1530 end
1531
1532 # Extract the text of `self` without leading and trailing.
1533 fun text: String do return value.substring(leading, value.length - trailing)
1534 end
1535
1536 # A markdown line.
1537 interface Line
1538
1539 # Parse the line.
1540 # See `MarkdownProcessor::recurse`.
1541 fun process(v: MarkdownProcessor) is abstract
1542 end
1543
1544 # An empty markdown line.
1545 class LineEmpty
1546 super Line
1547
1548 redef fun process(v) do
1549 v.current_line = v.current_line.next
1550 end
1551 end
1552
1553 # A non-specific markdown construction.
1554 # Mainly used as part of another line construct such as paragraphs or lists.
1555 class LineOther
1556 super Line
1557
1558 redef fun process(v) do
1559 var line = v.current_line
1560 # go to block end
1561 var was_empty = line.prev_empty
1562 while line != null and not line.is_empty do
1563 var t = v.line_kind(line)
1564 if (v.in_list or v.ext_mode) and t isa LineList then
1565 break
1566 end
1567 if v.ext_mode and (t isa LineCode or t isa LineFence) then
1568 break
1569 end
1570 if t isa LineHeadline or t isa LineHeadline1 or t isa LineHeadline2 or
1571 t isa LineHR or t isa LineBlockquote or t isa LineXML then
1572 break
1573 end
1574 line = line.next
1575 end
1576 # build block
1577 if line != null and not line.is_empty then
1578 var block = v.current_block.split(line.prev.as(not null))
1579 if v.in_list and not was_empty then
1580 block.kind = new BlockNone(block)
1581 else
1582 block.kind = new BlockParagraph(block)
1583 end
1584 v.current_block.remove_leading_empty_lines
1585 else
1586 var block: MDBlock
1587 if line != null then
1588 block = v.current_block.split(line)
1589 else
1590 block = v.current_block.split(v.current_block.last_line.as(not null))
1591 end
1592 if v.in_list and (line == null or not line.is_empty) and not was_empty then
1593 block.kind = new BlockNone(block)
1594 else
1595 block.kind = new BlockParagraph(block)
1596 end
1597 v.current_block.remove_leading_empty_lines
1598 end
1599 v.current_line = v.current_block.first_line
1600 end
1601 end
1602
1603 # A line of markdown code.
1604 class LineCode
1605 super Line
1606
1607 redef fun process(v) do
1608 var line = v.current_line
1609 # lookup block end
1610 while line != null and (line.is_empty or v.line_kind(line) isa LineCode) do
1611 line = line.next
1612 end
1613 # split at block end line
1614 var block: MDBlock
1615 if line != null then
1616 block = v.current_block.split(line.prev.as(not null))
1617 else
1618 block = v.current_block.split(v.current_block.last_line.as(not null))
1619 end
1620 block.kind = new BlockCode(block)
1621 block.remove_surrounding_empty_lines
1622 v.current_line = v.current_block.first_line
1623 end
1624 end
1625
1626 # A line of raw XML.
1627 class LineXML
1628 super Line
1629
1630 redef fun process(v) do
1631 var line = v.current_line
1632 var prev = line.prev
1633 if prev != null then v.current_block.split(prev)
1634 var block = v.current_block.split(line.xml_end_line.as(not null))
1635 block.kind = new BlockXML(block)
1636 v.current_block.remove_leading_empty_lines
1637 v.current_line = v.current_block.first_line
1638 end
1639 end
1640
1641 # A markdown blockquote line.
1642 class LineBlockquote
1643 super Line
1644
1645 redef fun process(v) do
1646 var line = v.current_line
1647 # go to bquote end
1648 while line != null do
1649 if not line.is_empty and (line.prev_empty and
1650 line.leading == 0 and
1651 not v.line_kind(line) isa LineBlockquote) then break
1652 line = line.next
1653 end
1654 # build sub block
1655 var block: MDBlock
1656 if line != null then
1657 block = v.current_block.split(line.prev.as(not null))
1658 else
1659 block = v.current_block.split(v.current_block.last_line.as(not null))
1660 end
1661 var kind = new BlockQuote(block)
1662 block.kind = kind
1663 block.remove_surrounding_empty_lines
1664 kind.remove_block_quote_prefix(block)
1665 v.current_line = line
1666 v.recurse(block, false)
1667 v.current_line = v.current_block.first_line
1668 end
1669 end
1670
1671 # A markdown ruler line.
1672 class LineHR
1673 super Line
1674
1675 redef fun process(v) do
1676 var line = v.current_line
1677 if line.prev != null then v.current_block.split(line.prev.as(not null))
1678 var block = v.current_block.split(line.as(not null))
1679 block.kind = new BlockRuler(block)
1680 v.current_block.remove_leading_empty_lines
1681 v.current_line = v.current_block.first_line
1682 end
1683 end
1684
1685 # A markdown fence code line.
1686 class LineFence
1687 super Line
1688
1689 redef fun process(v) do
1690 # go to fence end
1691 var line = v.current_line.next
1692 while line != null do
1693 if v.line_kind(line) isa LineFence then break
1694 line = line.next
1695 end
1696 if line != null then
1697 line = line.next
1698 end
1699 # build fence block
1700 var block: MDBlock
1701 if line != null then
1702 block = v.current_block.split(line.prev.as(not null))
1703 else
1704 block = v.current_block.split(v.current_block.last_line.as(not null))
1705 end
1706 var meta = block.first_line.value.meta_from_fence
1707 block.kind = new BlockFence(block, meta)
1708 block.first_line.clear
1709 var last = block.last_line
1710 if last != null and v.line_kind(last) isa LineFence then
1711 block.last_line.clear
1712 end
1713 block.remove_surrounding_empty_lines
1714 v.current_line = line
1715 end
1716 end
1717
1718 # A markdown headline.
1719 class LineHeadline
1720 super Line
1721
1722 redef fun process(v) do
1723 var line = v.current_line
1724 var lprev = line.prev
1725 if lprev != null then v.current_block.split(lprev)
1726 var block = v.current_block.split(line.as(not null))
1727 var kind = new BlockHeadline(block)
1728 block.kind = kind
1729 kind.transform_headline(block)
1730 v.current_block.remove_leading_empty_lines
1731 v.current_line = v.current_block.first_line
1732 end
1733 end
1734
1735 # A markdown headline of level 1.
1736 class LineHeadline1
1737 super LineHeadline
1738
1739 redef fun process(v) do
1740 var line = v.current_line
1741 var lprev = line.prev
1742 if lprev != null then v.current_block.split(lprev)
1743 line.next.clear
1744 var block = v.current_block.split(line.as(not null))
1745 var kind = new BlockHeadline(block)
1746 kind.depth = 1
1747 kind.transform_headline(block)
1748 block.kind = kind
1749 v.current_block.remove_leading_empty_lines
1750 v.current_line = v.current_block.first_line
1751 end
1752 end
1753
1754 # A markdown headline of level 2.
1755 class LineHeadline2
1756 super LineHeadline
1757
1758 redef fun process(v) do
1759 var line = v.current_line
1760 var lprev = line.prev
1761 if lprev != null then v.current_block.split(lprev)
1762 line.next.clear
1763 var block = v.current_block.split(line.as(not null))
1764 var kind = new BlockHeadline(block)
1765 kind.depth = 2
1766 kind.transform_headline(block)
1767 block.kind = kind
1768 v.current_block.remove_leading_empty_lines
1769 v.current_line = v.current_block.first_line
1770 end
1771 end
1772
1773 # A markdown list line.
1774 # Mainly used to factorize code between ordered and unordered lists.
1775 class LineList
1776 super Line
1777
1778 redef fun process(v) do
1779 var line = v.current_line
1780 # go to list end
1781 while line != null do
1782 var t = v.line_kind(line)
1783 if not line.is_empty and (line.prev_empty and line.leading == 0 and
1784 not t isa LineList) then break
1785 line = line.next
1786 end
1787 # build list block
1788 var list: MDBlock
1789 if line != null then
1790 list = v.current_block.split(line.prev.as(not null))
1791 else
1792 list = v.current_block.split(v.current_block.last_line.as(not null))
1793 end
1794 var kind = block_kind(list)
1795 list.kind = kind
1796 list.first_line.prev_empty = false
1797 list.last_line.next_empty = false
1798 list.remove_surrounding_empty_lines
1799 list.first_line.prev_empty = false
1800 list.last_line.next_empty = false
1801 kind.init_block(v)
1802 var block = list.first_block
1803 while block != null do
1804 block.remove_list_indent(v)
1805 v.recurse(block, true)
1806 block = block.next
1807 end
1808 kind.expand_paragraphs(list)
1809 v.current_line = line
1810 end
1811
1812 # Create a new block kind based on this line.
1813 protected fun block_kind(block: MDBlock): BlockList is abstract
1814
1815 # Extract string value from `MDLine`.
1816 protected fun extract_value(line: MDLine): String is abstract
1817 end
1818
1819 # An ordered list line.
1820 class LineOList
1821 super LineList
1822
1823 redef fun block_kind(block) do return new BlockOrderedList(block)
1824
1825 redef fun extract_value(line) do
1826 return line.value.substring_from(line.value.index_of('.') + 2)
1827 end
1828 end
1829
1830 # An unordered list line.
1831 class LineUList
1832 super LineList
1833
1834 redef fun block_kind(block) do return new BlockUnorderedList(block)
1835
1836 redef fun extract_value(line) do
1837 return line.value.substring_from(line.leading + 2)
1838 end
1839 end
1840
1841 # A token represent a character in the markdown input.
1842 # Some tokens have a specific markup behaviour that is handled here.
1843 abstract class Token
1844
1845 # Location of `self` in the original input.
1846 var location: MDLocation
1847
1848 # Position of `self` in input independant from lines.
1849 var pos: Int
1850
1851 # Character found at `pos` in the markdown input.
1852 var char: Char
1853
1854 # Output that token using `MarkdownEmitter::decorator`.
1855 fun emit(v: MarkdownEmitter) do v.addc char
1856 end
1857
1858 # A token without a specific meaning.
1859 class TokenNone
1860 super Token
1861 end
1862
1863 # An emphasis token.
1864 abstract class TokenEm
1865 super Token
1866
1867 redef fun emit(v) do
1868 var tmp = v.push_buffer
1869 var b = v.emit_text_until(v.current_text.as(not null), pos + 1, self)
1870 v.pop_buffer
1871 if b > 0 then
1872 v.decorator.add_em(v, tmp)
1873 v.current_pos = b
1874 else
1875 v.addc char
1876 end
1877 end
1878 end
1879
1880 # An emphasis star token.
1881 class TokenEmStar
1882 super TokenEm
1883 end
1884
1885 # An emphasis underscore token.
1886 class TokenEmUnderscore
1887 super TokenEm
1888 end
1889
1890 # A strong token.
1891 abstract class TokenStrong
1892 super Token
1893
1894 redef fun emit(v) do
1895 var tmp = v.push_buffer
1896 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
1897 v.pop_buffer
1898 if b > 0 then
1899 v.decorator.add_strong(v, tmp)
1900 v.current_pos = b + 1
1901 else
1902 v.addc char
1903 end
1904 end
1905 end
1906
1907 # A strong star token.
1908 class TokenStrongStar
1909 super TokenStrong
1910 end
1911
1912 # A strong underscore token.
1913 class TokenStrongUnderscore
1914 super TokenStrong
1915 end
1916
1917 # A code token.
1918 # This class is mainly used to factorize work between single and double quoted span codes.
1919 abstract class TokenCode
1920 super Token
1921
1922 redef fun emit(v) do
1923 var a = pos + next_pos + 1
1924 var b = v.processor.find_token(v.current_text.as(not null), a, self)
1925 if b > 0 then
1926 v.current_pos = b + next_pos
1927 while a < b and v.current_text[a] == ' ' do a += 1
1928 if a < b then
1929 while v.current_text[b - 1] == ' ' do b -= 1
1930 v.decorator.add_span_code(v, v.current_text.as(not null), a, b)
1931 end
1932 else
1933 v.addc char
1934 end
1935 end
1936
1937 private fun next_pos: Int is abstract
1938 end
1939
1940 # A span code token.
1941 class TokenCodeSingle
1942 super TokenCode
1943
1944 redef fun next_pos do return 0
1945 end
1946
1947 # A doubled span code token.
1948 class TokenCodeDouble
1949 super TokenCode
1950
1951 redef fun next_pos do return 1
1952 end
1953
1954 # A link or image token.
1955 # This class is mainly used to factorize work between images and links.
1956 abstract class TokenLinkOrImage
1957 super Token
1958
1959 # Link adress
1960 var link: nullable Text = null
1961
1962 # Link text
1963 var name: nullable Text = null
1964
1965 # Link title
1966 var comment: nullable Text = null
1967
1968 # Is the link construct an abbreviation?
1969 var is_abbrev = false
1970
1971 redef fun emit(v) do
1972 var tmp = new FlatBuffer
1973 var b = check_link(v, tmp, pos, self)
1974 if b > 0 then
1975 emit_hyper(v)
1976 v.current_pos = b
1977 else
1978 v.addc char
1979 end
1980 end
1981
1982 # Emit the hyperlink as link or image.
1983 private fun emit_hyper(v: MarkdownEmitter) is abstract
1984
1985 # Check if the link is a valid link.
1986 private fun check_link(v: MarkdownEmitter, out: FlatBuffer, start: Int, token: Token): Int do
1987 var md = v.current_text
1988 var pos
1989 if token isa TokenLink then
1990 pos = start + 1
1991 else
1992 pos = start + 2
1993 end
1994 var tmp = new FlatBuffer
1995 pos = md.read_md_link_id(tmp, pos)
1996 if pos < start then return -1
1997 name = tmp
1998 var old_pos = pos
1999 pos += 1
2000 pos = md.skip_spaces(pos)
2001 if pos < start then
2002 var tid = name.write_to_string.to_lower
2003 if v.processor.link_refs.has_key(tid) then
2004 var lr = v.processor.link_refs[tid]
2005 is_abbrev = lr.is_abbrev
2006 link = lr.link
2007 comment = lr.title
2008 pos = old_pos
2009 else
2010 return -1
2011 end
2012 else if md[pos] == '(' then
2013 pos += 1
2014 pos = md.skip_spaces(pos)
2015 if pos < start then return -1
2016 tmp = new FlatBuffer
2017 var use_lt = md[pos] == '<'
2018 if use_lt then
2019 pos = md.read_until(tmp, pos + 1, '>')
2020 else
2021 pos = md.read_md_link(tmp, pos)
2022 end
2023 if pos < start then return -1
2024 if use_lt then pos += 1
2025 link = tmp.write_to_string
2026 if md[pos] == ' ' then
2027 pos = md.skip_spaces(pos)
2028 if pos > start and md[pos] == '"' then
2029 pos += 1
2030 tmp = new FlatBuffer
2031 pos = md.read_until(tmp, pos, '"')
2032 if pos < start then return -1
2033 comment = tmp.write_to_string
2034 pos += 1
2035 pos = md.skip_spaces(pos)
2036 if pos == -1 then return -1
2037 end
2038 end
2039 if md[pos] != ')' then return -1
2040 else if md[pos] == '[' then
2041 pos += 1
2042 tmp = new FlatBuffer
2043 pos = md.read_raw_until(tmp, pos, ']')
2044 if pos < start then return -1
2045 var id
2046 if tmp.length > 0 then
2047 id = tmp
2048 else
2049 id = name
2050 end
2051 var tid = id.write_to_string.to_lower
2052 if v.processor.link_refs.has_key(tid) then
2053 var lr = v.processor.link_refs[tid]
2054 link = lr.link
2055 comment = lr.title
2056 end
2057 else
2058 var tid = name.write_to_string.replace("\n", " ").to_lower
2059 if v.processor.link_refs.has_key(tid) then
2060 var lr = v.processor.link_refs[tid]
2061 link = lr.link
2062 comment = lr.title
2063 pos = old_pos
2064 else
2065 return -1
2066 end
2067 end
2068 if link == null then return -1
2069 return pos
2070 end
2071 end
2072
2073 # A markdown link token.
2074 class TokenLink
2075 super TokenLinkOrImage
2076
2077 redef fun emit_hyper(v) do
2078 if is_abbrev and comment != null then
2079 v.decorator.add_abbr(v, name.as(not null), comment.as(not null))
2080 else
2081 v.decorator.add_link(v, link.as(not null), name.as(not null), comment)
2082 end
2083 end
2084 end
2085
2086 # A markdown image token.
2087 class TokenImage
2088 super TokenLinkOrImage
2089
2090 redef fun emit_hyper(v) do
2091 v.decorator.add_image(v, link.as(not null), name.as(not null), comment)
2092 end
2093 end
2094
2095 # A HTML/XML token.
2096 class TokenHTML
2097 super Token
2098
2099 redef fun emit(v) do
2100 var tmp = new FlatBuffer
2101 var b = check_html(v, tmp, v.current_text.as(not null), v.current_pos)
2102 if b > 0 then
2103 v.add tmp
2104 v.current_pos = b
2105 else
2106 v.decorator.escape_char(v, char)
2107 end
2108 end
2109
2110 # Is the HTML valid?
2111 # Also take care of link and mailto shortcuts.
2112 private fun check_html(v: MarkdownEmitter, out: FlatBuffer, md: Text, start: Int): Int do
2113 # check for auto links
2114 var tmp = new FlatBuffer
2115 var pos = md.read_until(tmp, start + 1, ':', ' ', '>', '\n')
2116 if pos != -1 and md[pos] == ':' and tmp.is_link_prefix then
2117 pos = md.read_until(tmp, pos, '>')
2118 if pos != -1 then
2119 var link = tmp.write_to_string
2120 v.decorator.add_link(v, link, link, null)
2121 return pos
2122 end
2123 end
2124 # TODO check for mailto
2125 # check for inline html
2126 if start + 2 < md.length then
2127 return md.read_xml(out, start, true)
2128 end
2129 return -1
2130 end
2131 end
2132
2133 # An HTML entity token.
2134 class TokenEntity
2135 super Token
2136
2137 redef fun emit(v) do
2138 var tmp = new FlatBuffer
2139 var b = check_entity(tmp, v.current_text.as(not null), pos)
2140 if b > 0 then
2141 v.add tmp
2142 v.current_pos = b
2143 else
2144 v.decorator.escape_char(v, char)
2145 end
2146 end
2147
2148 # Is the entity valid?
2149 private fun check_entity(out: FlatBuffer, md: Text, start: Int): Int do
2150 var pos = md.read_until(out, start, ';')
2151 if pos < 0 or out.length < 3 then
2152 return -1
2153 end
2154 if out[1] == '#' then
2155 if out[2] == 'x' or out[2] == 'X' then
2156 if out.length < 4 then return -1
2157 for i in [3..out.length[ do
2158 var c = out[i]
2159 if (c < '0' or c > '9') and (c < 'a' and c > 'f') and (c < 'A' and c > 'F') then
2160 return -1
2161 end
2162 end
2163 else
2164 for i in [2..out.length[ do
2165 var c = out[i]
2166 if c < '0' or c > '9' then return -1
2167 end
2168 end
2169 out.add ';'
2170 else
2171 for i in [1..out.length[ do
2172 var c = out[i]
2173 if not c.is_digit and not c.is_letter then return -1
2174 end
2175 out.add ';'
2176 # TODO check entity is valid
2177 # if out.is_entity then
2178 return pos
2179 # else
2180 # return -1
2181 # end
2182 end
2183 return pos
2184 end
2185 end
2186
2187 # A markdown escape token.
2188 class TokenEscape
2189 super Token
2190
2191 redef fun emit(v) do
2192 v.current_pos += 1
2193 v.addc v.current_text[v.current_pos]
2194 end
2195 end
2196
2197 # A markdown strike token.
2198 #
2199 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2200 class TokenStrike
2201 super Token
2202
2203 redef fun emit(v) do
2204 var tmp = v.push_buffer
2205 var b = v.emit_text_until(v.current_text.as(not null), pos + 2, self)
2206 v.pop_buffer
2207 if b > 0 then
2208 v.decorator.add_strike(v, tmp)
2209 v.current_pos = b + 1
2210 else
2211 v.addc char
2212 end
2213 end
2214 end
2215
2216 redef class Text
2217
2218 # Get the position of the next non-space character.
2219 private fun skip_spaces(start: Int): Int do
2220 var pos = start
2221 while pos > -1 and pos < length and (self[pos] == ' ' or self[pos] == '\n') do
2222 pos += 1
2223 end
2224 if pos < length then return pos
2225 return -1
2226 end
2227
2228 # Read `self` until `nend` and append it to the `out` buffer.
2229 # Escape markdown special chars.
2230 private fun read_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2231 var pos = start
2232 while pos < length do
2233 var c = self[pos]
2234 if c == '\\' and pos + 1 < length then
2235 pos = escape(out, self[pos + 1], pos)
2236 else
2237 var end_reached = false
2238 for n in nend do
2239 if c == n then
2240 end_reached = true
2241 break
2242 end
2243 end
2244 if end_reached then break
2245 out.add c
2246 end
2247 pos += 1
2248 end
2249 if pos == length then return -1
2250 return pos
2251 end
2252
2253 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2254 # No escape is made.
2255 private fun read_raw_until(out: FlatBuffer, start: Int, nend: Char...): Int do
2256 var pos = start
2257 while pos < length do
2258 var c = self[pos]
2259 var end_reached = false
2260 for n in nend do
2261 if c == n then
2262 end_reached = true
2263 break
2264 end
2265 end
2266 if end_reached then break
2267 out.add c
2268 pos += 1
2269 end
2270 if pos == length then return -1
2271 return pos
2272 end
2273
2274 # Read `self` as XML until `to` and append it to the `out` buffer.
2275 # Escape HTML special chars.
2276 private fun read_xml_until(out: FlatBuffer, from: Int, to: Char...): Int do
2277 var pos = from
2278 var in_str = false
2279 var str_char: nullable Char = null
2280 while pos < length do
2281 var c = self[pos]
2282 if in_str then
2283 if c == '\\' then
2284 out.add c
2285 pos += 1
2286 if pos < length then
2287 out.add c
2288 pos += 1
2289 end
2290 continue
2291 end
2292 if c == str_char then
2293 in_str = false
2294 out.add c
2295 pos += 1
2296 continue
2297 end
2298 end
2299 if c == '"' or c == '\'' then
2300 in_str = true
2301 str_char = c
2302 end
2303 if not in_str then
2304 var end_reached = false
2305 for n in [0..to.length[ do
2306 if c == to[n] then
2307 end_reached = true
2308 break
2309 end
2310 end
2311 if end_reached then break
2312 end
2313 out.add c
2314 pos += 1
2315 end
2316 if pos == length then return -1
2317 return pos
2318 end
2319
2320 # Read `self` as XML and append it to the `out` buffer.
2321 # Safe mode can be activated to limit reading to valid xml.
2322 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2323 var pos = 0
2324 var is_valid = true
2325 var is_close_tag = false
2326 if start + 1 >= length then return -1
2327 if self[start + 1] == '/' then
2328 is_close_tag = true
2329 pos = start + 2
2330 else if self[start + 1] == '!' then
2331 out.append "<!"
2332 return start + 1
2333 else
2334 is_close_tag = false
2335 pos = start + 1
2336 end
2337 if safe_mode then
2338 var tmp = new FlatBuffer
2339 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2340 if pos == -1 then return -1
2341 var tag = tmp.write_to_string.trim.to_lower
2342 if not tag.is_valid_html_tag then
2343 out.append "&lt;"
2344 pos = -1
2345 else if tag.is_html_unsafe then
2346 is_valid = false
2347 out.append "&lt;"
2348 if is_close_tag then out.add '/'
2349 out.append tmp
2350 else
2351 out.append "<"
2352 if is_close_tag then out.add '/'
2353 out.append tmp
2354 end
2355 else
2356 out.add '<'
2357 if is_close_tag then out.add '/'
2358 pos = read_xml_until(out, pos, ' ', '/', '>')
2359 end
2360 if pos == -1 then return -1
2361 pos = read_xml_until(out, pos, '/', '>')
2362 if pos == -1 then return -1
2363 if self[pos] == '/' then
2364 out.append " /"
2365 pos = self.read_xml_until(out, pos + 1, '>')
2366 if pos == -1 then return -1
2367 end
2368 if self[pos] == '>' then
2369 if is_valid then
2370 out.add '>'
2371 else
2372 out.append "&gt;"
2373 end
2374 return pos
2375 end
2376 return -1
2377 end
2378
2379 # Read a markdown link address and append it to the `out` buffer.
2380 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2381 var pos = start
2382 var counter = 1
2383 while pos < length do
2384 var c = self[pos]
2385 if c == '\\' and pos + 1 < length then
2386 pos = escape(out, self[pos + 1], pos)
2387 else
2388 var end_reached = false
2389 if c == '(' then
2390 counter += 1
2391 else if c == ' ' then
2392 if counter == 1 then end_reached = true
2393 else if c == ')' then
2394 counter -= 1
2395 if counter == 0 then end_reached = true
2396 end
2397 if end_reached then break
2398 out.add c
2399 end
2400 pos += 1
2401 end
2402 if pos == length then return -1
2403 return pos
2404 end
2405
2406 # Read a markdown link text and append it to the `out` buffer.
2407 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2408 var pos = start
2409 var counter = 1
2410 while pos < length do
2411 var c = self[pos]
2412 var end_reached = false
2413 if c == '[' then
2414 counter += 1
2415 out.add c
2416 else if c == ']' then
2417 counter -= 1
2418 if counter == 0 then
2419 end_reached = true
2420 else
2421 out.add c
2422 end
2423 else
2424 out.add c
2425 end
2426 if end_reached then break
2427 pos += 1
2428 end
2429 if pos == length then return -1
2430 return pos
2431 end
2432
2433 # Extract the XML tag name from a XML tag.
2434 private fun xml_tag: String do
2435 var tpl = new FlatBuffer
2436 var pos = 1
2437 if pos < length and self[1] == '/' then pos += 1
2438 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2439 tpl.add self[pos]
2440 pos += 1
2441 end
2442 return tpl.write_to_string.to_lower
2443 end
2444
2445 private fun is_valid_html_tag: Bool do
2446 if is_empty then return false
2447 for c in self do
2448 if not c.is_alpha then return false
2449 end
2450 return true
2451 end
2452
2453 # Read and escape the markdown contained in `self`.
2454 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2455 if c == '\\' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2456 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2457 c == '>' or c == '*' or c == '+' or c == '-' or c == '_' or c == '!' or
2458 c == '`' or c == '~' or c == '^' then
2459 out.add c
2460 return pos + 1
2461 end
2462 out.add '\\'
2463 return pos
2464 end
2465
2466 # Extract string found at end of fence opening.
2467 private fun meta_from_fence: nullable Text do
2468 for i in [0..chars.length[ do
2469 var c = chars[i]
2470 if c != ' ' and c != '`' and c != '~' then
2471 return substring_from(i).trim
2472 end
2473 end
2474 return null
2475 end
2476
2477 # Init a `MDLocation` instance at `pos` in `self`.
2478 private fun pos_to_loc(pos: Int): MDLocation do
2479 assert pos <= length
2480 var line = 1
2481 var col = 0
2482 var i = 0
2483 while i <= pos do
2484 col += 1
2485 var c = self[i]
2486 if c == '\n' then
2487 line +=1
2488 col = 0
2489 end
2490 i +=1
2491 end
2492 return new MDLocation(line, col, line, col)
2493 end
2494
2495 # Is `self` an unsafe HTML element?
2496 private fun is_html_unsafe: Bool do return html_unsafe_tags.has(self.write_to_string)
2497
2498 # Is `self` a HRML block element?
2499 private fun is_html_block: Bool do return html_block_tags.has(self.write_to_string)
2500
2501 # Is `self` a link prefix?
2502 private fun is_link_prefix: Bool do return html_link_prefixes.has(self.write_to_string)
2503
2504 private fun html_unsafe_tags: Array[String] do return once ["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2505
2506 private fun html_block_tags: Array[String] do return once ["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2507
2508 private fun html_link_prefixes: Array[String] do return once ["http", "https", "ftp", "ftps"]
2509 end
2510
2511 redef class String
2512
2513 # Parse `self` as markdown and return the HTML representation
2514 #.
2515 # var md = "**Hello World!**"
2516 # var html = md.md_to_html
2517 # assert html == "<p><strong>Hello World!</strong></p>\n"
2518 fun md_to_html: Writable do
2519 var processor = new MarkdownProcessor
2520 return processor.process(self)
2521 end
2522 end