1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
20 # Parse a markdown string and split it in blocks.
22 # Blocks are then outputed by an `MarkdownEmitter`.
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
33 # `MarkdownEmitter` used for ouput.
34 var emitter
: MarkdownEmitter is noinit
, protected writable
36 # Work in extended mode (default).
38 # Behavior changes when using extended mode:
40 # * Lists and code blocks end a paragraph
42 # In normal markdown the following:
46 # * and this is not a list
52 # <p>This is a paragraph
53 # * and this is not a list</p>
56 # When using extended mode this changes to:
59 # <p>This is a paragraph</p>
61 # <li>and this is not a list</li>
65 # * Fences code blocks
67 # If you don't want to indent your all your code with 4 spaces,
68 # you can wrap your code in ``` ``` ``` or `~~~`.
74 # print "Hello World!"
80 # If you want to use syntax highlighting tools, most of them need to know what kind
81 # of language they are highlighting.
82 # You can add an optional language identifier after the fence declaration to output
83 # it in the HTML render.
88 # print "# Hello World!".md_to_html
94 # <pre class="nit"><code>import markdown
96 # print "Hello World!".md_to_html
100 # * Underscores (Emphasis)
102 # Underscores in the middle of a word like:
108 # normally produces this:
111 # <p>Con<em>cat</em>this</p>
114 # With extended mode they don't result in emphasis.
117 # <p>Con_cat_this</p>
122 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
123 # strikethrought span is marked with `~~`.
132 # <del>Mistaken text.</del>
136 init do self.emitter
= new MarkdownEmitter(self)
138 # Process the mardown `input` string and return the processed output.
139 fun process
(input
: String): Writable do
146 var parent
= read_lines
(input
)
147 parent
.remove_surrounding_empty_lines
148 recurse
(parent
, false)
149 # output processed text
150 return emitter
.emit
(parent
.kind
)
153 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
154 private fun read_lines
(input
: String): MDBlock do
155 var block
= new MDBlock(new MDLocation(1, 1, 1, 1))
156 var value
= new FlatBuffer
162 while i
< input
.length
do
166 while not eol
and i
< input
.length
do
171 else if c
== '\r' then
172 else if c
== '\t' then
173 var np
= pos
+ (4 - (pos
& 3))
186 var loc
= new MDLocation(line_pos
, 1, line_pos
, col_pos
)
187 var line
= new MDLine(loc
, value
.write_to_string
)
188 var is_link_ref
= check_link_ref
(line
)
190 if not is_link_ref
then block
.add_line line
196 # Check if line is a block link definition.
197 # Return `true` if line contains a valid link ref and save it into `link_refs`.
198 private fun check_link_ref
(line
: MDLine): Bool do
200 var is_link_ref
= false
201 var id
= new FlatBuffer
202 var link
= new FlatBuffer
203 var comment
= new FlatBuffer
205 if not line
.is_empty
and line
.leading
< 4 and line
.value
[line
.leading
] == '[' then
206 pos
= line
.leading
+ 1
207 pos
= md
.read_until
(id
, pos
, ']')
208 if not id
.is_empty
and pos
+ 2 < line
.value
.length
then
209 if line
.value
[pos
+ 1] == ':' then
211 pos
= md
.skip_spaces
(pos
)
212 if line
.value
[pos
] == '<' then
214 pos
= md
.read_until
(link
, pos
, '>')
217 pos
= md
.read_until
(link
, pos
, ' ', '\n')
219 if not link
.is_empty
then
220 pos
= md
.skip_spaces
(pos
)
221 if pos
> 0 and pos
< line
.value
.length
then
222 var c
= line
.value
[pos
]
223 if c
== '\"' or c
== '\'' or c == '(' then
226 pos = md.read_until(comment, pos, ')')
228 pos = md.read_until(comment, pos, c)
230 if pos > 0 then is_link_ref = true
239 if is_link_ref and not id.is_empty and not link.is_empty then
240 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
241 add_link_ref(id.write_to_string, lr)
242 if comment.is_empty then last_link_ref = lr
245 comment = new FlatBuffer
246 if not line.is_empty and last_link_ref != null then
248 var c = line.value[pos]
249 if c == '\
"' or c == '\'' or c == '(' then
252 pos = md.read_until(comment, pos, ')')
254 pos = md.read_until(comment, pos, c)
257 if not comment.is_empty then last_link_ref.title = comment.write_to_string
259 if comment.is_empty then return false
265 # This list will be needed during output to expand links.
266 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
268 # Last encountered link ref (for multiline definitions)
270 # Markdown allows link refs to be defined over two lines:
273 # [id]: http://example.com/longish/path/to/resource/here
274 # "Optional Title Here"
277 private var last_link_ref: nullable LinkRef = null
279 # Add a link ref to the list
280 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
282 # Recursively split a `block`.
284 # The block is splitted according to the type of lines it contains.
285 # Some blocks can be splited again recursively like lists.
286 # The `in_list` mode is used to recurse on list and build
287 # nested paragraphs or code blocks.
288 fun recurse(root: MDBlock, in_list: Bool) do
289 var old_mode = self.in_list
290 var old_root = self.current_block
291 self.in_list = in_list
293 var line = root.first_line
294 while line != null and line.is_empty do
296 if line == null then return
301 while current_line != null do
302 line_kind(current_line.as(not null)).process(self)
304 self.in_list = old_mode
305 self.current_block = old_root
308 # Currently processed line.
309 # Used when visiting blocks with `recurse`.
310 var current_line: nullable MDLine = null is writable
312 # Currently processed block.
313 # Used when visiting blocks with `recurse`.
314 var current_block: nullable MDBlock = null is writable
316 # Is the current recursion in list mode?
317 # Used when visiting blocks with `recurse`
318 private var in_list = false
322 fun line_kind(md: MDLine): Line do
324 var leading = md.leading
325 var trailing = md.trailing
326 if md.is_empty then return new LineEmpty
327 if md.leading > 3 then return new LineCode
328 if value[leading] == '#' then return new LineHeadline
329 if value[leading] == '>' then return new LineBlockquote
332 if value.length - leading - trailing > 2 then
333 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
336 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
342 if value.length - leading - trailing > 2 and
343 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
344 if md.count_chars(value[leading]) >= 3 then
349 if value.length - leading >= 2 and value[leading + 1] == ' ' then
350 var c = value[leading]
351 if c == '*' or c == '-' or c == '+' then return new LineUList
354 if value.length - leading >= 3 and value[leading].is_digit then
356 while i < value.length and value[i].is_digit do i += 1
357 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
362 if value[leading] == '<' and md.check_html then return new LineXML
365 if next != null and not next.is_empty then
366 if next.count_chars('=') > 0 then
367 return new LineHeadline1
369 if next.count_chars('-') > 0 then
370 return new LineHeadline2
376 # Get the token kind at `pos`.
377 fun token_at(text: Text, pos: Int): Token do
389 if pos + 1 < text.length then
394 if pos + 2 < text.length then
400 var loc = new MDLocation(
401 current_loc.line_start,
402 current_loc.column_start + pos,
403 current_loc.line_start,
404 current_loc.column_start + pos)
408 if c0 != ' ' or c2 != ' ' then
409 return new TokenStrongStar(loc, pos, c)
411 return new TokenEmStar(loc, pos, c)
414 if c0 != ' ' or c1 != ' ' then
415 return new TokenEmStar(loc, pos, c)
417 return new TokenNone(loc, pos, c)
419 else if c == '_' then
421 if c0 != ' ' or c2 != ' ' then
422 return new TokenStrongUnderscore(loc, pos, c)
424 return new TokenEmUnderscore(loc, pos, c)
428 if (c0.is_letter or c0.is_digit) and c0 != '_' and
429 (c1.is_letter or c1.is_digit) then
430 return new TokenNone(loc, pos, c)
432 return new TokenEmUnderscore(loc, pos, c)
435 if c0 != ' ' or c1 != ' ' then
436 return new TokenEmUnderscore(loc, pos, c)
438 return new TokenNone(loc, pos, c)
440 else if c == '!' then
441 if c1 == '[' then return new TokenImage(loc, pos, c)
442 return new TokenNone(loc, pos, c)
443 else if c == '[' then
444 return new TokenLink(loc, pos, c)
445 else if c == ']' then
446 return new TokenNone(loc, pos, c)
447 else if c == '`' then
449 return new TokenCodeDouble(loc, pos, c)
451 return new TokenCodeSingle(loc, pos, c)
453 else if c == '\\' then
454 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\
'' or c1
== '.' or c1
== '<' or c1
== '>' or c1
== '*' or c1
== '+' or c1
== '-' or c1
== '_' or c1
== '!' or c1
== '`' or c1
== '~' or c1
== '^' then
455 return new TokenEscape(loc
, pos
, c
)
457 return new TokenNone(loc
, pos
, c
)
459 else if c
== '<' then
460 return new TokenHTML(loc
, pos
, c
)
461 else if c
== '&' then
462 return new TokenEntity(loc
, pos
, c
)
465 if c
== '~' and c1
== '~' then
466 return new TokenStrike(loc
, pos
, c
)
469 return new TokenNone(loc
, pos
, c
)
473 # Find the position of a `token` in `self`.
474 fun find_token
(text
: Text, start
: Int, token
: Token): Int do
476 while pos
< text
.length
do
477 if token_at
(text
, pos
).is_same_type
(token
) then
485 # Location used for next parsed token.
487 # This location can be changed by the emitter to adjust with `\n` found
489 private fun current_loc
: MDLocation do return emitter
.current_loc
492 # Emit output corresponding to blocks content.
494 # Blocks are created by a previous pass in `MarkdownProcessor`.
495 # The emitter use a `Decorator` to select the output format.
496 class MarkdownEmitter
498 # Kind of processor used for parsing.
499 type PROCESSOR: MarkdownProcessor
501 # Processor containing link refs.
502 var processor
: PROCESSOR
504 # Kind of decorator used for decoration.
505 type DECORATOR: Decorator
507 # Decorator used for output.
508 # Default is `HTMLDecorator`
509 var decorator
: DECORATOR is writable, lazy
do
510 return new HTMLDecorator
513 # Create a new `MarkdownEmitter` using a custom `decorator`.
514 init with_decorator
(processor
: PROCESSOR, decorator
: DECORATOR) do
516 self.decorator
= decorator
519 # Output `block` using `decorator` in the current buffer.
520 fun emit
(block
: Block): Text do
521 var buffer
= push_buffer
527 # Output the content of `block`.
528 fun emit_in
(block
: Block) do block
.emit_in
(self)
530 # Transform and emit mardown text
531 fun emit_text
(text
: Text) do emit_text_until
(text
, 0, null)
533 # Transform and emit mardown text starting at `start` and
534 # until a token with the same type as `token` is found.
535 # Go until the end of `text` if `token` is null.
536 fun emit_text_until
(text
: Text, start
: Int, token
: nullable Token): Int do
537 var old_text
= current_text
538 var old_pos
= current_pos
541 while current_pos
< text
.length
do
542 if text
[current_pos
] == '\n' then
543 current_loc
.line_start
+= 1
544 current_loc
.column_start
= -current_pos
546 var mt
= processor
.token_at
(text
, current_pos
)
547 if (token
!= null and not token
isa TokenNone) and
548 (mt
.is_same_type
(token
) or
549 (token
isa TokenEmStar and mt
isa TokenStrongStar) or
550 (token
isa TokenEmUnderscore and mt
isa TokenStrongUnderscore)) then
556 current_text
= old_text
557 current_pos
= old_pos
561 # Currently processed position in `current_text`.
562 # Used when visiting inline production with `emit_text_until`.
563 private var current_pos
: Int = -1
565 # Currently processed text.
566 # Used when visiting inline production with `emit_text_until`.
567 private var current_text
: nullable Text = null
570 private var buffer_stack
= new List[FlatBuffer]
572 # Push a new buffer on the stack.
573 private fun push_buffer
: FlatBuffer do
574 var buffer
= new FlatBuffer
575 buffer_stack
.add buffer
579 # Pop the last buffer.
580 private fun pop_buffer
do buffer_stack
.pop
582 # Current output buffer.
583 private fun current_buffer
: FlatBuffer do
584 assert not buffer_stack
.is_empty
585 return buffer_stack
.last
589 private var loc_stack
= new List[MDLocation]
591 # Push a new MDLocation on the stack.
592 private fun push_loc
(location
: MDLocation) do loc_stack
.add location
594 # Pop the last buffer.
595 private fun pop_loc
: MDLocation do return loc_stack
.pop
597 # Current output buffer.
598 private fun current_loc
: MDLocation do
599 assert not loc_stack
.is_empty
600 return loc_stack
.last
603 # Append `e` to current buffer.
604 fun add
(e
: Writable) do
606 current_buffer
.append e
608 current_buffer
.append e
.write_to_string
612 # Append `c` to current buffer.
613 fun addc
(c
: Char) do add c
.to_s
615 # Append a "\n" line break.
620 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
623 # [1]: http://example.com/ "Optional title"
630 # Optional link title
631 var title
: nullable String = null
633 # Is the link an abreviation?
634 var is_abbrev
= false
636 # Create a link with a title.
637 init with_title
(link
: String, title
: nullable String) do
643 # A `Decorator` is used to emit mardown into a specific format.
644 # Default decorator used is `HTMLDecorator`.
647 # Kind of emitter used for decoration.
648 type EMITTER: MarkdownEmitter
650 # Render a single plain char.
652 # Redefine this method to add special escaping for plain text.
653 fun add_char
(v
: EMITTER, c
: Char) do v
.addc c
655 # Render a ruler block.
656 fun add_ruler
(v
: EMITTER, block
: BlockRuler) is abstract
658 # Render a headline block with corresponding level.
659 fun add_headline
(v
: EMITTER, block
: BlockHeadline) is abstract
661 # Render a paragraph block.
662 fun add_paragraph
(v
: EMITTER, block
: BlockParagraph) is abstract
664 # Render a code or fence block.
665 fun add_code
(v
: EMITTER, block
: BlockCode) is abstract
667 # Render a blockquote.
668 fun add_blockquote
(v
: EMITTER, block
: BlockQuote) is abstract
670 # Render an unordered list.
671 fun add_unorderedlist
(v
: EMITTER, block
: BlockUnorderedList) is abstract
673 # Render an ordered list.
674 fun add_orderedlist
(v
: EMITTER, block
: BlockOrderedList) is abstract
676 # Render a list item.
677 fun add_listitem
(v
: EMITTER, block
: BlockListItem) is abstract
679 # Render an emphasis text.
680 fun add_em
(v
: EMITTER, text
: Text) is abstract
682 # Render a strong text.
683 fun add_strong
(v
: EMITTER, text
: Text) is abstract
685 # Render a strike text.
687 # Extended mode only (see `MarkdownProcessor::ext_mode`)
688 fun add_strike
(v
: EMITTER, text
: Text) is abstract
691 fun add_link
(v
: EMITTER, link
: Text, name
: Text, comment
: nullable Text) is abstract
694 fun add_image
(v
: EMITTER, link
: Text, name
: Text, comment
: nullable Text) is abstract
696 # Render an abbreviation.
697 fun add_abbr
(v
: EMITTER, name
: Text, comment
: Text) is abstract
699 # Render a code span reading from a buffer.
700 fun add_span_code
(v
: EMITTER, buffer
: Text, from
, to
: Int) is abstract
702 # Render a text and escape it.
703 fun append_value
(v
: EMITTER, value
: Text) is abstract
705 # Render code text from buffer and escape it.
706 fun append_code
(v
: EMITTER, buffer
: Text, from
, to
: Int) is abstract
708 # Render a character escape.
709 fun escape_char
(v
: EMITTER, char
: Char) is abstract
711 # Render a line break
712 fun add_line_break
(v
: EMITTER) is abstract
714 # Generate a new html valid id from a `String`.
715 fun strip_id
(txt
: String): String is abstract
717 # Found headlines during the processing labeled by their ids.
718 fun headlines
: ArrayMap[String, HeadLine] is abstract
721 # Class representing a markdown headline.
723 # Unique identifier of this headline.
726 # Text of the headline.
729 # Level of this headline.
731 # According toe the markdown specification, level must be in `[1..6]`.
735 # `Decorator` that outputs HTML.
739 redef var headlines
= new ArrayMap[String, HeadLine]
741 redef fun add_ruler
(v
, block
) do v
.add
"<hr/>\n"
743 redef fun add_headline
(v
, block
) do
745 var txt
= block
.block
.first_line
.value
746 var id
= strip_id
(txt
)
747 var lvl
= block
.depth
748 headlines
[id
] = new HeadLine(id
, txt
, lvl
)
750 v
.add
"<h{lvl} id=\"{id}\
">"
755 redef fun add_paragraph
(v
, block
) do
761 redef fun add_code
(v
, block
) do
762 var meta
= block
.meta
764 v
.add
"<pre class=\""
765 append_value(v, meta)
771 v
.add
"</code></pre>\n"
774 redef fun add_blockquote
(v
, block
) do
775 v
.add
"<blockquote>\n"
777 v
.add
"</blockquote>\n"
780 redef fun add_unorderedlist
(v
, block
) do
786 redef fun add_orderedlist
(v
, block
) do
792 redef fun add_listitem
(v
, block
) do
798 redef fun add_em
(v
, text
) do
804 redef fun add_strong
(v
, text
) do
810 redef fun add_strike
(v
, text
) do
816 redef fun add_image
(v
, link
, name
, comment
) do
818 append_value(v, link)
820 append_value(v, name)
822 if comment
!= null and not comment
.is_empty
then
824 append_value(v, comment)
830 redef fun add_link
(v
, link
, name
, comment
) do
832 append_value(v, link)
834 if comment
!= null and not comment
.is_empty
then
836 append_value(v, comment)
844 redef fun add_abbr
(v
, name
, comment
) do
845 v
.add
"<abbr title=\""
846 append_value(v, comment)
852 redef fun add_span_code
(v
, text
, from
, to
) do
854 append_code
(v
, text
, from
, to
)
858 redef fun add_line_break
(v
) do
862 redef fun append_value
(v
, text
) do for c
in text
do escape_char
(v
, c
)
864 redef fun escape_char
(v
, c
) do
867 else if c
== '<' then
869 else if c
== '>' then
871 else if c
== '"' then
873 else if c
== '\'' then
880 redef fun append_code(v, buffer, from, to) do
881 for i in [from..to[ do
885 else if c == '<' then
887 else if c == '>' then
895 redef fun strip_id(txt) do
897 var b = new FlatBuffer
902 if not c.is_letter and
904 not allowed_id_chars.has(c) then continue
910 # check for multiple id definitions
911 if headlines.has_key(key) then
914 while headlines.has_key(key) do
922 private var allowed_id_chars: Array[Char] = ['-', '_
', ':', '.']
925 # Location in a Markdown input.
928 # Starting line number (starting from 1).
931 # Starting column number (starting from 1).
932 var column_start: Int
934 # Stopping line number (starting from 1).
937 # Stopping column number (starting from 1).
940 redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
942 # Return a copy of `self`.
943 fun copy: MDLocation do
944 return new MDLocation(line_start, column_start, line_end, column_end)
948 # A block of markdown lines.
949 # A `MDBlock` can contains lines and/or sub-blocks.
952 # Position of `self` in the input.
953 var location: MDLocation
957 var kind: Block = new BlockNone(self) is writable
960 var first_line: nullable MDLine = null is writable
963 var last_line: nullable MDLine = null is writable
965 # First sub-block if any.
966 var first_block: nullable MDBlock = null is writable
968 # Last sub-block if any.
969 var last_block: nullable MDBlock = null is writable
971 # Previous block if any.
972 var prev: nullable MDBlock = null is writable
975 var next: nullable MDBlock = null is writable
977 # Does this block contain subblocks?
978 fun has_blocks: Bool do return first_block != null
981 fun count_blocks: Int do
983 var block = first_block
984 while block != null do
991 # Does this block contain lines?
992 fun has_lines: Bool do return first_line != null
995 fun count_lines: Int do
997 var line = first_line
998 while line != null do
1005 # Split `self` creating a new sub-block having `line` has `last_line`.
1006 fun split(line: MDLine): MDBlock do
1007 # location for new block
1008 var new_loc = new MDLocation(
1009 first_line.location.line_start,
1010 first_line.location.column_start,
1011 line.location.line_end,
1012 line.location.column_end)
1014 var block = new MDBlock(new_loc)
1015 block.first_line = first_line
1016 block.last_line = line
1017 first_line = line.next
1019 if first_line == null then
1022 first_line.prev = null
1023 # update current block loc
1024 location.line_start = first_line.location.line_start
1025 location.column_start = first_line.location.column_start
1027 if first_block == null then
1031 last_block.next = block
1037 # Add a `line` to this block.
1038 fun add_line(line: MDLine) do
1039 if last_line == null then
1043 last_line.next_empty = line.is_empty
1044 line.prev_empty = last_line.is_empty
1045 line.prev = last_line
1046 last_line.next = line
1051 # Remove `line` from this block.
1052 fun remove_line(line: MDLine) do
1053 if line.prev == null then
1054 first_line = line.next
1056 line.prev.next = line.next
1058 if line.next == null then
1059 last_line = line.prev
1061 line.next.prev = line.prev
1067 # Remove leading empty lines.
1068 fun remove_leading_empty_lines: Bool do
1069 var was_empty = false
1070 var line = first_line
1071 while line != null and line.is_empty do
1079 # Remove trailing empty lines.
1080 fun remove_trailing_empty_lines: Bool do
1081 var was_empty = false
1082 var line = last_line
1083 while line != null and line.is_empty do
1091 # Remove leading and trailing empty lines.
1092 fun remove_surrounding_empty_lines: Bool do
1093 var was_empty = false
1094 if remove_leading_empty_lines then was_empty = true
1095 if remove_trailing_empty_lines then was_empty = true
1099 # Remove list markers and up to 4 leading spaces.
1100 # Used to clean nested lists.
1101 fun remove_list_indent(v: MarkdownProcessor) do
1102 var line = first_line
1103 while line != null do
1104 if not line.is_empty then
1105 var kind = v.line_kind(line)
1106 if kind isa LineList then
1107 line.value = kind.extract_value(line)
1109 line.value = line.value.substring_from(line.leading.min(4))
1111 line.leading = line.process_leading
1117 # Collect block line text.
1119 var text = new FlatBuffer
1120 var line = first_line
1121 while line != null do
1122 if not line.is_empty then
1123 text.append line.text
1128 return text.write_to_string
1132 # Representation of a markdown block in the AST.
1133 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1134 abstract class Block
1136 # The markdown block `self` is related to.
1139 # Output `self` using `v.decorator`.
1140 fun emit(v: MarkdownEmitter) do v.emit_in(self)
1142 # Emit the containts of `self`, lines or blocks.
1143 fun emit_in(v: MarkdownEmitter) do
1144 block.remove_surrounding_empty_lines
1145 if block.has_lines then
1152 # Emit lines contained in `block`.
1153 fun emit_lines(v: MarkdownEmitter) do
1154 var tpl = v.push_buffer
1155 var line = block.first_line
1156 while line != null do
1157 if not line.is_empty then
1158 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1159 if line.trailing >= 2 then v.decorator.add_line_break(v)
1161 if line.next != null then
1170 # Emit sub-blocks contained in `block`.
1171 fun emit_blocks(v: MarkdownEmitter) do
1172 var block = self.block.first_block
1173 while block != null do
1174 v.push_loc(block.location)
1181 # The raw content of the block as a multi-line string.
1182 fun raw_content: String do
1183 var infence = self isa BlockFence
1184 var text = new FlatBuffer
1185 var line = self.block.first_line
1186 while line != null do
1187 if not line.is_empty then
1188 var str = line.value
1189 if not infence and str.has_prefix(" ") then
1190 text.append str.substring(4, str.length - line.trailing)
1198 return text.write_to_string
1202 # A block without any markdown specificities.
1204 # Actually use the same implementation than `BlockCode`,
1205 # this class is only used for typing purposes.
1210 # A markdown blockquote.
1214 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1216 # Remove blockquote markers.
1217 private fun remove_block_quote_prefix(block: MDBlock) do
1218 var line = block.first_line
1219 while line != null do
1220 if not line.is_empty then
1221 if line.value[line.leading] == '>' then
1222 var rem = line.leading + 1
1223 if line.leading + 1 < line.value.length and
1224 line.value[line.leading + 1] == ' ' then
1227 line.value = line.value.substring_from(rem)
1228 line.leading = line.process_leading
1236 # A markdown code block.
1240 # Any string found after fence token.
1241 var meta: nullable Text
1243 # Number of char to skip at the beginning of the line.
1245 # Block code lines start at 4 spaces.
1246 protected var line_start = 4
1248 redef fun emit(v) do v.decorator.add_code(v, self)
1250 redef fun emit_lines(v) do
1251 var line = block.first_line
1252 while line != null do
1253 if not line.is_empty then
1254 v.decorator.append_code(v, line.value, line_start, line.value.length)
1262 # A markdown code-fence block.
1264 # Actually use the same implementation than `BlockCode`,
1265 # this class is only used for typing purposes.
1269 # Fence code lines start at 0 spaces.
1270 redef var line_start = 0
1273 # A markdown headline.
1277 redef fun emit(v) do
1278 var loc = block.location.copy
1279 loc.column_start += start
1281 v.decorator.add_headline(v, self)
1285 private var start = 0
1287 # Depth of the headline used to determine the headline level.
1290 # Remove healine marks from lines contained in `self`.
1291 private fun transform_headline(block: MDBlock) do
1292 if depth > 0 then return
1294 var line = block.first_line
1295 if line.is_empty then return
1296 var start = line.leading
1297 while start < line.value.length and line.value[start] == '#' do
1301 while start
< line
.value
.length
and line
.value
[start
] == ' ' do
1304 if start
>= line
.value
.length
then
1305 line
.is_empty
= true
1307 var nend
= line
.value
.length
- line
.trailing
- 1
1308 while line
.value
[nend
] == '#' do nend
-= 1
1309 while line
.value
[nend
] == ' ' do nend
-= 1
1310 line
.value
= line
.value
.substring
(start
, nend
- start
+ 1)
1315 depth
= level
.min
(6)
1319 # A markdown list item block.
1323 redef fun emit
(v
) do v
.decorator
.add_listitem
(v
, self)
1326 # A markdown list block.
1327 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1328 abstract class BlockList
1331 # Split list block into list items sub-blocks.
1332 private fun init_block
(v
: MarkdownProcessor) do
1333 var line
= block
.first_line
1335 while line
!= null do
1336 var t
= v
.line_kind
(line
)
1337 if t
isa LineList or
1338 (not line
.is_empty
and (line
.prev_empty
and line
.leading
== 0 and
1339 not (t
isa LineList))) then
1340 var sblock
= block
.split
(line
.prev
.as(not null))
1341 sblock
.kind
= new BlockListItem(sblock
)
1345 var sblock
= block
.split
(block
.last_line
.as(not null))
1346 sblock
.kind
= new BlockListItem(sblock
)
1349 # Expand list items as paragraphs if needed.
1350 private fun expand_paragraphs
(block
: MDBlock) do
1351 var outer
= block
.first_block
1352 var inner
: nullable MDBlock
1353 var has_paragraph
= false
1354 while outer
!= null and not has_paragraph
do
1355 if outer
.kind
isa BlockListItem then
1356 inner
= outer
.first_block
1357 while inner
!= null and not has_paragraph
do
1358 if inner
.kind
isa BlockParagraph then
1359 has_paragraph
= true
1366 if has_paragraph
then
1367 outer
= block
.first_block
1368 while outer
!= null do
1369 if outer
.kind
isa BlockListItem then
1370 inner
= outer
.first_block
1371 while inner
!= null do
1372 if inner
.kind
isa BlockNone then
1373 inner
.kind
= new BlockParagraph(inner
)
1384 # A markdown ordered list.
1385 class BlockOrderedList
1388 redef fun emit
(v
) do v
.decorator
.add_orderedlist
(v
, self)
1391 # A markdown unordred list.
1392 class BlockUnorderedList
1395 redef fun emit
(v
) do v
.decorator
.add_unorderedlist
(v
, self)
1398 # A markdown paragraph block.
1399 class BlockParagraph
1402 redef fun emit
(v
) do v
.decorator
.add_paragraph
(v
, self)
1409 redef fun emit
(v
) do v
.decorator
.add_ruler
(v
, self)
1412 # Xml blocks that can be found in markdown markup.
1416 redef fun emit_lines
(v
) do
1417 var line
= block
.first_line
1418 while line
!= null do
1419 if not line
.is_empty
then v
.add line
.value
1429 # Location of `self` in the original input.
1430 var location
: MDLocation
1432 # Text contained in this line.
1433 var value
: String is writable
1435 # Is this line empty?
1436 # Lines containing only spaces are considered empty.
1437 var is_empty
: Bool = true is writable
1439 # Previous line in `MDBlock` or null if first line.
1440 var prev
: nullable MDLine = null is writable
1442 # Next line in `MDBlock` or null if last line.
1443 var next
: nullable MDLine = null is writable
1445 # Is the previous line empty?
1446 var prev_empty
: Bool = false is writable
1448 # Is the next line empty?
1449 var next_empty
: Bool = false is writable
1451 # Initialize a new MDLine from its string value
1453 self.leading
= process_leading
1454 if leading
!= value
.length
then
1455 self.is_empty
= false
1456 self.trailing
= process_trailing
1460 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1466 if prev
!= null then prev
.next_empty
= true
1467 if next
!= null then next
.prev_empty
= true
1470 # Number or leading spaces on this line.
1471 var leading
: Int = 0 is writable
1473 # Compute `leading` depending on `value`.
1474 fun process_leading
: Int do
1476 var value
= self.value
1477 while count
< value
.length
and value
[count
] == ' ' do count
+= 1
1478 if leading
== value
.length
then clear
1482 # Number of trailing spaces on this line.
1483 var trailing
: Int = 0 is writable
1485 # Compute `trailing` depending on `value`.
1486 fun process_trailing
: Int do
1488 var value
= self.value
1489 while value
[value
.length
- count
- 1] == ' ' do
1495 # Count the amount of `ch` in this line.
1496 # Return A value > 0 if this line only consists of `ch` end spaces.
1497 fun count_chars
(ch
: Char): Int do
1513 # Count the amount of `ch` at the start of this line ignoring spaces.
1514 fun count_chars_start
(ch
: Char): Int do
1529 # Last XML line if any.
1530 private var xml_end_line
: nullable MDLine = null
1532 # Does `value` contains valid XML markup?
1533 private fun check_html
: Bool do
1534 var tags
= new Array[String]
1535 var tmp
= new FlatBuffer
1537 if pos
+ 1 < value
.length
and value
[pos
+ 1] == '!' then
1538 if read_xml_comment
(self, pos
) > 0 then return true
1540 pos
= value
.read_xml
(tmp
, pos
, false)
1544 if not tag
.is_html_block
then
1552 var line
: nullable MDLine = self
1553 while line
!= null do
1554 while pos
< line
.value
.length
and line
.value
[pos
] != '<' do
1557 if pos
>= line
.value
.length
then
1558 if pos
- 2 >= 0 and line
.value
[pos
- 2] == '/' then
1560 if tags
.is_empty
then
1568 tmp
= new FlatBuffer
1569 var new_pos
= line
.value
.read_xml
(tmp
, pos
, false)
1572 if tag
.is_html_block
and not tag
== "hr" then
1573 if tmp
[1] == '/' then
1574 if tags
.last
!= tag
then
1582 if tags
.is_empty
then
1592 return tags
.is_empty
1597 # Read a XML comment.
1598 # Used by `check_html`.
1599 private fun read_xml_comment
(first_line
: MDLine, start
: Int): Int do
1600 var line
: nullable MDLine = first_line
1601 if start
+ 3 < line
.value
.length
then
1602 if line
.value
[2] == '-' and line
.value
[3] == '-' then
1604 while line
!= null do
1605 while pos
< line
.value
.length
and line
.value
[pos
] != '-' do
1608 if pos
== line
.value
.length
then
1612 if pos
+ 2 < line
.value
.length
then
1613 if line
.value
[pos
+ 1] == '-' and line
.value
[pos
+ 2] == '>' then
1614 first_line
.xml_end_line
= line
1626 # Extract the text of `self` without leading and trailing.
1627 fun text
: String do return value
.substring
(leading
, value
.length
- trailing
)
1634 # See `MarkdownProcessor::recurse`.
1635 fun process
(v
: MarkdownProcessor) is abstract
1638 # An empty markdown line.
1642 redef fun process
(v
) do
1643 v
.current_line
= v
.current_line
.next
1647 # A non-specific markdown construction.
1648 # Mainly used as part of another line construct such as paragraphs or lists.
1652 redef fun process
(v
) do
1653 var line
= v
.current_line
1655 var was_empty
= line
.prev_empty
1656 while line
!= null and not line
.is_empty
do
1657 var t
= v
.line_kind
(line
)
1658 if (v
.in_list
or v
.ext_mode
) and t
isa LineList then
1661 if v
.ext_mode
and (t
isa LineCode or t
isa LineFence) then
1664 if t
isa LineHeadline or t
isa LineHeadline1 or t
isa LineHeadline2 or
1665 t
isa LineHR or t
isa LineBlockquote or t
isa LineXML then
1671 if line
!= null and not line
.is_empty
then
1672 var block
= v
.current_block
.split
(line
.prev
.as(not null))
1673 if v
.in_list
and not was_empty
then
1674 block
.kind
= new BlockNone(block
)
1676 block
.kind
= new BlockParagraph(block
)
1678 v
.current_block
.remove_leading_empty_lines
1681 if line
!= null then
1682 block
= v
.current_block
.split
(line
)
1684 block
= v
.current_block
.split
(v
.current_block
.last_line
.as(not null))
1686 if v
.in_list
and (line
== null or not line
.is_empty
) and not was_empty
then
1687 block
.kind
= new BlockNone(block
)
1689 block
.kind
= new BlockParagraph(block
)
1691 v
.current_block
.remove_leading_empty_lines
1693 v
.current_line
= v
.current_block
.first_line
1697 # A line of markdown code.
1701 redef fun process
(v
) do
1702 var line
= v
.current_line
1704 while line
!= null and (line
.is_empty
or v
.line_kind
(line
) isa LineCode) do
1707 # split at block end line
1709 if line
!= null then
1710 block
= v
.current_block
.split
(line
.prev
.as(not null))
1712 block
= v
.current_block
.split
(v
.current_block
.last_line
.as(not null))
1714 block
.kind
= new BlockCode(block
)
1715 block
.remove_surrounding_empty_lines
1716 v
.current_line
= v
.current_block
.first_line
1720 # A line of raw XML.
1724 redef fun process
(v
) do
1725 var line
= v
.current_line
1726 var prev
= line
.prev
1727 if prev
!= null then v
.current_block
.split
(prev
)
1728 var block
= v
.current_block
.split
(line
.xml_end_line
.as(not null))
1729 block
.kind
= new BlockXML(block
)
1730 v
.current_block
.remove_leading_empty_lines
1731 v
.current_line
= v
.current_block
.first_line
1735 # A markdown blockquote line.
1736 class LineBlockquote
1739 redef fun process
(v
) do
1740 var line
= v
.current_line
1742 while line
!= null do
1743 if not line
.is_empty
and (line
.prev_empty
and
1744 line
.leading
== 0 and
1745 not v
.line_kind
(line
) isa LineBlockquote) then break
1750 if line
!= null then
1751 block
= v
.current_block
.split
(line
.prev
.as(not null))
1753 block
= v
.current_block
.split
(v
.current_block
.last_line
.as(not null))
1755 var kind
= new BlockQuote(block
)
1757 block
.remove_surrounding_empty_lines
1758 kind
.remove_block_quote_prefix
(block
)
1759 v
.current_line
= line
1760 v
.recurse
(block
, false)
1761 v
.current_line
= v
.current_block
.first_line
1765 # A markdown ruler line.
1769 redef fun process
(v
) do
1770 var line
= v
.current_line
1771 if line
.prev
!= null then v
.current_block
.split
(line
.prev
.as(not null))
1772 var block
= v
.current_block
.split
(line
.as(not null))
1773 block
.kind
= new BlockRuler(block
)
1774 v
.current_block
.remove_leading_empty_lines
1775 v
.current_line
= v
.current_block
.first_line
1779 # A markdown fence code line.
1783 redef fun process
(v
) do
1785 var line
= v
.current_line
.next
1786 while line
!= null do
1787 if v
.line_kind
(line
) isa LineFence then break
1790 if line
!= null then
1795 if line
!= null then
1796 block
= v
.current_block
.split
(line
.prev
.as(not null))
1798 block
= v
.current_block
.split
(v
.current_block
.last_line
.as(not null))
1800 block
.remove_surrounding_empty_lines
1801 var meta
= block
.first_line
.value
.meta_from_fence
1802 block
.kind
= new BlockFence(block
, meta
)
1803 block
.first_line
.clear
1804 var last
= block
.last_line
1805 if last
!= null and v
.line_kind
(last
) isa LineFence then
1806 block
.last_line
.clear
1808 block
.remove_surrounding_empty_lines
1809 v
.current_line
= line
1813 # A markdown headline.
1817 redef fun process
(v
) do
1818 var line
= v
.current_line
1819 var lprev
= line
.prev
1820 if lprev
!= null then v
.current_block
.split
(lprev
)
1821 var block
= v
.current_block
.split
(line
.as(not null))
1822 var kind
= new BlockHeadline(block
)
1824 kind
.transform_headline
(block
)
1825 v
.current_block
.remove_leading_empty_lines
1826 v
.current_line
= v
.current_block
.first_line
1830 # A markdown headline of level 1.
1834 redef fun process
(v
) do
1835 var line
= v
.current_line
1836 var lprev
= line
.prev
1837 if lprev
!= null then v
.current_block
.split
(lprev
)
1839 var block
= v
.current_block
.split
(line
.as(not null))
1840 var kind
= new BlockHeadline(block
)
1842 kind
.transform_headline
(block
)
1844 v
.current_block
.remove_leading_empty_lines
1845 v
.current_line
= v
.current_block
.first_line
1849 # A markdown headline of level 2.
1853 redef fun process
(v
) do
1854 var line
= v
.current_line
1855 var lprev
= line
.prev
1856 if lprev
!= null then v
.current_block
.split
(lprev
)
1858 var block
= v
.current_block
.split
(line
.as(not null))
1859 var kind
= new BlockHeadline(block
)
1861 kind
.transform_headline
(block
)
1863 v
.current_block
.remove_leading_empty_lines
1864 v
.current_line
= v
.current_block
.first_line
1868 # A markdown list line.
1869 # Mainly used to factorize code between ordered and unordered lists.
1870 abstract class LineList
1873 redef fun process
(v
) do
1874 var line
= v
.current_line
1876 while line
!= null do
1877 var t
= v
.line_kind
(line
)
1878 if not line
.is_empty
and (line
.prev_empty
and line
.leading
== 0 and
1879 not t
isa LineList) then break
1884 if line
!= null then
1885 list
= v
.current_block
.split
(line
.prev
.as(not null))
1887 list
= v
.current_block
.split
(v
.current_block
.last_line
.as(not null))
1889 var kind
= block_kind
(list
)
1891 list
.first_line
.prev_empty
= false
1892 list
.last_line
.next_empty
= false
1893 list
.remove_surrounding_empty_lines
1894 list
.first_line
.prev_empty
= false
1895 list
.last_line
.next_empty
= false
1897 var block
= list
.first_block
1898 while block
!= null do
1899 block
.remove_list_indent
(v
)
1900 v
.recurse
(block
, true)
1903 kind
.expand_paragraphs
(list
)
1904 v
.current_line
= line
1907 # Create a new block kind based on this line.
1908 protected fun block_kind
(block
: MDBlock): BlockList is abstract
1910 # Extract string value from `MDLine`.
1911 protected fun extract_value
(line
: MDLine): String is abstract
1914 # An ordered list line.
1918 redef fun block_kind
(block
) do return new BlockOrderedList(block
)
1920 redef fun extract_value
(line
) do
1921 return line
.value
.substring_from
(line
.value
.index_of
('.') + 2)
1925 # An unordered list line.
1929 redef fun block_kind
(block
) do return new BlockUnorderedList(block
)
1931 redef fun extract_value
(line
) do
1932 return line
.value
.substring_from
(line
.leading
+ 2)
1936 # A token represent a character in the markdown input.
1937 # Some tokens have a specific markup behaviour that is handled here.
1938 abstract class Token
1940 # Location of `self` in the original input.
1941 var location
: MDLocation
1943 # Position of `self` in input independant from lines.
1946 # Character found at `pos` in the markdown input.
1949 # Output that token using `MarkdownEmitter::decorator`.
1950 fun emit
(v
: MarkdownEmitter) do v
.decorator
.add_char
(v
, char
)
1953 # A token without a specific meaning.
1958 # An emphasis token.
1959 abstract class TokenEm
1962 redef fun emit
(v
) do
1963 var tmp
= v
.push_buffer
1964 var b
= v
.emit_text_until
(v
.current_text
.as(not null), pos
+ 1, self)
1967 v
.decorator
.add_em
(v
, tmp
)
1975 # An emphasis star token.
1980 # An emphasis underscore token.
1981 class TokenEmUnderscore
1986 abstract class TokenStrong
1989 redef fun emit
(v
) do
1990 var tmp
= v
.push_buffer
1991 var b
= v
.emit_text_until
(v
.current_text
.as(not null), pos
+ 2, self)
1994 v
.decorator
.add_strong
(v
, tmp
)
1995 v
.current_pos
= b
+ 1
2002 # A strong star token.
2003 class TokenStrongStar
2007 # A strong underscore token.
2008 class TokenStrongUnderscore
2013 # This class is mainly used to factorize work between single and double quoted span codes.
2014 abstract class TokenCode
2017 redef fun emit
(v
) do
2018 var a
= pos
+ next_pos
+ 1
2019 var b
= v
.processor
.find_token
(v
.current_text
.as(not null), a
, self)
2021 v
.current_pos
= b
+ next_pos
2022 while a
< b
and v
.current_text
[a
] == ' ' do a
+= 1
2024 while v
.current_text
[b
- 1] == ' ' do b
-= 1
2025 v
.decorator
.add_span_code
(v
, v
.current_text
.as(not null), a
, b
)
2032 private fun next_pos
: Int is abstract
2035 # A span code token.
2036 class TokenCodeSingle
2039 redef fun next_pos
do return 0
2042 # A doubled span code token.
2043 class TokenCodeDouble
2046 redef fun next_pos
do return 1
2049 # A link or image token.
2050 # This class is mainly used to factorize work between images and links.
2051 abstract class TokenLinkOrImage
2055 var link
: nullable Text = null
2058 var name
: nullable Text = null
2061 var comment
: nullable Text = null
2063 # Is the link construct an abbreviation?
2064 var is_abbrev
= false
2066 redef fun emit
(v
) do
2067 var tmp
= new FlatBuffer
2068 var b
= check_link
(v
, tmp
, pos
, self)
2077 # Emit the hyperlink as link or image.
2078 private fun emit_hyper
(v
: MarkdownEmitter) is abstract
2080 # Check if the link is a valid link.
2081 private fun check_link
(v
: MarkdownEmitter, out
: FlatBuffer, start
: Int, token
: Token): Int do
2082 var md
= v
.current_text
2084 if token
isa TokenLink then
2089 var tmp
= new FlatBuffer
2090 pos
= md
.read_md_link_id
(tmp
, pos
)
2091 if pos
< start
then return -1
2095 pos
= md
.skip_spaces
(pos
)
2097 var tid
= name
.write_to_string
.to_lower
2098 if v
.processor
.link_refs
.has_key
(tid
) then
2099 var lr
= v
.processor
.link_refs
[tid
]
2100 is_abbrev
= lr
.is_abbrev
2107 else if md
[pos
] == '(' then
2109 pos
= md
.skip_spaces
(pos
)
2110 if pos
< start
then return -1
2111 tmp
= new FlatBuffer
2112 var use_lt
= md
[pos
] == '<'
2114 pos
= md
.read_until
(tmp
, pos
+ 1, '>')
2116 pos
= md
.read_md_link
(tmp
, pos
)
2118 if pos
< start
then return -1
2119 if use_lt
then pos
+= 1
2120 link
= tmp
.write_to_string
2121 if md
[pos
] == ' ' then
2122 pos
= md
.skip_spaces
(pos
)
2123 if pos
> start
and md
[pos
] == '"' then
2125 tmp
= new FlatBuffer
2126 pos
= md
.read_until
(tmp
, pos
, '"')
2127 if pos
< start
then return -1
2128 comment
= tmp
.write_to_string
2130 pos
= md
.skip_spaces
(pos
)
2131 if pos
== -1 then return -1
2134 if pos
< start
then return -1
2135 if md
[pos
] != ')' then return -1
2136 else if md
[pos
] == '[' then
2138 tmp
= new FlatBuffer
2139 pos
= md
.read_raw_until
(tmp
, pos
, ']')
2140 if pos
< start
then return -1
2142 if tmp
.length
> 0 then
2147 var tid
= id
.write_to_string
.to_lower
2148 if v
.processor
.link_refs
.has_key
(tid
) then
2149 var lr
= v
.processor
.link_refs
[tid
]
2154 var tid
= name
.write_to_string
.replace
("\n", " ").to_lower
2155 if v
.processor
.link_refs
.has_key
(tid
) then
2156 var lr
= v
.processor
.link_refs
[tid
]
2164 if link
== null then return -1
2169 # A markdown link token.
2171 super TokenLinkOrImage
2173 redef fun emit_hyper
(v
) do
2174 if is_abbrev
and comment
!= null then
2175 v
.decorator
.add_abbr
(v
, name
.as(not null), comment
.as(not null))
2177 v
.decorator
.add_link
(v
, link
.as(not null), name
.as(not null), comment
)
2182 # A markdown image token.
2184 super TokenLinkOrImage
2186 redef fun emit_hyper
(v
) do
2187 v
.decorator
.add_image
(v
, link
.as(not null), name
.as(not null), comment
)
2195 redef fun emit
(v
) do
2196 var tmp
= new FlatBuffer
2197 var b
= check_html
(v
, tmp
, v
.current_text
.as(not null), v
.current_pos
)
2202 v
.decorator
.escape_char
(v
, char
)
2206 # Is the HTML valid?
2207 # Also take care of link and mailto shortcuts.
2208 private fun check_html
(v
: MarkdownEmitter, out
: FlatBuffer, md
: Text, start
: Int): Int do
2209 # check for auto links
2210 var tmp
= new FlatBuffer
2211 var pos
= md
.read_until
(tmp
, start
+ 1, ':', ' ', '>', '\n')
2212 if pos
!= -1 and md
[pos
] == ':' and tmp
.is_link_prefix
then
2213 pos
= md
.read_until
(tmp
, pos
, '>')
2215 var link
= tmp
.write_to_string
2216 v
.decorator
.add_link
(v
, link
, link
, null)
2220 # TODO check for mailto
2221 # check for inline html
2222 if start
+ 2 < md
.length
then
2223 return md
.read_xml
(out
, start
, true)
2229 # An HTML entity token.
2233 redef fun emit
(v
) do
2234 var tmp
= new FlatBuffer
2235 var b
= check_entity
(tmp
, v
.current_text
.as(not null), pos
)
2240 v
.decorator
.escape_char
(v
, char
)
2244 # Is the entity valid?
2245 private fun check_entity
(out
: FlatBuffer, md
: Text, start
: Int): Int do
2246 var pos
= md
.read_until
(out
, start
, ';')
2247 if pos
< 0 or out
.length
< 3 then
2250 if out
[1] == '#' then
2251 if out
[2] == 'x' or out
[2] == 'X' then
2252 if out
.length
< 4 then return -1
2253 for i
in [3..out
.length
[ do
2255 if (c
< '0' or c
> '9') and (c
< 'a' and c
> 'f') and (c
< 'A' and c
> 'F') then
2260 for i
in [2..out
.length
[ do
2262 if c
< '0' or c
> '9' then return -1
2267 for i
in [1..out
.length
[ do
2269 if not c
.is_digit
and not c
.is_letter
then return -1
2272 # TODO check entity is valid
2273 # if out.is_entity then
2283 # A markdown escape token.
2287 redef fun emit
(v
) do
2289 v
.addc v
.current_text
[v
.current_pos
]
2293 # A markdown strike token.
2295 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2299 redef fun emit
(v
) do
2300 var tmp
= v
.push_buffer
2301 var b
= v
.emit_text_until
(v
.current_text
.as(not null), pos
+ 2, self)
2304 v
.decorator
.add_strike
(v
, tmp
)
2305 v
.current_pos
= b
+ 1
2314 # Get the position of the next non-space character.
2315 private fun skip_spaces
(start
: Int): Int do
2317 while pos
> -1 and pos
< length
and (self[pos
] == ' ' or self[pos
] == '\n') do
2320 if pos
< length
then return pos
2324 # Read `self` until `nend` and append it to the `out` buffer.
2325 # Escape markdown special chars.
2326 private fun read_until
(out
: FlatBuffer, start
: Int, nend
: Char...): Int do
2328 while pos
< length
do
2330 if c
== '\\' and pos
+ 1 < length
then
2331 pos
= escape
(out
, self[pos
+ 1], pos
)
2333 var end_reached
= false
2340 if end_reached
then break
2345 if pos
== length
then return -1
2349 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2350 # No escape is made.
2351 private fun read_raw_until
(out
: FlatBuffer, start
: Int, nend
: Char...): Int do
2353 while pos
< length
do
2355 var end_reached
= false
2362 if end_reached
then break
2366 if pos
== length
then return -1
2370 # Read `self` as XML until `to` and append it to the `out` buffer.
2371 # Escape HTML special chars.
2372 private fun read_xml_until
(out
: FlatBuffer, from
: Int, to
: Char...): Int do
2375 var str_char
: nullable Char = null
2376 while pos
< length
do
2382 if pos
< length
then
2388 if c
== str_char
then
2395 if c
== '"' or c
== '\'' then
2400 var end_reached = false
2401 for n in [0..to.length[ do
2407 if end_reached then break
2412 if pos == length then return -1
2416 # Read `self` as XML and append it to the `out` buffer.
2417 # Safe mode can be activated to limit reading to valid xml.
2418 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2421 var is_close_tag = false
2422 if start + 1 >= length then return -1
2423 if self[start + 1] == '/' then
2426 else if self[start + 1] == '!' then
2430 is_close_tag = false
2434 var tmp = new FlatBuffer
2435 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2436 if pos == -1 then return -1
2437 var tag = tmp.write_to_string.trim.to_lower
2438 if not tag.is_valid_html_tag then
2441 else if tag.is_html_unsafe then
2444 if is_close_tag then out.add '/'
2448 if is_close_tag then out.add '/'
2453 if is_close_tag then out.add '/'
2454 pos = read_xml_until(out, pos, ' ', '/', '>')
2456 if pos == -1 then return -1
2457 pos = read_xml_until(out, pos, '/', '>')
2458 if pos == -1 then return -1
2459 if self[pos] == '/' then
2461 pos = self.read_xml_until(out, pos + 1, '>')
2462 if pos == -1 then return -1
2464 if self[pos] == '>' then
2475 # Read a markdown link address and append it to the `out` buffer.
2476 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2479 while pos < length do
2481 if c == '\\
' and pos + 1 < length then
2482 pos = escape(out, self[pos + 1], pos)
2484 var end_reached = false
2487 else if c == ' ' then
2488 if counter == 1 then end_reached = true
2489 else if c == ')' then
2491 if counter == 0 then end_reached = true
2493 if end_reached then break
2498 if pos == length then return -1
2502 # Read a markdown link text and append it to the `out` buffer.
2503 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2506 while pos < length do
2508 var end_reached = false
2512 else if c == ']' then
2514 if counter == 0 then
2522 if end_reached then break
2525 if pos == length then return -1
2529 # Extract the XML tag name from a XML tag.
2530 private fun xml_tag: String do
2531 var tpl = new FlatBuffer
2533 if pos < length and self[1] == '/' then pos += 1
2534 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2538 return tpl.write_to_string.to_lower
2541 private fun is_valid_html_tag: Bool do
2542 if is_empty then return false
2544 if not c.is_alpha then return false
2549 # Read and escape the markdown contained in `self`.
2550 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2551 if c == '\\
' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2552 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2553 c
== '>' or c
== '*' or c
== '+' or c
== '-' or c
== '_' or c
== '!' or
2554 c
== '`' or c
== '~' or c
== '^' then
2562 # Extract string found at end of fence opening.
2563 private fun meta_from_fence
: nullable Text do
2564 for i
in [0..chars
.length
[ do
2566 if c
!= ' ' and c
!= '`' and c
!= '~' then
2567 return substring_from
(i
).trim
2573 # Is `self` an unsafe HTML element?
2574 private fun is_html_unsafe
: Bool do return html_unsafe_tags
.has
(self.write_to_string
)
2576 # Is `self` a HRML block element?
2577 private fun is_html_block
: Bool do return html_block_tags
.has
(self.write_to_string
)
2579 # Is `self` a link prefix?
2580 private fun is_link_prefix
: Bool do return html_link_prefixes
.has
(self.write_to_string
)
2582 private fun html_unsafe_tags
: Array[String] do return once
["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2584 private fun html_block_tags
: Array[String] do return once
["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2586 private fun html_link_prefixes
: Array[String] do return once
["http", "https", "ftp", "ftps"]
2591 # Parse `self` as markdown and return the HTML representation
2593 # var md = "**Hello World!**"
2594 # var html = md.md_to_html
2595 # assert html == "<p><strong>Hello World!</strong></p>\n"
2596 fun md_to_html
: Writable do
2597 var processor
= new MarkdownProcessor
2598 return processor
.process
(self)