1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
20 # Parse a markdown string and split it in blocks.
22 # Blocks are then outputed by an `MarkdownEmitter`.
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
33 # `MarkdownEmitter` used for ouput.
34 var emitter
: MarkdownEmitter is noinit
, protected writable
36 # Work in extended mode (default).
38 # Behavior changes when using extended mode:
40 # * Lists and code blocks end a paragraph
42 # In normal markdown the following:
45 # * and this is not a list
49 # <p>This is a paragraph
50 # * and this is not a list</p>
52 # When using extended mode this changes to:
54 # <p>This is a paragraph</p>
56 # <li>and this is not a list</li>
59 # * Fences code blocks
61 # If you don't want to indent your all your code with 4 spaces,
62 # you can wrap your code in ``` ``` ``` or `~~~`.
68 # print "Hello World!"
74 # If you want to use syntax highlighting tools, most of them need to know what kind
75 # of language they are highlighting.
76 # You can add an optional language identifier after the fence declaration to output
77 # it in the HTML render.
82 # print "# Hello World!".md_to_html
87 # <pre class="nit"><code>import markdown
89 # print "Hello World!".md_to_html
92 # * Underscores (Emphasis)
94 # Underscores in the middle of a word like:
98 # normally produces this:
100 # <p>Con<em>cat</em>this</p>
102 # With extended mode they don't result in emphasis.
104 # <p>Con_cat_this</p>
108 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
109 # strikethrought span is marked with `~~`.
115 # <del>Mistaken text.</del>
118 init do self.emitter
= new MarkdownEmitter(self)
120 # Process the mardown `input` string and return the processed output.
121 fun process
(input
: String): Writable do
128 var parent
= read_lines
(input
)
129 parent
.remove_surrounding_empty_lines
130 recurse
(parent
, false)
131 # output processed text
132 return emitter
.emit
(parent
.kind
)
135 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
136 private fun read_lines
(input
: String): MDBlock do
137 var block
= new MDBlock
138 var value
= new FlatBuffer
140 while i
< input
.length
do
144 while not eol
and i
< input
.length
do
149 else if c
== '\t' then
150 var np
= pos
+ (4 - (pos
.bin_and
(3)))
163 var line
= new MDLine(value
.write_to_string
)
164 var is_link_ref
= check_link_ref
(line
)
166 if not is_link_ref
then block
.add_line line
171 # Check if line is a block link definition.
172 # Return `true` if line contains a valid link ref and save it into `link_refs`.
173 private fun check_link_ref
(line
: MDLine): Bool do
175 var is_link_ref
= false
176 var id
= new FlatBuffer
177 var link
= new FlatBuffer
178 var comment
= new FlatBuffer
180 if not line
.is_empty
and line
.leading
< 4 and line
.value
[line
.leading
] == '[' then
181 pos
= line
.leading
+ 1
182 pos
= md
.read_until
(id
, pos
, ']')
183 if not id
.is_empty
and pos
+ 2 < line
.value
.length
then
184 if line
.value
[pos
+ 1] == ':' then
186 pos
= md
.skip_spaces
(pos
)
187 if line
.value
[pos
] == '<' then
189 pos
= md
.read_until
(link
, pos
, '>')
192 pos
= md
.read_until
(link
, pos
, ' ', '\n')
194 if not link
.is_empty
then
195 pos
= md
.skip_spaces
(pos
)
196 if pos
> 0 and pos
< line
.value
.length
then
197 var c
= line
.value
[pos
]
198 if c
== '\"' or c
== '\'' or c == '(' then
201 pos = md.read_until(comment, pos, ')')
203 pos = md.read_until(comment, pos, c)
205 if pos > 0 then is_link_ref = true
214 if is_link_ref and not id.is_empty and not link.is_empty then
215 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
216 add_link_ref(id.write_to_string, lr)
217 if comment.is_empty then last_link_ref = lr
220 comment = new FlatBuffer
221 if not line.is_empty and last_link_ref != null then
223 var c = line.value[pos]
224 if c == '\
"' or c == '\'' or c == '(' then
227 pos = md.read_until(comment, pos, ')')
229 pos = md.read_until(comment, pos, c)
232 if not comment.is_empty then last_link_ref.title = comment.write_to_string
234 if comment.is_empty then return false
240 # This list will be needed during output to expand links.
241 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
243 # Last encountered link ref (for multiline definitions)
245 # Markdown allows link refs to be defined over two lines:
247 # [id]: http://example.com/longish/path/to/resource/here
248 # "Optional Title Here"
250 private var last_link_ref: nullable LinkRef = null
252 # Add a link ref to the list
253 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
255 # Recursively split a `block`.
257 # The block is splitted according to the type of lines it contains.
258 # Some blocks can be splited again recursively like lists.
259 # The `in_list` mode is used to recurse on list and build
260 # nested paragraphs or code blocks.
261 fun recurse(root: MDBlock, in_list: Bool) do
262 var old_mode = self.in_list
263 var old_root = self.current_block
264 self.in_list = in_list
266 var line = root.first_line
267 while line != null and line.is_empty do
269 if line == null then return
274 while current_line != null do
275 line_kind(current_line.as(not null)).process(self)
277 self.in_list = old_mode
278 self.current_block = old_root
281 # Currently processed line.
282 # Used when visiting blocks with `recurse`.
283 var current_line: nullable MDLine = null is writable
285 # Currently processed block.
286 # Used when visiting blocks with `recurse`.
287 var current_block: nullable MDBlock = null is writable
289 # Is the current recursion in list mode?
290 # Used when visiting blocks with `recurse`
291 private var in_list = false
295 fun line_kind(md: MDLine): Line do
297 var leading = md.leading
298 var trailing = md.trailing
299 if md.is_empty then return new LineEmpty
300 if md.leading > 3 then return new LineCode
301 if value[leading] == '#' then return new LineHeadline
302 if value[leading] == '>' then return new LineBlockquote
305 if value.length - leading - trailing > 2 then
306 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
309 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
315 if value.length - leading - trailing > 2 and
316 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
317 if md.count_chars(value[leading]) >= 3 then
322 if value.length - leading >= 2 and value[leading + 1] == ' ' then
323 var c = value[leading]
324 if c == '*' or c == '-' or c == '+' then return new LineUList
327 if value.length - leading >= 3 and value[leading].is_digit then
329 while i < value.length and value[i].is_digit do i += 1
330 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
335 if value[leading] == '<' and md.check_html then return new LineXML
338 if next != null and not next.is_empty then
339 if next.count_chars('=') > 0 then
340 return new LineHeadline1
342 if next.count_chars('-') > 0 then
343 return new LineHeadline2
349 # Get the token kind at `pos`.
350 fun token_at(text: Text, pos: Int): Token do
362 if pos + 1 < text.length then
367 if pos + 2 < text.length then
375 if c0 != ' ' or c2 != ' ' then
376 return new TokenStrongStar(pos, c)
378 return new TokenEmStar(pos, c)
381 if c0 != ' ' or c1 != ' ' then
382 return new TokenEmStar(pos, c)
384 return new TokenNone(pos, c)
386 else if c == '_' then
388 if c0 != ' ' or c2 != ' 'then
389 return new TokenStrongUnderscore(pos, c)
391 return new TokenEmUnderscore(pos, c)
395 if (c0.is_letter or c0.is_digit) and c0 != '_' and
396 (c1.is_letter or c1.is_digit) then
397 return new TokenNone(pos, c)
399 return new TokenEmUnderscore(pos, c)
402 if c0 != ' ' or c1 != ' ' then
403 return new TokenEmUnderscore(pos, c)
405 return new TokenNone(pos, c)
407 else if c == '!' then
408 if c1 == '[' then return new TokenImage(pos, c)
409 return new TokenNone(pos, c)
410 else if c == '[' then
411 return new TokenLink(pos, c)
412 else if c == ']' then
413 return new TokenNone(pos, c)
414 else if c == '`' then
416 return new TokenCodeDouble(pos, c)
418 return new TokenCodeSingle(pos, c)
420 else if c == '\\' then
421 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\
'' or c1
== '.' or c1
== '<' or c1
== '>' or c1
== '*' or c1
== '+' or c1
== '-' or c1
== '_' or c1
== '!' or c1
== '`' or c1
== '~' or c1
== '^' then
422 return new TokenEscape(pos
, c
)
424 return new TokenNone(pos
, c
)
426 else if c
== '<' then
427 return new TokenHTML(pos
, c
)
428 else if c
== '&' then
429 return new TokenEntity(pos
, c
)
432 if c
== '~' and c1
== '~' then
433 return new TokenStrike(pos
, c
)
436 return new TokenNone(pos
, c
)
440 # Find the position of a `token` in `self`.
441 fun find_token
(text
: Text, start
: Int, token
: Token): Int do
443 while pos
< text
.length
do
444 if token_at
(text
, pos
).is_same_type
(token
) then
453 # Emit output corresponding to blocks content.
455 # Blocks are created by a previous pass in `MarkdownProcessor`.
456 # The emitter use a `Decorator` to select the output format.
457 class MarkdownEmitter
459 # Kind of processor used for parsing.
460 type PROCESSOR: MarkdownProcessor
462 # Processor containing link refs.
463 var processor
: PROCESSOR
465 # Kind of decorator used for decoration.
466 type DECORATOR: Decorator
468 # Decorator used for output.
469 # Default is `HTMLDecorator`
470 var decorator
: DECORATOR is writable, lazy
do
471 return new HTMLDecorator
474 # Create a new `MarkdownEmitter` using a custom `decorator`.
475 init with_decorator
(processor
: PROCESSOR, decorator
: DECORATOR) do
477 self.decorator
= decorator
480 # Output `block` using `decorator` in the current buffer.
481 fun emit
(block
: Block): Text do
482 var buffer
= push_buffer
488 # Output the content of `block`.
489 fun emit_in
(block
: Block) do block
.emit_in
(self)
491 # Transform and emit mardown text
492 fun emit_text
(text
: Text) do emit_text_until
(text
, 0, null)
494 # Transform and emit mardown text starting at `from` and
495 # until a token with the same type as `token` is found.
496 # Go until the end of text if `token` is null.
497 fun emit_text_until
(text
: Text, start
: Int, token
: nullable Token): Int do
498 var old_text
= current_text
499 var old_pos
= current_pos
502 while current_pos
< text
.length
do
503 var mt
= processor
.token_at
(text
, current_pos
)
504 if (token
!= null and not token
isa TokenNone) and
505 (mt
.is_same_type
(token
) or
506 (token
isa TokenEmStar and mt
isa TokenStrongStar) or
507 (token
isa TokenEmUnderscore and mt
isa TokenStrongUnderscore)) then
513 current_text
= old_text
514 current_pos
= old_pos
518 # Currently processed position in `current_text`.
519 # Used when visiting inline production with `emit_text_until`.
520 private var current_pos
: Int = -1
522 # Currently processed text.
523 # Used when visiting inline production with `emit_text_until`.
524 private var current_text
: nullable Text = null
527 private var buffer_stack
= new List[FlatBuffer]
529 # Push a new buffer on the stack.
530 private fun push_buffer
: FlatBuffer do
531 var buffer
= new FlatBuffer
532 buffer_stack
.add buffer
536 # Pop the last buffer.
537 private fun pop_buffer
do buffer_stack
.pop
539 # Current output buffer.
540 private fun current_buffer
: FlatBuffer do
541 assert not buffer_stack
.is_empty
542 return buffer_stack
.last
545 # Append `e` to current buffer.
546 fun add
(e
: Writable) do
548 current_buffer
.append e
550 current_buffer
.append e
.write_to_string
554 # Append `c` to current buffer.
555 fun addc
(c
: Char) do current_buffer
.add c
557 # Append a "\n" line break.
558 fun addn
do current_buffer
.add
'\n'
562 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
565 # [1]: http://example.com/ "Optional title"
572 # Optional link title
573 var title
: nullable String = null
575 # Is the link an abreviation?
576 var is_abbrev
= false
578 # Create a link with a title.
579 init with_title
(link
: String, title
: nullable String) do
585 # A `Decorator` is used to emit mardown into a specific format.
586 # Default decorator used is `HTMLDecorator`.
589 # Kind of emitter used for decoration.
590 type EMITTER: MarkdownEmitter
592 # Render a ruler block.
593 fun add_ruler
(v
: EMITTER, block
: BlockRuler) is abstract
595 # Render a headline block with corresponding level.
596 fun add_headline
(v
: EMITTER, block
: BlockHeadline) is abstract
598 # Render a paragraph block.
599 fun add_paragraph
(v
: EMITTER, block
: BlockParagraph) is abstract
601 # Render a code or fence block.
602 fun add_code
(v
: EMITTER, block
: BlockCode) is abstract
604 # Render a blockquote.
605 fun add_blockquote
(v
: EMITTER, block
: BlockQuote) is abstract
607 # Render an unordered list.
608 fun add_unorderedlist
(v
: EMITTER, block
: BlockUnorderedList) is abstract
610 # Render an ordered list.
611 fun add_orderedlist
(v
: EMITTER, block
: BlockOrderedList) is abstract
613 # Render a list item.
614 fun add_listitem
(v
: EMITTER, block
: BlockListItem) is abstract
616 # Render an emphasis text.
617 fun add_em
(v
: EMITTER, text
: Text) is abstract
619 # Render a strong text.
620 fun add_strong
(v
: EMITTER, text
: Text) is abstract
622 # Render a strike text.
624 # Extended mode only (see `MarkdownProcessor::ext_mode`)
625 fun add_strike
(v
: EMITTER, text
: Text) is abstract
628 fun add_link
(v
: EMITTER, link
: Text, name
: Text, comment
: nullable Text) is abstract
631 fun add_image
(v
: EMITTER, link
: Text, name
: Text, comment
: nullable Text) is abstract
633 # Render an abbreviation.
634 fun add_abbr
(v
: EMITTER, name
: Text, comment
: Text) is abstract
636 # Render a code span reading from a buffer.
637 fun add_span_code
(v
: EMITTER, buffer
: Text, from
, to
: Int) is abstract
639 # Render a text and escape it.
640 fun append_value
(v
: EMITTER, value
: Text) is abstract
642 # Render code text from buffer and escape it.
643 fun append_code
(v
: EMITTER, buffer
: Text, from
, to
: Int) is abstract
645 # Render a character escape.
646 fun escape_char
(v
: EMITTER, char
: Char) is abstract
648 # Render a line break
649 fun add_line_break
(v
: EMITTER) is abstract
651 # Generate a new html valid id from a `String`.
652 fun strip_id
(txt
: String): String is abstract
654 # Found headlines during the processing labeled by their ids.
655 fun headlines
: ArrayMap[String, HeadLine] is abstract
658 # Class representing a markdown headline.
660 # Unique identifier of this headline.
663 # Text of the headline.
666 # Level of this headline.
668 # According toe the markdown specification, level must be in `[1..6]`.
672 # `Decorator` that outputs HTML.
676 redef var headlines
= new ArrayMap[String, HeadLine]
678 redef fun add_ruler
(v
, block
) do v
.add
"<hr/>\n"
680 redef fun add_headline
(v
, block
) do
682 var txt
= block
.block
.first_line
.value
683 var id
= strip_id
(txt
)
684 var lvl
= block
.depth
685 headlines
[id
] = new HeadLine(id
, txt
, lvl
)
687 v
.add
"<h{lvl} id=\"{id}\
">"
692 redef fun add_paragraph
(v
, block
) do
698 redef fun add_code
(v
, block
) do
699 if block
isa BlockFence and block
.meta
!= null then
700 v
.add
"<pre class=\"{block.meta.to_s}\
"><code>"
705 v
.add
"</code></pre>\n"
708 redef fun add_blockquote
(v
, block
) do
709 v
.add
"<blockquote>\n"
711 v
.add
"</blockquote>\n"
714 redef fun add_unorderedlist
(v
, block
) do
720 redef fun add_orderedlist
(v
, block
) do
726 redef fun add_listitem
(v
, block
) do
732 redef fun add_em
(v
, text
) do
738 redef fun add_strong
(v
, text
) do
744 redef fun add_strike
(v
, text
) do
750 redef fun add_image
(v
, link
, name
, comment
) do
752 append_value(v, link)
754 append_value(v, name)
756 if comment
!= null and not comment
.is_empty
then
758 append_value(v, comment)
764 redef fun add_link
(v
, link
, name
, comment
) do
766 append_value(v, link)
768 if comment
!= null and not comment
.is_empty
then
770 append_value(v, comment)
778 redef fun add_abbr
(v
, name
, comment
) do
779 v
.add
"<abbr title=\""
780 append_value(v, comment)
786 redef fun add_span_code
(v
, text
, from
, to
) do
788 append_code
(v
, text
, from
, to
)
792 redef fun add_line_break
(v
) do
796 redef fun append_value
(v
, text
) do for c
in text
do escape_char
(v
, c
)
798 redef fun escape_char
(v
, c
) do
801 else if c
== '<' then
803 else if c
== '>' then
805 else if c
== '"' then
807 else if c
== '\'' then
814 redef fun append_code(v, buffer, from, to) do
815 for i in [from..to[ do
819 else if c == '<' then
821 else if c == '>' then
829 redef fun strip_id(txt) do
831 var b = new FlatBuffer
836 if not c.is_letter and
838 not allowed_id_chars.has(c) then continue
844 # check for multiple id definitions
845 if headlines.has_key(key) then
848 while headlines.has_key(key) do
856 private var allowed_id_chars: Array[Char] = ['-', '_
', ':', '.']
859 # A block of markdown lines.
860 # A `MDBlock` can contains lines and/or sub-blocks.
864 var kind: Block = new BlockNone(self) is writable
867 var first_line: nullable MDLine = null is writable
870 var last_line: nullable MDLine = null is writable
872 # First sub-block if any.
873 var first_block: nullable MDBlock = null is writable
875 # Last sub-block if any.
876 var last_block: nullable MDBlock = null is writable
878 # Previous block if any.
879 var prev: nullable MDBlock = null is writable
882 var next: nullable MDBlock = null is writable
884 # Does this block contain subblocks?
885 fun has_blocks: Bool do return first_block != null
888 fun count_blocks: Int do
890 var block = first_block
891 while block != null do
898 # Does this block contain lines?
899 fun has_lines: Bool do return first_line != null
902 fun count_lines: Int do
904 var line = first_line
905 while line != null do
912 # Split `self` creating a new sub-block having `line` has `last_line`.
913 fun split(line: MDLine): MDBlock do
914 var block = new MDBlock
915 block.first_line = first_line
916 block.last_line = line
917 first_line = line.next
919 if first_line == null then
922 first_line.prev = null
924 if first_block == null then
928 last_block.next = block
934 # Add a `line` to this block.
935 fun add_line(line: MDLine) do
936 if last_line == null then
940 last_line.next_empty = line.is_empty
941 line.prev_empty = last_line.is_empty
942 line.prev = last_line
943 last_line.next = line
948 # Remove `line` from this block.
949 fun remove_line(line: MDLine) do
950 if line.prev == null then
951 first_line = line.next
953 line.prev.next = line.next
955 if line.next == null then
956 last_line = line.prev
958 line.next.prev = line.prev
964 # Remove leading empty lines.
965 fun remove_leading_empty_lines: Bool do
966 var was_empty = false
967 var line = first_line
968 while line != null and line.is_empty do
976 # Remove trailing empty lines.
977 fun remove_trailing_empty_lines: Bool do
978 var was_empty = false
980 while line != null and line.is_empty do
988 # Remove leading and trailing empty lines.
989 fun remove_surrounding_empty_lines: Bool do
990 var was_empty = false
991 if remove_leading_empty_lines then was_empty = true
992 if remove_trailing_empty_lines then was_empty = true
996 # Remove list markers and up to 4 leading spaces.
997 # Used to clean nested lists.
998 fun remove_list_indent(v: MarkdownProcessor) do
999 var line = first_line
1000 while line != null do
1001 if not line.is_empty then
1002 var kind = v.line_kind(line)
1003 if kind isa LineList then
1004 line.value = kind.extract_value(line)
1006 line.value = line.value.substring_from(line.leading.min(4))
1008 line.leading = line.process_leading
1014 # Collect block line text.
1016 var text = new FlatBuffer
1017 var line = first_line
1018 while line != null do
1019 if not line.is_empty then
1020 text.append line.text
1025 return text.write_to_string
1029 # Representation of a markdown block in the AST.
1030 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1031 abstract class Block
1033 # The markdown block `self` is related to.
1036 # Output `self` using `v.decorator`.
1037 fun emit(v: MarkdownEmitter) do v.emit_in(self)
1039 # Emit the containts of `self`, lines or blocks.
1040 fun emit_in(v: MarkdownEmitter) do
1041 block.remove_surrounding_empty_lines
1042 if block.has_lines then
1049 # Emit lines contained in `block`.
1050 fun emit_lines(v: MarkdownEmitter) do
1051 var tpl = v.push_buffer
1052 var line = block.first_line
1053 while line != null do
1054 if not line.is_empty then
1055 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1056 if line.trailing >= 2 then v.decorator.add_line_break(v)
1058 if line.next != null then
1067 # Emit sub-blocks contained in `block`.
1068 fun emit_blocks(v: MarkdownEmitter) do
1069 var block = self.block.first_block
1070 while block != null do
1077 # A block without any markdown specificities.
1079 # Actually use the same implementation than `BlockCode`,
1080 # this class is only used for typing purposes.
1085 # A markdown blockquote.
1089 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1091 # Remove blockquote markers.
1092 private fun remove_block_quote_prefix(block: MDBlock) do
1093 var line = block.first_line
1094 while line != null do
1095 if not line.is_empty then
1096 if line.value[line.leading] == '>' then
1097 var rem = line.leading + 1
1098 if line.leading + 1 < line.value.length and
1099 line.value[line.leading + 1] == ' ' then
1102 line.value = line.value.substring_from(rem)
1103 line.leading = line.process_leading
1111 # A markdown code block.
1115 # Number of char to skip at the beginning of the line.
1117 # Block code lines start at 4 spaces.
1118 protected var line_start = 4
1120 redef fun emit(v) do v.decorator.add_code(v, self)
1122 redef fun emit_lines(v) do
1123 var line = block.first_line
1124 while line != null do
1125 if not line.is_empty then
1126 v.decorator.append_code(v, line.value, line_start, line.value.length)
1134 # A markdown code-fence block.
1136 # Actually use the same implementation than `BlockCode`,
1137 # this class is only used for typing purposes.
1141 # Any string found after fence token.
1142 var meta: nullable Text
1144 # Fence code lines start at 0 spaces.
1145 redef var line_start = 0
1148 # A markdown headline.
1152 redef fun emit(v) do v.decorator.add_headline(v, self)
1154 # Depth of the headline used to determine the headline level.
1157 # Remove healine marks from lines contained in `self`.
1158 private fun transform_headline(block: MDBlock) do
1159 if depth > 0 then return
1161 var line = block.first_line
1162 if line.is_empty then return
1163 var start = line.leading
1164 while start < line.value.length and line.value[start] == '#' do
1168 while start
< line
.value
.length
and line
.value
[start
] == ' ' do
1171 if start
>= line
.value
.length
then
1172 line
.is_empty
= true
1174 var nend
= line
.value
.length
- line
.trailing
- 1
1175 while line
.value
[nend
] == '#' do nend
-= 1
1176 while line
.value
[nend
] == ' ' do nend
-= 1
1177 line
.value
= line
.value
.substring
(start
, nend
- start
+ 1)
1181 depth
= level
.min
(6)
1185 # A markdown list item block.
1189 redef fun emit
(v
) do v
.decorator
.add_listitem
(v
, self)
1192 # A markdown list block.
1193 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1194 abstract class BlockList
1197 # Split list block into list items sub-blocks.
1198 private fun init_block
(v
: MarkdownProcessor) do
1199 var line
= block
.first_line
1201 while line
!= null do
1202 var t
= v
.line_kind
(line
)
1203 if t
isa LineList or
1204 (not line
.is_empty
and (line
.prev_empty
and line
.leading
== 0 and
1205 not (t
isa LineList))) then
1206 var sblock
= block
.split
(line
.prev
.as(not null))
1207 sblock
.kind
= new BlockListItem(sblock
)
1211 var sblock
= block
.split
(block
.last_line
.as(not null))
1212 sblock
.kind
= new BlockListItem(sblock
)
1215 # Expand list items as paragraphs if needed.
1216 private fun expand_paragraphs
(block
: MDBlock) do
1217 var outer
= block
.first_block
1218 var inner
: nullable MDBlock
1219 var has_paragraph
= false
1220 while outer
!= null and not has_paragraph
do
1221 if outer
.kind
isa BlockListItem then
1222 inner
= outer
.first_block
1223 while inner
!= null and not has_paragraph
do
1224 if inner
.kind
isa BlockParagraph then
1225 has_paragraph
= true
1232 if has_paragraph
then
1233 outer
= block
.first_block
1234 while outer
!= null do
1235 if outer
.kind
isa BlockListItem then
1236 inner
= outer
.first_block
1237 while inner
!= null do
1238 if inner
.kind
isa BlockNone then
1239 inner
.kind
= new BlockParagraph(inner
)
1250 # A markdown ordered list.
1251 class BlockOrderedList
1254 redef fun emit
(v
) do v
.decorator
.add_orderedlist
(v
, self)
1257 # A markdown unordred list.
1258 class BlockUnorderedList
1261 redef fun emit
(v
) do v
.decorator
.add_unorderedlist
(v
, self)
1264 # A markdown paragraph block.
1265 class BlockParagraph
1268 redef fun emit
(v
) do v
.decorator
.add_paragraph
(v
, self)
1275 redef fun emit
(v
) do v
.decorator
.add_ruler
(v
, self)
1278 # Xml blocks that can be found in markdown markup.
1282 redef fun emit_lines
(v
) do
1283 var line
= block
.first_line
1284 while line
!= null do
1285 if not line
.is_empty
then v
.add line
.value
1295 # Text contained in this line.
1296 var value
: String is writable
1298 # Is this line empty?
1299 # Lines containing only spaces are considered empty.
1300 var is_empty
: Bool = true is writable
1302 # Previous line in `MDBlock` or null if first line.
1303 var prev
: nullable MDLine = null is writable
1305 # Next line in `MDBlock` or null if last line.
1306 var next
: nullable MDLine = null is writable
1308 # Is the previous line empty?
1309 var prev_empty
: Bool = false is writable
1311 # Is the next line empty?
1312 var next_empty
: Bool = false is writable
1314 # Initialize a new MDLine from its string value
1316 self.leading
= process_leading
1317 if leading
!= value
.length
then
1318 self.is_empty
= false
1319 self.trailing
= process_trailing
1323 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1329 if prev
!= null then prev
.next_empty
= true
1330 if next
!= null then next
.prev_empty
= true
1333 # Number or leading spaces on this line.
1334 var leading
: Int = 0 is writable
1336 # Compute `leading` depending on `value`.
1337 fun process_leading
: Int do
1339 var value
= self.value
1340 while count
< value
.length
and value
[count
] == ' ' do count
+= 1
1341 if leading
== value
.length
then clear
1345 # Number of trailing spaces on this line.
1346 var trailing
: Int = 0 is writable
1348 # Compute `trailing` depending on `value`.
1349 fun process_trailing
: Int do
1351 var value
= self.value
1352 while value
[value
.length
- count
- 1] == ' ' do
1358 # Count the amount of `ch` in this line.
1359 # Return A value > 0 if this line only consists of `ch` end spaces.
1360 fun count_chars
(ch
: Char): Int do
1376 # Count the amount of `ch` at the start of this line ignoring spaces.
1377 fun count_chars_start
(ch
: Char): Int do
1392 # Last XML line if any.
1393 private var xml_end_line
: nullable MDLine = null
1395 # Does `value` contains valid XML markup?
1396 private fun check_html
: Bool do
1397 var tags
= new Array[String]
1398 var tmp
= new FlatBuffer
1400 if pos
+ 1 < value
.length
and value
[pos
+ 1] == '!' then
1401 if read_xml_comment
(self, pos
) > 0 then return true
1403 pos
= value
.read_xml
(tmp
, pos
, false)
1407 if not tag
.is_html_block
then
1415 var line
: nullable MDLine = self
1416 while line
!= null do
1417 while pos
< line
.value
.length
and line
.value
[pos
] != '<' do
1420 if pos
>= line
.value
.length
then
1421 if pos
- 2 >= 0 and line
.value
[pos
- 2] == '/' then
1423 if tags
.is_empty
then
1431 tmp
= new FlatBuffer
1432 var new_pos
= line
.value
.read_xml
(tmp
, pos
, false)
1435 if tag
.is_html_block
and not tag
== "hr" then
1436 if tmp
[1] == '/' then
1437 if tags
.last
!= tag
then
1445 if tags
.is_empty
then
1455 return tags
.is_empty
1460 # Read a XML comment.
1461 # Used by `check_html`.
1462 private fun read_xml_comment
(first_line
: MDLine, start
: Int): Int do
1463 var line
: nullable MDLine = first_line
1464 if start
+ 3 < line
.value
.length
then
1465 if line
.value
[2] == '-' and line
.value
[3] == '-' then
1467 while line
!= null do
1468 while pos
< line
.value
.length
and line
.value
[pos
] != '-' do
1471 if pos
== line
.value
.length
then
1475 if pos
+ 2 < line
.value
.length
then
1476 if line
.value
[pos
+ 1] == '-' and line
.value
[pos
+ 2] == '>' then
1477 first_line
.xml_end_line
= line
1489 # Extract the text of `self` without leading and trailing.
1490 fun text
: String do return value
.substring
(leading
, value
.length
- trailing
)
1497 # See `MarkdownProcessor::recurse`.
1498 fun process
(v
: MarkdownProcessor) is abstract
1501 # An empty markdown line.
1505 redef fun process
(v
) do
1506 v
.current_line
= v
.current_line
.next
1510 # A non-specific markdown construction.
1511 # Mainly used as part of another line construct such as paragraphs or lists.
1515 redef fun process
(v
) do
1516 var line
= v
.current_line
1518 var was_empty
= line
.prev_empty
1519 while line
!= null and not line
.is_empty
do
1520 var t
= v
.line_kind
(line
)
1521 if (v
.in_list
or v
.ext_mode
) and t
isa LineList then
1524 if v
.ext_mode
and (t
isa LineCode or t
isa LineFence) then
1527 if t
isa LineHeadline or t
isa LineHeadline1 or t
isa LineHeadline2 or
1528 t
isa LineHR or t
isa LineBlockquote or t
isa LineXML then
1534 if line
!= null and not line
.is_empty
then
1535 var block
= v
.current_block
.split
(line
.prev
.as(not null))
1536 if v
.in_list
and not was_empty
then
1537 block
.kind
= new BlockNone(block
)
1539 block
.kind
= new BlockParagraph(block
)
1541 v
.current_block
.remove_leading_empty_lines
1544 if line
!= null then
1545 block
= v
.current_block
.split
(line
)
1547 block
= v
.current_block
.split
(v
.current_block
.last_line
.as(not null))
1549 if v
.in_list
and (line
== null or not line
.is_empty
) and not was_empty
then
1550 block
.kind
= new BlockNone(block
)
1552 block
.kind
= new BlockParagraph(block
)
1554 v
.current_block
.remove_leading_empty_lines
1556 v
.current_line
= v
.current_block
.first_line
1560 # A line of markdown code.
1564 redef fun process
(v
) do
1565 var line
= v
.current_line
1567 while line
!= null and (line
.is_empty
or v
.line_kind
(line
) isa LineCode) do
1570 # split at block end line
1572 if line
!= null then
1573 block
= v
.current_block
.split
(line
.prev
.as(not null))
1575 block
= v
.current_block
.split
(v
.current_block
.last_line
.as(not null))
1577 block
.kind
= new BlockCode(block
)
1578 block
.remove_surrounding_empty_lines
1579 v
.current_line
= v
.current_block
.first_line
1583 # A line of raw XML.
1587 redef fun process
(v
) do
1588 var line
= v
.current_line
1589 var prev
= line
.prev
1590 if prev
!= null then v
.current_block
.split
(prev
)
1591 var block
= v
.current_block
.split
(line
.xml_end_line
.as(not null))
1592 block
.kind
= new BlockXML(block
)
1593 v
.current_block
.remove_leading_empty_lines
1594 v
.current_line
= v
.current_block
.first_line
1598 # A markdown blockquote line.
1599 class LineBlockquote
1602 redef fun process
(v
) do
1603 var line
= v
.current_line
1605 while line
!= null do
1606 if not line
.is_empty
and (line
.prev_empty
and
1607 line
.leading
== 0 and
1608 not v
.line_kind
(line
) isa LineBlockquote) then break
1613 if line
!= null then
1614 block
= v
.current_block
.split
(line
.prev
.as(not null))
1616 block
= v
.current_block
.split
(v
.current_block
.last_line
.as(not null))
1618 var kind
= new BlockQuote(block
)
1620 block
.remove_surrounding_empty_lines
1621 kind
.remove_block_quote_prefix
(block
)
1622 v
.current_line
= line
1623 v
.recurse
(block
, false)
1624 v
.current_line
= v
.current_block
.first_line
1628 # A markdown ruler line.
1632 redef fun process
(v
) do
1633 var line
= v
.current_line
1634 if line
.prev
!= null then v
.current_block
.split
(line
.prev
.as(not null))
1635 var block
= v
.current_block
.split
(line
.as(not null))
1636 block
.kind
= new BlockRuler(block
)
1637 v
.current_block
.remove_leading_empty_lines
1638 v
.current_line
= v
.current_block
.first_line
1642 # A markdown fence code line.
1646 redef fun process
(v
) do
1648 var line
= v
.current_line
.next
1649 while line
!= null do
1650 if v
.line_kind
(line
) isa LineFence then break
1653 if line
!= null then
1658 if line
!= null then
1659 block
= v
.current_block
.split
(line
.prev
.as(not null))
1661 block
= v
.current_block
.split
(v
.current_block
.last_line
.as(not null))
1663 var meta
= block
.first_line
.value
.meta_from_fence
1664 block
.kind
= new BlockFence(block
, meta
)
1665 block
.first_line
.clear
1666 var last
= block
.last_line
1667 if last
!= null and v
.line_kind
(last
) isa LineFence then
1668 block
.last_line
.clear
1670 block
.remove_surrounding_empty_lines
1671 v
.current_line
= line
1675 # A markdown headline.
1679 redef fun process
(v
) do
1680 var line
= v
.current_line
1681 var lprev
= line
.prev
1682 if lprev
!= null then v
.current_block
.split
(lprev
)
1683 var block
= v
.current_block
.split
(line
.as(not null))
1684 var kind
= new BlockHeadline(block
)
1686 kind
.transform_headline
(block
)
1687 v
.current_block
.remove_leading_empty_lines
1688 v
.current_line
= v
.current_block
.first_line
1692 # A markdown headline of level 1.
1696 redef fun process
(v
) do
1697 var line
= v
.current_line
1698 var lprev
= line
.prev
1699 if lprev
!= null then v
.current_block
.split
(lprev
)
1701 var block
= v
.current_block
.split
(line
.as(not null))
1702 var kind
= new BlockHeadline(block
)
1704 kind
.transform_headline
(block
)
1706 v
.current_block
.remove_leading_empty_lines
1707 v
.current_line
= v
.current_block
.first_line
1711 # A markdown headline of level 2.
1715 redef fun process
(v
) do
1716 var line
= v
.current_line
1717 var lprev
= line
.prev
1718 if lprev
!= null then v
.current_block
.split
(lprev
)
1720 var block
= v
.current_block
.split
(line
.as(not null))
1721 var kind
= new BlockHeadline(block
)
1723 kind
.transform_headline
(block
)
1725 v
.current_block
.remove_leading_empty_lines
1726 v
.current_line
= v
.current_block
.first_line
1730 # A markdown list line.
1731 # Mainly used to factorize code between ordered and unordered lists.
1735 redef fun process
(v
) do
1736 var line
= v
.current_line
1738 while line
!= null do
1739 var t
= v
.line_kind
(line
)
1740 if not line
.is_empty
and (line
.prev_empty
and line
.leading
== 0 and
1741 not t
isa LineList) then break
1746 if line
!= null then
1747 list
= v
.current_block
.split
(line
.prev
.as(not null))
1749 list
= v
.current_block
.split
(v
.current_block
.last_line
.as(not null))
1751 var kind
= block_kind
(list
)
1753 list
.first_line
.prev_empty
= false
1754 list
.last_line
.next_empty
= false
1755 list
.remove_surrounding_empty_lines
1756 list
.first_line
.prev_empty
= false
1757 list
.last_line
.next_empty
= false
1759 var block
= list
.first_block
1760 while block
!= null do
1761 block
.remove_list_indent
(v
)
1762 v
.recurse
(block
, true)
1765 kind
.expand_paragraphs
(list
)
1766 v
.current_line
= line
1769 # Create a new block kind based on this line.
1770 protected fun block_kind
(block
: MDBlock): BlockList is abstract
1772 # Extract string value from `MDLine`.
1773 protected fun extract_value
(line
: MDLine): String is abstract
1776 # An ordered list line.
1780 redef fun block_kind
(block
) do return new BlockOrderedList(block
)
1782 redef fun extract_value
(line
) do
1783 return line
.value
.substring_from
(line
.value
.index_of
('.') + 2)
1787 # An unordered list line.
1791 redef fun block_kind
(block
) do return new BlockUnorderedList(block
)
1793 redef fun extract_value
(line
) do
1794 return line
.value
.substring_from
(line
.leading
+ 2)
1798 # A token represent a character in the markdown input.
1799 # Some tokens have a specific markup behaviour that is handled here.
1800 abstract class Token
1802 # Position of `self` in markdown input.
1805 # Character found at `pos` in the markdown input.
1808 # Output that token using `MarkdownEmitter::decorator`.
1809 fun emit
(v
: MarkdownEmitter) do v
.addc char
1812 # A token without a specific meaning.
1817 # An emphasis token.
1818 abstract class TokenEm
1821 redef fun emit
(v
) do
1822 var tmp
= v
.push_buffer
1823 var b
= v
.emit_text_until
(v
.current_text
.as(not null), pos
+ 1, self)
1826 v
.decorator
.add_em
(v
, tmp
)
1834 # An emphasis star token.
1839 # An emphasis underscore token.
1840 class TokenEmUnderscore
1845 abstract class TokenStrong
1848 redef fun emit
(v
) do
1849 var tmp
= v
.push_buffer
1850 var b
= v
.emit_text_until
(v
.current_text
.as(not null), pos
+ 2, self)
1853 v
.decorator
.add_strong
(v
, tmp
)
1854 v
.current_pos
= b
+ 1
1861 # A strong star token.
1862 class TokenStrongStar
1866 # A strong underscore token.
1867 class TokenStrongUnderscore
1872 # This class is mainly used to factorize work between single and double quoted span codes.
1873 abstract class TokenCode
1876 redef fun emit
(v
) do
1877 var a
= pos
+ next_pos
+ 1
1878 var b
= v
.processor
.find_token
(v
.current_text
.as(not null), a
, self)
1880 v
.current_pos
= b
+ next_pos
1881 while a
< b
and v
.current_text
[a
] == ' ' do a
+= 1
1883 while v
.current_text
[b
- 1] == ' ' do b
-= 1
1884 v
.decorator
.add_span_code
(v
, v
.current_text
.as(not null), a
, b
)
1891 private fun next_pos
: Int is abstract
1894 # A span code token.
1895 class TokenCodeSingle
1898 redef fun next_pos
do return 0
1901 # A doubled span code token.
1902 class TokenCodeDouble
1905 redef fun next_pos
do return 1
1908 # A link or image token.
1909 # This class is mainly used to factorize work between images and links.
1910 abstract class TokenLinkOrImage
1914 var link
: nullable Text = null
1917 var name
: nullable Text = null
1920 var comment
: nullable Text = null
1922 # Is the link construct an abbreviation?
1923 var is_abbrev
= false
1925 redef fun emit
(v
) do
1926 var tmp
= new FlatBuffer
1927 var b
= check_link
(v
, tmp
, pos
, self)
1936 # Emit the hyperlink as link or image.
1937 private fun emit_hyper
(v
: MarkdownEmitter) is abstract
1939 # Check if the link is a valid link.
1940 private fun check_link
(v
: MarkdownEmitter, out
: FlatBuffer, start
: Int, token
: Token): Int do
1941 var md
= v
.current_text
1943 if token
isa TokenLink then
1948 var tmp
= new FlatBuffer
1949 pos
= md
.read_md_link_id
(tmp
, pos
)
1950 if pos
< start
then return -1
1954 pos
= md
.skip_spaces
(pos
)
1956 var tid
= name
.write_to_string
.to_lower
1957 if v
.processor
.link_refs
.has_key
(tid
) then
1958 var lr
= v
.processor
.link_refs
[tid
]
1959 is_abbrev
= lr
.is_abbrev
1966 else if md
[pos
] == '(' then
1968 pos
= md
.skip_spaces
(pos
)
1969 if pos
< start
then return -1
1970 tmp
= new FlatBuffer
1971 var use_lt
= md
[pos
] == '<'
1973 pos
= md
.read_until
(tmp
, pos
+ 1, '>')
1975 pos
= md
.read_md_link
(tmp
, pos
)
1977 if pos
< start
then return -1
1978 if use_lt
then pos
+= 1
1979 link
= tmp
.write_to_string
1980 if md
[pos
] == ' ' then
1981 pos
= md
.skip_spaces
(pos
)
1982 if pos
> start
and md
[pos
] == '"' then
1984 tmp
= new FlatBuffer
1985 pos
= md
.read_until
(tmp
, pos
, '"')
1986 if pos
< start
then return -1
1987 comment
= tmp
.write_to_string
1989 pos
= md
.skip_spaces
(pos
)
1990 if pos
== -1 then return -1
1993 if md
[pos
] != ')' then return -1
1994 else if md
[pos
] == '[' then
1996 tmp
= new FlatBuffer
1997 pos
= md
.read_raw_until
(tmp
, pos
, ']')
1998 if pos
< start
then return -1
2000 if tmp
.length
> 0 then
2005 var tid
= id
.write_to_string
.to_lower
2006 if v
.processor
.link_refs
.has_key
(tid
) then
2007 var lr
= v
.processor
.link_refs
[tid
]
2012 var tid
= name
.write_to_string
.replace
("\n", " ").to_lower
2013 if v
.processor
.link_refs
.has_key
(tid
) then
2014 var lr
= v
.processor
.link_refs
[tid
]
2022 if link
== null then return -1
2027 # A markdown link token.
2029 super TokenLinkOrImage
2031 redef fun emit_hyper
(v
) do
2032 if is_abbrev
and comment
!= null then
2033 v
.decorator
.add_abbr
(v
, name
.as(not null), comment
.as(not null))
2035 v
.decorator
.add_link
(v
, link
.as(not null), name
.as(not null), comment
)
2040 # A markdown image token.
2042 super TokenLinkOrImage
2044 redef fun emit_hyper
(v
) do
2045 v
.decorator
.add_image
(v
, link
.as(not null), name
.as(not null), comment
)
2053 redef fun emit
(v
) do
2054 var tmp
= new FlatBuffer
2055 var b
= check_html
(v
, tmp
, v
.current_text
.as(not null), v
.current_pos
)
2060 v
.decorator
.escape_char
(v
, char
)
2064 # Is the HTML valid?
2065 # Also take care of link and mailto shortcuts.
2066 private fun check_html
(v
: MarkdownEmitter, out
: FlatBuffer, md
: Text, start
: Int): Int do
2067 # check for auto links
2068 var tmp
= new FlatBuffer
2069 var pos
= md
.read_until
(tmp
, start
+ 1, ':', ' ', '>', '\n')
2070 if pos
!= -1 and md
[pos
] == ':' and tmp
.is_link_prefix
then
2071 pos
= md
.read_until
(tmp
, pos
, '>')
2073 var link
= tmp
.write_to_string
2074 v
.decorator
.add_link
(v
, link
, link
, null)
2078 # TODO check for mailto
2079 # check for inline html
2080 if start
+ 2 < md
.length
then
2081 return md
.read_xml
(out
, start
, true)
2087 # An HTML entity token.
2091 redef fun emit
(v
) do
2092 var tmp
= new FlatBuffer
2093 var b
= check_entity
(tmp
, v
.current_text
.as(not null), pos
)
2098 v
.decorator
.escape_char
(v
, char
)
2102 # Is the entity valid?
2103 private fun check_entity
(out
: FlatBuffer, md
: Text, start
: Int): Int do
2104 var pos
= md
.read_until
(out
, start
, ';')
2105 if pos
< 0 or out
.length
< 3 then
2108 if out
[1] == '#' then
2109 if out
[2] == 'x' or out
[2] == 'X' then
2110 if out
.length
< 4 then return -1
2111 for i
in [3..out
.length
[ do
2113 if (c
< '0' or c
> '9') and (c
< 'a' and c
> 'f') and (c
< 'A' and c
> 'F') then
2118 for i
in [2..out
.length
[ do
2120 if c
< '0' or c
> '9' then return -1
2125 for i
in [1..out
.length
[ do
2127 if not c
.is_digit
and not c
.is_letter
then return -1
2130 # TODO check entity is valid
2131 # if out.is_entity then
2141 # A markdown escape token.
2145 redef fun emit
(v
) do
2147 v
.addc v
.current_text
[v
.current_pos
]
2151 # A markdown strike token.
2153 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2157 redef fun emit
(v
) do
2158 var tmp
= v
.push_buffer
2159 var b
= v
.emit_text_until
(v
.current_text
.as(not null), pos
+ 2, self)
2162 v
.decorator
.add_strike
(v
, tmp
)
2163 v
.current_pos
= b
+ 1
2172 # Get the position of the next non-space character.
2173 private fun skip_spaces
(start
: Int): Int do
2175 while pos
> -1 and pos
< length
and (self[pos
] == ' ' or self[pos
] == '\n') do
2178 if pos
< length
then return pos
2182 # Read `self` until `nend` and append it to the `out` buffer.
2183 # Escape markdown special chars.
2184 private fun read_until
(out
: FlatBuffer, start
: Int, nend
: Char...): Int do
2186 while pos
< length
do
2188 if c
== '\\' and pos
+ 1 < length
then
2189 pos
= escape
(out
, self[pos
+ 1], pos
)
2191 var end_reached
= false
2198 if end_reached
then break
2203 if pos
== length
then return -1
2207 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2208 # No escape is made.
2209 private fun read_raw_until
(out
: FlatBuffer, start
: Int, nend
: Char...): Int do
2211 while pos
< length
do
2213 var end_reached
= false
2220 if end_reached
then break
2224 if pos
== length
then return -1
2228 # Read `self` as XML until `to` and append it to the `out` buffer.
2229 # Escape HTML special chars.
2230 private fun read_xml_until
(out
: FlatBuffer, from
: Int, to
: Char...): Int do
2233 var str_char
: nullable Char = null
2234 while pos
< length
do
2240 if pos
< length
then
2246 if c
== str_char
then
2253 if c
== '"' or c
== '\'' then
2258 var end_reached = false
2259 for n in [0..to.length[ do
2265 if end_reached then break
2270 if pos == length then return -1
2274 # Read `self` as XML and append it to the `out` buffer.
2275 # Safe mode can be activated to limit reading to valid xml.
2276 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2279 var is_close_tag = false
2280 if start + 1 >= length then return -1
2281 if self[start + 1] == '/' then
2284 else if self[start + 1] == '!' then
2288 is_close_tag = false
2292 var tmp = new FlatBuffer
2293 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2294 if pos == -1 then return -1
2295 var tag = tmp.write_to_string.trim.to_lower
2296 if not tag.is_valid_html_tag then
2299 else if tag.is_html_unsafe then
2302 if is_close_tag then out.add '/'
2306 if is_close_tag then out.add '/'
2311 if is_close_tag then out.add '/'
2312 pos = read_xml_until(out, pos, ' ', '/', '>')
2314 if pos == -1 then return -1
2315 pos = read_xml_until(out, pos, '/', '>')
2316 if pos == -1 then return -1
2317 if self[pos] == '/' then
2319 pos = self.read_xml_until(out, pos + 1, '>')
2320 if pos == -1 then return -1
2322 if self[pos] == '>' then
2333 # Read a markdown link address and append it to the `out` buffer.
2334 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2337 while pos < length do
2339 if c == '\\
' and pos + 1 < length then
2340 pos = escape(out, self[pos + 1], pos)
2342 var end_reached = false
2345 else if c == ' ' then
2346 if counter == 1 then end_reached = true
2347 else if c == ')' then
2349 if counter == 0 then end_reached = true
2351 if end_reached then break
2356 if pos == length then return -1
2360 # Read a markdown link text and append it to the `out` buffer.
2361 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2364 while pos < length do
2366 var end_reached = false
2370 else if c == ']' then
2372 if counter == 0 then
2380 if end_reached then break
2383 if pos == length then return -1
2387 # Extract the XML tag name from a XML tag.
2388 private fun xml_tag: String do
2389 var tpl = new FlatBuffer
2391 if pos < length and self[1] == '/' then pos += 1
2392 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2396 return tpl.write_to_string.to_lower
2399 private fun is_valid_html_tag: Bool do
2400 if is_empty then return false
2402 if not c.is_alpha then return false
2407 # Read and escape the markdown contained in `self`.
2408 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2409 if c == '\\
' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2410 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2411 c
== '>' or c
== '*' or c
== '+' or c
== '-' or c
== '_' or c
== '!' or
2412 c
== '`' or c
== '~' or c
== '^' then
2420 # Extract string found at end of fence opening.
2421 private fun meta_from_fence
: nullable Text do
2422 for i
in [0..chars
.length
[ do
2424 if c
!= ' ' and c
!= '`' and c
!= '~' then
2425 return substring_from
(i
).trim
2431 # Is `self` an unsafe HTML element?
2432 private fun is_html_unsafe
: Bool do return html_unsafe_tags
.has
(self.write_to_string
)
2434 # Is `self` a HRML block element?
2435 private fun is_html_block
: Bool do return html_block_tags
.has
(self.write_to_string
)
2437 # Is `self` a link prefix?
2438 private fun is_link_prefix
: Bool do return html_link_prefixes
.has
(self.write_to_string
)
2440 private fun html_unsafe_tags
: Array[String] do return once
["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2442 private fun html_block_tags
: Array[String] do return once
["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2444 private fun html_link_prefixes
: Array[String] do return once
["http", "https", "ftp", "ftps"]
2449 # Parse `self` as markdown and return the HTML representation
2451 # var md = "**Hello World!**"
2452 # var html = md.md_to_html
2453 # assert html == "<p><strong>Hello World!</strong></p>\n"
2454 fun md_to_html
: Writable do
2455 var processor
= new MarkdownProcessor
2456 return processor
.process
(self)