1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
20 # Parse a markdown string and split it in blocks.
22 # Blocks are then outputed by an `MarkdownEmitter`.
26 # var proc = new MarkdownProcessor
27 # var html = proc.process("**Hello World!**")
28 # assert html == "<p><strong>Hello World!</strong></p>\n"
30 # SEE: `String::md_to_html` for a shortcut.
31 class MarkdownProcessor
33 # Work in extended mode (default).
35 # Behavior changes when using extended mode:
37 # * Lists and code blocks end a paragraph
39 # In normal markdown the following:
43 # * and this is not a list
49 # <p>This is a paragraph
50 # * and this is not a list</p>
53 # When using extended mode this changes to:
56 # <p>This is a paragraph</p>
58 # <li>and this is not a list</li>
62 # * Fences code blocks
64 # If you don't want to indent your all your code with 4 spaces,
65 # you can wrap your code in ``` ``` ``` or `~~~`.
71 # print "Hello World!"
77 # If you want to use syntax highlighting tools, most of them need to know what kind
78 # of language they are highlighting.
79 # You can add an optional language identifier after the fence declaration to output
80 # it in the HTML render.
85 # print "# Hello World!".md_to_html
91 # <pre class="nit"><code>import markdown
93 # print "Hello World!".md_to_html
97 # * Underscores (Emphasis)
99 # Underscores in the middle of a word like:
105 # normally produces this:
108 # <p>Con<em>cat</em>this</p>
111 # With extended mode they don't result in emphasis.
114 # <p>Con_cat_this</p>
119 # Like in [GFM](https://help.github.com/articles/github-flavored-markdown),
120 # strikethrought span is marked with `~~`.
129 # <del>Mistaken text.</del>
133 # Disable attaching MDLocation to Tokens
135 # Locations are useful for some tools but they may
136 # cause an important time and space overhead.
139 var no_location
= false is writable
141 # Process the mardown `input` string and return the processed output.
142 fun process
(input
: String): Writable do
149 var parent
= read_lines
(input
)
150 parent
.remove_surrounding_empty_lines
151 recurse
(parent
, false)
152 # output processed text
153 return emit
(parent
.kind
)
156 # Split `input` string into `MDLines` and create a parent `MDBlock` with it.
157 private fun read_lines
(input
: String): MDBlock do
158 var block
= new MDBlock(new MDLocation(1, 1, 1, 1))
159 var value
= new FlatBuffer
165 while i
< input
.length
do
169 while not eol
and i
< input
.length
do
174 else if c
== '\r' then
175 else if c
== '\t' then
176 var np
= pos
+ (4 - (pos
& 3))
189 var loc
= new MDLocation(line_pos
, 1, line_pos
, col_pos
)
190 var line
= new MDLine(loc
, value
.write_to_string
)
191 var is_link_ref
= check_link_ref
(line
)
193 if not is_link_ref
then block
.add_line line
199 # Check if line is a block link definition.
200 # Return `true` if line contains a valid link ref and save it into `link_refs`.
201 private fun check_link_ref
(line
: MDLine): Bool do
203 var is_link_ref
= false
204 var id
= new FlatBuffer
205 var link
= new FlatBuffer
206 var comment
= new FlatBuffer
208 if not line
.is_empty
and line
.leading
< 4 and line
.value
[line
.leading
] == '[' then
209 pos
= line
.leading
+ 1
210 pos
= md
.read_until
(id
, pos
, ']')
211 if not id
.is_empty
and pos
>= 0 and pos
+ 2 < line
.value
.length
then
212 if line
.value
[pos
+ 1] == ':' then
214 pos
= md
.skip_spaces
(pos
)
215 if pos
>= 0 and line
.value
[pos
] == '<' then
217 pos
= md
.read_until
(link
, pos
, '>')
219 else if pos
>= 0 then
220 pos
= md
.read_until
(link
, pos
, ' ', '\n')
222 if not link
.is_empty
then
223 pos
= md
.skip_spaces
(pos
)
224 if pos
> 0 and pos
< line
.value
.length
then
225 var c
= line
.value
[pos
]
226 if c
== '\"' or c
== '\'' or c == '(' then
229 pos = md.read_until(comment, pos, ')')
231 pos = md.read_until(comment, pos, c)
233 if pos > 0 then is_link_ref = true
242 if is_link_ref and not id.is_empty and not link.is_empty then
243 var lr = new LinkRef.with_title(link.write_to_string, comment.write_to_string)
244 add_link_ref(id.write_to_string, lr)
245 if comment.is_empty then last_link_ref = lr
248 comment = new FlatBuffer
249 if not line.is_empty and last_link_ref != null then
251 var c = line.value[pos]
252 if c == '\
"' or c == '\'' or c == '(' then
255 pos = md.read_until(comment, pos, ')')
257 pos = md.read_until(comment, pos, c)
260 var last_link_ref = self.last_link_ref
261 if not comment.is_empty and last_link_ref != null then
262 last_link_ref.title = comment.write_to_string
265 if comment.is_empty then return false
271 # This list will be needed during output to expand links.
272 var link_refs: Map[String, LinkRef] = new HashMap[String, LinkRef]
274 # Last encountered link ref (for multiline definitions)
276 # Markdown allows link refs to be defined over two lines:
279 # [id]: http://example.com/longish/path/to/resource/here
280 # "Optional Title Here"
283 private var last_link_ref: nullable LinkRef = null
285 # Add a link ref to the list
286 fun add_link_ref(key: String, ref: LinkRef) do link_refs[key.to_lower] = ref
288 # Recursively split a `block`.
290 # The block is splitted according to the type of lines it contains.
291 # Some blocks can be splited again recursively like lists.
292 # The `in_list` mode is used to recurse on list and build
293 # nested paragraphs or code blocks.
294 fun recurse(root: MDBlock, in_list: Bool) do
295 var old_mode = self.in_list
296 var old_root = self.current_block
297 self.in_list = in_list
299 var line = root.first_line
300 while line != null and line.is_empty do
302 if line == null then return
307 while current_line != null do
308 line_kind(current_line.as(not null)).process(self)
310 self.in_list = old_mode
311 self.current_block = old_root
314 # Currently processed line.
315 # Used when visiting blocks with `recurse`.
316 var current_line: nullable MDLine = null is writable
318 # Currently processed block.
319 # Used when visiting blocks with `recurse`.
320 var current_block: nullable MDBlock = null is writable
322 # Is the current recursion in list mode?
323 # Used when visiting blocks with `recurse`
324 private var in_list = false
328 fun line_kind(md: MDLine): Line do
330 var leading = md.leading
331 var trailing = md.trailing
332 if md.is_empty then return new LineEmpty
333 if md.leading > 3 then return new LineCode
334 if value[leading] == '#' then return new LineHeadline
335 if value[leading] == '>' then return new LineBlockquote
338 if value.length - leading - trailing > 2 then
339 if value[leading] == '`' and md.count_chars_start('`') >= 3 then
342 if value[leading] == '~' and md.count_chars_start('~') >= 3 then
348 if value.length - leading - trailing > 2 and
349 (value[leading] == '*' or value[leading] == '-' or value[leading] == '_') then
350 if md.count_chars(value[leading]) >= 3 then
355 if value.length - leading >= 2 and value[leading + 1] == ' ' then
356 var c = value[leading]
357 if c == '*' or c == '-' or c == '+' then return new LineUList
360 if value.length - leading >= 3 and value[leading].is_digit then
362 while i < value.length and value[i].is_digit do i += 1
363 if i + 1 < value.length and value[i] == '.' and value[i + 1] == ' ' then
368 if value[leading] == '<' and md.check_html then return new LineXML
371 if next != null and not next.is_empty then
372 if next.count_chars('=') > 0 then
373 return new LineHeadline1
375 if next.count_chars('-') > 0 then
376 return new LineHeadline2
382 # Get the token kind at `pos`.
383 fun token_at(text: Text, pos: Int): Token do
395 if pos + 1 < text.length then
400 if pos + 2 < text.length then
410 loc = new MDLocation(
411 current_loc.line_start,
412 current_loc.column_start + pos,
413 current_loc.line_start,
414 current_loc.column_start + pos)
419 if c0 != ' ' or c2 != ' ' then
420 return new TokenStrongStar(loc, pos, c)
422 return new TokenEmStar(loc, pos, c)
425 if c0 != ' ' or c1 != ' ' then
426 return new TokenEmStar(loc, pos, c)
428 return new TokenNone(loc, pos, c)
430 else if c == '_' then
432 if c0 != ' ' or c2 != ' ' then
433 return new TokenStrongUnderscore(loc, pos, c)
435 return new TokenEmUnderscore(loc, pos, c)
439 if (c0.is_letter or c0.is_digit) and c0 != '_' and
440 (c1.is_letter or c1.is_digit) then
441 return new TokenNone(loc, pos, c)
443 return new TokenEmUnderscore(loc, pos, c)
446 if c0 != ' ' or c1 != ' ' then
447 return new TokenEmUnderscore(loc, pos, c)
449 return new TokenNone(loc, pos, c)
451 else if c == '!' then
452 if c1 == '[' then return new TokenImage(loc, pos, c)
453 return new TokenNone(loc, pos, c)
454 else if c == '[' then
455 return new TokenLink(loc, pos, c)
456 else if c == ']' then
457 return new TokenNone(loc, pos, c)
458 else if c == '`' then
460 return new TokenCodeDouble(loc, pos, c)
462 return new TokenCodeSingle(loc, pos, c)
464 else if c == '\\' then
465 if c1 == '\\' or c1 == '[' or c1 == ']' or c1 == '(' or c1 == ')' or c1 == '{' or c1 == '}' or c1 == '#' or c1 == '"' or c1 == '\
'' or c1
== '.' or c1
== '<' or c1
== '>' or c1
== '*' or c1
== '+' or c1
== '-' or c1
== '_' or c1
== '!' or c1
== '`' or c1
== '~' or c1
== '^' then
466 return new TokenEscape(loc
, pos
, c
)
468 return new TokenNone(loc
, pos
, c
)
470 else if c
== '<' then
471 return new TokenHTML(loc
, pos
, c
)
472 else if c
== '&' then
473 return new TokenEntity(loc
, pos
, c
)
476 if c
== '~' and c1
== '~' then
477 return new TokenStrike(loc
, pos
, c
)
480 return new TokenNone(loc
, pos
, c
)
484 # Find the position of a `token` in `self`.
485 fun find_token
(text
: Text, start
: Int, token
: Token): Int do
487 while pos
< text
.length
do
488 if token_at
(text
, pos
).is_same_type
(token
) then
496 # Kind of decorator used for decoration.
497 type DECORATOR: Decorator
499 # Decorator used for output.
500 # Default is `HTMLDecorator`
501 var decorator
: DECORATOR is writable, lazy
do
502 return new HTMLDecorator
505 # Create a new `MarkdownEmitter` using a custom `decorator`.
506 init with_decorator
(decorator
: DECORATOR) do
507 self.decorator
= decorator
510 # Output `block` using `decorator` in the current buffer.
511 fun emit
(block
: Block): Text do
512 var buffer
= push_buffer
518 # Output the content of `block`.
519 fun emit_in
(block
: Block) do block
.emit_in
(self)
521 # Transform and emit mardown text
522 fun emit_text
(text
: Text) do emit_text_until
(text
, 0, null)
524 # Transform and emit mardown text starting at `start` and
525 # until a token with the same type as `token` is found.
526 # Go until the end of `text` if `token` is null.
527 fun emit_text_until
(text
: Text, start
: Int, token
: nullable Token): Int do
528 var old_text
= current_text
529 var old_pos
= current_pos
532 while current_pos
< text
.length
do
533 if text
[current_pos
] == '\n' then
534 current_loc
.line_start
+= 1
535 current_loc
.column_start
= -current_pos
537 var mt
= token_at
(text
, current_pos
)
538 if (token
!= null and not token
isa TokenNone) and
539 (mt
.is_same_type
(token
) or
540 (token
isa TokenEmStar and mt
isa TokenStrongStar) or
541 (token
isa TokenEmUnderscore and mt
isa TokenStrongUnderscore)) then
547 current_text
= old_text
548 current_pos
= old_pos
552 # Currently processed position in `current_text`.
553 # Used when visiting inline production with `emit_text_until`.
554 private var current_pos
: Int = -1
556 # Currently processed text.
557 # Used when visiting inline production with `emit_text_until`.
558 private var current_text
: nullable Text = null
561 private var buffer_stack
= new List[FlatBuffer]
563 # Push a new buffer on the stack.
564 private fun push_buffer
: FlatBuffer do
565 var buffer
= new FlatBuffer
566 buffer_stack
.add buffer
570 # Pop the last buffer.
571 private fun pop_buffer
do buffer_stack
.pop
573 # Current output buffer.
574 private fun current_buffer
: FlatBuffer do
575 assert not buffer_stack
.is_empty
576 return buffer_stack
.last
580 private var loc_stack
= new List[MDLocation]
582 # Push a new MDLocation on the stack.
583 private fun push_loc
(location
: MDLocation) do loc_stack
.add location
585 # Pop the last buffer.
586 private fun pop_loc
: MDLocation do return loc_stack
.pop
588 # Current output buffer.
589 private fun current_loc
: MDLocation do
590 assert not loc_stack
.is_empty
591 return loc_stack
.last
594 # Append `e` to current buffer.
595 fun add
(e
: Writable) do
597 current_buffer
.append e
599 current_buffer
.append e
.write_to_string
603 # Append `c` to current buffer.
608 # Append a "\n" line break.
609 fun addn
do addc
'\n'
613 # Links that are specified somewhere in the mardown document to be reused as shortcuts.
616 # [1]: http://example.com/ "Optional title"
623 # Optional link title
624 var title
: nullable String = null
626 # Is the link an abreviation?
627 var is_abbrev
= false
629 # Create a link with a title.
630 init with_title
(link
: String, title
: nullable String) do
636 # A `Decorator` is used to emit mardown into a specific format.
637 # Default decorator used is `HTMLDecorator`.
640 # Kind of processor used
641 type PROCESSOR: MarkdownProcessor
643 # Render a single plain char.
645 # Redefine this method to add special escaping for plain text.
646 fun add_char
(v
: PROCESSOR, c
: Char) do v
.addc c
648 # Render a ruler block.
649 fun add_ruler
(v
: PROCESSOR, block
: BlockRuler) is abstract
651 # Render a headline block with corresponding level.
652 fun add_headline
(v
: PROCESSOR, block
: BlockHeadline) is abstract
654 # Render a paragraph block.
655 fun add_paragraph
(v
: PROCESSOR, block
: BlockParagraph) is abstract
657 # Render a code or fence block.
658 fun add_code
(v
: PROCESSOR, block
: BlockCode) is abstract
660 # Render a blockquote.
661 fun add_blockquote
(v
: PROCESSOR, block
: BlockQuote) is abstract
663 # Render an unordered list.
664 fun add_unorderedlist
(v
: PROCESSOR, block
: BlockUnorderedList) is abstract
666 # Render an ordered list.
667 fun add_orderedlist
(v
: PROCESSOR, block
: BlockOrderedList) is abstract
669 # Render a list item.
670 fun add_listitem
(v
: PROCESSOR, block
: BlockListItem) is abstract
672 # Render an emphasis text.
673 fun add_em
(v
: PROCESSOR, text
: Text) is abstract
675 # Render a strong text.
676 fun add_strong
(v
: PROCESSOR, text
: Text) is abstract
678 # Render a strike text.
680 # Extended mode only (see `MarkdownProcessor::ext_mode`)
681 fun add_strike
(v
: PROCESSOR, text
: Text) is abstract
684 fun add_link
(v
: PROCESSOR, link
: Text, name
: Text, comment
: nullable Text) is abstract
687 fun add_image
(v
: PROCESSOR, link
: Text, name
: Text, comment
: nullable Text) is abstract
689 # Render an abbreviation.
690 fun add_abbr
(v
: PROCESSOR, name
: Text, comment
: Text) is abstract
692 # Render a code span reading from a buffer.
693 fun add_span_code
(v
: PROCESSOR, buffer
: Text, from
, to
: Int) is abstract
695 # Render a text and escape it.
696 fun append_value
(v
: PROCESSOR, value
: Text) is abstract
698 # Render code text from buffer and escape it.
699 fun append_code
(v
: PROCESSOR, buffer
: Text, from
, to
: Int) is abstract
701 # Render a character escape.
702 fun escape_char
(v
: PROCESSOR, char
: Char) is abstract
704 # Render a line break
705 fun add_line_break
(v
: PROCESSOR) is abstract
707 # Generate a new html valid id from a `String`.
708 fun strip_id
(txt
: String): String is abstract
710 # Found headlines during the processing labeled by their ids.
711 fun headlines
: ArrayMap[String, HeadLine] is abstract
714 # Class representing a markdown headline.
716 # Unique identifier of this headline.
719 # Text of the headline.
722 # Level of this headline.
724 # According toe the markdown specification, level must be in `[1..6]`.
728 # `Decorator` that outputs HTML.
732 redef var headlines
= new ArrayMap[String, HeadLine]
734 redef fun add_ruler
(v
, block
) do v
.add
"<hr/>\n"
736 redef fun add_headline
(v
, block
) do
738 var line
= block
.block
.first_line
739 if line
== null then return
741 var id
= strip_id
(txt
)
742 var lvl
= block
.depth
743 headlines
[id
] = new HeadLine(id
, txt
, lvl
)
745 v
.add
"<h{lvl} id=\"{id}\
">"
750 redef fun add_paragraph
(v
, block
) do
756 redef fun add_code
(v
, block
) do
757 var meta
= block
.meta
759 v
.add
"<pre class=\""
760 append_value(v, meta)
766 v
.add
"</code></pre>\n"
769 redef fun add_blockquote
(v
, block
) do
770 v
.add
"<blockquote>\n"
772 v
.add
"</blockquote>\n"
775 redef fun add_unorderedlist
(v
, block
) do
781 redef fun add_orderedlist
(v
, block
) do
787 redef fun add_listitem
(v
, block
) do
793 redef fun add_em
(v
, text
) do
799 redef fun add_strong
(v
, text
) do
805 redef fun add_strike
(v
, text
) do
811 redef fun add_image
(v
, link
, name
, comment
) do
813 append_value(v, link)
815 append_value(v, name)
817 if comment
!= null and not comment
.is_empty
then
819 append_value(v, comment)
825 redef fun add_link
(v
, link
, name
, comment
) do
827 append_value(v, link)
829 if comment
!= null and not comment
.is_empty
then
831 append_value(v, comment)
839 redef fun add_abbr
(v
, name
, comment
) do
840 v
.add
"<abbr title=\""
841 append_value(v, comment)
847 redef fun add_span_code
(v
, text
, from
, to
) do
849 append_code
(v
, text
, from
, to
)
853 redef fun add_line_break
(v
) do
857 redef fun append_value
(v
, text
) do for c
in text
do escape_char
(v
, c
)
859 redef fun escape_char
(v
, c
) do
862 else if c
== '<' then
864 else if c
== '>' then
866 else if c
== '"' then
868 else if c
== '\'' then
875 redef fun append_code(v, buffer, from, to) do
876 for i in [from..to[ do
880 else if c == '<' then
882 else if c == '>' then
890 redef fun strip_id(txt) do
892 var b = new FlatBuffer
897 if not c.is_letter and
899 not allowed_id_chars.has(c) then continue
905 # check for multiple id definitions
906 if headlines.has_key(key) then
909 while headlines.has_key(key) do
917 private var allowed_id_chars: Array[Char] = ['-', '_
', ':', '.']
920 # Location in a Markdown input.
923 # Starting line number (starting from 1).
926 # Starting column number (starting from 1).
927 var column_start: Int
929 # Stopping line number (starting from 1).
932 # Stopping column number (starting from 1).
935 redef fun to_s do return "{line_start},{column_start}--{line_end},{column_end}"
937 # Return a copy of `self`.
938 fun copy: MDLocation do
939 return new MDLocation(line_start, column_start, line_end, column_end)
943 # A block of markdown lines.
944 # A `MDBlock` can contains lines and/or sub-blocks.
947 # Position of `self` in the input.
948 var location: MDLocation
952 var kind: Block = new BlockNone(self) is writable
955 var first_line: nullable MDLine = null is writable
958 var last_line: nullable MDLine = null is writable
960 # First sub-block if any.
961 var first_block: nullable MDBlock = null is writable
963 # Last sub-block if any.
964 var last_block: nullable MDBlock = null is writable
966 # Previous block if any.
967 var prev: nullable MDBlock = null is writable
970 var next: nullable MDBlock = null is writable
972 # Does this block contain subblocks?
973 fun has_blocks: Bool do return first_block != null
976 fun count_blocks: Int do
978 var block = first_block
979 while block != null do
986 # Does this block contain lines?
987 fun has_lines: Bool do return first_line != null
990 fun count_lines: Int do
992 var line = first_line
993 while line != null do
1000 # Split `self` creating a new sub-block having `line` has `last_line`.
1001 fun split(line: MDLine): MDBlock do
1002 # location for new block
1003 var new_loc = new MDLocation(
1004 first_line.as(not null).location.line_start,
1005 first_line.as(not null).location.column_start,
1006 line.location.line_end,
1007 line.location.column_end)
1009 var block = new MDBlock(new_loc)
1010 block.first_line = first_line
1011 block.last_line = line
1012 first_line = line.next
1014 if first_line == null then
1017 first_line.as(not null).prev = null
1018 # update current block loc
1019 location.line_start = first_line.as(not null).location.line_start
1020 location.column_start = first_line.as(not null).location.column_start
1022 if first_block == null then
1026 last_block.as(not null).next = block
1032 # Add a `line` to this block.
1033 fun add_line(line: MDLine) do
1034 if last_line == null then
1038 last_line.as(not null).next_empty = line.is_empty
1039 line.prev_empty = last_line.as(not null).is_empty
1040 line.prev = last_line
1041 last_line.as(not null).next = line
1046 # Remove `line` from this block.
1047 fun remove_line(line: MDLine) do
1048 if line.prev == null then
1049 first_line = line.next
1051 line.prev.as(not null).next = line.next
1053 if line.next == null then
1054 last_line = line.prev
1056 line.next.as(not null).prev = line.prev
1062 # Remove leading empty lines.
1063 fun remove_leading_empty_lines: Bool do
1064 var was_empty = false
1065 var line = first_line
1066 while line != null and line.is_empty do
1074 # Remove trailing empty lines.
1075 fun remove_trailing_empty_lines: Bool do
1076 var was_empty = false
1077 var line = last_line
1078 while line != null and line.is_empty do
1086 # Remove leading and trailing empty lines.
1087 fun remove_surrounding_empty_lines: Bool do
1088 var was_empty = false
1089 if remove_leading_empty_lines then was_empty = true
1090 if remove_trailing_empty_lines then was_empty = true
1094 # Remove list markers and up to 4 leading spaces.
1095 # Used to clean nested lists.
1096 fun remove_list_indent(v: MarkdownProcessor) do
1097 var line = first_line
1098 while line != null do
1099 if not line.is_empty then
1100 var kind = v.line_kind(line)
1101 if kind isa LineList then
1102 line.value = kind.extract_value(line)
1104 line.value = line.value.substring_from(line.leading.min(4))
1106 line.leading = line.process_leading
1112 # Collect block line text.
1114 var text = new FlatBuffer
1115 var line = first_line
1116 while line != null do
1117 if not line.is_empty then
1118 text.append line.text
1123 return text.write_to_string
1127 # Representation of a markdown block in the AST.
1128 # Each `Block` is linked to a `MDBlock` that contains mardown code.
1129 abstract class Block
1131 # The markdown block `self` is related to.
1134 # Output `self` using `v.decorator`.
1135 fun emit(v: MarkdownProcessor) do v.emit_in(self)
1137 # Emit the containts of `self`, lines or blocks.
1138 fun emit_in(v: MarkdownProcessor) do
1139 block.remove_surrounding_empty_lines
1140 if block.has_lines then
1147 # Emit lines contained in `block`.
1148 fun emit_lines(v: MarkdownProcessor) do
1149 var tpl = v.push_buffer
1150 var line = block.first_line
1151 while line != null do
1152 if not line.is_empty then
1153 v.add line.value.substring(line.leading, line.value.length - line.trailing)
1154 if line.trailing >= 2 then v.decorator.add_line_break(v)
1156 if line.next != null then
1165 # Emit sub-blocks contained in `block`.
1166 fun emit_blocks(v: MarkdownProcessor) do
1167 var block = self.block.first_block
1168 while block != null do
1169 v.push_loc(block.location)
1176 # The raw content of the block as a multi-line string.
1177 fun raw_content: String do
1178 var infence = self isa BlockFence
1179 var text = new FlatBuffer
1180 var line = self.block.first_line
1181 while line != null do
1182 if not line.is_empty then
1183 var str = line.value
1184 if not infence and str.has_prefix(" ") then
1185 text.append str.substring(4, str.length - line.trailing)
1193 return text.write_to_string
1197 # A block without any markdown specificities.
1199 # Actually use the same implementation than `BlockCode`,
1200 # this class is only used for typing purposes.
1205 # A markdown blockquote.
1209 redef fun emit(v) do v.decorator.add_blockquote(v, self)
1211 # Remove blockquote markers.
1212 private fun remove_block_quote_prefix(block: MDBlock) do
1213 var line = block.first_line
1214 while line != null do
1215 if not line.is_empty then
1216 if line.value[line.leading] == '>' then
1217 var rem = line.leading + 1
1218 if line.leading + 1 < line.value.length and
1219 line.value[line.leading + 1] == ' ' then
1222 line.value = line.value.substring_from(rem)
1223 line.leading = line.process_leading
1231 # A markdown code block.
1235 # Any string found after fence token.
1236 var meta: nullable Text
1238 # Number of char to skip at the beginning of the line.
1240 # Block code lines start at 4 spaces.
1241 protected var line_start = 4
1243 redef fun emit(v) do v.decorator.add_code(v, self)
1245 redef fun emit_lines(v) do
1246 var line = block.first_line
1247 while line != null do
1248 if not line.is_empty then
1249 v.decorator.append_code(v, line.value, line_start, line.value.length)
1257 # A markdown code-fence block.
1259 # Actually use the same implementation than `BlockCode`,
1260 # this class is only used for typing purposes.
1264 # Fence code lines start at 0 spaces.
1265 redef var line_start = 0
1268 # A markdown headline.
1272 redef fun emit(v) do
1273 var loc = block.location.copy
1274 loc.column_start += start
1276 v.decorator.add_headline(v, self)
1280 private var start = 0
1282 # Depth of the headline used to determine the headline level.
1285 # Remove healine marks from lines contained in `self`.
1286 private fun transform_headline(block: MDBlock) do
1287 if depth > 0 then return
1289 var line = block.first_line
1290 if line == null then return
1291 if line.is_empty then return
1292 var start = line.leading
1293 while start < line.value.length and line.value[start] == '#' do
1297 while start
< line
.value
.length
and line
.value
[start
] == ' ' do
1300 if start
>= line
.value
.length
then
1301 line
.is_empty
= true
1303 var nend
= line
.value
.length
- line
.trailing
- 1
1304 while line
.value
[nend
] == '#' do nend
-= 1
1305 while line
.value
[nend
] == ' ' do nend
-= 1
1306 line
.value
= line
.value
.substring
(start
, nend
- start
+ 1)
1311 depth
= level
.min
(6)
1315 # A markdown list item block.
1319 redef fun emit
(v
) do v
.decorator
.add_listitem
(v
, self)
1322 # A markdown list block.
1323 # Can be either an ordered or unordered list, this class is mainly used to factorize code.
1324 abstract class BlockList
1327 # Split list block into list items sub-blocks.
1328 private fun init_block
(v
: MarkdownProcessor) do
1329 var line
= block
.first_line
1330 if line
== null then return
1332 while line
!= null do
1333 var t
= v
.line_kind
(line
)
1334 if t
isa LineList or
1335 (not line
.is_empty
and (line
.prev_empty
and line
.leading
== 0 and
1336 not (t
isa LineList))) then
1337 var sblock
= block
.split
(line
.prev
.as(not null))
1338 sblock
.kind
= new BlockListItem(sblock
)
1342 var sblock
= block
.split
(block
.last_line
.as(not null))
1343 sblock
.kind
= new BlockListItem(sblock
)
1346 # Expand list items as paragraphs if needed.
1347 private fun expand_paragraphs
(block
: MDBlock) do
1348 var outer
= block
.first_block
1349 var inner
: nullable MDBlock
1350 var has_paragraph
= false
1351 while outer
!= null and not has_paragraph
do
1352 if outer
.kind
isa BlockListItem then
1353 inner
= outer
.first_block
1354 while inner
!= null and not has_paragraph
do
1355 if inner
.kind
isa BlockParagraph then
1356 has_paragraph
= true
1363 if has_paragraph
then
1364 outer
= block
.first_block
1365 while outer
!= null do
1366 if outer
.kind
isa BlockListItem then
1367 inner
= outer
.first_block
1368 while inner
!= null do
1369 if inner
.kind
isa BlockNone then
1370 inner
.kind
= new BlockParagraph(inner
)
1381 # A markdown ordered list.
1382 class BlockOrderedList
1385 redef fun emit
(v
) do v
.decorator
.add_orderedlist
(v
, self)
1388 # A markdown unordred list.
1389 class BlockUnorderedList
1392 redef fun emit
(v
) do v
.decorator
.add_unorderedlist
(v
, self)
1395 # A markdown paragraph block.
1396 class BlockParagraph
1399 redef fun emit
(v
) do v
.decorator
.add_paragraph
(v
, self)
1406 redef fun emit
(v
) do v
.decorator
.add_ruler
(v
, self)
1409 # Xml blocks that can be found in markdown markup.
1413 redef fun emit_lines
(v
) do
1414 var line
= block
.first_line
1415 while line
!= null do
1416 if not line
.is_empty
then v
.add line
.value
1426 # Location of `self` in the original input.
1427 var location
: MDLocation
1429 # Text contained in this line.
1430 var value
: String is writable
1432 # Is this line empty?
1433 # Lines containing only spaces are considered empty.
1434 var is_empty
: Bool = true is writable
1436 # Previous line in `MDBlock` or null if first line.
1437 var prev
: nullable MDLine = null is writable
1439 # Next line in `MDBlock` or null if last line.
1440 var next
: nullable MDLine = null is writable
1442 # Is the previous line empty?
1443 var prev_empty
: Bool = false is writable
1445 # Is the next line empty?
1446 var next_empty
: Bool = false is writable
1448 # Initialize a new MDLine from its string value
1450 self.leading
= process_leading
1451 if leading
!= value
.length
then
1452 self.is_empty
= false
1453 self.trailing
= process_trailing
1457 # Set `value` as an empty String and update `leading`, `trailing` and is_`empty`.
1463 if prev
!= null then prev
.as(not null).next_empty
= true
1464 if next
!= null then next
.as(not null).prev_empty
= true
1467 # Number or leading spaces on this line.
1468 var leading
: Int = 0 is writable
1470 # Compute `leading` depending on `value`.
1471 fun process_leading
: Int do
1473 var value
= self.value
1474 while count
< value
.length
and value
[count
] == ' ' do count
+= 1
1475 if leading
== value
.length
then clear
1479 # Number of trailing spaces on this line.
1480 var trailing
: Int = 0 is writable
1482 # Compute `trailing` depending on `value`.
1483 fun process_trailing
: Int do
1485 var value
= self.value
1486 while value
[value
.length
- count
- 1] == ' ' do
1492 # Count the amount of `ch` in this line.
1493 # Return A value > 0 if this line only consists of `ch` end spaces.
1494 fun count_chars
(ch
: Char): Int do
1510 # Count the amount of `ch` at the start of this line ignoring spaces.
1511 fun count_chars_start
(ch
: Char): Int do
1526 # Last XML line if any.
1527 private var xml_end_line
: nullable MDLine = null
1529 # Does `value` contains valid XML markup?
1530 private fun check_html
: Bool do
1531 var tags
= new Array[String]
1532 var tmp
= new FlatBuffer
1534 if pos
+ 1 < value
.length
and value
[pos
+ 1] == '!' then
1535 if read_xml_comment
(self, pos
) > 0 then return true
1537 pos
= value
.read_xml
(tmp
, pos
, false)
1541 if not tag
.is_html_block
then
1549 var line
: nullable MDLine = self
1550 while line
!= null do
1551 while pos
< line
.value
.length
and line
.value
[pos
] != '<' do
1554 if pos
>= line
.value
.length
then
1555 if pos
- 2 >= 0 and line
.value
[pos
- 2] == '/' then
1557 if tags
.is_empty
then
1565 tmp
= new FlatBuffer
1566 var new_pos
= line
.value
.read_xml
(tmp
, pos
, false)
1569 if tag
.is_html_block
and not tag
== "hr" then
1570 if tmp
[1] == '/' then
1571 if tags
.last
!= tag
then
1579 if tags
.is_empty
then
1589 return tags
.is_empty
1594 # Read a XML comment.
1595 # Used by `check_html`.
1596 private fun read_xml_comment
(first_line
: MDLine, start
: Int): Int do
1597 var line
: nullable MDLine = first_line
1598 if start
+ 3 < line
.as(not null).value
.length
then
1599 if line
.as(not null).value
[2] == '-' and line
.as(not null).value
[3] == '-' then
1601 while line
!= null do
1602 while pos
< line
.value
.length
and line
.value
[pos
] != '-' do
1605 if pos
== line
.value
.length
then
1609 if pos
+ 2 < line
.value
.length
then
1610 if line
.value
[pos
+ 1] == '-' and line
.value
[pos
+ 2] == '>' then
1611 first_line
.xml_end_line
= line
1623 # Extract the text of `self` without leading and trailing.
1624 fun text
: String do return value
.substring
(leading
, value
.length
- trailing
)
1631 # See `MarkdownProcessor::recurse`.
1632 fun process
(v
: MarkdownProcessor) is abstract
1635 # An empty markdown line.
1639 redef fun process
(v
) do
1640 v
.current_line
= v
.current_line
.as(not null).next
1644 # A non-specific markdown construction.
1645 # Mainly used as part of another line construct such as paragraphs or lists.
1649 redef fun process
(v
) do
1650 var line
= v
.current_line
1652 var was_empty
= line
.as(not null).prev_empty
1653 while line
!= null and not line
.is_empty
do
1654 var t
= v
.line_kind
(line
)
1655 if (v
.in_list
or v
.ext_mode
) and t
isa LineList then
1658 if v
.ext_mode
and (t
isa LineCode or t
isa LineFence) then
1661 if t
isa LineHeadline or t
isa LineHeadline1 or t
isa LineHeadline2 or
1662 t
isa LineHR or t
isa LineBlockquote or t
isa LineXML then
1668 var current_block
= v
.current_block
.as(not null)
1669 if line
!= null and not line
.is_empty
then
1670 var block
= current_block
.split
(line
.prev
.as(not null))
1671 if v
.in_list
and not was_empty
then
1672 block
.kind
= new BlockNone(block
)
1674 block
.kind
= new BlockParagraph(block
)
1676 current_block
.remove_leading_empty_lines
1679 if line
!= null then
1680 block
= current_block
.split
(line
)
1682 block
= current_block
.split
(current_block
.last_line
.as(not null))
1684 if v
.in_list
and (line
== null or not line
.is_empty
) and not was_empty
then
1685 block
.kind
= new BlockNone(block
)
1687 block
.kind
= new BlockParagraph(block
)
1689 current_block
.remove_leading_empty_lines
1691 v
.current_line
= current_block
.first_line
1695 # A line of markdown code.
1699 redef fun process
(v
) do
1700 var line
= v
.current_line
1702 while line
!= null and (line
.is_empty
or v
.line_kind
(line
) isa LineCode) do
1705 # split at block end line
1706 var current_block
= v
.current_block
.as(not null)
1708 if line
!= null then
1709 block
= current_block
.split
(line
.prev
.as(not null))
1711 block
= current_block
.split
(current_block
.last_line
.as(not null))
1713 block
.kind
= new BlockCode(block
)
1714 block
.remove_surrounding_empty_lines
1715 v
.current_line
= current_block
.first_line
1719 # A line of raw XML.
1723 redef fun process
(v
) do
1724 var line
= v
.current_line
1725 if line
== null then return
1726 var current_block
= v
.current_block
.as(not null)
1727 var prev
= line
.prev
1728 if prev
!= null then current_block
.split
(prev
)
1729 var block
= current_block
.split
(line
.xml_end_line
.as(not null))
1730 block
.kind
= new BlockXML(block
)
1731 current_block
.remove_leading_empty_lines
1732 v
.current_line
= current_block
.first_line
1736 # A markdown blockquote line.
1737 class LineBlockquote
1740 redef fun process
(v
) do
1741 var line
= v
.current_line
1742 var current_block
= v
.current_block
.as(not null)
1744 while line
!= null do
1745 if not line
.is_empty
and (line
.prev_empty
and
1746 line
.leading
== 0 and
1747 not v
.line_kind
(line
) isa LineBlockquote) then break
1752 if line
!= null then
1753 block
= current_block
.split
(line
.prev
.as(not null))
1755 block
= current_block
.split
(current_block
.last_line
.as(not null))
1757 var kind
= new BlockQuote(block
)
1759 block
.remove_surrounding_empty_lines
1760 kind
.remove_block_quote_prefix
(block
)
1761 v
.current_line
= line
1762 v
.recurse
(block
, false)
1763 v
.current_line
= current_block
.first_line
1767 # A markdown ruler line.
1771 redef fun process
(v
) do
1772 var line
= v
.current_line
1773 if line
== null then return
1774 var current_block
= v
.current_block
.as(not null)
1775 if line
.prev
!= null then current_block
.split
(line
.prev
.as(not null))
1776 var block
= current_block
.split
(line
)
1777 block
.kind
= new BlockRuler(block
)
1778 current_block
.remove_leading_empty_lines
1779 v
.current_line
= current_block
.first_line
1783 # A markdown fence code line.
1787 redef fun process
(v
) do
1789 var line
= v
.current_line
.as(not null).next
1790 var current_block
= v
.current_block
.as(not null)
1791 while line
!= null do
1792 if v
.line_kind
(line
) isa LineFence then break
1795 if line
!= null then
1800 if line
!= null then
1801 block
= current_block
.split
(line
.prev
.as(not null))
1803 block
= current_block
.split
(current_block
.last_line
.as(not null))
1805 block
.remove_surrounding_empty_lines
1806 var meta
= block
.first_line
.as(not null).value
.meta_from_fence
1807 block
.kind
= new BlockFence(block
, meta
)
1808 block
.first_line
.as(not null).clear
1809 var last
= block
.last_line
1810 if last
!= null and v
.line_kind
(last
) isa LineFence then
1811 block
.last_line
.as(not null).clear
1813 block
.remove_surrounding_empty_lines
1814 v
.current_line
= line
1818 # A markdown headline.
1822 redef fun process
(v
) do
1823 var line
= v
.current_line
1824 if line
== null then return
1825 var current_block
= v
.current_block
.as(not null)
1826 var lprev
= line
.prev
1827 if lprev
!= null then current_block
.split
(lprev
)
1828 var block
= current_block
.split
(line
)
1829 var kind
= new BlockHeadline(block
)
1831 kind
.transform_headline
(block
)
1832 current_block
.remove_leading_empty_lines
1833 v
.current_line
= current_block
.first_line
1837 # A markdown headline of level 1.
1841 redef fun process
(v
) do
1842 var line
= v
.current_line
1843 if line
== null then return
1844 var current_block
= v
.current_block
.as(not null)
1845 var lprev
= line
.prev
1846 if lprev
!= null then current_block
.split
(lprev
)
1847 line
.next
.as(not null).clear
1848 var block
= current_block
.split
(line
)
1849 var kind
= new BlockHeadline(block
)
1851 kind
.transform_headline
(block
)
1853 current_block
.remove_leading_empty_lines
1854 v
.current_line
= current_block
.first_line
1858 # A markdown headline of level 2.
1862 redef fun process
(v
) do
1863 var line
= v
.current_line
1864 if line
== null then return
1865 var current_block
= v
.current_block
.as(not null)
1866 var lprev
= line
.prev
1867 if lprev
!= null then current_block
.split
(lprev
)
1868 line
.next
.as(not null).clear
1869 var block
= current_block
.split
(line
)
1870 var kind
= new BlockHeadline(block
)
1872 kind
.transform_headline
(block
)
1874 current_block
.remove_leading_empty_lines
1875 v
.current_line
= current_block
.first_line
1879 # A markdown list line.
1880 # Mainly used to factorize code between ordered and unordered lists.
1881 abstract class LineList
1884 redef fun process
(v
) do
1885 var line
= v
.current_line
1887 while line
!= null do
1888 var t
= v
.line_kind
(line
)
1889 if not line
.is_empty
and (line
.prev_empty
and line
.leading
== 0 and
1890 not t
isa LineList) then break
1894 var current_block
= v
.current_block
.as(not null)
1896 if line
!= null then
1897 list
= current_block
.split
(line
.prev
.as(not null))
1899 list
= current_block
.split
(current_block
.last_line
.as(not null))
1901 var kind
= block_kind
(list
)
1903 list
.first_line
.as(not null).prev_empty
= false
1904 list
.last_line
.as(not null).next_empty
= false
1905 list
.remove_surrounding_empty_lines
1906 list
.first_line
.as(not null).prev_empty
= false
1907 list
.last_line
.as(not null).next_empty
= false
1909 var block
= list
.first_block
1910 while block
!= null do
1911 block
.remove_list_indent
(v
)
1912 v
.recurse
(block
, true)
1915 kind
.expand_paragraphs
(list
)
1916 v
.current_line
= line
1919 # Create a new block kind based on this line.
1920 protected fun block_kind
(block
: MDBlock): BlockList is abstract
1922 # Extract string value from `MDLine`.
1923 protected fun extract_value
(line
: MDLine): String is abstract
1926 # An ordered list line.
1930 redef fun block_kind
(block
) do return new BlockOrderedList(block
)
1932 redef fun extract_value
(line
) do
1933 return line
.value
.substring_from
(line
.value
.index_of
('.') + 2)
1937 # An unordered list line.
1941 redef fun block_kind
(block
) do return new BlockUnorderedList(block
)
1943 redef fun extract_value
(line
) do
1944 return line
.value
.substring_from
(line
.leading
+ 2)
1948 # A token represent a character in the markdown input.
1949 # Some tokens have a specific markup behaviour that is handled here.
1950 abstract class Token
1952 # Location of `self` in the original input.
1953 var location
: nullable MDLocation
1955 # Position of `self` in input independant from lines.
1958 # Character found at `pos` in the markdown input.
1961 # Output that token using `MarkdownEmitter::decorator`.
1962 fun emit
(v
: MarkdownProcessor) do v
.decorator
.add_char
(v
, char
)
1965 # A token without a specific meaning.
1970 # An emphasis token.
1971 abstract class TokenEm
1974 redef fun emit
(v
) do
1975 var tmp
= v
.push_buffer
1976 var b
= v
.emit_text_until
(v
.current_text
.as(not null), pos
+ 1, self)
1979 v
.decorator
.add_em
(v
, tmp
)
1987 # An emphasis star token.
1992 # An emphasis underscore token.
1993 class TokenEmUnderscore
1998 abstract class TokenStrong
2001 redef fun emit
(v
) do
2002 var tmp
= v
.push_buffer
2003 var b
= v
.emit_text_until
(v
.current_text
.as(not null), pos
+ 2, self)
2006 v
.decorator
.add_strong
(v
, tmp
)
2007 v
.current_pos
= b
+ 1
2014 # A strong star token.
2015 class TokenStrongStar
2019 # A strong underscore token.
2020 class TokenStrongUnderscore
2025 # This class is mainly used to factorize work between single and double quoted span codes.
2026 abstract class TokenCode
2029 redef fun emit
(v
) do
2030 var current_text
= v
.current_text
.as(not null)
2031 var a
= pos
+ next_pos
+ 1
2032 var b
= v
.find_token
(current_text
, a
, self)
2034 v
.current_pos
= b
+ next_pos
2035 while a
< b
and current_text
[a
] == ' ' do a
+= 1
2037 while current_text
[b
- 1] == ' ' do b
-= 1
2038 v
.decorator
.add_span_code
(v
, current_text
, a
, b
)
2045 private fun next_pos
: Int is abstract
2048 # A span code token.
2049 class TokenCodeSingle
2052 redef fun next_pos
do return 0
2055 # A doubled span code token.
2056 class TokenCodeDouble
2059 redef fun next_pos
do return 1
2062 # A link or image token.
2063 # This class is mainly used to factorize work between images and links.
2064 abstract class TokenLinkOrImage
2068 var link
: nullable Text = null
2071 var name
: nullable Text = null
2074 var comment
: nullable Text = null
2076 # Is the link construct an abbreviation?
2077 var is_abbrev
= false
2079 redef fun emit
(v
) do
2080 var tmp
= new FlatBuffer
2081 var b
= check_link
(v
, tmp
, pos
, self)
2090 # Emit the hyperlink as link or image.
2091 private fun emit_hyper
(v
: MarkdownProcessor) is abstract
2093 # Check if the link is a valid link.
2094 private fun check_link
(v
: MarkdownProcessor, out
: FlatBuffer, start
: Int, token
: Token): Int do
2095 var md
= v
.current_text
2096 if md
== null then return -1
2098 if token
isa TokenLink then
2103 var tmp
= new FlatBuffer
2104 pos
= md
.read_md_link_id
(tmp
, pos
)
2105 if pos
< start
then return -1
2109 pos
= md
.skip_spaces
(pos
)
2111 var tid
= name
.as(not null).write_to_string
.to_lower
2112 if v
.link_refs
.has_key
(tid
) then
2113 var lr
= v
.link_refs
[tid
]
2114 is_abbrev
= lr
.is_abbrev
2121 else if md
[pos
] == '(' then
2123 pos
= md
.skip_spaces
(pos
)
2124 if pos
< start
then return -1
2125 tmp
= new FlatBuffer
2126 var use_lt
= md
[pos
] == '<'
2128 pos
= md
.read_until
(tmp
, pos
+ 1, '>')
2130 pos
= md
.read_md_link
(tmp
, pos
)
2132 if pos
< start
then return -1
2133 if use_lt
then pos
+= 1
2134 link
= tmp
.write_to_string
2135 if md
[pos
] == ' ' then
2136 pos
= md
.skip_spaces
(pos
)
2137 if pos
> start
and md
[pos
] == '"' then
2139 tmp
= new FlatBuffer
2140 pos
= md
.read_until
(tmp
, pos
, '"')
2141 if pos
< start
then return -1
2142 comment
= tmp
.write_to_string
2144 pos
= md
.skip_spaces
(pos
)
2145 if pos
== -1 then return -1
2148 if pos
< start
then return -1
2149 if md
[pos
] != ')' then return -1
2150 else if md
[pos
] == '[' then
2152 tmp
= new FlatBuffer
2153 pos
= md
.read_raw_until
(tmp
, pos
, ']')
2154 if pos
< start
then return -1
2156 if tmp
.length
> 0 then
2161 var tid
= id
.as(not null).write_to_string
.to_lower
2162 if v
.link_refs
.has_key
(tid
) then
2163 var lr
= v
.link_refs
[tid
]
2168 var tid
= name
.as(not null).write_to_string
.replace
("\n", " ").to_lower
2169 if v
.link_refs
.has_key
(tid
) then
2170 var lr
= v
.link_refs
[tid
]
2178 if link
== null then return -1
2183 # A markdown link token.
2185 super TokenLinkOrImage
2187 redef fun emit_hyper
(v
) do
2188 if is_abbrev
and comment
!= null then
2189 v
.decorator
.add_abbr
(v
, name
.as(not null), comment
.as(not null))
2191 v
.decorator
.add_link
(v
, link
.as(not null), name
.as(not null), comment
)
2196 # A markdown image token.
2198 super TokenLinkOrImage
2200 redef fun emit_hyper
(v
) do
2201 v
.decorator
.add_image
(v
, link
.as(not null), name
.as(not null), comment
)
2209 redef fun emit
(v
) do
2210 var tmp
= new FlatBuffer
2211 var b
= check_html
(v
, tmp
, v
.current_text
.as(not null), v
.current_pos
)
2216 v
.decorator
.escape_char
(v
, char
)
2220 # Is the HTML valid?
2221 # Also take care of link and mailto shortcuts.
2222 private fun check_html
(v
: MarkdownProcessor, out
: FlatBuffer, md
: Text, start
: Int): Int do
2223 # check for auto links
2224 var tmp
= new FlatBuffer
2225 var pos
= md
.read_until
(tmp
, start
+ 1, ':', ' ', '>', '\n')
2226 if pos
!= -1 and md
[pos
] == ':' and tmp
.is_link_prefix
then
2227 pos
= md
.read_until
(tmp
, pos
, '>')
2229 var link
= tmp
.write_to_string
2230 v
.decorator
.add_link
(v
, link
, link
, null)
2234 # TODO check for mailto
2235 # check for inline html
2236 if start
+ 2 < md
.length
then
2237 return md
.read_xml
(out
, start
, true)
2243 # An HTML entity token.
2247 redef fun emit
(v
) do
2248 var tmp
= new FlatBuffer
2249 var b
= check_entity
(tmp
, v
.current_text
.as(not null), pos
)
2254 v
.decorator
.escape_char
(v
, char
)
2258 # Is the entity valid?
2259 private fun check_entity
(out
: FlatBuffer, md
: Text, start
: Int): Int do
2260 var pos
= md
.read_until
(out
, start
, ';')
2261 if pos
< 0 or out
.length
< 3 then
2264 if out
[1] == '#' then
2265 if out
[2] == 'x' or out
[2] == 'X' then
2266 if out
.length
< 4 then return -1
2267 for i
in [3..out
.length
[ do
2269 if (c
< '0' or c
> '9') and (c
< 'a' and c
> 'f') and (c
< 'A' and c
> 'F') then
2274 for i
in [2..out
.length
[ do
2276 if c
< '0' or c
> '9' then return -1
2281 for i
in [1..out
.length
[ do
2283 if not c
.is_digit
and not c
.is_letter
then return -1
2286 # TODO check entity is valid
2287 # if out.is_entity then
2297 # A markdown escape token.
2301 redef fun emit
(v
) do
2303 v
.addc v
.current_text
.as(not null)[v
.current_pos
]
2307 # A markdown strike token.
2309 # Extended mode only (see `MarkdownProcessor::ext_mode`)
2313 redef fun emit
(v
) do
2314 var tmp
= v
.push_buffer
2315 var b
= v
.emit_text_until
(v
.current_text
.as(not null), pos
+ 2, self)
2318 v
.decorator
.add_strike
(v
, tmp
)
2319 v
.current_pos
= b
+ 1
2328 # Get the position of the next non-space character.
2329 private fun skip_spaces
(start
: Int): Int do
2331 while pos
> -1 and pos
< length
and (self[pos
] == ' ' or self[pos
] == '\n') do
2334 if pos
< length
then return pos
2338 # Read `self` until `nend` and append it to the `out` buffer.
2339 # Escape markdown special chars.
2340 private fun read_until
(out
: FlatBuffer, start
: Int, nend
: Char...): Int do
2342 while pos
< length
do
2344 if c
== '\\' and pos
+ 1 < length
then
2345 pos
= escape
(out
, self[pos
+ 1], pos
)
2347 for n
in nend
do if c
== n
then break label
2352 if pos
== length
then return -1
2356 # Read `self` as raw text until `nend` and append it to the `out` buffer.
2357 # No escape is made.
2358 private fun read_raw_until
(out
: FlatBuffer, start
: Int, nend
: Char...): Int do
2360 while pos
< length
do
2362 var end_reached
= false
2369 if end_reached
then break
2373 if pos
== length
then return -1
2377 # Read `self` as XML until `to` and append it to the `out` buffer.
2378 # Escape HTML special chars.
2379 private fun read_xml_until
(out
: FlatBuffer, from
: Int, to
: Char...): Int do
2382 var str_char
: nullable Char = null
2383 while pos
< length
do
2389 if pos
< length
then
2395 if c
== str_char
then
2402 if c
== '"' or c
== '\'' then
2407 var end_reached = false
2408 for n in [0..to.length[ do
2414 if end_reached then break
2419 if pos == length then return -1
2423 # Read `self` as XML and append it to the `out` buffer.
2424 # Safe mode can be activated to limit reading to valid xml.
2425 private fun read_xml(out: FlatBuffer, start: Int, safe_mode: Bool): Int do
2428 var is_close_tag = false
2429 if start + 1 >= length then return -1
2430 if self[start + 1] == '/' then
2433 else if self[start + 1] == '!' then
2437 is_close_tag = false
2441 var tmp = new FlatBuffer
2442 pos = read_xml_until(tmp, pos, ' ', '/', '>')
2443 if pos == -1 then return -1
2444 var tag = tmp.write_to_string.trim.to_lower
2445 if not tag.is_valid_html_tag then
2448 else if tag.is_html_unsafe then
2451 if is_close_tag then out.add '/'
2455 if is_close_tag then out.add '/'
2460 if is_close_tag then out.add '/'
2461 pos = read_xml_until(out, pos, ' ', '/', '>')
2463 if pos == -1 then return -1
2464 pos = read_xml_until(out, pos, '/', '>')
2465 if pos == -1 then return -1
2466 if self[pos] == '/' then
2468 pos = self.read_xml_until(out, pos + 1, '>')
2469 if pos == -1 then return -1
2471 if self[pos] == '>' then
2482 # Read a markdown link address and append it to the `out` buffer.
2483 private fun read_md_link(out: FlatBuffer, start: Int): Int do
2486 while pos < length do
2488 if c == '\\
' and pos + 1 < length then
2489 pos = escape(out, self[pos + 1], pos)
2491 var end_reached = false
2494 else if c == ' ' then
2495 if counter == 1 then end_reached = true
2496 else if c == ')' then
2498 if counter == 0 then end_reached = true
2500 if end_reached then break
2505 if pos == length then return -1
2509 # Read a markdown link text and append it to the `out` buffer.
2510 private fun read_md_link_id(out: FlatBuffer, start: Int): Int do
2513 while pos < length do
2515 var end_reached = false
2519 else if c == ']' then
2521 if counter == 0 then
2529 if end_reached then break
2532 if pos == length then return -1
2536 # Extract the XML tag name from a XML tag.
2537 private fun xml_tag: String do
2538 var tpl = new FlatBuffer
2540 if pos < length and self[1] == '/' then pos += 1
2541 while pos < length - 1 and (self[pos].is_digit or self[pos].is_letter) do
2545 return tpl.write_to_string.to_lower
2548 private fun is_valid_html_tag: Bool do
2549 if is_empty then return false
2551 if not c.is_alpha then return false
2556 # Read and escape the markdown contained in `self`.
2557 private fun escape(out: FlatBuffer, c: Char, pos: Int): Int do
2558 if c == '\\
' or c == '[' or c == ']' or c == '(' or c == ')' or c == '{' or
2559 c == '}' or c == '#' or c == '"' or c == '\'' or c == '.' or c == '<' or
2560 c
== '>' or c
== '*' or c
== '+' or c
== '-' or c
== '_' or c
== '!' or
2561 c
== '`' or c
== '~' or c
== '^' then
2569 # Extract string found at end of fence opening.
2570 private fun meta_from_fence
: nullable Text do
2571 for i
in [0..chars
.length
[ do
2573 if c
!= ' ' and c
!= '`' and c
!= '~' then
2574 return substring_from
(i
).trim
2580 # Is `self` an unsafe HTML element?
2581 private fun is_html_unsafe
: Bool do return html_unsafe_tags
.has
(self.write_to_string
)
2583 # Is `self` a HRML block element?
2584 private fun is_html_block
: Bool do return html_block_tags
.has
(self.write_to_string
)
2586 # Is `self` a link prefix?
2587 private fun is_link_prefix
: Bool do return html_link_prefixes
.has
(self.write_to_string
)
2589 private fun html_unsafe_tags
: Array[String] do return once
["applet", "head", "body", "frame", "frameset", "iframe", "script", "object"]
2591 private fun html_block_tags
: Array[String] do return once
["address", "article", "aside", "audio", "blockquote", "canvas", "dd", "div", "dl", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]
2593 private fun html_link_prefixes
: Array[String] do return once
["http", "https", "ftp", "ftps"]
2598 # Parse `self` as markdown and return the HTML representation
2600 # var md = "**Hello World!**"
2601 # var html = md.md_to_html
2602 # assert html == "<p><strong>Hello World!</strong></p>\n"
2603 fun md_to_html
: Writable do
2604 var processor
= new MarkdownProcessor
2605 return processor
.process
(self)