1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Generate bench suites for markdown parsers.
20 # Decorate a markdown plain text for bench puposes.
22 # *This is not the markdown you are looking for.*
23 interface MarkdownDecorator
25 # Parse the text and decorate it.
26 # Behavior depends on `LineDecorator` and `WordDecorator`.
27 fun decorate_text
(txt
: Text): Text is abstract
29 # Is `c` a something else than a letter a digit or a `-`?
30 fun is_word_break
(c
: Char): Bool do
31 return not (c
.is_letter
or c
.is_digit
or c
== '-')
34 # Parses the next word from `pos` and return the ending position.
35 # Returns `-1` if next word is a word break symbol or there is no more text.
36 private fun parse_word
(txt
: Text, pos
: Int, res
: FlatBuffer): Int do
37 while pos
>= 0 and pos
< txt
.length
do
39 if is_word_break
(c
) then break
47 # Break text in lines of ~80 chars and call `decorate_line` for each.
48 abstract class LineDecorator
49 super MarkdownDecorator
51 redef fun decorate_text
(txt
) do return decorate_lines
(txt
)
53 private fun decorate_lines
(txt
: Text): Text do
55 var res
= new FlatBuffer
56 var line
= new FlatBuffer
57 var word
= new FlatBuffer
58 while pos
< txt
.length
do
60 if is_word_break
(c
) then
64 res
.append decorate_line
(line
)
68 pos
= parse_word
(txt
, pos
, word
)
69 if line
.length
+ word
.length
> 80 then
70 res
.append decorate_line
(line
)
80 # Returns the decorated version of `line`.
81 fun decorate_line
(line
: Text): Text is abstract
84 # Add a `\n` after each line.
85 class ManualBreakDecorator
88 redef fun decorate_line
(line
) do return "{line}\n"
91 # Add a `> ` before each line.
92 class BlockQuoteDecorator
93 super ManualBreakDecorator
95 redef fun decorate_line
(line
) do return super("> {line}")
98 # Add four spaces before each line.
99 class CodeBlockDecorator
100 super ManualBreakDecorator
102 redef fun decorate_line
(line
) do return super(" {line}")
105 # Add "* " before each line.
106 class UnorderedListDecorator
107 super ManualBreakDecorator
109 redef fun decorate_line
(line
) do return super("* {line}")
113 # WordDecorator is used to decorate each word of a text.
114 abstract class WordDecorator
115 super MarkdownDecorator
117 redef fun decorate_text
(txt
): Text do return decorate_words
(txt
)
119 private fun decorate_words
(txt
: Text): Text do
121 var res
= new FlatBuffer
122 var tmp
= new FlatBuffer
123 while pos
< txt
.length
do
125 if is_word_break
(c
) then
129 pos
= parse_word
(txt
, pos
, tmp
)
130 res
.append decorate_word
(tmp
)
137 # Returns the decorated version of `word`.
138 fun decorate_word
(word
: Text): Text is abstract
141 # Returns the word as this.
145 redef fun decorate_word
(word
) do return word
148 # Wraps the word with `*empthasis*`.
149 class EmphasisDecorator
152 redef fun decorate_word
(w
) do return "*{w}*"
155 # Wraps the word with `**strong**`.
156 class StrongDecorator
159 redef fun decorate_word
(w
) do return "**{w}**"
162 # Wraps the word with ````inline code````.
163 class InlineCodeDecorator
166 redef fun decorate_word
(w
) do return "`{w}`"
169 # Wraps the word with `<fastlink>`.
170 class FastLinkDecorator
173 redef fun decorate_word
(w
) do return "<{w}>"
176 # Replaces the word letters by random special XML chars.
177 class SpecialXmlCharsDecorator
181 var chars
: Array[Char] = ['<', '>', '&']
183 redef fun decorate_word
(w
) do
184 var res
= new FlatBuffer
186 var i
= chars
.length
.rand
193 # Wraps the word with `<blink>inline html<block>`.
194 class InlineHtmlDecorator
197 redef fun decorate_word
(w
) do return "<blink>{w}</blink>"
200 # Replaces the word with `[link](htt://example.com/link "link Title")`.
201 class FullLinkDecorator
204 redef fun decorate_word
(w
) do
205 return "[{w}](http://example.com/{w} \"{w} Title\
")"
209 # Replaces the word with `![link](htt://example.com/link "link Title")`.
210 class FullImageDecorator
213 redef fun decorate_word
(w
) do
214 return "![{w}](http://example.com/{w} \"{w} Title\
")"
218 # Replaces the word with `[reflink][id123]`.
219 class RefLinkDecorator
222 redef fun decorate_word
(w
) do
223 return "[{w}][id123]"
227 # Uses other decorators randomly.
232 redef fun decorate_text
(txt
) do
233 return decorate_lines
(txt
)
236 private var line_decs
: Array[LineDecorator] is lazy
do
237 return [new ManualBreakDecorator, new BlockQuoteDecorator,
238 new CodeBlockDecorator, new UnorderedListDecorator: LineDecorator]
241 private var current_dec
: LineDecorator = line_decs
.first
is lazy
243 redef fun decorate_line
(line
) do
245 var txt
= current_dec
.decorate_line
(line
)
247 var i
= line_decs
.length
.rand
248 current_dec
= line_decs
[i
]
250 return decorate_words
(txt
)
253 private var word_decs
: Array[WordDecorator] is lazy
do
254 return [new PlainDecorator, new EmphasisDecorator, new StrongDecorator,
255 new InlineCodeDecorator, new FastLinkDecorator, new SpecialXmlCharsDecorator,
256 new InlineHtmlDecorator, new FullLinkDecorator, new FullImageDecorator,
257 new RefLinkDecorator: WordDecorator]
260 redef fun decorate_word
(w
) do
261 var i
= word_decs
.length
.rand
262 return word_decs
[i
].decorate_word
(w
)
266 var opt_dir
= new OptionString("Output directory", "-o", "--output")
267 var ctx
= new OptionContext
268 ctx
.add_option
(opt_dir
)
271 if ctx
.rest
.length
!= 1 then
273 print
"gen_benches path/to/base/text.md"
277 var out_dir
= opt_dir
.value
or else "markdown.out/"
280 var txt
= ctx
.rest
.first
.to_path
.read_all
282 var lst
= [new ManualBreakDecorator, new BlockQuoteDecorator, new CodeBlockDecorator,
283 new UnorderedListDecorator, new MixedDecorator, new EmphasisDecorator,
284 new StrongDecorator, new InlineCodeDecorator, new FastLinkDecorator,
285 # FIXME XML is to slow with Nit
286 # new SpecialXmlCharsDecorator,
287 new InlineHtmlDecorator, new FullLinkDecorator,
288 new FullImageDecorator, new RefLinkDecorator: MarkdownDecorator]
291 var name
= dec
.class_name
.to_snake_case
.basename
("_decorator")
292 dec
.decorate_text
(txt
).write_to_file
("{out_dir}/{name}.md")