1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Generate bench suites for markdown parsers.
20 # Decorate a markdown plain text for bench puposes.
22 # *This is not the markdown you are looking for.*
23 interface MarkdownDecorator
25 # Parse the text and decorate it.
26 # Behavior depends on `LineDecorator` and `WordDecorator`.
27 fun decorate_text
(txt
: Text): Text is abstract
29 # Is `c` a something else than a letter a digit or a `-`?
30 fun is_word_break
(c
: Char): Bool do
31 return not (c
.is_letter
or c
.is_digit
or c
== '-')
34 # Parses the next word from `pos` and return the ending position.
35 # Returns `-1` if next word is a word break symbol or there is no more text.
36 private fun parse_word
(txt
: Text, pos
: Int, res
: FlatBuffer): Int do
37 while pos
>= 0 and pos
< txt
.length
do
39 if is_word_break
(c
) then break
47 # Break text in lines of ~80 chars and call `decorate_line` for each.
48 abstract class LineDecorator
49 super MarkdownDecorator
51 redef fun decorate_text
(txt
) do return decorate_lines
(txt
)
53 private fun decorate_lines
(txt
: Text): Text do
55 var res
= new FlatBuffer
56 var line
= new FlatBuffer
57 var word
= new FlatBuffer
58 while pos
< txt
.length
do
60 if is_word_break
(c
) then
64 res
.append decorate_line
(line
)
68 pos
= parse_word
(txt
, pos
, word
)
69 if line
.length
+ word
.length
> 80 then
70 res
.append decorate_line
(line
)
80 # Returns the decorated version of `line`.
81 fun decorate_line
(line
: Text): Text is abstract
84 # Add a `\n` after each line.
85 class ManualBreakDecorator
88 redef fun decorate_line
(line
) do return "{line}\n"
91 # Add a `> ` before each line.
92 class BlockQuoteDecorator
93 super ManualBreakDecorator
95 redef fun decorate_line
(line
) do return super("> {line}")
98 # Add four spaces before each line.
99 class CodeBlockDecorator
100 super ManualBreakDecorator
102 redef fun decorate_line
(line
) do return super(" {line}")
105 # Add "* " before each line.
106 class UnorderedListDecorator
107 super ManualBreakDecorator
109 redef fun decorate_line
(line
) do return super("* {line}")
113 # WordDecorator is used to decorate each word of a text.
114 abstract class WordDecorator
115 super MarkdownDecorator
117 redef fun decorate_text
(txt
): Text do return decorate_words
(txt
)
119 private fun decorate_words
(txt
: Text): Text do
121 var res
= new FlatBuffer
122 var tmp
= new FlatBuffer
123 while pos
< txt
.length
do
125 if is_word_break
(c
) then
129 pos
= parse_word
(txt
, pos
, tmp
)
130 res
.append decorate_word
(tmp
)
137 # Returns the decorated version of `word`.
138 fun decorate_word
(word
: Text): Text is abstract
141 # Returns the word as this.
145 redef fun decorate_word
(word
) do return word
148 # Wraps the word with `*empthasis*`.
149 class EmphasisDecorator
152 redef fun decorate_word
(w
) do return "*{w}*"
155 # Wraps the word with `**strong**`.
156 class StrongDecorator
159 redef fun decorate_word
(w
) do return "**{w}**"
162 # Wraps the word with ````inline code````.
163 class InlineCodeDecorator
166 redef fun decorate_word
(w
) do return "`{w}`"
169 # Wraps the word with `<fastlink>`.
170 class FastLinkDecorator
173 redef fun decorate_word
(w
) do return "<{w}>"
176 # Replaces the word letters by random special XML chars.
177 class SpecialXmlCharsDecorator
181 var chars
: Array[Char] = ['<', '>', '&']
183 redef fun decorate_word
(w
) do
184 var res
= new FlatBuffer
186 var i
= chars
.length
.rand
193 # Wraps the word with `<blink>inline html<block>`.
194 class InlineHtmlDecorator
197 redef fun decorate_word
(w
) do return "<blink>{w}</blink>"
200 # Replaces the word with `[link](htt://example.com/link "link Title")`.
201 class FullLinkDecorator
204 redef fun decorate_word
(w
) do
205 return "[{w}](http://example.com/{w} \"{w} Title\
")"
209 # Replaces the word with `![link](htt://example.com/link "link Title")`.
210 class FullImageDecorator
213 redef fun decorate_word
(w
) do
214 return "![{w}](http://example.com/{w} \"{w} Title\
")"
218 # Replaces the word with `[reflink][id123]`.
219 class RefLinkDecorator
222 redef fun decorate_word
(w
) do
223 return "[{w}][id123]"
227 # Uses other decorators randomly.
232 redef fun decorate_text
(txt
) do
233 return decorate_lines
(txt
)
236 private var line_decs
: Array[LineDecorator] is lazy
do
237 return [new ManualBreakDecorator, new BlockQuoteDecorator,
238 new CodeBlockDecorator, new UnorderedListDecorator: LineDecorator]
241 private var current_dec
: LineDecorator = line_decs
.first
is lazy
243 redef fun decorate_line
(line
) do
245 var txt
= current_dec
.decorate_line
(line
)
247 var i
= line_decs
.length
.rand
248 current_dec
= line_decs
[i
]
250 return decorate_words
(txt
)
253 private var word_decs
: Array[WordDecorator] is lazy
do
254 return [new PlainDecorator, new EmphasisDecorator, new StrongDecorator,
255 new InlineCodeDecorator, new FastLinkDecorator, new SpecialXmlCharsDecorator,
256 new InlineHtmlDecorator, new FullLinkDecorator, new FullImageDecorator,
257 new RefLinkDecorator: WordDecorator]
260 redef fun decorate_word
(w
) do
261 var i
= word_decs
.length
.rand
262 return word_decs
[i
].decorate_word
(w
)
266 var opt_dir
= new OptionString("Output directory", "-o", "--output")
267 var ctx
= new OptionContext
268 ctx
.add_option
(opt_dir
)
271 if ctx
.rest
.length
!= 1 then
273 print
"gen_benches path/to/base/text.md"
277 var out_dir
= opt_dir
.value
or else "markdown.out/"
280 var txt
= ctx
.rest
.first
.to_path
.read_all
282 var lst
= [new ManualBreakDecorator, new BlockQuoteDecorator, new CodeBlockDecorator,
283 new UnorderedListDecorator, new MixedDecorator, new EmphasisDecorator,
284 new StrongDecorator, new InlineCodeDecorator, new FastLinkDecorator,
285 new SpecialXmlCharsDecorator, new InlineHtmlDecorator, new FullLinkDecorator,
286 new FullImageDecorator, new RefLinkDecorator: MarkdownDecorator]
289 var name
= dec
.class_name
.to_snake_case
.basename
("_decorator")
290 dec
.decorate_text
(txt
).write_to_file
("{out_dir}/{name}.md")