benches/markdown: introduces markdown benches
[nit.git] / benchmarks / markdown / benches / gen_benches.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Generate bench suites for markdown parsers.
16 module gen_benches
17
18 import opts
19
20 # Decorate a markdown plain text for bench puposes.
21 #
22 # *This is not the markdown you are looking for.*
23 interface MarkdownDecorator
24
25 # Parse the text and decorate it.
26 # Behavior depends on `LineDecorator` and `WordDecorator`.
27 fun decorate_text(txt: Text): Text is abstract
28
29 # Is `c` a something else than a letter a digit or a `-`?
30 fun is_word_break(c: Char): Bool do
31 return not (c.is_letter or c.is_digit or c == '-')
32 end
33
34 # Parses the next word from `pos` and return the ending position.
35 # Returns `-1` if next word is a word break symbol or there is no more text.
36 private fun parse_word(txt: Text, pos: Int, res: FlatBuffer): Int do
37 while pos >= 0 and pos < txt.length do
38 var c = txt[pos]
39 if is_word_break(c) then break
40 res.add c
41 pos += 1
42 end
43 return pos
44 end
45 end
46
47 # Break text in lines of ~80 chars and call `decorate_line` for each.
48 abstract class LineDecorator
49 super MarkdownDecorator
50
51 redef fun decorate_text(txt) do return decorate_lines(txt)
52
53 private fun decorate_lines(txt: Text): Text do
54 var pos = 0
55 var res = new FlatBuffer
56 var line = new FlatBuffer
57 var word = new FlatBuffer
58 while pos < txt.length do
59 var c = txt[pos]
60 if is_word_break(c) then
61 pos += 1
62 line.add c
63 if c == '\n' then
64 res.append decorate_line(line)
65 line.clear
66 end
67 else
68 pos = parse_word(txt, pos, word)
69 if line.length + word.length > 80 then
70 res.append decorate_line(line)
71 line.clear
72 end
73 line.append word
74 word.clear
75 end
76 end
77 return res
78 end
79
80 # Returns the decorated version of `line`.
81 fun decorate_line(line: Text): Text is abstract
82 end
83
84 # Add a `\n` after each line.
85 class ManualBreakDecorator
86 super LineDecorator
87
88 redef fun decorate_line(line) do return "{line}\n"
89 end
90
91 # Add a `> ` before each line.
92 class BlockQuoteDecorator
93 super ManualBreakDecorator
94
95 redef fun decorate_line(line) do return super("> {line}")
96 end
97
98 # Add four spaces before each line.
99 class CodeBlockDecorator
100 super ManualBreakDecorator
101
102 redef fun decorate_line(line) do return super(" {line}")
103 end
104
105 # Add "* " before each line.
106 class UnorderedListDecorator
107 super ManualBreakDecorator
108
109 redef fun decorate_line(line) do return super("* {line}")
110 end
111
112
113 # WordDecorator is used to decorate each word of a text.
114 abstract class WordDecorator
115 super MarkdownDecorator
116
117 redef fun decorate_text(txt): Text do return decorate_words(txt)
118
119 private fun decorate_words(txt: Text): Text do
120 var pos = 0
121 var res = new FlatBuffer
122 var tmp = new FlatBuffer
123 while pos < txt.length do
124 var c = txt[pos]
125 if is_word_break(c) then
126 pos += 1
127 res.add c
128 else
129 pos = parse_word(txt, pos, tmp)
130 res.append decorate_word(tmp)
131 tmp.clear
132 end
133 end
134 return res
135 end
136
137 # Returns the decorated version of `word`.
138 fun decorate_word(word: Text): Text is abstract
139 end
140
141 # Returns the word as this.
142 class PlainDecorator
143 super WordDecorator
144
145 redef fun decorate_word(word) do return word
146 end
147
148 # Wraps the word with `*empthasis*`.
149 class EmphasisDecorator
150 super WordDecorator
151
152 redef fun decorate_word(w) do return "*{w}*"
153 end
154
155 # Wraps the word with `**strong**`.
156 class StrongDecorator
157 super WordDecorator
158
159 redef fun decorate_word(w) do return "**{w}**"
160 end
161
162 # Wraps the word with ````inline code````.
163 class InlineCodeDecorator
164 super WordDecorator
165
166 redef fun decorate_word(w) do return "`{w}`"
167 end
168
169 # Wraps the word with `<fastlink>`.
170 class FastLinkDecorator
171 super WordDecorator
172
173 redef fun decorate_word(w) do return "<{w}>"
174 end
175
176 # Replaces the word letters by random special XML chars.
177 class SpecialXmlCharsDecorator
178 super WordDecorator
179
180 # Random chars used
181 var chars: Array[Char] = ['<', '>', '&']
182
183 redef fun decorate_word(w) do
184 var res = new FlatBuffer
185 for c in w do
186 var i = chars.length.rand
187 res.add chars[i]
188 end
189 return res
190 end
191 end
192
193 # Wraps the word with `<blink>inline html<block>`.
194 class InlineHtmlDecorator
195 super WordDecorator
196
197 redef fun decorate_word(w) do return "<blink>{w}</blink>"
198 end
199
200 # Replaces the word with `[link](htt://example.com/link "link Title")`.
201 class FullLinkDecorator
202 super WordDecorator
203
204 redef fun decorate_word(w) do
205 return "[{w}](http://example.com/{w} \"{w} Title\")"
206 end
207 end
208
209 # Replaces the word with `![link](htt://example.com/link "link Title")`.
210 class FullImageDecorator
211 super WordDecorator
212
213 redef fun decorate_word(w) do
214 return "![{w}](http://example.com/{w} \"{w} Title\")"
215 end
216 end
217
218 # Replaces the word with `[reflink][id123]`.
219 class RefLinkDecorator
220 super WordDecorator
221
222 redef fun decorate_word(w) do
223 return "[{w}][id123]"
224 end
225 end
226
227 # Uses other decorators randomly.
228 class MixedDecorator
229 super LineDecorator
230 super WordDecorator
231
232 redef fun decorate_text(txt) do
233 return decorate_lines(txt)
234 end
235
236 private var line_decs: Array[LineDecorator] is lazy do
237 return [new ManualBreakDecorator, new BlockQuoteDecorator,
238 new CodeBlockDecorator, new UnorderedListDecorator: LineDecorator]
239 end
240
241 private var current_dec: LineDecorator = line_decs.first is lazy
242
243 redef fun decorate_line(line) do
244 var re = "\n$".to_re
245 var txt = current_dec.decorate_line(line)
246 if line.has(re) then
247 var i = line_decs.length.rand
248 current_dec = line_decs[i]
249 end
250 return decorate_words(txt)
251 end
252
253 private var word_decs: Array[WordDecorator] is lazy do
254 return [new PlainDecorator, new EmphasisDecorator, new StrongDecorator,
255 new InlineCodeDecorator, new FastLinkDecorator, new SpecialXmlCharsDecorator,
256 new InlineHtmlDecorator, new FullLinkDecorator, new FullImageDecorator,
257 new RefLinkDecorator: WordDecorator]
258 end
259
260 redef fun decorate_word(w) do
261 var i = word_decs.length.rand
262 return word_decs[i].decorate_word(w)
263 end
264 end
265
266 var opt_dir = new OptionString("Output directory", "-o", "--output")
267 var ctx = new OptionContext
268 ctx.add_option(opt_dir)
269 ctx.parse(args)
270
271 if ctx.rest.length != 1 then
272 print "Usage:"
273 print "gen_benches path/to/base/text.md"
274 exit 0
275 end
276
277 var out_dir = opt_dir.value or else "markdown.out/"
278 out_dir.mkdir
279
280 var txt = ctx.rest.first.to_path.read_all
281
282 var lst = [new ManualBreakDecorator, new BlockQuoteDecorator, new CodeBlockDecorator,
283 new UnorderedListDecorator, new MixedDecorator, new EmphasisDecorator,
284 new StrongDecorator, new InlineCodeDecorator, new FastLinkDecorator,
285 new SpecialXmlCharsDecorator, new InlineHtmlDecorator, new FullLinkDecorator,
286 new FullImageDecorator, new RefLinkDecorator: MarkdownDecorator]
287
288 for dec in lst do
289 var name = dec.class_name.to_snake_case.basename("_decorator")
290 dec.decorate_text(txt).write_to_file("{out_dir}/{name}.md")
291 end