Merge remote-tracking branch 'origin/master' into init_auto
[nit.git] / benchmarks / markdown / benches / gen_benches.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Generate bench suites for markdown parsers.
16 module gen_benches
17
18 import opts
19
20 # Decorate a markdown plain text for bench puposes.
21 #
22 # *This is not the markdown you are looking for.*
23 interface MarkdownDecorator
24
25 # Parse the text and decorate it.
26 # Behavior depends on `LineDecorator` and `WordDecorator`.
27 fun decorate_text(txt: Text): Text is abstract
28
29 # Is `c` a something else than a letter a digit or a `-`?
30 fun is_word_break(c: Char): Bool do
31 return not (c.is_letter or c.is_digit or c == '-')
32 end
33
34 # Parses the next word from `pos` and return the ending position.
35 # Returns `-1` if next word is a word break symbol or there is no more text.
36 private fun parse_word(txt: Text, pos: Int, res: FlatBuffer): Int do
37 while pos >= 0 and pos < txt.length do
38 var c = txt[pos]
39 if is_word_break(c) then break
40 res.add c
41 pos += 1
42 end
43 return pos
44 end
45 end
46
47 # Break text in lines of ~80 chars and call `decorate_line` for each.
48 abstract class LineDecorator
49 super MarkdownDecorator
50
51 redef fun decorate_text(txt) do return decorate_lines(txt)
52
53 private fun decorate_lines(txt: Text): Text do
54 var pos = 0
55 var res = new FlatBuffer
56 var line = new FlatBuffer
57 var word = new FlatBuffer
58 while pos < txt.length do
59 var c = txt[pos]
60 if is_word_break(c) then
61 pos += 1
62 line.add c
63 if c == '\n' then
64 res.append decorate_line(line)
65 line.clear
66 end
67 else
68 pos = parse_word(txt, pos, word)
69 if line.length + word.length > 80 then
70 res.append decorate_line(line)
71 line.clear
72 end
73 line.append word
74 word.clear
75 end
76 end
77 return res
78 end
79
80 # Returns the decorated version of `line`.
81 fun decorate_line(line: Text): Text is abstract
82 end
83
84 # Add a `\n` after each line.
85 class ManualBreakDecorator
86 super LineDecorator
87
88 redef fun decorate_line(line) do return "{line}\n"
89 end
90
91 # Add a `> ` before each line.
92 class BlockQuoteDecorator
93 super ManualBreakDecorator
94
95 redef fun decorate_line(line) do return super("> {line}")
96 end
97
98 # Add four spaces before each line.
99 class CodeBlockDecorator
100 super ManualBreakDecorator
101
102 redef fun decorate_line(line) do return super(" {line}")
103 end
104
105 # Add "* " before each line.
106 class UnorderedListDecorator
107 super ManualBreakDecorator
108
109 redef fun decorate_line(line) do return super("* {line}")
110 end
111
112
113 # WordDecorator is used to decorate each word of a text.
114 abstract class WordDecorator
115 super MarkdownDecorator
116
117 redef fun decorate_text(txt): Text do return decorate_words(txt)
118
119 private fun decorate_words(txt: Text): Text do
120 var pos = 0
121 var res = new FlatBuffer
122 var tmp = new FlatBuffer
123 while pos < txt.length do
124 var c = txt[pos]
125 if is_word_break(c) then
126 pos += 1
127 res.add c
128 else
129 pos = parse_word(txt, pos, tmp)
130 res.append decorate_word(tmp)
131 tmp.clear
132 end
133 end
134 return res
135 end
136
137 # Returns the decorated version of `word`.
138 fun decorate_word(word: Text): Text is abstract
139 end
140
141 # Returns the word as this.
142 class PlainDecorator
143 super WordDecorator
144
145 redef fun decorate_word(word) do return word
146 end
147
148 # Wraps the word with `*empthasis*`.
149 class EmphasisDecorator
150 super WordDecorator
151
152 redef fun decorate_word(w) do return "*{w}*"
153 end
154
155 # Wraps the word with `**strong**`.
156 class StrongDecorator
157 super WordDecorator
158
159 redef fun decorate_word(w) do return "**{w}**"
160 end
161
162 # Wraps the word with ````inline code````.
163 class InlineCodeDecorator
164 super WordDecorator
165
166 redef fun decorate_word(w) do return "`{w}`"
167 end
168
169 # Wraps the word with `<fastlink>`.
170 class FastLinkDecorator
171 super WordDecorator
172
173 redef fun decorate_word(w) do return "<{w}>"
174 end
175
176 # Replaces the word letters by random special XML chars.
177 class SpecialXmlCharsDecorator
178 super WordDecorator
179
180 # Random chars used
181 var chars: Array[Char] = ['<', '>', '&']
182
183 redef fun decorate_word(w) do
184 var res = new FlatBuffer
185 for c in w do
186 var i = chars.length.rand
187 res.add chars[i]
188 end
189 return res
190 end
191 end
192
193 # Wraps the word with `<blink>inline html<block>`.
194 class InlineHtmlDecorator
195 super WordDecorator
196
197 redef fun decorate_word(w) do return "<blink>{w}</blink>"
198 end
199
200 # Replaces the word with `[link](htt://example.com/link "link Title")`.
201 class FullLinkDecorator
202 super WordDecorator
203
204 redef fun decorate_word(w) do
205 return "[{w}](http://example.com/{w} \"{w} Title\")"
206 end
207 end
208
209 # Replaces the word with `![link](htt://example.com/link "link Title")`.
210 class FullImageDecorator
211 super WordDecorator
212
213 redef fun decorate_word(w) do
214 return "![{w}](http://example.com/{w} \"{w} Title\")"
215 end
216 end
217
218 # Replaces the word with `[reflink][id123]`.
219 class RefLinkDecorator
220 super WordDecorator
221
222 redef fun decorate_word(w) do
223 return "[{w}][id123]"
224 end
225 end
226
227 # Uses other decorators randomly.
228 class MixedDecorator
229 super LineDecorator
230 super WordDecorator
231
232 redef fun decorate_text(txt) do
233 return decorate_lines(txt)
234 end
235
236 private var line_decs: Array[LineDecorator] is lazy do
237 return [new ManualBreakDecorator, new BlockQuoteDecorator,
238 new CodeBlockDecorator, new UnorderedListDecorator: LineDecorator]
239 end
240
241 private var current_dec: LineDecorator = line_decs.first is lazy
242
243 redef fun decorate_line(line) do
244 var re = "\n$".to_re
245 var txt = current_dec.decorate_line(line)
246 if line.has(re) then
247 var i = line_decs.length.rand
248 current_dec = line_decs[i]
249 end
250 return decorate_words(txt)
251 end
252
253 private var word_decs: Array[WordDecorator] is lazy do
254 return [new PlainDecorator, new EmphasisDecorator, new StrongDecorator,
255 new InlineCodeDecorator, new FastLinkDecorator, new SpecialXmlCharsDecorator,
256 new InlineHtmlDecorator, new FullLinkDecorator, new FullImageDecorator,
257 new RefLinkDecorator: WordDecorator]
258 end
259
260 redef fun decorate_word(w) do
261 var i = word_decs.length.rand
262 return word_decs[i].decorate_word(w)
263 end
264 end
265
266 var opt_dir = new OptionString("Output directory", "-o", "--output")
267 var ctx = new OptionContext
268 ctx.add_option(opt_dir)
269 ctx.parse(args)
270
271 if ctx.rest.length != 1 then
272 print "Usage:"
273 print "gen_benches path/to/base/text.md"
274 exit 0
275 end
276
277 var out_dir = opt_dir.value or else "markdown.out/"
278 out_dir.mkdir
279
280 var txt = ctx.rest.first.to_path.read_all
281
282 var lst = [new ManualBreakDecorator, new BlockQuoteDecorator, new CodeBlockDecorator,
283 new UnorderedListDecorator, new MixedDecorator, new EmphasisDecorator,
284 new StrongDecorator, new InlineCodeDecorator, new FastLinkDecorator,
285 # FIXME XML is to slow with Nit
286 # new SpecialXmlCharsDecorator,
287 new InlineHtmlDecorator, new FullLinkDecorator,
288 new FullImageDecorator, new RefLinkDecorator: MarkdownDecorator]
289
290 for dec in lst do
291 var name = dec.class_name.to_snake_case.basename("_decorator")
292 dec.decorate_text(txt).write_to_file("{out_dir}/{name}.md")
293 end