nitester: do not use nit copy dir, it doesn't affect performance
[nit.git] / contrib / nitester / src / nitester.nit
1 # This file is part of NIT (http://www.nitlanguage.org).
2 #
3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16
17 # Tester of Nit engines on an MPI cluster
18 module nitester
19
20 import mpi
21 import signals
22 import opts
23
24 # Any processor, worker or controller
25 #
26 # All data and methods within this class are shared by the controller and the
27 # workers.
28 abstract class Processor
29 super SignalHandler
30
31 # Controller rank is always 0
32 var controller_rank: Rank = 0.rank
33
34 # Where to store data for transfer between nodes
35 #
36 # Require: `buffer.length % 4 == 0`
37 var buffer = new CIntArray(1024)
38
39 # Run in verbose mode, display more text
40 var verbose = 0
41
42 init
43 do
44 # OpenMPI sends a SIGTERM to all nodes upon receiving a SIGTERM or SIGINT
45 # on the first process.
46 handle_signal(sigterm, true)
47 end
48
49 # Tag of a new task packet of size `tasks_per_packet`
50 var task_tag: Tag = 0.tag
51
52 # Tag to return a set of `Result` throught `buffer`
53 var result_tag: Tag = 1.tag
54
55 # Tag to notify `Worker` when to quit
56 var quit_tag: Tag = 2.tag
57
58 # Tag to request more work from the `Controller` by a `Worker`
59 var need_work_tag: Tag = 4.tag
60
61 # Tag to notify `Controller` that the sender `Worker` is done
62 var done_tag: Tag = 5.tag
63
64 # Number of tasks within each task assignation with `task_tag`
65 var tasks_per_packet = 4
66
67 # Run the main logic of this node
68 fun run is abstract
69
70 # Engines targetted by this execution
71 var engines: Array[String] is noinit
72
73 # All known engines, used to detect errors in `engines`
74 var all_engines: Array[String] = ["nitg-s", "nitg-sg", "nitg-g", "nitg-e", "niti", "emscripten"]
75
76 # Programs to test in this execution
77 var test_programs: Array[String] is noinit
78
79 # Root of the temporary directory
80 var tmp_dir = "/dev/shm/"
81
82 # `ccache` directory
83 var ccache_dir = "/dev/shm/nit_ccache"
84
85 # Read command line options
86 fun read_cli_options
87 do
88 var opt_ctx = new OptionContext
89 var opt_engines = new OptionString(
90 "Engines to test, separated with commas ({all_engines.join(", ")} or all)",
91 "--engine", "-e")
92 var opt_help = new OptionBool("Print this help message", "--help", "-h")
93 var opt_verbose = new OptionCount(
94 "Be verbose, repeat to increase verbose level (max with -vvv)",
95 "--verbose", "-v")
96 var opt_cleanup = new OptionBool(
97 "Clean up all nitester files (and do not run tests)",
98 "--cleanup", "-C")
99
100 opt_ctx.add_option(opt_engines, opt_help, opt_verbose, opt_cleanup)
101 opt_ctx.parse args
102
103 # --help?
104 if opt_help.value then opt_ctx.usage_error null
105
106 # --verbose?
107 verbose = opt_verbose.value
108
109 # --cleanup?
110 if opt_cleanup.value then
111 assert tmp_dir.file_exists
112 for file in tmp_dir.files do if file.has_prefix("nit") then
113 var full_path = tmp_dir / file
114 if full_path == ccache_dir then continue
115
116 assert full_path.file_exists
117
118 var stat = full_path.file_lstat
119 if stat.is_dir then
120 full_path.rmdir
121 else
122 full_path.file_delete
123 end
124 stat.free
125 end
126 mpi.finalize
127 exit 0
128 end
129
130 # any files?
131 var rest = opt_ctx.rest
132 if rest.is_empty then opt_ctx.usage_error "This tool needs at least one test_program.nit"
133 test_programs = rest
134
135 # gather and check engines
136 var engines_str = opt_engines.value
137 var engines
138 if engines_str == null then
139 # default
140 engines = ["nitg-s"]
141 else
142 engines = engines_str.split(',')
143
144 if engines.has("all") then
145 # all engines
146 engines = all_engines
147 end
148 end
149
150 # check validity of targetted engines
151 var unknown_engines = new Array[String]
152 for engine in engines do if not all_engines.has(engine) then unknown_engines.add engine
153
154 if not unknown_engines.is_empty then
155 opt_ctx.usage_error "Unknown engines: {unknown_engines.join(", ")} (expected one or most of {all_engines.join(", ")})"
156 end
157 self.engines = engines
158 end
159
160 # All tasks to be performed
161 var tasks = new Array[Task]
162
163 # Gather and registar all tasks
164 fun create_tasks
165 do
166 var c = 0
167 for engine in engines do for prog in test_programs do
168 tasks.add new Task(engine, prog)
169 c += 1
170 end
171 end
172 end
173
174 # Single controller to dispatch tasks, gather results and produce stats
175 class Controller
176 super Processor
177
178 # Id as `Int` of the next task to distribute
179 var next_task_id = 0
180
181 redef fun receive_signal(signal)
182 do
183 shutdown
184 print_results
185
186 mpi.finalize
187 exit 0
188 end
189
190 redef fun run
191 do
192 read_cli_options
193 create_tasks
194 distribute_tasks
195 print_results
196 end
197
198 # Cumulated results from workers
199 var results = new ResultSet
200
201 # Maintain communication with workers to distribute tasks and receiver results
202 fun distribute_tasks
203 do
204 var at_work = new Array[Rank]
205
206 # send initial tasks
207 for r in [1..comm_world.size[ do
208 var sent = send_task_to(r.rank)
209 if sent then
210 at_work.add r.rank
211 else
212 mpi.send_empty(r.rank, quit_tag, comm_world)
213 end
214 end
215
216 var status = new Status
217 # await results and send new tasks
218 while not at_work.is_empty do
219 check_signals
220
221 # Double probe to avoid bug with some implementation of MPI
222 mpi.probe(new Rank.any, new Tag.any, comm_world, status)
223 mpi.probe(new Rank.any, new Tag.any, comm_world, status)
224
225 if status.tag == result_tag then
226 # Receive results fron a worker
227 var count = status.count(new DataType.int)
228 mpi.recv_into(buffer, 0, count, status.source, status.tag, comm_world)
229
230 # Parse results from C array to `Result` instances
231 #
232 # Each result is on 4 ints: task id, arg, alt and result.
233 #
234 # See the comments where the data is produced in `Worker::work_on_tasks` for more informationé
235 assert count % 4 == 0
236 for t in (count/4).times do
237 var tt = t*4
238
239 var task_id = buffer[tt]
240 var arg = buffer[tt+1]
241 var alt = buffer[tt+2]
242 var res = buffer[tt+3]
243
244 var result = new Result(tasks[task_id], arg, alt)
245
246 if res == 1 then result.ok = true
247 if res == 2 then result.ok_empty = true
248 if res == 3 then result.no_sav = true
249 if res == 4 then result.fixme = true
250 if res == 5 then result.fail = true
251 if res == 6 then result.soso = true
252 if res == 7 then result.skip = true
253 if res == 0 then result.unknown = true
254
255 results.add result
256
257 if verbose > 0 and results.length % 25 == 0 then print_short_results
258 end
259
260 else if status.tag == need_work_tag then
261 # A worker needs more work
262 mpi.recv_empty(status.source, status.tag, comm_world)
263 var sent = send_task_to(status.source)
264 if not sent then
265 # no more work, quit
266 mpi.send_empty(status.source, quit_tag, comm_world)
267 end
268 else if status.tag == done_tag then
269 # A worker is done and will quit
270 mpi.recv_empty(status.source, status.tag, comm_world)
271 at_work.remove(status.source)
272
273 if verbose > 1 then print "worker {status.source} is done ({at_work.length} still at work)"
274 else
275 print "Unexpected tag {status.tag}"
276 shutdown
277 break
278 end
279 end
280 status.free
281 end
282
283 # Send a packet of tasks to worker at `rank`
284 fun send_task_to(rank: Rank): Bool
285 do
286 if next_task_id >= tasks.length then return false
287
288 buffer[0] = next_task_id
289 next_task_id += tasks_per_packet
290
291 mpi.send_from(buffer, 0, 1, rank, task_tag, comm_world)
292
293 if verbose > 1 then print "sent tasks [{buffer[0]}..{next_task_id}[ to worker {rank}"
294 return true
295 end
296
297 # Display the accumulated results received from workers
298 fun print_results
299 do
300 print "# results #"
301 print "* {results.length} total"
302 print "* {results.oks.length + results.ok_empties.length} oks & 0ks"
303 print "* {results.fails.length} fails"
304 print "* {results.no_savs.length} no savs"
305 print "* {results.fixmes.length} fixmes"
306 print "* {results.sosos.length} sosos"
307 print "* {results.skips.length} skips"
308 print "* {results.unknowns.length} unknowns (bug in tests.sh or nitester)"
309 end
310
311 fun print_short_results do print "oks & fails / total: {results.oks.length + results.ok_empties.length} " +
312 "& {results.fails.length} / {results.length}"
313
314 # Shutdown anormaly the running tests
315 fun shutdown
316 do
317 print "Shutting down"
318 mpi.send_empty(new Rank.any, quit_tag, comm_world)
319 end
320 end
321
322 # A worker node which actually execute the tests
323 class Worker
324 super Processor
325
326 # The `Rank` of `self`
327 var rank: Rank
328
329 # Compilation directory
330 var comp_dir = "/dev/shm/nit_compile{rank}" is lazy
331
332 # Output file directory
333 var out_dir = "/dev/shm/nit_out{rank}" is lazy
334
335 # Directory to store the xml files produced for Jenkins
336 var xml_dir = "~/jenkins_xml/"
337
338 # Output file of the `tests.sh` script
339 var tests_sh_out = "/dev/shm/nit_local_out{rank}" is lazy
340
341 # Source Nit repository, must be already updated and `make` before execution
342 var nit_source_dir = "~/nit"
343
344 # Compiled `Regex` to detect the argument of an execution
345 var re_arg: Regex = "arg [0-9]+".to_re
346
347 # Compiled `Regex` to detect the alternative of an execution
348 var re_alt: Regex = "_alt[0-9]+".to_re
349
350 redef fun run
351 do
352 read_cli_options
353 setup
354 create_tasks
355 work_on_tasks
356 cleanup
357 end
358
359 # Setup the testing environment
360 #
361 # Clone the nit repository.
362 fun setup
363 do
364 if verbose > 0 then sys.system "hostname"
365 end
366
367 # Clean up the testing environment
368 #
369 # Delete all temporary files, except `ccache_dir`.
370 fun cleanup
371 do
372 if comp_dir.file_exists then comp_dir.rmdir
373 if out_dir.file_exists then out_dir.rmdir
374 if tests_sh_out.file_exists then tests_sh_out.file_delete
375 end
376
377 # Single C `int` to hold the next task id received from the `Controller`
378 var task_buffer = new CIntArray(1)
379
380 # Manage communication with the `Controller` and execute dispatched `Task`s
381 fun work_on_tasks
382 do
383 var status = new Status
384 loop
385 check_signals
386
387 # We double probe to prevent bug where a single probes does not receive the
388 # real next read.
389 mpi.probe(controller_rank, new Tag.any, comm_world, status)
390 mpi.probe(controller_rank, new Tag.any, comm_world, status)
391
392 if status.tag == task_tag then
393 # Receive tasks to execute
394 mpi.recv_into(task_buffer, 0, 1, status.source, status.tag, comm_world)
395 var first_id = task_buffer[0]
396 for task_id in [first_id .. first_id + tasks_per_packet] do
397
398 # If id is over all known tasks, stop right here
399 if task_id >= tasks.length then break
400 var task = tasks[task_id]
401
402 # Command line to execute test
403 var cmd = "XMLDIR={xml_dir} ERRLIST={out_dir}/errlist TMPDIR={out_dir} " +
404 "CCACHE_DIR={ccache_dir} CCACHE_TEMPDIR={ccache_dir} CCACHE_BASEDIR={comp_dir} " +
405 "./tests.sh --compdir {comp_dir} --outdir {out_dir} " +
406 " --node --engine {task.engine} {task.test_program} > {tests_sh_out}"
407
408 # Execute test
409 sys.system cmd
410
411 # Test results were written to file, read them
412 var fstream = new IFStream.open(tests_sh_out)
413 var content = fstream.read_all
414 fstream.close
415
416 # Parse result and prepare them for sending
417 #
418 # The structure is composed of 4 ints for each result.
419 # 1. task id
420 # 2. arg number
421 # 3. alt number
422 # 4. test result as int
423 var c = results_count
424 for line in content.split('\n') do if not line.is_empty then
425 var cc = c*4
426
427 buffer[cc] = task_id
428
429 var arg_match = line.search(re_arg)
430 var arg = 0
431 if arg_match != null then arg = arg_match.to_s.substring_from(4).to_i
432 buffer[cc+1] = arg
433
434 var alt_match = line.search(re_alt)
435 var alt = 0
436 if alt_match != null then alt = alt_match.to_s.substring_from(4).to_i
437 buffer[cc+2] = alt
438
439 var res = null
440 if line.has("[ok]") then res = 1
441 if line.has("[0k]") then res = 2
442 if line.has("[=== no sav ===]") then res = 3
443 if line.has("[fixme]") then res = 4
444 if line.has("[======= fail") then res = 5
445 if line.has("[======= soso") then res = 6
446 if line.has("[skip]") then res = 7
447
448 if res == null then
449 res = 0
450 if verbose > 1 then print "Unknown result: '{line}'"
451 end
452 buffer[cc+3] = res
453
454 c += 1
455
456 if verbose > 2 then print "tests.sh output line: {line}"
457
458 # If result buffer is full, send to `Controller`
459 if c*4 == buffer.length then
460 send_results
461 c = 0
462 end
463 end
464
465 self.results_count = c
466 end
467
468 mpi.send_empty(controller_rank, need_work_tag, comm_world)
469 else if status.tag == quit_tag then
470 # Notification from the `Controller` to quit
471 mpi.recv_empty(status.source, status.tag, comm_world)
472
473 # Send remaining results
474 send_results
475
476 # Notify `Controller` that `self` is done and will quit
477 mpi.send_empty(controller_rank, done_tag, comm_world)
478 break
479 else
480 print "Unexpected tag {status.tag}"
481 break
482 end
483 end
484 status.free
485 end
486
487 # Total results listed in `buffer` and ready to send
488 var results_count = 0
489
490 # Send all results in `buffer` to the `Controller`
491 fun send_results
492 do
493 if results_count > 0 then
494 if verbose > 1 then print "sending {results_count} results"
495 mpi.send_from(buffer, 0, results_count*4, controller_rank, result_tag, comm_world)
496 results_count = 0
497 end
498 end
499
500 redef fun receive_signal(signal)
501 do
502 cleanup
503 mpi.finalize
504 exit 0
505 end
506 end
507
508 # A single test task, on a `test_program` with an `engine`
509 #
510 # Note that a task may involve more than one program to test considering the
511 # alts and args for the `test_program`.
512 class Task
513 # Engine to test executing `test_program`
514 var engine: String
515
516 # Program to execute with `engine`
517 var test_program: String
518
519 redef fun to_s do return "{engine} {test_program}"
520 end
521
522 # Result of a `Task`
523 #
524 # There may be more than one result per `Task`.
525 class Result
526 # `Task` associated to `self`
527 var task: Task
528
529 # Argument index of the execution resulting in `self`
530 var arg: Int
531
532 # Alternative index of the execution resulting in `self`
533 var alt: Int
534
535 # Is `self` result an _ok_?
536 var ok = false
537
538 # Is `self` result an _0k_?
539 var ok_empty = false
540
541 # Is `self` result a _no sav_?
542 var no_sav = false
543
544 # Is `self` result a _fixme_?
545 var fixme = false
546
547 # Is `self` result a _fail_?
548 var fail = false
549
550 # Is `self` result a _soso_?
551 var soso = false
552
553 # Is `self` skipped test?
554 var skip = false
555
556 # Is `self` an unknown result, probably an error
557 var unknown = false
558
559 redef fun to_s
560 do
561 var err = "Unknown"
562 if no_sav then err = "no sav"
563 if ok then err = "ok"
564 if ok_empty then err = "0k"
565 if fixme then err = "fixme"
566 if fail then err = "fail"
567
568 return "{task} arg{arg} alt{alt} => {err}"
569 end
570 end
571
572 # A global and sorted collection of `Result`
573 class ResultSet
574 super HashSet[Result]
575
576 var no_savs = new HashSet[Result]
577 var oks = new HashSet[Result]
578 var ok_empties = new HashSet[Result]
579 var fixmes = new HashSet[Result]
580 var fails = new HashSet[Result]
581 var sosos = new HashSet[Result]
582 var skips = new HashSet[Result]
583 var unknowns = new HashSet[Result]
584
585 # TODO remove
586 var per_engines = new HashMap[String, Result]
587
588 redef fun add(result)
589 do
590 if result.no_sav then no_savs.add result
591 if result.ok then oks.add result
592 if result.ok_empty then ok_empties.add result
593 if result.fixme then fixmes.add result
594 if result.fail then fails.add result
595 if result.soso then sosos.add result
596 if result.skip then skips.add result
597 if result.unknown then unknowns.add result
598
599 super
600 end
601
602 redef fun remove(r) do abort
603
604 redef fun clear do abort
605 end
606
607 redef class OptionContext
608
609 # Print usage with a possible error `message`
610 private fun usage_error(message: nullable String)
611 do
612 var ret = 0
613 if message != null then
614 print "Error: {message}"
615 ret = 1
616 end
617
618 if comm_world.rank == 0 then
619 print "Usage: mpirun nitester [Options] test_program.nit [other_test.nit [...]]"
620 usage
621 end
622
623 mpi.finalize
624 exit ret
625 end
626 end
627
628 # On `Worker` nodes, prefix all prints with `rank/comm_world.size`
629 redef fun print(msg: Object)
630 do
631 if comm_world.rank != 0.rank then
632 super "{comm_world.rank}/{comm_world.size}: {msg}"
633 else super msg
634 end
635
636 # Running MPI instance
637 fun mpi: MPI do return once new MPI
638
639 # Launch mpi
640 mpi
641
642 # Local rank
643 var rank = comm_world.rank
644
645 var processor: Processor
646 if rank == 0.rank then
647 # If rank == 0, this is the `Controller`
648 processor = new Controller
649 else
650 # This is a worker
651 processor = new Worker(rank)
652 end
653 processor.run
654
655 mpi.finalize