3431a8645c3e9d47a2f0e14d56684eaaa7e5b17a
[nit.git] / contrib / nitester / src / nitester.nit
1 # This file is part of NIT (http://www.nitlanguage.org).
2 #
3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16
17 # Tester of Nit engines on an MPI cluster
18 module nitester
19
20 import mpi
21 import signals
22 import opts
23
24 # Any processor, worker or controller
25 #
26 # All data and methods within this class are shared by the controller and the
27 # workers.
28 abstract class Processor
29 super SignalHandler
30
31 # Controller rank is always 0
32 var controller_rank: Rank = 0.rank
33
34 # Rank on this processor
35 fun rank: Rank is abstract
36
37 # Where to store data for transfer between nodes
38 #
39 # Require: `buffer.length % 4 == 0`
40 var buffer = new CIntArray(1024)
41
42 # Run in verbose mode, display more text
43 var verbose = 0
44
45 init
46 do
47 # OpenMPI sends a SIGTERM to all nodes upon receiving a SIGTERM or SIGINT
48 # on the first process.
49 handle_signal(sigterm, true)
50 end
51
52 # Tag of a new task packet of size `tasks_per_packet`
53 var task_tag: Tag = 0.tag
54
55 # Tag to return a set of `Result` thought `buffer`
56 var result_tag: Tag = 1.tag
57
58 # Tag to notify `Worker` when to quit
59 var quit_tag: Tag = 2.tag
60
61 # Tag to request more work from the `Controller` by a `Worker`
62 var need_work_tag: Tag = 4.tag
63
64 # Tag to notify `Controller` that the sender `Worker` is done
65 var done_tag: Tag = 5.tag
66
67 # Number of tasks within each task assignation with `task_tag`
68 var tasks_per_packet = 1
69
70 # Run the main logic of this node
71 fun run is abstract
72
73 # Hash or name of the branch to test
74 var branch_hash: String is noinit
75
76 # Engines targeted by this execution
77 var engines: Array[String] is noinit
78
79 # All known engines, used to detect errors in `engines`
80 var all_engines: Array[String] = ["nitg-s", "nitg-sg", "nitg-g", "nitg-e", "niti", "emscripten"]
81
82 # Programs to test in this execution
83 var test_programs: Array[String] is noinit
84
85 # Root of the temporary directory
86 var tmp_dir = "/dev/shm/"
87
88 # `ccache` directory
89 var ccache_dir = "/dev/shm/nit_ccache"
90
91 # Read command line options
92 fun read_cli_options
93 do
94 var opt_ctx = new OptionContext
95 var opt_hash = new OptionString(
96 "Branch to test",
97 "--hash", "-h")
98 opt_hash.mandatory = true
99 var opt_engines = new OptionString(
100 "Engines to test, separated with commas ({all_engines.join(", ")} or all)",
101 "--engine", "-e")
102 var opt_help = new OptionBool("Print this help message", "--help", "-h")
103 var opt_verbose = new OptionCount(
104 "Be verbose, repeat to increase verbose level (max with -vvv)",
105 "--verbose", "-v")
106 var opt_cleanup = new OptionBool(
107 "Clean up all nitester files (and do not run tests)",
108 "--cleanup", "-C")
109
110 opt_ctx.add_option(opt_hash, opt_engines, opt_help, opt_verbose, opt_cleanup)
111 opt_ctx.parse args
112
113 # --help?
114 if opt_help.value then opt_ctx.usage_error null
115
116 # --verbose?
117 verbose = opt_verbose.value
118
119 # --cleanup?
120 if opt_cleanup.value then
121 assert tmp_dir.file_exists
122 for file in tmp_dir.files do if file.has_prefix("nit") then
123 var full_path = tmp_dir / file
124 if full_path == ccache_dir then continue
125
126 assert full_path.file_exists
127
128 var stat = full_path.file_lstat
129 if stat.is_dir then
130 full_path.rmdir
131 else
132 full_path.file_delete
133 end
134 stat.free
135 end
136 mpi.finalize
137 exit 0
138 end
139
140 # any files?
141 var rest = opt_ctx.rest
142 if rest.is_empty then opt_ctx.usage_error "This tool needs at least one test_program.nit"
143 test_programs = rest
144
145 # hash
146 branch_hash = opt_hash.value.as(not null)
147
148 # gather and check engines
149 var engines_str = opt_engines.value
150 var engines
151 if engines_str == null then
152 # default
153 engines = ["nitg-s"]
154 else
155 engines = engines_str.split(',')
156
157 if engines.has("all") then
158 # all engines
159 engines = all_engines
160 end
161 end
162
163 # check validity of targetted engines
164 var unknown_engines = new Array[String]
165 for engine in engines do if not all_engines.has(engine) then unknown_engines.add engine
166
167 if not unknown_engines.is_empty then
168 opt_ctx.usage_error "Unknown engines: {unknown_engines.join(", ")} (expected one or most of {all_engines.join(", ")})"
169 end
170 self.engines = engines
171 end
172
173 # All tasks to be performed
174 var tasks = new Array[Task]
175
176 # Gather and register all tasks
177 fun create_tasks
178 do
179 # At this point we are in our local nit
180 var skip_path = "tests/turing.skip"
181 var skip
182 if skip_path.file_exists then
183 var skip_file = new IFStream.open(skip_path)
184 skip = skip_file.read_lines
185 skip_file.close
186 else
187 skip = new Array[String]
188 end
189
190 for prog in test_programs do for engine in engines do
191
192 # Is is blacklisted?
193 for s in skip do if not s.is_empty and prog.has(s) then
194 if verbose > 0 and rank == 0 then print "Skipping test '{prog}' because of '{s}' in turing.skip"
195 continue label
196 end
197
198 tasks.add new Task(engine, prog)
199 end label
200 end
201 end
202
203 # Single controller to dispatch tasks, gather results and produce stats
204 class Controller
205 super Processor
206
207 redef fun rank do return controller_rank
208
209 # Id as `Int` of the next task to distribute
210 var next_task_id = 0
211
212 redef fun receive_signal(signal)
213 do
214 shutdown
215 print_results
216
217 mpi.finalize
218 exit 0
219 end
220
221 redef fun run
222 do
223 read_cli_options
224 create_tasks
225 distribute_tasks
226 print_results
227 end
228
229 # Cumulated results from workers
230 var results = new ResultSet
231
232 # Maintain communication with workers to distribute tasks and receiver results
233 fun distribute_tasks
234 do
235 var at_work = new Array[Rank]
236
237 # send initial tasks
238 for r in [1..comm_world.size[ do
239 var sent = send_task_to(r.rank)
240 if sent then
241 at_work.add r.rank
242 else
243 mpi.send_empty(r.rank, quit_tag, comm_world)
244 end
245 end
246
247 var status = new Status
248 # await results and send new tasks
249 while not at_work.is_empty do
250 check_signals
251
252 # Double probe to avoid bug with some implementation of MPI
253 mpi.probe(new Rank.any, new Tag.any, comm_world, status)
254 mpi.probe(new Rank.any, new Tag.any, comm_world, status)
255
256 if status.tag == result_tag then
257 # Receive results fron a worker
258 var count = status.count(new DataType.int)
259 mpi.recv_into(buffer, 0, count, status.source, status.tag, comm_world)
260
261 # Parse results from C array to `Result` instances
262 #
263 # Each result is on 4 ints: task id, arg, alt and result.
264 #
265 # See the comments where the data is produced in `Worker::work_on_tasks` for more informationé
266 assert count % 4 == 0
267 for t in (count/4).times do
268 var tt = t*4
269
270 var task_id = buffer[tt]
271 var arg = buffer[tt+1]
272 var alt = buffer[tt+2]
273 var res = buffer[tt+3]
274
275 var result = new Result(tasks[task_id], arg, alt)
276
277 if res == 1 then result.ok = true
278 if res == 2 then result.ok_empty = true
279 if res == 3 then result.no_sav = true
280 if res == 4 then result.fixme = true
281 if res == 5 then result.fail = true
282 if res == 6 then result.soso = true
283 if res == 7 then result.skip = true
284 if res == 8 then result.todo = true
285 if res == 9 then result.skip_exec = true
286 if res == 0 then result.unknown = true
287
288 results.add result
289
290 if verbose > 0 and results.length % 25 == 0 then print_short_results
291 end
292
293 else if status.tag == need_work_tag then
294 # A worker needs more work
295 mpi.recv_empty(status.source, status.tag, comm_world)
296 var sent = send_task_to(status.source)
297 if not sent then
298 # no more work, quit
299 mpi.send_empty(status.source, quit_tag, comm_world)
300 end
301 else if status.tag == done_tag then
302 # A worker is done and will quit
303 mpi.recv_empty(status.source, status.tag, comm_world)
304 at_work.remove(status.source)
305
306 if verbose > 1 then print "worker {status.source} is done ({at_work.length} still at work)"
307 else
308 print "Unexpected tag {status.tag}"
309 shutdown
310 break
311 end
312 end
313 status.free
314 end
315
316 # Send a packet of tasks to worker at `rank`
317 fun send_task_to(rank: Rank): Bool
318 do
319 if next_task_id >= tasks.length then return false
320
321 buffer[0] = next_task_id
322 next_task_id += tasks_per_packet
323
324 mpi.send_from(buffer, 0, 1, rank, task_tag, comm_world)
325
326 if verbose > 1 then print "sent tasks [{buffer[0]}..{next_task_id}[ to worker {rank}"
327 return true
328 end
329
330 # Display the accumulated results received from workers
331 fun print_results
332 do
333 print "# results #"
334 print "* {results.length} total"
335 print "* {results.oks.length + results.ok_empties.length} oks & 0ks"
336 print "* {results.fails.length} fails"
337 print "* {results.no_savs.length} no savs"
338 print "* {results.fixmes.length} fixmes"
339 print "* {results.sosos.length} sosos"
340 print "* {results.skips.length} skips"
341 print "* {results.todos.length} todos"
342 print "* {results.skip_execs.length} skip execs"
343 print "* {results.unknowns.length} unknowns (bug in tests.sh or nitester)"
344 end
345
346 fun print_short_results do print "oks & fails / total: {results.oks.length + results.ok_empties.length} " +
347 "& {results.fails.length} / {results.length}"
348
349 # Shutdown anormaly the running tests
350 fun shutdown
351 do
352 print "Shutting down"
353 mpi.send_empty(new Rank.any, quit_tag, comm_world)
354 end
355 end
356
357 # A worker node which actually execute the tests
358 class Worker
359 super Processor
360
361 # The `Rank` of `self`
362 redef var rank: Rank
363
364 # Compilation directory
365 var comp_dir = "/dev/shm/nit_compile{rank}" is lazy
366
367 # Directory to store the xml files produced for Jenkins
368 var xml_dir = "~/jenkins_xml/"
369
370 # Output file of the `tests.sh` script
371 var tests_sh_out = "/dev/shm/nit_local_out{rank}" is lazy
372
373 # Source Nit repository, must be already updated and `make` before execution
374 var local_nit = "/dev/shm/nit{rank}" is lazy
375
376 # Remote Nit repository (actually the local source)
377 var remote_nit = "~/nit/"
378
379 # Compiled `Regex` to detect the argument of an execution
380 var re_arg: Regex = "arg [0-9]+".to_re
381
382 # Compiled `Regex` to detect the alternative of an execution
383 var re_alt: Regex = "_alt[0-9]+".to_re
384
385 redef fun run
386 do
387 read_cli_options
388 setup
389 create_tasks
390 work_on_tasks
391 cleanup
392 end
393
394 # Setup the testing environment
395 #
396 # Clone the nit repository.
397 fun setup
398 do
399 if verbose > 0 then sys.system "hostname"
400
401 if local_nit.file_exists then local_nit.rmdir
402
403 exec_and_check "git clone {remote_nit} {local_nit}"
404 local_nit.chdir
405 exec_and_check "git config remote.origin.fetch +refs/remotes/origin/pr/*:refs/remotes/origin/pr/*"
406 exec_and_check "git fetch origin --quiet"
407 exec_and_check "git checkout {branch_hash}"
408 exec_and_check "cp {remote_nit}/bin/nitg bin/"
409 exec_and_check "src/git-gen-version.sh"
410 exec_and_check "bin/nitg --dir bin/ src/nit.nit src/nitvm.nit"
411 end
412
413 private fun exec_and_check(cmd: String)
414 do
415 if verbose > 0 then
416 print "+ {cmd}"
417 var res = sys.system(cmd)
418 assert res == 0 else print "Command '{cmd}' failed."
419 end
420 end
421
422 # Clean up the testing environment
423 #
424 # Delete all temporary files, except `ccache_dir`.
425 fun cleanup
426 do
427 if comp_dir.file_exists then comp_dir.rmdir
428 if tests_sh_out.file_exists then tests_sh_out.file_delete
429 if local_nit.file_exists then local_nit.file_delete
430 end
431
432 # Single C `int` to hold the next task id received from the `Controller`
433 var task_buffer = new CIntArray(1)
434
435 # Manage communication with the `Controller` and execute dispatched `Task`s
436 fun work_on_tasks
437 do
438 var status = new Status
439 loop
440 check_signals
441
442 # We double probe to prevent bug where a single probes does not receive the
443 # real next read.
444 mpi.probe(controller_rank, new Tag.any, comm_world, status)
445 mpi.probe(controller_rank, new Tag.any, comm_world, status)
446
447 if status.tag == task_tag then
448 # Receive tasks to execute
449 mpi.recv_into(task_buffer, 0, 1, status.source, status.tag, comm_world)
450 var first_id = task_buffer[0]
451 for task_id in [first_id .. first_id + tasks_per_packet[ do
452
453 # If id is over all known tasks, stop right here
454 if task_id >= tasks.length then break
455 var task = tasks[task_id]
456
457 "tests".chdir
458
459 # Command line to execute test
460 var cmd = "XMLDIR={xml_dir} " +
461 "CCACHE_DIR={ccache_dir} CCACHE_TEMPDIR={ccache_dir} CCACHE_BASEDIR={comp_dir} " +
462 "./tests.sh --node --engine {task.engine} {task.test_program} > {tests_sh_out}"
463
464 # Execute test
465 sys.system cmd
466
467 # Test results were written to file, read them
468 var fstream = new IFStream.open(tests_sh_out)
469 var content = fstream.read_all
470 fstream.close
471
472 # Parse result and prepare them for sending
473 #
474 # The structure is composed of 4 ints for each result.
475 # 1. task id
476 # 2. arg number
477 # 3. alt number
478 # 4. test result as int
479 var c = results_count
480 for line in content.split('\n') do if not line.is_empty then
481 var cc = c*4
482
483 buffer[cc] = task_id
484
485 var arg_match = line.search(re_arg)
486 var arg = 0
487 if arg_match != null then arg = arg_match.to_s.substring_from(4).to_i
488 buffer[cc+1] = arg
489
490 var alt_match = line.search(re_alt)
491 var alt = 0
492 if alt_match != null then alt = alt_match.to_s.substring_from(4).to_i
493 buffer[cc+2] = alt
494
495 var res = null
496 if line.has("[ok]") then res = 1
497 if line.has("[0k]") then res = 2
498 if line.has("[=== no sav ===]") then res = 3
499 if line.has("[fixme]") then res = 4
500 if line.has("[======= fail") then res = 5
501 if line.has("[======= soso") then res = 6
502 if line.has("[skip]") then res = 7
503 if line.has("[todo]") then res = 8
504 if line.has("[skip exec]") then res = 9
505
506 if res == null then
507 res = 0
508 if verbose > 1 then print "Unknown result: '{line}'"
509 end
510 buffer[cc+3] = res
511
512 c += 1
513
514 if verbose > 2 then print "tests.sh output line: {line}"
515
516 # If result buffer is full, send to `Controller`
517 if c*4 == buffer.length then
518 send_results
519 c = 0
520 end
521 end
522
523 self.results_count = c
524 end
525
526 mpi.send_empty(controller_rank, need_work_tag, comm_world)
527 else if status.tag == quit_tag then
528 # Notification from the `Controller` to quit
529 mpi.recv_empty(status.source, status.tag, comm_world)
530
531 # Send remaining results
532 send_results
533
534 # Notify `Controller` that `self` is done and will quit
535 mpi.send_empty(controller_rank, done_tag, comm_world)
536 break
537 else
538 print "Unexpected tag {status.tag}"
539 break
540 end
541 end
542 status.free
543 end
544
545 # Total results listed in `buffer` and ready to send
546 var results_count = 0
547
548 # Send all results in `buffer` to the `Controller`
549 fun send_results
550 do
551 if results_count > 0 then
552 if verbose > 1 then print "sending {results_count} results"
553 mpi.send_from(buffer, 0, results_count*4, controller_rank, result_tag, comm_world)
554 results_count = 0
555 end
556 end
557
558 redef fun receive_signal(signal)
559 do
560 cleanup
561 mpi.finalize
562 exit 0
563 end
564 end
565
566 # A single test task, on a `test_program` with an `engine`
567 #
568 # Note that a task may involve more than one program to test considering the
569 # alts and args for the `test_program`.
570 class Task
571 # Engine to test executing `test_program`
572 var engine: String
573
574 # Program to execute with `engine`
575 var test_program: String
576
577 redef fun to_s do return "{engine} {test_program}"
578 end
579
580 # Result of a `Task`
581 #
582 # There may be more than one result per `Task`.
583 class Result
584 # `Task` associated to `self`
585 var task: Task
586
587 # Argument index of the execution resulting in `self`
588 var arg: Int
589
590 # Alternative index of the execution resulting in `self`
591 var alt: Int
592
593 # Is `self` result an _ok_?
594 var ok = false
595
596 # Is `self` result an _0k_?
597 var ok_empty = false
598
599 # Is `self` result a _no sav_?
600 var no_sav = false
601
602 # Is `self` result a _fixme_?
603 var fixme = false
604
605 # Is `self` result a _fail_?
606 var fail = false
607
608 # Is `self` result a _soso_?
609 var soso = false
610
611 # Has `self` been skipped?
612 var skip = false
613
614 # Is `self` TODO?
615 var todo = false
616
617 # Has the execution of `self` been skipped?
618 var skip_exec = false
619
620 # Is `self` an unknown result, probably an error
621 var unknown = false
622
623 redef fun to_s
624 do
625 var err = "Unknown"
626 if no_sav then err = "no sav"
627 if ok then err = "ok"
628 if ok_empty then err = "0k"
629 if fixme then err = "fixme"
630 if fail then err = "fail"
631 if soso then err = "soso"
632 if skip then err = "skip"
633 if todo then err = "todo"
634 if skip_exec then err = "skip_exec"
635
636 return "{task} arg{arg} alt{alt} => {err}"
637 end
638 end
639
640 # A global and sorted collection of `Result`
641 class ResultSet
642 super HashSet[Result]
643
644 var no_savs = new HashSet[Result]
645 var oks = new HashSet[Result]
646 var ok_empties = new HashSet[Result]
647 var fixmes = new HashSet[Result]
648 var fails = new HashSet[Result]
649 var sosos = new HashSet[Result]
650 var skips = new HashSet[Result]
651 var todos = new HashSet[Result]
652 var skip_execs = new HashSet[Result]
653 var unknowns = new HashSet[Result]
654
655 # TODO remove
656 var per_engines = new HashMap[String, Result]
657
658 redef fun add(result)
659 do
660 if result.no_sav then no_savs.add result
661 if result.ok then oks.add result
662 if result.ok_empty then ok_empties.add result
663 if result.fixme then fixmes.add result
664 if result.fail then fails.add result
665 if result.soso then sosos.add result
666 if result.skip then skips.add result
667 if result.todo then todos.add result
668 if result.skip_exec then skip_execs.add result
669 if result.unknown then unknowns.add result
670
671 super
672 end
673
674 redef fun remove(r) do abort
675
676 redef fun clear do abort
677 end
678
679 redef class OptionContext
680
681 # Print usage with a possible error `message`
682 private fun usage_error(message: nullable String)
683 do
684 var ret = 0
685 if message != null then
686 print "Error: {message}"
687 ret = 1
688 end
689
690 if comm_world.rank == 0 then
691 print "Usage: mpirun nitester [Options] test_program.nit [other_test.nit [...]]"
692 usage
693 end
694
695 mpi.finalize
696 exit ret
697 end
698 end
699
700 # On `Worker` nodes, prefix all prints with `rank/comm_world.size`
701 redef fun print(msg: Object)
702 do
703 if comm_world.rank != 0.rank then
704 super "{comm_world.rank}/{comm_world.size}: {msg}"
705 else super msg
706 end
707
708 # Running MPI instance
709 fun mpi: MPI do return once new MPI
710
711 # Launch mpi
712 mpi
713
714 # Local rank
715 var rank = comm_world.rank
716
717 var processor: Processor
718 if rank == 0.rank then
719 # If rank == 0, this is the `Controller`
720 processor = new Controller
721 else
722 # This is a worker
723 processor = new Worker(rank)
724 end
725 processor.run
726
727 mpi.finalize