1 # This file is part of NIT (http://www.nitlanguage.org).
3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 # Tester of Nit engines on an MPI cluster
24 # Any processor, worker or controller
26 # All data and methods within this class are shared by the controller and the
28 abstract class Processor
31 # Controller rank is always 0
32 var controller_rank
: Rank = 0.rank
34 # Rank on this processor
35 fun rank
: Rank is abstract
37 # Where to store data for transfer between nodes
39 # Require: `buffer.length % 4 == 0`
40 var buffer
= new CIntArray(1024)
42 # Run in verbose mode, display more text
47 # OpenMPI sends a SIGTERM to all nodes upon receiving a SIGTERM or SIGINT
48 # on the first process.
49 handle_signal
(sigterm
, true)
52 # Tag of a new task packet of size `tasks_per_packet`
53 var task_tag
: Tag = 0.tag
55 # Tag to return a set of `Result` thought `buffer`
56 var result_tag
: Tag = 1.tag
58 # Tag to notify `Worker` when to quit
59 var quit_tag
: Tag = 2.tag
61 # Tag to request more work from the `Controller` by a `Worker`
62 var need_work_tag
: Tag = 4.tag
64 # Tag to notify `Controller` that the sender `Worker` is done
65 var done_tag
: Tag = 5.tag
67 # Number of tasks within each task assignation with `task_tag`
68 var tasks_per_packet
= 1
70 # Run the main logic of this node
73 # Hash or name of the branch to test
74 var branch_hash
: String is noinit
76 # Engines targeted by this execution
77 var engines
: Array[String] is noinit
79 # All known engines, used to detect errors in `engines`
80 var all_engines
: Array[String] = ["nitcs", "nitcsg", "nitcg", "nitce", "niti", "emscripten"]
82 # Programs to test in this execution
83 var test_programs
: Array[String] is noinit
85 # Root of the temporary directory
86 var tmp_dir
= "/dev/shm/"
89 var ccache_dir
= "/dev/shm/nit_ccache"
91 # Read command line options
94 var opt_ctx
= new OptionContext
95 var opt_hash
= new OptionString(
98 opt_hash
.mandatory
= true
99 var opt_engines
= new OptionString(
100 "Engines to test, separated with commas ({all_engines.join(", ")} or all)",
102 var opt_help
= new OptionBool("Print this help message", "--help", "-h")
103 var opt_verbose
= new OptionCount(
104 "Be verbose, repeat to increase verbose level (max with -vvv)",
106 var opt_cleanup
= new OptionBool(
107 "Clean up all nitester files (and do not run tests)",
110 opt_ctx
.add_option
(opt_hash
, opt_engines
, opt_help
, opt_verbose
, opt_cleanup
)
114 if opt_help
.value
then opt_ctx
.usage_error
null
117 verbose
= opt_verbose
.value
120 if opt_cleanup
.value
then
121 assert tmp_dir
.file_exists
122 for file
in tmp_dir
.files
do if file
.has_prefix
("nit") then
123 var full_path
= tmp_dir
/ file
124 if full_path
== ccache_dir
then continue
126 assert full_path
.file_exists
128 var stat
= full_path
.file_lstat
132 full_path
.file_delete
140 var rest
= opt_ctx
.rest
141 if rest
.is_empty
then opt_ctx
.usage_error
"This tool needs at least one test_program.nit"
145 branch_hash
= opt_hash
.value
.as(not null)
147 # gather and check engines
148 var engines_str
= opt_engines
.value
150 if engines_str
== null then
154 engines
= engines_str
.split
(',')
156 if engines
.has
("all") then
158 engines
= all_engines
162 # check validity of targetted engines
163 var unknown_engines
= new Array[String]
164 for engine
in engines
do if not all_engines
.has
(engine
) then unknown_engines
.add engine
166 if not unknown_engines
.is_empty
then
167 opt_ctx
.usage_error
"Unknown engines: {unknown_engines.join(", ")} (expected one or most of {all_engines.join(", ")})"
169 self.engines
= engines
172 # All tasks to be performed
173 var tasks
= new Array[Task]
175 # Gather and register all tasks
178 # At this point we are in our local nit
179 var skip_path
= "tests/turing.skip"
181 if skip_path
.file_exists
then
182 var skip_file
= new FileReader.open
(skip_path
)
183 skip
= skip_file
.read_lines
186 skip
= new Array[String]
189 for engine
in engines
do for prog
in test_programs
do
191 for s
in skip
do if not s
.is_empty
and prog
.has
(s
) then
192 if verbose
> 0 and rank
== 0 then print
"Skipping test '{prog}' because of '{s}' in turing.skip"
196 tasks
.add
new Task(engine
, prog
)
201 # Single controller to dispatch tasks, gather results and produce stats
205 redef fun rank
do return controller_rank
207 # Id as `Int` of the next task to distribute
210 redef fun receive_signal
(signal
)
227 # Cumulated results from workers
228 var results
= new ResultSet
230 # Maintain communication with workers to distribute tasks and receiver results
233 var at_work
= new Array[Rank]
236 for r
in [1..comm_world
.size
[ do
237 var sent
= send_task_to
(r
.rank
)
241 mpi
.send_empty
(r
.rank
, quit_tag
, comm_world
)
245 var status
= new Status
246 # await results and send new tasks
247 while not at_work
.is_empty
do
250 # Double probe to avoid bug with some implementation of MPI
251 mpi
.probe
(new Rank.any
, new Tag.any
, comm_world
, status
)
252 mpi
.probe
(new Rank.any
, new Tag.any
, comm_world
, status
)
254 if status
.tag
== result_tag
then
255 # Receive results fron a worker
256 var count
= status
.count
(new DataType.int
)
257 mpi
.recv_into
(buffer
, 0, count
, status
.source
, status
.tag
, comm_world
)
259 # Parse results from C array to `Result` instances
261 # Each result is on 4 ints: task id, arg, alt and result.
263 # See the comments where the data is produced in `Worker::work_on_tasks` for more informationé
264 assert count
% 4 == 0
265 for t
in (count
/4).times
do
268 var task_id
= buffer
[tt
]
269 var arg
= buffer
[tt
+1]
270 var alt
= buffer
[tt
+2]
271 var res
= buffer
[tt
+3]
273 var result
= new Result(tasks
[task_id
], arg
, alt
)
275 if res
== 1 then result
.ok
= true
276 if res
== 2 then result
.ok_empty
= true
277 if res
== 3 then result
.no_sav
= true
278 if res
== 4 then result
.fixme
= true
279 if res
== 5 then result
.fail
= true
280 if res
== 6 then result
.soso
= true
281 if res
== 7 then result
.skip
= true
282 if res
== 8 then result
.todo
= true
283 if res
== 9 then result
.skip_exec
= true
284 if res
== 0 then result
.unknown
= true
288 if verbose
> 0 and results
.length
% 25 == 0 then print_short_results
291 else if status
.tag
== need_work_tag
then
292 # A worker needs more work
293 mpi
.recv_empty
(status
.source
, status
.tag
, comm_world
)
294 var sent
= send_task_to
(status
.source
)
297 mpi
.send_empty
(status
.source
, quit_tag
, comm_world
)
299 else if status
.tag
== done_tag
then
300 # A worker is done and will quit
301 mpi
.recv_empty
(status
.source
, status
.tag
, comm_world
)
302 at_work
.remove
(status
.source
)
304 if verbose
> 0 then print
"Worker {status.source} is done ({at_work.length} still at work)"
306 print
"Unexpected tag {status.tag}"
314 # Send a packet of tasks to worker at `rank`
315 fun send_task_to
(rank
: Rank): Bool
317 if next_task_id
>= tasks
.length
then return false
319 buffer
[0] = next_task_id
320 next_task_id
+= tasks_per_packet
322 mpi
.send_from
(buffer
, 0, 1, rank
, task_tag
, comm_world
)
324 if verbose
> 1 then print
"sent tasks [{buffer[0]}..{next_task_id}[ to worker {rank}"
328 # Display the accumulated results received from workers
332 print
"* {results.length} total"
333 print
"* {results.oks.length + results.ok_empties.length} oks & 0ks"
334 print
"* {results.fails.length} fails"
335 print
"* {results.no_savs.length} no savs"
336 print
"* {results.fixmes.length} fixmes"
337 print
"* {results.sosos.length} sosos"
338 print
"* {results.skips.length} skips"
339 print
"* {results.todos.length} todos"
340 print
"* {results.skip_execs.length} skip execs"
341 print
"* {results.unknowns.length} unknowns (bug in tests.sh or nitester)"
344 fun print_short_results
do print
"oks & fails / total: {results.oks.length + results.ok_empties.length} " +
345 "& {results.fails.length} / {results.length}"
347 # Shutdown anormaly the running tests
350 print
"Shutting down"
351 mpi
.send_empty
(new Rank.any
, quit_tag
, comm_world
)
355 # A worker node which actually execute the tests
359 # The `Rank` of `self`
362 # Compilation directory
363 var comp_dir
= "/dev/shm/nit_compile{rank}" is lazy
365 # Directory to store the xml files produced for Jenkins
366 var xml_dir
= "~/jenkins_xml/"
368 # Output file of the `tests.sh` script
369 var tests_sh_out
= "/dev/shm/nit_local_out{rank}" is lazy
371 # Source Nit repository, must be already updated and `make` before execution
372 var local_nit
= "/dev/shm/nit{rank}" is lazy
374 # Remote Nit repository (actually the local source)
375 var remote_nit
= "~/nit/"
377 # Compiled `Regex` to detect the argument of an execution
378 var re_arg
: Regex = "arg [0-9]+".to_re
380 # Compiled `Regex` to detect the alternative of an execution
381 var re_alt
: Regex = "_alt[0-9]+".to_re
392 # Setup the testing environment
394 # Clone the nit repository.
397 if verbose
> 0 then sys
.system
"hostname"
399 if local_nit
.file_exists
then local_nit
.rmdir
401 exec_and_check
"git clone {remote_nit} {local_nit}"
403 exec_and_check
"git config remote.origin.fetch +refs/remotes/origin/pr/*:refs/remotes/origin/pr/*"
404 exec_and_check
"git fetch origin --quiet"
405 exec_and_check
"git checkout {branch_hash}"
406 exec_and_check
"cp {remote_nit}/bin/* bin/"
407 exec_and_check
"src/git-gen-version.sh"
410 private fun exec_and_check
(cmd
: String)
414 var res
= sys
.system
(cmd
)
415 assert res
== 0 else print
"Command '{cmd}' failed."
419 # Clean up the testing environment
421 # Delete all temporary files, except `ccache_dir`.
424 if comp_dir
.file_exists
then comp_dir
.rmdir
425 if tests_sh_out
.file_exists
then tests_sh_out
.file_delete
426 if local_nit
.file_exists
then local_nit
.file_delete
429 # Single C `int` to hold the next task id received from the `Controller`
430 var task_buffer
= new CIntArray(1)
432 # Manage communication with the `Controller` and execute dispatched `Task`s
435 var status
= new Status
439 # We double probe to prevent bug where a single probes does not receive the
441 mpi
.probe
(controller_rank
, new Tag.any
, comm_world
, status
)
442 mpi
.probe
(controller_rank
, new Tag.any
, comm_world
, status
)
444 if status
.tag
== task_tag
then
445 # Receive tasks to execute
446 mpi
.recv_into
(task_buffer
, 0, 1, status
.source
, status
.tag
, comm_world
)
447 var first_id
= task_buffer
[0]
448 for task_id
in [first_id
.. first_id
+ tasks_per_packet
[ do
450 # If id is over all known tasks, stop right here
451 if task_id
>= tasks
.length
then break
452 var task
= tasks
[task_id
]
456 # Command line to execute test
457 var cmd
= "XMLDIR={xml_dir} " +
458 "CCACHE_DIR={ccache_dir} CCACHE_TEMPDIR={ccache_dir} CCACHE_BASEDIR={comp_dir} " +
459 "./tests.sh --node --engine {task.engine} {task.test_program} > {tests_sh_out}"
464 # Test results were written to file, read them
465 var fstream
= new FileReader.open
(tests_sh_out
)
466 var content
= fstream
.read_all
469 # Parse result and prepare them for sending
471 # The structure is composed of 4 ints for each result.
475 # 4. test result as int
476 var c
= results_count
477 for line
in content
.split
('\n') do if not line
.is_empty
then
482 var arg_match
= line
.search
(re_arg
)
484 if arg_match
!= null then arg
= arg_match
.to_s
.substring_from
(4).to_i
487 var alt_match
= line
.search
(re_alt
)
489 if alt_match
!= null then alt
= alt_match
.to_s
.substring_from
(4).to_i
493 if line
.has
("[ok]") then res
= 1
494 if line
.has
("[0k]") then res
= 2
495 if line
.has
("[=== no sav ===]") then res
= 3
496 if line
.has
("[fixme]") then res
= 4
497 if line
.has
("[======= fail") then res
= 5
498 if line
.has
("[======= soso") then res
= 6
499 if line
.has
("[skip]") then res
= 7
500 if line
.has
("[todo]") then res
= 8
501 if line
.has
("[skip exec]") then res
= 9
505 if verbose
> 1 then print
"Unknown result: '{line}'"
511 if verbose
> 2 then print
"tests.sh output line: {line}"
513 # If result buffer is full, send to `Controller`
514 if c
*4 == buffer
.length
then
520 if verbose
> 1 then print
"Done testing: {task}"
522 self.results_count
= c
525 mpi
.send_empty
(controller_rank
, need_work_tag
, comm_world
)
526 else if status
.tag
== quit_tag
then
527 # Notification from the `Controller` to quit
528 mpi
.recv_empty
(status
.source
, status
.tag
, comm_world
)
530 # Send remaining results
533 # Notify `Controller` that `self` is done and will quit
534 mpi
.send_empty
(controller_rank
, done_tag
, comm_world
)
537 print
"Unexpected tag {status.tag}"
544 # Total results listed in `buffer` and ready to send
545 var results_count
= 0
547 # Send all results in `buffer` to the `Controller`
550 if results_count
> 0 then
551 if verbose
> 2 then print
"Sending {results_count} results"
552 mpi
.send_from
(buffer
, 0, results_count
*4, controller_rank
, result_tag
, comm_world
)
557 redef fun receive_signal
(signal
)
565 # A single test task, on a `test_program` with an `engine`
567 # Note that a task may involve more than one program to test considering the
568 # alts and args for the `test_program`.
570 # Engine to test executing `test_program`
573 # Program to execute with `engine`
574 var test_program
: String
576 redef fun to_s
do return "{engine} {test_program}"
581 # There may be more than one result per `Task`.
583 # `Task` associated to `self`
586 # Argument index of the execution resulting in `self`
589 # Alternative index of the execution resulting in `self`
592 # Is `self` result an _ok_?
595 # Is `self` result an _0k_?
598 # Is `self` result a _no sav_?
601 # Is `self` result a _fixme_?
604 # Is `self` result a _fail_?
607 # Is `self` result a _soso_?
610 # Has `self` been skipped?
616 # Has the execution of `self` been skipped?
617 var skip_exec
= false
619 # Is `self` an unknown result, probably an error
625 if no_sav
then err
= "no sav"
626 if ok
then err
= "ok"
627 if ok_empty
then err
= "0k"
628 if fixme
then err
= "fixme"
629 if fail
then err
= "fail"
630 if soso
then err
= "soso"
631 if skip
then err
= "skip"
632 if todo
then err
= "todo"
633 if skip_exec
then err
= "skip_exec"
635 return "{task} arg{arg} alt{alt} => {err}"
639 # A global and sorted collection of `Result`
641 super HashSet[Result]
643 var no_savs
= new HashSet[Result]
644 var oks
= new HashSet[Result]
645 var ok_empties
= new HashSet[Result]
646 var fixmes
= new HashSet[Result]
647 var fails
= new HashSet[Result]
648 var sosos
= new HashSet[Result]
649 var skips
= new HashSet[Result]
650 var todos
= new HashSet[Result]
651 var skip_execs
= new HashSet[Result]
652 var unknowns
= new HashSet[Result]
655 var per_engines
= new HashMap[String, Result]
657 redef fun add
(result
)
659 if result
.no_sav
then no_savs
.add result
660 if result
.ok
then oks
.add result
661 if result
.ok_empty
then ok_empties
.add result
662 if result
.fixme
then fixmes
.add result
663 if result
.fail
then fails
.add result
664 if result
.soso
then sosos
.add result
665 if result
.skip
then skips
.add result
666 if result
.todo
then todos
.add result
667 if result
.skip_exec
then skip_execs
.add result
668 if result
.unknown
then unknowns
.add result
673 redef fun remove
(r
) do abort
675 redef fun clear
do abort
678 redef class OptionContext
680 # Print usage with a possible error `message`
681 private fun usage_error
(message
: nullable String)
684 if message
!= null then
685 print
"Error: {message}"
689 if comm_world
.rank
== 0 then
690 print
"Usage: mpirun nitester [Options] test_program.nit [other_test.nit [...]]"
699 # On `Worker` nodes, prefix all prints with `rank/comm_world.size`
700 redef fun print
(msg
: Object)
702 if comm_world
.rank
!= 0.rank
then
703 super "{comm_world.rank}/{comm_world.size}: {msg}"
707 # Running MPI instance
708 fun mpi
: MPI do return once
new MPI
714 var rank
= comm_world
.rank
716 var processor
: Processor
717 if rank
== 0.rank
then
718 # If rank == 0, this is the `Controller`
719 processor
= new Controller
722 processor
= new Worker(rank
)