3431a8645c3e9d47a2f0e14d56684eaaa7e5b17a
1 # This file is part of NIT (http://www.nitlanguage.org).
3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 # Tester of Nit engines on an MPI cluster
24 # Any processor, worker or controller
26 # All data and methods within this class are shared by the controller and the
28 abstract class Processor
31 # Controller rank is always 0
32 var controller_rank
: Rank = 0.rank
34 # Rank on this processor
35 fun rank
: Rank is abstract
37 # Where to store data for transfer between nodes
39 # Require: `buffer.length % 4 == 0`
40 var buffer
= new CIntArray(1024)
42 # Run in verbose mode, display more text
47 # OpenMPI sends a SIGTERM to all nodes upon receiving a SIGTERM or SIGINT
48 # on the first process.
49 handle_signal
(sigterm
, true)
52 # Tag of a new task packet of size `tasks_per_packet`
53 var task_tag
: Tag = 0.tag
55 # Tag to return a set of `Result` thought `buffer`
56 var result_tag
: Tag = 1.tag
58 # Tag to notify `Worker` when to quit
59 var quit_tag
: Tag = 2.tag
61 # Tag to request more work from the `Controller` by a `Worker`
62 var need_work_tag
: Tag = 4.tag
64 # Tag to notify `Controller` that the sender `Worker` is done
65 var done_tag
: Tag = 5.tag
67 # Number of tasks within each task assignation with `task_tag`
68 var tasks_per_packet
= 1
70 # Run the main logic of this node
73 # Hash or name of the branch to test
74 var branch_hash
: String is noinit
76 # Engines targeted by this execution
77 var engines
: Array[String] is noinit
79 # All known engines, used to detect errors in `engines`
80 var all_engines
: Array[String] = ["nitg-s", "nitg-sg", "nitg-g", "nitg-e", "niti", "emscripten"]
82 # Programs to test in this execution
83 var test_programs
: Array[String] is noinit
85 # Root of the temporary directory
86 var tmp_dir
= "/dev/shm/"
89 var ccache_dir
= "/dev/shm/nit_ccache"
91 # Read command line options
94 var opt_ctx
= new OptionContext
95 var opt_hash
= new OptionString(
98 opt_hash
.mandatory
= true
99 var opt_engines
= new OptionString(
100 "Engines to test, separated with commas ({all_engines.join(", ")} or all)",
102 var opt_help
= new OptionBool("Print this help message", "--help", "-h")
103 var opt_verbose
= new OptionCount(
104 "Be verbose, repeat to increase verbose level (max with -vvv)",
106 var opt_cleanup
= new OptionBool(
107 "Clean up all nitester files (and do not run tests)",
110 opt_ctx
.add_option
(opt_hash
, opt_engines
, opt_help
, opt_verbose
, opt_cleanup
)
114 if opt_help
.value
then opt_ctx
.usage_error
null
117 verbose
= opt_verbose
.value
120 if opt_cleanup
.value
then
121 assert tmp_dir
.file_exists
122 for file
in tmp_dir
.files
do if file
.has_prefix
("nit") then
123 var full_path
= tmp_dir
/ file
124 if full_path
== ccache_dir
then continue
126 assert full_path
.file_exists
128 var stat
= full_path
.file_lstat
132 full_path
.file_delete
141 var rest
= opt_ctx
.rest
142 if rest
.is_empty
then opt_ctx
.usage_error
"This tool needs at least one test_program.nit"
146 branch_hash
= opt_hash
.value
.as(not null)
148 # gather and check engines
149 var engines_str
= opt_engines
.value
151 if engines_str
== null then
155 engines
= engines_str
.split
(',')
157 if engines
.has
("all") then
159 engines
= all_engines
163 # check validity of targetted engines
164 var unknown_engines
= new Array[String]
165 for engine
in engines
do if not all_engines
.has
(engine
) then unknown_engines
.add engine
167 if not unknown_engines
.is_empty
then
168 opt_ctx
.usage_error
"Unknown engines: {unknown_engines.join(", ")} (expected one or most of {all_engines.join(", ")})"
170 self.engines
= engines
173 # All tasks to be performed
174 var tasks
= new Array[Task]
176 # Gather and register all tasks
179 # At this point we are in our local nit
180 var skip_path
= "tests/turing.skip"
182 if skip_path
.file_exists
then
183 var skip_file
= new IFStream.open
(skip_path
)
184 skip
= skip_file
.read_lines
187 skip
= new Array[String]
190 for prog
in test_programs
do for engine
in engines
do
193 for s
in skip
do if not s
.is_empty
and prog
.has
(s
) then
194 if verbose
> 0 and rank
== 0 then print
"Skipping test '{prog}' because of '{s}' in turing.skip"
198 tasks
.add
new Task(engine
, prog
)
203 # Single controller to dispatch tasks, gather results and produce stats
207 redef fun rank
do return controller_rank
209 # Id as `Int` of the next task to distribute
212 redef fun receive_signal
(signal
)
229 # Cumulated results from workers
230 var results
= new ResultSet
232 # Maintain communication with workers to distribute tasks and receiver results
235 var at_work
= new Array[Rank]
238 for r
in [1..comm_world
.size
[ do
239 var sent
= send_task_to
(r
.rank
)
243 mpi
.send_empty
(r
.rank
, quit_tag
, comm_world
)
247 var status
= new Status
248 # await results and send new tasks
249 while not at_work
.is_empty
do
252 # Double probe to avoid bug with some implementation of MPI
253 mpi
.probe
(new Rank.any
, new Tag.any
, comm_world
, status
)
254 mpi
.probe
(new Rank.any
, new Tag.any
, comm_world
, status
)
256 if status
.tag
== result_tag
then
257 # Receive results fron a worker
258 var count
= status
.count
(new DataType.int
)
259 mpi
.recv_into
(buffer
, 0, count
, status
.source
, status
.tag
, comm_world
)
261 # Parse results from C array to `Result` instances
263 # Each result is on 4 ints: task id, arg, alt and result.
265 # See the comments where the data is produced in `Worker::work_on_tasks` for more informationé
266 assert count
% 4 == 0
267 for t
in (count
/4).times
do
270 var task_id
= buffer
[tt
]
271 var arg
= buffer
[tt
+1]
272 var alt
= buffer
[tt
+2]
273 var res
= buffer
[tt
+3]
275 var result
= new Result(tasks
[task_id
], arg
, alt
)
277 if res
== 1 then result
.ok
= true
278 if res
== 2 then result
.ok_empty
= true
279 if res
== 3 then result
.no_sav
= true
280 if res
== 4 then result
.fixme
= true
281 if res
== 5 then result
.fail
= true
282 if res
== 6 then result
.soso
= true
283 if res
== 7 then result
.skip
= true
284 if res
== 8 then result
.todo
= true
285 if res
== 9 then result
.skip_exec
= true
286 if res
== 0 then result
.unknown
= true
290 if verbose
> 0 and results
.length
% 25 == 0 then print_short_results
293 else if status
.tag
== need_work_tag
then
294 # A worker needs more work
295 mpi
.recv_empty
(status
.source
, status
.tag
, comm_world
)
296 var sent
= send_task_to
(status
.source
)
299 mpi
.send_empty
(status
.source
, quit_tag
, comm_world
)
301 else if status
.tag
== done_tag
then
302 # A worker is done and will quit
303 mpi
.recv_empty
(status
.source
, status
.tag
, comm_world
)
304 at_work
.remove
(status
.source
)
306 if verbose
> 1 then print
"worker {status.source} is done ({at_work.length} still at work)"
308 print
"Unexpected tag {status.tag}"
316 # Send a packet of tasks to worker at `rank`
317 fun send_task_to
(rank
: Rank): Bool
319 if next_task_id
>= tasks
.length
then return false
321 buffer
[0] = next_task_id
322 next_task_id
+= tasks_per_packet
324 mpi
.send_from
(buffer
, 0, 1, rank
, task_tag
, comm_world
)
326 if verbose
> 1 then print
"sent tasks [{buffer[0]}..{next_task_id}[ to worker {rank}"
330 # Display the accumulated results received from workers
334 print
"* {results.length} total"
335 print
"* {results.oks.length + results.ok_empties.length} oks & 0ks"
336 print
"* {results.fails.length} fails"
337 print
"* {results.no_savs.length} no savs"
338 print
"* {results.fixmes.length} fixmes"
339 print
"* {results.sosos.length} sosos"
340 print
"* {results.skips.length} skips"
341 print
"* {results.todos.length} todos"
342 print
"* {results.skip_execs.length} skip execs"
343 print
"* {results.unknowns.length} unknowns (bug in tests.sh or nitester)"
346 fun print_short_results
do print
"oks & fails / total: {results.oks.length + results.ok_empties.length} " +
347 "& {results.fails.length} / {results.length}"
349 # Shutdown anormaly the running tests
352 print
"Shutting down"
353 mpi
.send_empty
(new Rank.any
, quit_tag
, comm_world
)
357 # A worker node which actually execute the tests
361 # The `Rank` of `self`
364 # Compilation directory
365 var comp_dir
= "/dev/shm/nit_compile{rank}" is lazy
367 # Directory to store the xml files produced for Jenkins
368 var xml_dir
= "~/jenkins_xml/"
370 # Output file of the `tests.sh` script
371 var tests_sh_out
= "/dev/shm/nit_local_out{rank}" is lazy
373 # Source Nit repository, must be already updated and `make` before execution
374 var local_nit
= "/dev/shm/nit{rank}" is lazy
376 # Remote Nit repository (actually the local source)
377 var remote_nit
= "~/nit/"
379 # Compiled `Regex` to detect the argument of an execution
380 var re_arg
: Regex = "arg [0-9]+".to_re
382 # Compiled `Regex` to detect the alternative of an execution
383 var re_alt
: Regex = "_alt[0-9]+".to_re
394 # Setup the testing environment
396 # Clone the nit repository.
399 if verbose
> 0 then sys
.system
"hostname"
401 if local_nit
.file_exists
then local_nit
.rmdir
403 exec_and_check
"git clone {remote_nit} {local_nit}"
405 exec_and_check
"git config remote.origin.fetch +refs/remotes/origin/pr/*:refs/remotes/origin/pr/*"
406 exec_and_check
"git fetch origin --quiet"
407 exec_and_check
"git checkout {branch_hash}"
408 exec_and_check
"cp {remote_nit}/bin/nitg bin/"
409 exec_and_check
"src/git-gen-version.sh"
410 exec_and_check
"bin/nitg --dir bin/ src/nit.nit src/nitvm.nit"
413 private fun exec_and_check
(cmd
: String)
417 var res
= sys
.system
(cmd
)
418 assert res
== 0 else print
"Command '{cmd}' failed."
422 # Clean up the testing environment
424 # Delete all temporary files, except `ccache_dir`.
427 if comp_dir
.file_exists
then comp_dir
.rmdir
428 if tests_sh_out
.file_exists
then tests_sh_out
.file_delete
429 if local_nit
.file_exists
then local_nit
.file_delete
432 # Single C `int` to hold the next task id received from the `Controller`
433 var task_buffer
= new CIntArray(1)
435 # Manage communication with the `Controller` and execute dispatched `Task`s
438 var status
= new Status
442 # We double probe to prevent bug where a single probes does not receive the
444 mpi
.probe
(controller_rank
, new Tag.any
, comm_world
, status
)
445 mpi
.probe
(controller_rank
, new Tag.any
, comm_world
, status
)
447 if status
.tag
== task_tag
then
448 # Receive tasks to execute
449 mpi
.recv_into
(task_buffer
, 0, 1, status
.source
, status
.tag
, comm_world
)
450 var first_id
= task_buffer
[0]
451 for task_id
in [first_id
.. first_id
+ tasks_per_packet
[ do
453 # If id is over all known tasks, stop right here
454 if task_id
>= tasks
.length
then break
455 var task
= tasks
[task_id
]
459 # Command line to execute test
460 var cmd
= "XMLDIR={xml_dir} " +
461 "CCACHE_DIR={ccache_dir} CCACHE_TEMPDIR={ccache_dir} CCACHE_BASEDIR={comp_dir} " +
462 "./tests.sh --node --engine {task.engine} {task.test_program} > {tests_sh_out}"
467 # Test results were written to file, read them
468 var fstream
= new IFStream.open
(tests_sh_out
)
469 var content
= fstream
.read_all
472 # Parse result and prepare them for sending
474 # The structure is composed of 4 ints for each result.
478 # 4. test result as int
479 var c
= results_count
480 for line
in content
.split
('\n') do if not line
.is_empty
then
485 var arg_match
= line
.search
(re_arg
)
487 if arg_match
!= null then arg
= arg_match
.to_s
.substring_from
(4).to_i
490 var alt_match
= line
.search
(re_alt
)
492 if alt_match
!= null then alt
= alt_match
.to_s
.substring_from
(4).to_i
496 if line
.has
("[ok]") then res
= 1
497 if line
.has
("[0k]") then res
= 2
498 if line
.has
("[=== no sav ===]") then res
= 3
499 if line
.has
("[fixme]") then res
= 4
500 if line
.has
("[======= fail") then res
= 5
501 if line
.has
("[======= soso") then res
= 6
502 if line
.has
("[skip]") then res
= 7
503 if line
.has
("[todo]") then res
= 8
504 if line
.has
("[skip exec]") then res
= 9
508 if verbose
> 1 then print
"Unknown result: '{line}'"
514 if verbose
> 2 then print
"tests.sh output line: {line}"
516 # If result buffer is full, send to `Controller`
517 if c
*4 == buffer
.length
then
523 self.results_count
= c
526 mpi
.send_empty
(controller_rank
, need_work_tag
, comm_world
)
527 else if status
.tag
== quit_tag
then
528 # Notification from the `Controller` to quit
529 mpi
.recv_empty
(status
.source
, status
.tag
, comm_world
)
531 # Send remaining results
534 # Notify `Controller` that `self` is done and will quit
535 mpi
.send_empty
(controller_rank
, done_tag
, comm_world
)
538 print
"Unexpected tag {status.tag}"
545 # Total results listed in `buffer` and ready to send
546 var results_count
= 0
548 # Send all results in `buffer` to the `Controller`
551 if results_count
> 0 then
552 if verbose
> 1 then print
"sending {results_count} results"
553 mpi
.send_from
(buffer
, 0, results_count
*4, controller_rank
, result_tag
, comm_world
)
558 redef fun receive_signal
(signal
)
566 # A single test task, on a `test_program` with an `engine`
568 # Note that a task may involve more than one program to test considering the
569 # alts and args for the `test_program`.
571 # Engine to test executing `test_program`
574 # Program to execute with `engine`
575 var test_program
: String
577 redef fun to_s
do return "{engine} {test_program}"
582 # There may be more than one result per `Task`.
584 # `Task` associated to `self`
587 # Argument index of the execution resulting in `self`
590 # Alternative index of the execution resulting in `self`
593 # Is `self` result an _ok_?
596 # Is `self` result an _0k_?
599 # Is `self` result a _no sav_?
602 # Is `self` result a _fixme_?
605 # Is `self` result a _fail_?
608 # Is `self` result a _soso_?
611 # Has `self` been skipped?
617 # Has the execution of `self` been skipped?
618 var skip_exec
= false
620 # Is `self` an unknown result, probably an error
626 if no_sav
then err
= "no sav"
627 if ok
then err
= "ok"
628 if ok_empty
then err
= "0k"
629 if fixme
then err
= "fixme"
630 if fail
then err
= "fail"
631 if soso
then err
= "soso"
632 if skip
then err
= "skip"
633 if todo
then err
= "todo"
634 if skip_exec
then err
= "skip_exec"
636 return "{task} arg{arg} alt{alt} => {err}"
640 # A global and sorted collection of `Result`
642 super HashSet[Result]
644 var no_savs
= new HashSet[Result]
645 var oks
= new HashSet[Result]
646 var ok_empties
= new HashSet[Result]
647 var fixmes
= new HashSet[Result]
648 var fails
= new HashSet[Result]
649 var sosos
= new HashSet[Result]
650 var skips
= new HashSet[Result]
651 var todos
= new HashSet[Result]
652 var skip_execs
= new HashSet[Result]
653 var unknowns
= new HashSet[Result]
656 var per_engines
= new HashMap[String, Result]
658 redef fun add
(result
)
660 if result
.no_sav
then no_savs
.add result
661 if result
.ok
then oks
.add result
662 if result
.ok_empty
then ok_empties
.add result
663 if result
.fixme
then fixmes
.add result
664 if result
.fail
then fails
.add result
665 if result
.soso
then sosos
.add result
666 if result
.skip
then skips
.add result
667 if result
.todo
then todos
.add result
668 if result
.skip_exec
then skip_execs
.add result
669 if result
.unknown
then unknowns
.add result
674 redef fun remove
(r
) do abort
676 redef fun clear
do abort
679 redef class OptionContext
681 # Print usage with a possible error `message`
682 private fun usage_error
(message
: nullable String)
685 if message
!= null then
686 print
"Error: {message}"
690 if comm_world
.rank
== 0 then
691 print
"Usage: mpirun nitester [Options] test_program.nit [other_test.nit [...]]"
700 # On `Worker` nodes, prefix all prints with `rank/comm_world.size`
701 redef fun print
(msg
: Object)
703 if comm_world
.rank
!= 0.rank
then
704 super "{comm_world.rank}/{comm_world.size}: {msg}"
708 # Running MPI instance
709 fun mpi
: MPI do return once
new MPI
715 var rank
= comm_world
.rank
717 var processor
: Processor
718 if rank
== 0.rank
then
719 # If rank == 0, this is the `Controller`
720 processor
= new Controller
723 processor
= new Worker(rank
)